[DCCP]: Initial implementation

Development to this point was done on a subversion repository at:

http://oops.ghostprotocols.net:81/cgi-bin/viewcvs.cgi/dccp-2.6/

This repository will be kept at this site for the foreseable future,
so that interested parties can see the history of this code,
attributions, etc.

If I ever decide to take this offline I'll provide the full history at
some other suitable place.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Arnaldo Carvalho de Melo 2005-08-09 20:14:34 -07:00 committed by David S. Miller
parent c4365c9235
commit 7c657876b6
22 changed files with 7746 additions and 0 deletions

432
include/linux/dccp.h Normal file
View file

@ -0,0 +1,432 @@
#ifndef _LINUX_DCCP_H
#define _LINUX_DCCP_H
#include <linux/in.h>
#include <linux/list.h>
#include <linux/types.h>
#include <linux/uio.h>
#include <linux/workqueue.h>
#include <net/inet_connection_sock.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <net/tcp.h>
/* FIXME: this is utterly wrong */
struct sockaddr_dccp {
struct sockaddr_in in;
unsigned int service;
};
enum dccp_state {
DCCP_OPEN = TCP_ESTABLISHED,
DCCP_REQUESTING = TCP_SYN_SENT,
DCCP_PARTOPEN = TCP_FIN_WAIT1, /* FIXME:
This mapping is horrible, but TCP has
no matching state for DCCP_PARTOPEN,
as TCP_SYN_RECV is already used by
DCCP_RESPOND, why don't stop using TCP
mapping of states? OK, now we don't use
sk_stream_sendmsg anymore, so doesn't
seem to exist any reason for us to
do the TCP mapping here */
DCCP_LISTEN = TCP_LISTEN,
DCCP_RESPOND = TCP_SYN_RECV,
DCCP_CLOSING = TCP_CLOSING,
DCCP_TIME_WAIT = TCP_TIME_WAIT,
DCCP_CLOSED = TCP_CLOSE,
DCCP_MAX_STATES = TCP_MAX_STATES,
};
#define DCCP_STATE_MASK 0xf
#define DCCP_ACTION_FIN (1<<7)
enum {
DCCPF_OPEN = TCPF_ESTABLISHED,
DCCPF_REQUESTING = TCPF_SYN_SENT,
DCCPF_PARTOPEN = TCPF_FIN_WAIT1,
DCCPF_LISTEN = TCPF_LISTEN,
DCCPF_RESPOND = TCPF_SYN_RECV,
DCCPF_CLOSING = TCPF_CLOSING,
DCCPF_TIME_WAIT = TCPF_TIME_WAIT,
DCCPF_CLOSED = TCPF_CLOSE,
};
/**
* struct dccp_hdr - generic part of DCCP packet header
*
* @dccph_sport - Relevant port on the endpoint that sent this packet
* @dccph_dport - Relevant port on the other endpoint
* @dccph_doff - Data Offset from the start of the DCCP header, in 32-bit words
* @dccph_ccval - Used by the HC-Sender CCID
* @dccph_cscov - Parts of the packet that are covered by the Checksum field
* @dccph_checksum - Internet checksum, depends on dccph_cscov
* @dccph_x - 0 = 24 bit sequence number, 1 = 48
* @dccph_type - packet type, see DCCP_PKT_ prefixed macros
* @dccph_seq - sequence number high or low order 24 bits, depends on dccph_x
*/
struct dccp_hdr {
__u16 dccph_sport,
dccph_dport;
__u8 dccph_doff;
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u8 dccph_cscov:4,
dccph_ccval:4;
#elif defined(__BIG_ENDIAN_BITFIELD)
__u8 dccph_ccval:4,
dccph_cscov:4;
#else
#error "Adjust your <asm/byteorder.h> defines"
#endif
__u16 dccph_checksum;
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u32 dccph_x:1,
dccph_type:4,
dccph_reserved:3,
dccph_seq:24;
#elif defined(__BIG_ENDIAN_BITFIELD)
__u32 dccph_reserved:3,
dccph_type:4,
dccph_x:1,
dccph_seq:24;
#else
#error "Adjust your <asm/byteorder.h> defines"
#endif
};
static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb)
{
return (struct dccp_hdr *)skb->h.raw;
}
/**
* struct dccp_hdr_ext - the low bits of a 48 bit seq packet
*
* @dccph_seq_low - low 24 bits of a 48 bit seq packet
*/
struct dccp_hdr_ext {
__u32 dccph_seq_low;
};
static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb)
{
return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr));
}
static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0);
}
static inline __u64 dccp_hdr_seq(const struct sk_buff *skb)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u64 seq_nr = ntohl(dh->dccph_seq << 8);
#elif defined(__BIG_ENDIAN_BITFIELD)
__u64 seq_nr = ntohl(dh->dccph_seq);
#else
#error "Adjust your <asm/byteorder.h> defines"
#endif
if (dh->dccph_x != 0)
seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low);
return seq_nr;
}
/**
* struct dccp_hdr_request - Conection initiation request header
*
* @dccph_req_service - Service to which the client app wants to connect
* @dccph_req_options - list of options (must be a multiple of 32 bits
*/
struct dccp_hdr_request {
__u32 dccph_req_service;
};
static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb)
{
return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb));
}
/**
* struct dccp_hdr_ack_bits - acknowledgment bits common to most packets
*
* @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR
* @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR
*/
struct dccp_hdr_ack_bits {
__u32 dccph_reserved1:8,
dccph_ack_nr_high:24;
__u32 dccph_ack_nr_low;
};
static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb)
{
return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb));
}
static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb)
{
const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb);
#if defined(__LITTLE_ENDIAN_BITFIELD)
return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low);
#elif defined(__BIG_ENDIAN_BITFIELD)
return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low);
#else
#error "Adjust your <asm/byteorder.h> defines"
#endif
}
/**
* struct dccp_hdr_response - Conection initiation response header
*
* @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR
* @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR
* @dccph_resp_service - Echoes the Service Code on a received DCCP-Request
* @dccph_resp_options - list of options (must be a multiple of 32 bits
*/
struct dccp_hdr_response {
struct dccp_hdr_ack_bits dccph_resp_ack;
__u32 dccph_resp_service;
};
static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb)
{
return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb));
}
/**
* struct dccp_hdr_reset - Unconditionally shut down a connection
*
* @dccph_reset_service - Echoes the Service Code on a received DCCP-Request
* @dccph_reset_options - list of options (must be a multiple of 32 bits
*/
struct dccp_hdr_reset {
struct dccp_hdr_ack_bits dccph_reset_ack;
__u8 dccph_reset_code,
dccph_reset_data[3];
};
static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb)
{
return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb));
}
enum dccp_pkt_type {
DCCP_PKT_REQUEST = 0,
DCCP_PKT_RESPONSE,
DCCP_PKT_DATA,
DCCP_PKT_ACK,
DCCP_PKT_DATAACK,
DCCP_PKT_CLOSEREQ,
DCCP_PKT_CLOSE,
DCCP_PKT_RESET,
DCCP_PKT_SYNC,
DCCP_PKT_SYNCACK,
DCCP_PKT_INVALID,
};
#define DCCP_NR_PKT_TYPES DCCP_PKT_INVALID
static inline unsigned int dccp_packet_hdr_len(const __u8 type)
{
if (type == DCCP_PKT_DATA)
return 0;
if (type == DCCP_PKT_DATAACK ||
type == DCCP_PKT_ACK ||
type == DCCP_PKT_SYNC ||
type == DCCP_PKT_SYNCACK ||
type == DCCP_PKT_CLOSE ||
type == DCCP_PKT_CLOSEREQ)
return sizeof(struct dccp_hdr_ack_bits);
if (type == DCCP_PKT_REQUEST)
return sizeof(struct dccp_hdr_request);
if (type == DCCP_PKT_RESPONSE)
return sizeof(struct dccp_hdr_response);
return sizeof(struct dccp_hdr_reset);
}
static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
{
return dccp_basic_hdr_len(skb) +
dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type);
}
enum dccp_reset_codes {
DCCP_RESET_CODE_UNSPECIFIED = 0,
DCCP_RESET_CODE_CLOSED,
DCCP_RESET_CODE_ABORTED,
DCCP_RESET_CODE_NO_CONNECTION,
DCCP_RESET_CODE_PACKET_ERROR,
DCCP_RESET_CODE_OPTION_ERROR,
DCCP_RESET_CODE_MANDATORY_ERROR,
DCCP_RESET_CODE_CONNECTION_REFUSED,
DCCP_RESET_CODE_BAD_SERVICE_CODE,
DCCP_RESET_CODE_TOO_BUSY,
DCCP_RESET_CODE_BAD_INIT_COOKIE,
DCCP_RESET_CODE_AGGRESSION_PENALTY,
};
/* DCCP options */
enum {
DCCPO_PADDING = 0,
DCCPO_MANDATORY = 1,
DCCPO_MIN_RESERVED = 3,
DCCPO_MAX_RESERVED = 31,
DCCPO_NDP_COUNT = 37,
DCCPO_ACK_VECTOR_0 = 38,
DCCPO_ACK_VECTOR_1 = 39,
DCCPO_TIMESTAMP = 41,
DCCPO_TIMESTAMP_ECHO = 42,
DCCPO_ELAPSED_TIME = 43,
DCCPO_MAX = 45,
DCCPO_MIN_CCID_SPECIFIC = 128,
DCCPO_MAX_CCID_SPECIFIC = 255,
};
/* DCCP features */
enum {
DCCPF_RESERVED = 0,
DCCPF_SEQUENCE_WINDOW = 3,
DCCPF_SEND_ACK_VECTOR = 6,
DCCPF_SEND_NDP_COUNT = 7,
/* 10-127 reserved */
DCCPF_MIN_CCID_SPECIFIC = 128,
DCCPF_MAX_CCID_SPECIFIC = 255,
};
/* initial values for each feature */
#define DCCPF_INITIAL_SEQUENCE_WINDOW 100
/* FIXME: for now we're using CCID 3 (TFRC) */
#define DCCPF_INITIAL_CCID 3
#define DCCPF_INITIAL_SEND_ACK_VECTOR 0
/* FIXME: for now we're default to 1 but it should really be 0 */
#define DCCPF_INITIAL_SEND_NDP_COUNT 1
#define DCCP_NDP_LIMIT 0xFFFFFF
/**
* struct dccp_options - option values for a DCCP connection
* @dccpo_sequence_window - Sequence Window Feature (section 7.5.2)
* @dccpo_ccid - Congestion Control Id (CCID) (section 10)
* @dccpo_send_ack_vector - Send Ack Vector Feature (section 11.5)
* @dccpo_send_ndp_count - Send NDP Count Feature (7.7.2)
*/
struct dccp_options {
__u64 dccpo_sequence_window;
__u8 dccpo_ccid;
__u8 dccpo_send_ack_vector;
__u8 dccpo_send_ndp_count;
};
extern void __dccp_options_init(struct dccp_options *dccpo);
extern void dccp_options_init(struct dccp_options *dccpo);
extern int dccp_parse_options(struct sock *sk, struct sk_buff *skb);
struct dccp_request_sock {
struct inet_request_sock dreq_inet_rsk;
__u64 dreq_iss;
__u64 dreq_isr;
__u32 dreq_service;
};
static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req)
{
return (struct dccp_request_sock *)req;
}
/* Read about the ECN nonce to see why it is 253 */
#define DCCP_MAX_ACK_VECTOR_LEN 253
struct dccp_options_received {
u32 dccpor_ndp:24,
dccpor_ack_vector_len:8;
u32 dccpor_ack_vector_idx:10;
/* 22 bits hole, try to pack */
u32 dccpor_timestamp;
u32 dccpor_timestamp_echo;
u32 dccpor_elapsed_time;
};
struct ccid;
enum dccp_role {
DCCP_ROLE_UNDEFINED,
DCCP_ROLE_LISTEN,
DCCP_ROLE_CLIENT,
DCCP_ROLE_SERVER,
};
/**
* struct dccp_sock - DCCP socket state
*
* @dccps_swl - sequence number window low
* @dccps_swh - sequence number window high
* @dccps_awl - acknowledgement number window low
* @dccps_awh - acknowledgement number window high
* @dccps_iss - initial sequence number sent
* @dccps_isr - initial sequence number received
* @dccps_osr - first OPEN sequence number received
* @dccps_gss - greatest sequence number sent
* @dccps_gsr - greatest valid sequence number received
* @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss
* @dccps_timestamp_time - time of latest TIMESTAMP option
* @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option
* @dccps_ext_header_len - network protocol overhead (IP/IPv6 options)
* @dccps_pmtu_cookie - Last pmtu seen by socket
* @dccps_avg_packet_size - FIXME: has to be set by the app thru some setsockopt or ioctl, CCID3 uses it
* @dccps_role - Role of this sock, one of %dccp_role
* @dccps_ndp_count - number of Non Data Packets since last data packet
* @dccps_hc_rx_ackpkts - receiver half connection acked packets
*/
struct dccp_sock {
/* inet_connection_sock has to be the first member of dccp_sock */
struct inet_connection_sock dccps_inet_connection;
__u64 dccps_swl;
__u64 dccps_swh;
__u64 dccps_awl;
__u64 dccps_awh;
__u64 dccps_iss;
__u64 dccps_isr;
__u64 dccps_osr;
__u64 dccps_gss;
__u64 dccps_gsr;
__u64 dccps_gar;
unsigned long dccps_service;
unsigned long dccps_timestamp_time;
__u32 dccps_timestamp_echo;
__u32 dccps_avg_packet_size;
unsigned long dccps_ndp_count;
__u16 dccps_ext_header_len;
__u32 dccps_pmtu_cookie;
__u32 dccps_mss_cache;
struct dccp_options dccps_options;
struct dccp_ackpkts *dccps_hc_rx_ackpkts;
void *dccps_hc_rx_ccid_private;
void *dccps_hc_tx_ccid_private;
struct ccid *dccps_hc_rx_ccid;
struct ccid *dccps_hc_tx_ccid;
struct dccp_options_received dccps_options_received;
enum dccp_role dccps_role:2;
};
static inline struct dccp_sock *dccp_sk(const struct sock *sk)
{
return (struct dccp_sock *)sk;
}
static inline const char *dccp_role(const struct sock *sk)
{
switch (dccp_sk(sk)->dccps_role) {
case DCCP_ROLE_UNDEFINED: return "undefined";
case DCCP_ROLE_LISTEN: return "listen";
case DCCP_ROLE_SERVER: return "server";
case DCCP_ROLE_CLIENT: return "client";
}
return NULL;
}
#endif /* _LINUX_DCCP_H */

View file

@ -32,6 +32,7 @@ enum {
IPPROTO_PUP = 12, /* PUP protocol */
IPPROTO_UDP = 17, /* User Datagram Protocol */
IPPROTO_IDP = 22, /* XNS IDP protocol */
IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */
IPPROTO_RSVP = 46, /* RSVP protocol */
IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */

View file

@ -84,6 +84,7 @@ enum sock_type {
SOCK_RAW = 3,
SOCK_RDM = 4,
SOCK_SEQPACKET = 5,
SOCK_DCCP = 6,
SOCK_PACKET = 10,
};

View file

@ -271,6 +271,7 @@ struct ucred {
#define SOL_IRDA 266
#define SOL_NETBEUI 267
#define SOL_LLC 268
#define SOL_DCCP 269
/* IPX options */
#define IPX_TYPE 1

View file

@ -147,6 +147,7 @@ source "net/bridge/netfilter/Kconfig"
endif
source "net/dccp/Kconfig"
source "net/sctp/Kconfig"
source "net/atm/Kconfig"
source "net/bridge/Kconfig"

View file

@ -42,6 +42,7 @@ obj-$(CONFIG_ATM) += atm/
obj-$(CONFIG_DECNET) += decnet/
obj-$(CONFIG_ECONET) += econet/
obj-$(CONFIG_VLAN_8021Q) += 8021q/
obj-$(CONFIG_IP_DCCP) += dccp/
obj-$(CONFIG_IP_SCTP) += sctp/
ifeq ($(CONFIG_NET),y)

24
net/dccp/Kconfig Normal file
View file

@ -0,0 +1,24 @@
menu "DCCP Configuration (EXPERIMENTAL)"
depends on INET && EXPERIMENTAL
config IP_DCCP
tristate "The DCCP Protocol (EXPERIMENTAL)"
---help---
Datagram Congestion Control Protocol
From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>.
The Datagram Congestion Control Protocol (DCCP) is a transport
protocol that implements bidirectional, unicast connections of
congestion-controlled, unreliable datagrams. It should be suitable
for use by applications such as streaming media, Internet telephony,
and on-line games
To compile this protocol support as a module, choose M here: the
module will be called dccp.
If in doubt, say N.
source "net/dccp/ccids/Kconfig"
endmenu

5
net/dccp/Makefile Normal file
View file

@ -0,0 +1,5 @@
obj-$(CONFIG_IP_DCCP) += dccp.o
dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o timer.o
obj-y += ccids/

139
net/dccp/ccid.c Normal file
View file

@ -0,0 +1,139 @@
/*
* net/dccp/ccid.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* CCID infrastructure
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include "ccid.h"
static struct ccid *ccids[CCID_MAX];
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
static atomic_t ccids_lockct = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(ccids_lock);
/*
* The strategy is: modifications ccids vector are short, do not sleep and
* veeery rare, but read access should be free of any exclusive locks.
*/
static void ccids_write_lock(void)
{
spin_lock(&ccids_lock);
while (atomic_read(&ccids_lockct) != 0) {
spin_unlock(&ccids_lock);
yield();
spin_lock(&ccids_lock);
}
}
static inline void ccids_write_unlock(void)
{
spin_unlock(&ccids_lock);
}
static inline void ccids_read_lock(void)
{
atomic_inc(&ccids_lockct);
spin_unlock_wait(&ccids_lock);
}
static inline void ccids_read_unlock(void)
{
atomic_dec(&ccids_lockct);
}
#else
#define ccids_write_lock() do { } while(0)
#define ccids_write_unlock() do { } while(0)
#define ccids_read_lock() do { } while(0)
#define ccids_read_unlock() do { } while(0)
#endif
int ccid_register(struct ccid *ccid)
{
int err;
if (ccid->ccid_init == NULL)
return -1;
ccids_write_lock();
err = -EEXIST;
if (ccids[ccid->ccid_id] == NULL) {
ccids[ccid->ccid_id] = ccid;
err = 0;
}
ccids_write_unlock();
if (err == 0)
pr_info("CCID: Registered CCID %d (%s)\n",
ccid->ccid_id, ccid->ccid_name);
return err;
}
EXPORT_SYMBOL_GPL(ccid_register);
int ccid_unregister(struct ccid *ccid)
{
ccids_write_lock();
ccids[ccid->ccid_id] = NULL;
ccids_write_unlock();
pr_info("CCID: Unregistered CCID %d (%s)\n",
ccid->ccid_id, ccid->ccid_name);
return 0;
}
EXPORT_SYMBOL_GPL(ccid_unregister);
struct ccid *ccid_init(unsigned char id, struct sock *sk)
{
struct ccid *ccid;
#ifdef CONFIG_KMOD
if (ccids[id] == NULL)
request_module("net-dccp-ccid-%d", id);
#endif
ccids_read_lock();
ccid = ccids[id];
if (ccid == NULL)
goto out;
if (!try_module_get(ccid->ccid_owner))
goto out_err;
if (ccid->ccid_init(sk) != 0)
goto out_module_put;
out:
ccids_read_unlock();
return ccid;
out_module_put:
module_put(ccid->ccid_owner);
out_err:
ccid = NULL;
goto out;
}
EXPORT_SYMBOL_GPL(ccid_init);
void ccid_exit(struct ccid *ccid, struct sock *sk)
{
if (ccid == NULL)
return;
ccids_read_lock();
if (ccids[ccid->ccid_id] != NULL) {
if (ccid->ccid_exit != NULL)
ccid->ccid_exit(sk);
module_put(ccid->ccid_owner);
}
ccids_read_unlock();
}
EXPORT_SYMBOL_GPL(ccid_exit);

156
net/dccp/ccid.h Normal file
View file

@ -0,0 +1,156 @@
#ifndef _CCID_H
#define _CCID_H
/*
* net/dccp/ccid.h
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* CCID infrastructure
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <net/sock.h>
#include <linux/dccp.h>
#include <linux/list.h>
#include <linux/module.h>
#define CCID_MAX 255
struct ccid {
unsigned char ccid_id;
const char *ccid_name;
struct module *ccid_owner;
int (*ccid_init)(struct sock *sk);
void (*ccid_exit)(struct sock *sk);
int (*ccid_hc_rx_init)(struct sock *sk);
int (*ccid_hc_tx_init)(struct sock *sk);
void (*ccid_hc_rx_exit)(struct sock *sk);
void (*ccid_hc_tx_exit)(struct sock *sk);
void (*ccid_hc_rx_packet_recv)(struct sock *sk, struct sk_buff *skb);
int (*ccid_hc_rx_parse_options)(struct sock *sk,
unsigned char option,
unsigned char len, u16 idx,
unsigned char* value);
void (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb);
void (*ccid_hc_tx_insert_options)(struct sock *sk, struct sk_buff *skb);
void (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb);
int (*ccid_hc_tx_parse_options)(struct sock *sk,
unsigned char option,
unsigned char len, u16 idx,
unsigned char* value);
int (*ccid_hc_tx_send_packet)(struct sock *sk,
struct sk_buff *skb, int len,
long *delay);
void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, int len);
};
extern int ccid_register(struct ccid *ccid);
extern int ccid_unregister(struct ccid *ccid);
extern struct ccid *ccid_init(unsigned char id, struct sock *sk);
extern void ccid_exit(struct ccid *ccid, struct sock *sk);
static inline void __ccid_get(struct ccid *ccid)
{
__module_get(ccid->ccid_owner);
}
static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb, int len,
long *delay)
{
int rc = 0;
if (ccid->ccid_hc_tx_send_packet != NULL)
rc = ccid->ccid_hc_tx_send_packet(sk, skb, len, delay);
return rc;
}
static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
int more, int len)
{
if (ccid->ccid_hc_tx_packet_sent != NULL)
ccid->ccid_hc_tx_packet_sent(sk, more, len);
}
static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk)
{
int rc = 0;
if (ccid->ccid_hc_rx_init != NULL)
rc = ccid->ccid_hc_rx_init(sk);
return rc;
}
static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk)
{
int rc = 0;
if (ccid->ccid_hc_tx_init != NULL)
rc = ccid->ccid_hc_tx_init(sk);
return rc;
}
static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk)
{
if (ccid->ccid_hc_rx_exit != NULL)
ccid->ccid_hc_rx_exit(sk);
}
static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk)
{
if (ccid->ccid_hc_tx_exit != NULL)
ccid->ccid_hc_tx_exit(sk);
}
static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
if (ccid->ccid_hc_rx_packet_recv != NULL)
ccid->ccid_hc_rx_packet_recv(sk, skb);
}
static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
if (ccid->ccid_hc_tx_packet_recv != NULL)
ccid->ccid_hc_tx_packet_recv(sk, skb);
}
static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
unsigned char option,
unsigned char len, u16 idx,
unsigned char* value)
{
int rc = 0;
if (ccid->ccid_hc_tx_parse_options != NULL)
rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, value);
return rc;
}
static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
unsigned char option,
unsigned char len, u16 idx,
unsigned char* value)
{
int rc = 0;
if (ccid->ccid_hc_rx_parse_options != NULL)
rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value);
return rc;
}
static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
if (ccid->ccid_hc_tx_insert_options != NULL)
ccid->ccid_hc_tx_insert_options(sk, skb);
}
static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
if (ccid->ccid_hc_rx_insert_options != NULL)
ccid->ccid_hc_rx_insert_options(sk, skb);
}
#endif /* _CCID_H */

25
net/dccp/ccids/Kconfig Normal file
View file

@ -0,0 +1,25 @@
menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
depends on IP_DCCP && EXPERIMENTAL
config IP_DCCP_CCID3
tristate "CCID3 (TFRC) (EXPERIMENTAL)"
depends on IP_DCCP
---help---
CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
rate-controlled congestion control mechanism. TFRC is designed to
be reasonably fair when competing for bandwidth with TCP-like flows,
where a flow is "reasonably fair" if its sending rate is generally
within a factor of two of the sending rate of a TCP flow under the
same conditions. However, TFRC has a much lower variation of
throughput over time compared with TCP, which makes CCID 3 more
suitable than CCID 2 for applications such streaming media where a
relatively smooth sending rate is of importance.
CCID 3 is further described in [CCID 3 PROFILE]. The TFRC
congestion control algorithms were initially described in RFC 3448.
This text was extracted from draft-ietf-dccp-spec-11.txt.
If in doubt, say M.
endmenu

3
net/dccp/ccids/Makefile Normal file
View file

@ -0,0 +1,3 @@
obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o
dccp_ccid3-y := ccid3.o

2164
net/dccp/ccids/ccid3.c Normal file

File diff suppressed because it is too large Load diff

137
net/dccp/ccids/ccid3.h Normal file
View file

@ -0,0 +1,137 @@
/*
* net/dccp/ccids/ccid3.h
*
* Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
*
* An implementation of the DCCP protocol
*
* This code has been developed by the University of Waikato WAND
* research group. For further information please see http://www.wand.net.nz/
* or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
*
* This code also uses code from Lulea University, rereleased as GPL by its
* authors:
* Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
*
* Changes to meet Linux coding standards, to make it meet latest ccid3 draft
* and to make it work as a loadable module in the DCCP stack written by
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
*
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _DCCP_CCID3_H_
#define _DCCP_CCID3_H_
#include <linux/types.h>
#include <linux/list.h>
#include <linux/timer.h>
struct ccid3_tx_hist_entry {
struct list_head ccid3htx_node;
u64 ccid3htx_seqno:48,
ccid3htx_win_count:8,
ccid3htx_sent:1;
struct timeval ccid3htx_tstamp;
};
struct ccid3_options_received {
u64 ccid3or_seqno:48,
ccid3or_loss_intervals_idx:16;
u16 ccid3or_loss_intervals_len;
u32 ccid3or_loss_event_rate;
u32 ccid3or_receive_rate;
};
/** struct ccid3_hc_tx_sock - CCID3 sender half connection congestion control block
*
* @ccid3hctx_state - Sender state
* @ccid3hctx_x - Current sending rate
* @ccid3hctx_x_recv - Receive rate
* @ccid3hctx_x_calc - Calculated send (?) rate
* @ccid3hctx_s - Packet size
* @ccid3hctx_rtt - Estimate of current round trip time in usecs
* @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
* @ccid3hctx_last_win_count - Last window counter sent
* @ccid3hctx_t_last_win_count - Timestamp of earliest packet with last_win_count value sent
* @ccid3hctx_no_feedback_timer - Handle to no feedback timer
* @ccid3hctx_idle - FIXME
* @ccid3hctx_t_ld - Time last doubled during slow start
* @ccid3hctx_t_nom - Nominal send time of next packet
* @ccid3hctx_t_ipi - Interpacket (send) interval
* @ccid3hctx_delta - Send timer delta
* @ccid3hctx_hist - Packet history
*/
struct ccid3_hc_tx_sock {
u32 ccid3hctx_x;
u32 ccid3hctx_x_recv;
u32 ccid3hctx_x_calc;
u16 ccid3hctx_s;
u32 ccid3hctx_rtt;
u32 ccid3hctx_p;
u8 ccid3hctx_state;
u8 ccid3hctx_last_win_count;
u8 ccid3hctx_idle;
struct timeval ccid3hctx_t_last_win_count;
struct timer_list ccid3hctx_no_feedback_timer;
struct timeval ccid3hctx_t_ld;
struct timeval ccid3hctx_t_nom;
u32 ccid3hctx_t_ipi;
u32 ccid3hctx_delta;
struct list_head ccid3hctx_hist;
struct ccid3_options_received ccid3hctx_options_received;
};
struct ccid3_loss_interval_hist_entry {
struct list_head ccid3lih_node;
u64 ccid3lih_seqno:48,
ccid3lih_win_count:4;
u32 ccid3lih_interval;
};
struct ccid3_rx_hist_entry {
struct list_head ccid3hrx_node;
u64 ccid3hrx_seqno:48,
ccid3hrx_win_count:4,
ccid3hrx_type:4;
u32 ccid3hrx_ndp; /* In fact it is from 8 to 24 bits */
struct timeval ccid3hrx_tstamp;
};
struct ccid3_hc_rx_sock {
u64 ccid3hcrx_seqno_last_counter:48,
ccid3hcrx_state:8,
ccid3hcrx_last_counter:4;
unsigned long ccid3hcrx_rtt;
u32 ccid3hcrx_p;
u32 ccid3hcrx_bytes_recv;
struct timeval ccid3hcrx_tstamp_last_feedback;
struct timeval ccid3hcrx_tstamp_last_ack;
struct list_head ccid3hcrx_hist;
struct list_head ccid3hcrx_loss_interval_hist;
u16 ccid3hcrx_s;
u32 ccid3hcrx_pinv;
u32 ccid3hcrx_elapsed_time;
u32 ccid3hcrx_x_recv;
};
#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \
((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field)
#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \
((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field)
#endif /* _DCCP_CCID3_H_ */

422
net/dccp/dccp.h Normal file
View file

@ -0,0 +1,422 @@
#ifndef _DCCP_H
#define _DCCP_H
/*
* net/dccp/dccp.h
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/dccp.h>
#include <net/snmp.h>
#include <net/sock.h>
#include <net/tcp.h>
#define DCCP_DEBUG
#ifdef DCCP_DEBUG
extern int dccp_debug;
#define dccp_pr_debug(format, a...) \
do { if (dccp_debug) \
printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \
} while (0)
#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) printk(format, ##a); } while (0)
#else
#define dccp_pr_debug(format, a...)
#define dccp_pr_debug_cat(format, a...)
#endif
extern struct inet_hashinfo dccp_hashinfo;
extern atomic_t dccp_orphan_count;
extern int dccp_tw_count;
extern void dccp_tw_deschedule(struct inet_timewait_sock *tw);
extern void dccp_time_wait(struct sock *sk, int state, int timeo);
/* FIXME: Right size this */
#define DCCP_MAX_OPT_LEN 128
#define DCCP_MAX_PACKET_HDR 32
#define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER)
#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
* state, about 60 seconds */
/* draft-ietf-dccp-spec-11.txt initial RTO value */
#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
/* Maximal interval between probes for local resources. */
#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
extern struct proto dccp_v4_prot;
/* is seq1 < seq2 ? */
static inline const int before48(const u64 seq1, const u64 seq2)
{
return (const s64)((seq1 << 16) - (seq2 << 16)) < 0;
}
/* is seq1 > seq2 ? */
static inline const int after48(const u64 seq1, const u64 seq2)
{
return (const s64)((seq2 << 16) - (seq1 << 16)) < 0;
}
/* is seq2 <= seq1 <= seq3 ? */
static inline const int between48(const u64 seq1, const u64 seq2, const u64 seq3)
{
return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16);
}
static inline u64 max48(const u64 seq1, const u64 seq2)
{
return after48(seq1, seq2) ? seq1 : seq2;
}
enum {
DCCP_MIB_NUM = 0,
DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */
DCCP_MIB_ESTABRESETS, /* EstabResets */
DCCP_MIB_CURRESTAB, /* CurrEstab */
DCCP_MIB_OUTSEGS, /* OutSegs */
DCCP_MIB_OUTRSTS,
DCCP_MIB_ABORTONTIMEOUT,
DCCP_MIB_TIMEOUTS,
DCCP_MIB_ABORTFAILED,
DCCP_MIB_PASSIVEOPENS,
DCCP_MIB_ATTEMPTFAILS,
DCCP_MIB_OUTDATAGRAMS,
DCCP_MIB_INERRS,
DCCP_MIB_OPTMANDATORYERROR,
DCCP_MIB_INVALIDOPT,
__DCCP_MIB_MAX
};
#define DCCP_MIB_MAX __DCCP_MIB_MAX
struct dccp_mib {
unsigned long mibs[DCCP_MIB_MAX];
} __SNMP_MIB_ALIGN__;
DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field)
#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field)
#define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field)
#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field)
#define DCCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(dccp_statistics, field, val)
#define DCCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(dccp_statistics, field, val)
extern int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb);
extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb);
extern int dccp_send_response(struct sock *sk);
extern void dccp_send_ack(struct sock *sk);
extern void dccp_send_delayed_ack(struct sock *sk);
extern void dccp_send_sync(struct sock *sk, u64 seq);
extern void dccp_init_xmit_timers(struct sock *sk);
static inline void dccp_clear_xmit_timers(struct sock *sk)
{
inet_csk_clear_xmit_timers(sk);
}
extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
extern const char *dccp_packet_name(const int type);
extern const char *dccp_state_name(const int state);
static inline void dccp_set_state(struct sock *sk, const int state)
{
const int oldstate = sk->sk_state;
dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
dccp_role(sk), sk,
dccp_state_name(oldstate), dccp_state_name(state));
WARN_ON(state == oldstate);
switch (state) {
case DCCP_OPEN:
if (oldstate != DCCP_OPEN)
DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
break;
case DCCP_CLOSED:
if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
sk->sk_prot->unhash(sk);
if (inet_csk(sk)->icsk_bind_hash != NULL &&
!(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
inet_put_port(&dccp_hashinfo, sk);
/* fall through */
default:
if (oldstate == DCCP_OPEN)
DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
}
/* Change state AFTER socket is unhashed to avoid closed
* socket sitting in hash tables.
*/
sk->sk_state = state;
}
static inline void dccp_done(struct sock *sk)
{
dccp_set_state(sk, DCCP_CLOSED);
dccp_clear_xmit_timers(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_state_change(sk);
else
inet_csk_destroy_sock(sk);
}
static inline void dccp_openreq_init(struct request_sock *req,
struct dccp_sock *dp,
struct sk_buff *skb)
{
/*
* FIXME: fill in the other req fields from the DCCP options
* received
*/
inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
inet_rsk(req)->acked = 0;
req->rcv_wnd = 0;
}
extern void dccp_v4_send_check(struct sock *sk, struct dccp_hdr *dh, int len,
struct sk_buff *skb);
extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
extern struct sock *dccp_create_openreq_child(struct sock *sk,
const struct request_sock *req,
const struct sk_buff *skb);
extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
extern void dccp_v4_err(struct sk_buff *skb, u32);
extern int dccp_v4_rcv(struct sk_buff *skb);
extern struct sock *dccp_v4_request_recv_sock(struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst);
extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct request_sock **prev);
extern int dccp_child_process(struct sock *parent, struct sock *child,
struct sk_buff *skb);
extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct dccp_hdr *dh, unsigned len);
extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
const struct dccp_hdr *dh, const unsigned len);
extern void dccp_close(struct sock *sk, long timeout);
extern struct sk_buff *dccp_make_response(struct sock *sk,
struct dst_entry *dst,
struct request_sock *req);
extern int dccp_connect(struct sock *sk);
extern int dccp_disconnect(struct sock *sk, int flags);
extern int dccp_getsockopt(struct sock *sk, int level, int optname,
char *optval, int *optlen);
extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t size);
extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len);
extern int dccp_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen);
extern void dccp_shutdown(struct sock *sk, int how);
extern int dccp_v4_checksum(struct sk_buff *skb);
extern int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code);
extern void dccp_send_close(struct sock *sk);
struct dccp_skb_cb {
__u8 dccpd_type;
__u8 dccpd_reset_code;
__u8 dccpd_service;
__u8 dccpd_ccval;
__u64 dccpd_seq;
__u64 dccpd_ack_seq;
int dccpd_opt_len;
};
#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
static inline int dccp_non_data_packet(const struct sk_buff *skb)
{
const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
return type == DCCP_PKT_ACK ||
type == DCCP_PKT_CLOSE ||
type == DCCP_PKT_CLOSEREQ ||
type == DCCP_PKT_RESET ||
type == DCCP_PKT_SYNC ||
type == DCCP_PKT_SYNCACK;
}
static inline int dccp_packet_without_ack(const struct sk_buff *skb)
{
const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
}
#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1)
#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2)
static inline void dccp_set_seqno(u64 *seqno, u64 value)
{
if (value > DCCP_MAX_SEQNO)
value -= DCCP_MAX_SEQNO + 1;
*seqno = value;
}
static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
{
return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
}
static inline void dccp_inc_seqno(u64 *seqno)
{
if (++*seqno > DCCP_MAX_SEQNO)
*seqno = 0;
}
static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
{
struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh + sizeof(*dh));
#if defined(__LITTLE_ENDIAN_BITFIELD)
dh->dccph_seq = htonl((gss >> 32)) >> 8;
#elif defined(__BIG_ENDIAN_BITFIELD)
dh->dccph_seq = htonl((gss >> 32));
#else
#error "Adjust your <asm/byteorder.h> defines"
#endif
dhx->dccph_seq_low = htonl(gss & 0xffffffff);
}
static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, const u64 gsr)
{
#if defined(__LITTLE_ENDIAN_BITFIELD)
dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8;
#elif defined(__BIG_ENDIAN_BITFIELD)
dhack->dccph_ack_nr_high = htonl((gsr >> 32));
#else
#error "Adjust your <asm/byteorder.h> defines"
#endif
dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff);
}
static inline void dccp_update_gsr(struct sock *sk, u64 seq)
{
struct dccp_sock *dp = dccp_sk(sk);
u64 tmp_gsr;
dccp_set_seqno(&tmp_gsr, dp->dccps_gsr + 1 - (dp->dccps_options.dccpo_sequence_window / 4));
dp->dccps_gsr = seq;
dccp_set_seqno(&dp->dccps_swl, max48(tmp_gsr, dp->dccps_isr));
dccp_set_seqno(&dp->dccps_swh,
dp->dccps_gsr + (3 * dp->dccps_options.dccpo_sequence_window) / 4);
}
static inline void dccp_update_gss(struct sock *sk, u64 seq)
{
struct dccp_sock *dp = dccp_sk(sk);
u64 tmp_gss;
dccp_set_seqno(&tmp_gss, dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1);
dp->dccps_awl = max48(tmp_gss, dp->dccps_iss);
dp->dccps_awh = dp->dccps_gss = seq;
}
extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb);
extern void dccp_insert_option_elapsed_time(struct sock *sk,
struct sk_buff *skb,
u32 elapsed_time);
extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
unsigned char option,
const void *value, unsigned char len);
extern struct socket *dccp_ctl_socket;
#define DCCP_ACKPKTS_STATE_RECEIVED 0
#define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6)
#define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6)
#define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */
#define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */
/** struct dccp_ackpkts - acknowledgeable packets
*
* This data structure is the one defined in the DCCP draft
* Appendix A.
*
* @dccpap_buf_head - circular buffer head
* @dccpap_buf_tail - circular buffer tail
* @dccpap_buf_ackno - ack # of the most recent packet acknoldgeable in the buffer (i.e. %dccpap_buf_head)
* @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked by the buffer with State 0
*
* Additionally, the HC-Receiver must keep some information about the
* Ack Vectors it has recently sent. For each packet sent carrying an
* Ack Vector, it remembers four variables:
*
* @dccpap_ack_seqno - the Sequence Number used for the packet (HC-Receiver seqno)
* @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement.
* @dccpap_ack_ackno - the Acknowledgement Number used for the packet (HC-Sender seqno)
* @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
*
* @dccpap_buf_len - circular buffer length
* @dccpap_buf - circular buffer of acknowledgeable packets
*/
struct dccp_ackpkts {
unsigned int dccpap_buf_head;
unsigned int dccpap_buf_tail;
u64 dccpap_buf_ackno;
u64 dccpap_ack_seqno;
u64 dccpap_ack_ackno;
unsigned int dccpap_ack_ptr;
unsigned int dccpap_buf_vector_len;
unsigned int dccpap_ack_vector_len;
unsigned int dccpap_buf_len;
unsigned long dccpap_time;
u8 dccpap_buf_nonce;
u8 dccpap_ack_nonce;
u8 dccpap_buf[0];
};
extern struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority);
extern void dccp_ackpkts_free(struct dccp_ackpkts *ap);
extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state);
extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap,
struct sock *sk, u64 ackno);
#ifdef DCCP_DEBUG
extern void dccp_ackvector_print(const u64 ackno,
const unsigned char *vector, int len);
extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap);
#else
static inline void dccp_ackvector_print(const u64 ackno,
const unsigned char *vector,
int len) { }
static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { }
#endif
#endif /* _DCCP_H */

510
net/dccp/input.c Normal file
View file

@ -0,0 +1,510 @@
/*
* net/dccp/input.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include "ccid.h"
#include "dccp.h"
static void dccp_fin(struct sock *sk, struct sk_buff *skb)
{
sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(sk, SOCK_DONE);
__skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4);
__skb_queue_tail(&sk->sk_receive_queue, skb);
skb_set_owner_r(skb, sk);
sk->sk_data_ready(sk, 0);
}
static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
{
switch (sk->sk_state) {
case DCCP_PARTOPEN:
case DCCP_OPEN:
dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED);
dccp_fin(sk, skb);
dccp_set_state(sk, DCCP_CLOSED);
break;
}
}
static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
{
/*
* Step 7: Check for unexpected packet types
* If (S.is_server and P.type == CloseReq)
* Send Sync packet acknowledging P.seqno
* Drop packet and return
*/
if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) {
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq);
return;
}
switch (sk->sk_state) {
case DCCP_PARTOPEN:
case DCCP_OPEN:
dccp_set_state(sk, DCCP_CLOSING);
dccp_send_close(sk);
break;
}
}
static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
if (dp->dccps_options.dccpo_send_ack_vector)
dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk,
DCCP_SKB_CB(skb)->dccpd_ack_seq);
}
static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
struct dccp_sock *dp = dccp_sk(sk);
u64 lswl = dp->dccps_swl;
u64 lawl = dp->dccps_awl;
/*
* Step 5: Prepare sequence numbers for Sync
* If P.type == Sync or P.type == SyncAck,
* If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL,
* / * P is valid, so update sequence number variables
* accordingly. After this update, P will pass the tests
* in Step 6. A SyncAck is generated if necessary in
* Step 15 * /
* Update S.GSR, S.SWL, S.SWH
* Otherwise,
* Drop packet and return
*/
if (dh->dccph_type == DCCP_PKT_SYNC ||
dh->dccph_type == DCCP_PKT_SYNCACK) {
if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh) &&
!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl))
dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
else
return -1;
/*
* Step 6: Check sequence numbers
* Let LSWL = S.SWL and LAWL = S.AWL
* If P.type == CloseReq or P.type == Close or P.type == Reset,
* LSWL := S.GSR + 1, LAWL := S.GAR
* If LSWL <= P.seqno <= S.SWH
* and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH),
* Update S.GSR, S.SWL, S.SWH
* If P.type != Sync,
* Update S.GAR
* Otherwise,
* Send Sync packet acknowledging P.seqno
* Drop packet and return
*/
} else if (dh->dccph_type == DCCP_PKT_CLOSEREQ ||
dh->dccph_type == DCCP_PKT_CLOSE ||
dh->dccph_type == DCCP_PKT_RESET) {
lswl = dp->dccps_gsr;
dccp_inc_seqno(&lswl);
lawl = dp->dccps_gar;
}
if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) &&
(DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ ||
between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, lawl, dp->dccps_awh))) {
dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
if (dh->dccph_type != DCCP_PKT_SYNC &&
DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq;
} else {
dccp_pr_debug("Step 6 failed, sending SYNC...\n");
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq);
return -1;
}
return 0;
}
int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
const struct dccp_hdr *dh, const unsigned len)
{
struct dccp_sock *dp = dccp_sk(sk);
if (dccp_check_seqno(sk, skb))
goto discard;
if (dccp_parse_options(sk, skb))
goto discard;
if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb);
/*
* FIXME: check ECN to see if we should use
* DCCP_ACKPKTS_STATE_ECN_MARKED
*/
if (dp->dccps_options.dccpo_send_ack_vector) {
struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_ACKPKTS_STATE_RECEIVED)) {
LIMIT_NETDEBUG(pr_info("DCCP: acknowledgeable packets buffer full!\n"));
ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
inet_csk_schedule_ack(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX);
goto discard;
}
/*
* FIXME: this activation is probably wrong, have to study more
* TCP delack machinery and how it fits into DCCP draft, but
* for now it kinda "works" 8)
*/
if (!inet_csk_ack_scheduled(sk)) {
inet_csk_schedule_ack(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, TCP_RTO_MAX);
}
}
ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
switch (dccp_hdr(skb)->dccph_type) {
case DCCP_PKT_DATAACK:
case DCCP_PKT_DATA:
/*
* FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED option
* if it is.
*/
__skb_pull(skb, dh->dccph_doff * 4);
__skb_queue_tail(&sk->sk_receive_queue, skb);
skb_set_owner_r(skb, sk);
sk->sk_data_ready(sk, 0);
return 0;
case DCCP_PKT_ACK:
goto discard;
case DCCP_PKT_RESET:
/*
* Step 9: Process Reset
* If P.type == Reset,
* Tear down connection
* S.state := TIMEWAIT
* Set TIMEWAIT timer
* Drop packet and return
*/
dccp_fin(sk, skb);
dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
return 0;
case DCCP_PKT_CLOSEREQ:
dccp_rcv_closereq(sk, skb);
goto discard;
case DCCP_PKT_CLOSE:
dccp_rcv_close(sk, skb);
return 0;
case DCCP_PKT_REQUEST:
/* Step 7
* or (S.is_server and P.type == Response)
* or (S.is_client and P.type == Request)
* or (S.state >= OPEN and P.type == Request
* and P.seqno >= S.OSR)
* or (S.state >= OPEN and P.type == Response
* and P.seqno >= S.OSR)
* or (S.state == RESPOND and P.type == Data),
* Send Sync packet acknowledging P.seqno
* Drop packet and return
*/
if (dp->dccps_role != DCCP_ROLE_LISTEN)
goto send_sync;
goto check_seq;
case DCCP_PKT_RESPONSE:
if (dp->dccps_role != DCCP_ROLE_CLIENT)
goto send_sync;
check_seq:
if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) {
send_sync:
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq);
}
break;
}
DCCP_INC_STATS_BH(DCCP_MIB_INERRS);
discard:
__kfree_skb(skb);
return 0;
}
static int dccp_rcv_request_sent_state_process(struct sock *sk,
struct sk_buff *skb,
const struct dccp_hdr *dh,
const unsigned len)
{
/*
* Step 4: Prepare sequence numbers in REQUEST
* If S.state == REQUEST,
* If (P.type == Response or P.type == Reset)
* and S.AWL <= P.ackno <= S.AWH,
* / * Set sequence number variables corresponding to the
* other endpoint, so P will pass the tests in Step 6 * /
* Set S.GSR, S.ISR, S.SWL, S.SWH
* / * Response processing continues in Step 10; Reset
* processing continues in Step 9 * /
*/
if (dh->dccph_type == DCCP_PKT_RESPONSE) {
const struct inet_connection_sock *icsk = inet_csk(sk);
struct dccp_sock *dp = dccp_sk(sk);
/* Stop the REQUEST timer */
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
BUG_TRAP(sk->sk_send_head != NULL);
__kfree_skb(sk->sk_send_head);
sk->sk_send_head = NULL;
if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) {
dccp_pr_debug("invalid ackno: S.AWL=%llu, P.ackno=%llu, S.AWH=%llu \n",
dp->dccps_awl, DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awh);
goto out_invalid_packet;
}
dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 ||
ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) {
ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
/* FIXME: send appropriate RESET code */
goto out_invalid_packet;
}
dccp_sync_mss(sk, dp->dccps_pmtu_cookie);
/*
* Step 10: Process REQUEST state (second part)
* If S.state == REQUEST,
* / * If we get here, P is a valid Response from the server (see
* Step 4), and we should move to PARTOPEN state. PARTOPEN
* means send an Ack, don't send Data packets, retransmit
* Acks periodically, and always include any Init Cookie from
* the Response * /
* S.state := PARTOPEN
* Set PARTOPEN timer
* Continue with S.state == PARTOPEN
* / * Step 12 will send the Ack completing the three-way
* handshake * /
*/
dccp_set_state(sk, DCCP_PARTOPEN);
/* Make sure socket is routed, for correct metrics. */
inet_sk_rebuild_header(sk);
if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_state_change(sk);
sk_wake_async(sk, 0, POLL_OUT);
}
if (sk->sk_write_pending || icsk->icsk_ack.pingpong ||
icsk->icsk_accept_queue.rskq_defer_accept) {
/* Save one ACK. Data will be ready after
* several ticks, if write_pending is set.
*
* It may be deleted, but with this feature tcpdumps
* look so _wonderfully_ clever, that I was not able
* to stand against the temptation 8) --ANK
*/
/*
* OK, in DCCP we can as well do a similar trick, its
* even in the draft, but there is no need for us to
* schedule an ack here, as dccp_sendmsg does this for
* us, also stated in the draft. -acme
*/
__kfree_skb(skb);
return 0;
}
dccp_send_ack(sk);
return -1;
}
out_invalid_packet:
return 1; /* dccp_v4_do_rcv will send a reset, but...
FIXME: the reset code should be DCCP_RESET_CODE_PACKET_ERROR */
}
static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
struct sk_buff *skb,
const struct dccp_hdr *dh,
const unsigned len)
{
int queued = 0;
switch (dh->dccph_type) {
case DCCP_PKT_RESET:
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
break;
case DCCP_PKT_DATAACK:
case DCCP_PKT_ACK:
/*
* FIXME: we should be reseting the PARTOPEN (DELACK) timer here,
* but only if we haven't used the DELACK timer for something else,
* like sending a delayed ack for a TIMESTAMP echo, etc, for now
* were not clearing it, sending an extra ACK when there is nothing
* else to do in DELACK is not a big deal after all.
*/
/* Stop the PARTOPEN timer */
if (sk->sk_state == DCCP_PARTOPEN)
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq;
dccp_set_state(sk, DCCP_OPEN);
if (dh->dccph_type == DCCP_PKT_DATAACK) {
dccp_rcv_established(sk, skb, dh, len);
queued = 1; /* packet was queued (by dccp_rcv_established) */
}
break;
}
return queued;
}
int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct dccp_hdr *dh, unsigned len)
{
struct dccp_sock *dp = dccp_sk(sk);
const int old_state = sk->sk_state;
int queued = 0;
if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING) {
if (dccp_check_seqno(sk, skb))
goto discard;
/*
* Step 8: Process options and mark acknowledgeable
*/
if (dccp_parse_options(sk, skb))
goto discard;
if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb);
ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
/*
* FIXME: check ECN to see if we should use
* DCCP_ACKPKTS_STATE_ECN_MARKED
*/
if (dp->dccps_options.dccpo_send_ack_vector) {
if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_ACKPKTS_STATE_RECEIVED))
goto discard;
/*
* FIXME: this activation is probably wrong, have to study more
* TCP delack machinery and how it fits into DCCP draft, but
* for now it kinda "works" 8)
*/
if (dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1 &&
!inet_csk_ack_scheduled(sk)) {
inet_csk_schedule_ack(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX);
}
}
}
/*
* Step 9: Process Reset
* If P.type == Reset,
* Tear down connection
* S.state := TIMEWAIT
* Set TIMEWAIT timer
* Drop packet and return
*/
if (dh->dccph_type == DCCP_PKT_RESET) {
/* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */
dccp_fin(sk, skb);
dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
return 0;
/*
* Step 7: Check for unexpected packet types
* If (S.is_server and P.type == CloseReq)
* or (S.is_server and P.type == Response)
* or (S.is_client and P.type == Request)
* or (S.state == RESPOND and P.type == Data),
* Send Sync packet acknowledging P.seqno
* Drop packet and return
*/
} else if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
(dh->dccph_type == DCCP_PKT_RESPONSE || dh->dccph_type == DCCP_PKT_CLOSEREQ)) ||
(dp->dccps_role == DCCP_ROLE_CLIENT &&
dh->dccph_type == DCCP_PKT_REQUEST) ||
(sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) {
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq);
goto discard;
}
switch (sk->sk_state) {
case DCCP_CLOSED:
return 1;
case DCCP_LISTEN:
if (dh->dccph_type == DCCP_PKT_ACK ||
dh->dccph_type == DCCP_PKT_DATAACK)
return 1;
if (dh->dccph_type == DCCP_PKT_RESET)
goto discard;
if (dh->dccph_type == DCCP_PKT_REQUEST) {
if (dccp_v4_conn_request(sk, skb) < 0)
return 1;
/* FIXME: do congestion control initialization */
goto discard;
}
goto discard;
case DCCP_REQUESTING:
/* FIXME: do congestion control initialization */
queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
if (queued >= 0)
return queued;
__kfree_skb(skb);
return 0;
case DCCP_RESPOND:
case DCCP_PARTOPEN:
queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len);
break;
}
if (dh->dccph_type == DCCP_PKT_ACK || dh->dccph_type == DCCP_PKT_DATAACK) {
switch (old_state) {
case DCCP_PARTOPEN:
sk->sk_state_change(sk);
sk_wake_async(sk, 0, POLL_OUT);
break;
}
}
if (!queued) {
discard:
__kfree_skb(skb);
}
return 0;
}

1289
net/dccp/ipv4.c Normal file

File diff suppressed because it is too large Load diff

199
net/dccp/minisocks.c Normal file
View file

@ -0,0 +1,199 @@
/*
* net/dccp/minisocks.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/skbuff.h>
#include <linux/timer.h>
#include <net/sock.h>
#include <net/xfrm.h>
#include <net/inet_timewait_sock.h>
#include "ccid.h"
#include "dccp.h"
void dccp_time_wait(struct sock *sk, int state, int timeo)
{
/* FIXME: Implement */
dccp_pr_debug("Want to help? Start here\n");
dccp_set_state(sk, state);
}
/* This is for handling early-kills of TIME_WAIT sockets. */
void dccp_tw_deschedule(struct inet_timewait_sock *tw)
{
dccp_pr_debug("Want to help? Start here\n");
__inet_twsk_kill(tw, &dccp_hashinfo);
}
struct sock *dccp_create_openreq_child(struct sock *sk,
const struct request_sock *req,
const struct sk_buff *skb)
{
/*
* Step 3: Process LISTEN state
*
* // Generate a new socket and switch to that socket
* Set S := new socket for this port pair
*/
struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
if (newsk != NULL) {
const struct dccp_request_sock *dreq = dccp_rsk(req);
struct inet_connection_sock *newicsk = inet_csk(sk);
struct dccp_sock *newdp = dccp_sk(newsk);
newdp->dccps_hc_rx_ackpkts = NULL;
newdp->dccps_role = DCCP_ROLE_SERVER;
newicsk->icsk_rto = TCP_TIMEOUT_INIT;
if (newdp->dccps_options.dccpo_send_ack_vector) {
newdp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
GFP_ATOMIC);
/*
* XXX: We're using the same CCIDs set on the parent, i.e. sk_clone
* copied the master sock and left the CCID pointers for this child,
* that is why we do the __ccid_get calls.
*/
if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL))
goto out_free;
}
if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid, newsk) != 0 ||
ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, newsk) != 0)) {
dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts);
ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk);
ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk);
out_free:
/* It is still raw copy of parent, so invalidate
* destructor and make plain sk_free() */
newsk->sk_destruct = NULL;
sk_free(newsk);
return NULL;
}
__ccid_get(newdp->dccps_hc_rx_ccid);
__ccid_get(newdp->dccps_hc_tx_ccid);
/*
* Step 3: Process LISTEN state
*
* Choose S.ISS (initial seqno) or set from Init Cookie
* Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
*/
/* See dccp_v4_conn_request */
newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd;
newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr;
dccp_update_gsr(newsk, dreq->dreq_isr);
newdp->dccps_iss = dreq->dreq_iss;
dccp_update_gss(newsk, dreq->dreq_iss);
dccp_init_xmit_timers(newsk);
DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
}
return newsk;
}
/*
* Process an incoming packet for RESPOND sockets represented
* as an request_sock.
*/
struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct request_sock **prev)
{
struct sock *child = NULL;
/* Check for retransmitted REQUEST */
if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dccp_rsk(req)->dreq_isr)) {
struct dccp_request_sock *dreq = dccp_rsk(req);
dccp_pr_debug("Retransmitted REQUEST\n");
/* Send another RESPONSE packet */
dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1);
dccp_set_seqno(&dreq->dreq_isr, DCCP_SKB_CB(skb)->dccpd_seq);
req->rsk_ops->rtx_syn_ack(sk, req, NULL);
}
/* Network Duplicate, discard packet */
return NULL;
}
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK &&
dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK)
goto drop;
/* Invalid ACK */
if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) {
dccp_pr_debug("Invalid ACK number: ack_seq=%llu, dreq_iss=%llu\n",
DCCP_SKB_CB(skb)->dccpd_ack_seq, dccp_rsk(req)->dreq_iss);
goto drop;
}
child = dccp_v4_request_recv_sock(sk, skb, req, NULL);
if (child == NULL)
goto listen_overflow;
/* FIXME: deal with options */
inet_csk_reqsk_queue_unlink(sk, req, prev);
inet_csk_reqsk_queue_removed(sk, req);
inet_csk_reqsk_queue_add(sk, req, child);
out:
return child;
listen_overflow:
dccp_pr_debug("listen_overflow!\n");
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
drop:
if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
req->rsk_ops->send_reset(skb);
inet_csk_reqsk_queue_drop(sk, req, prev);
goto out;
}
/*
* Queue segment on the new socket if the new socket is active,
* otherwise we just shortcircuit this and continue with
* the new socket.
*/
int dccp_child_process(struct sock *parent, struct sock *child,
struct sk_buff *skb)
{
int ret = 0;
const int state = child->sk_state;
if (!sock_owned_by_user(child)) {
ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), skb->len);
/* Wakeup parent, send SIGIO */
if (state == DCCP_RESPOND && child->sk_state != state)
parent->sk_data_ready(parent, 0);
} else {
/* Alas, it is possible again, because we do lookup
* in main socket hash table and lock on listening
* socket does not protect us more.
*/
sk_add_backlog(child, skb);
}
bh_unlock_sock(child);
sock_put(child);
return ret;
}

763
net/dccp/options.c Normal file
View file

@ -0,0 +1,763 @@
/*
* net/dccp/options.c
*
* An implementation of the DCCP protocol
* Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
* Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include "ccid.h"
#include "dccp.h"
static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
struct sock *sk,
const u64 ackno,
const unsigned char len,
const unsigned char *vector);
/* stores the default values for new connection. may be changed with sysctl */
static const struct dccp_options dccpo_default_values = {
.dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW,
.dccpo_ccid = DCCPF_INITIAL_CCID,
.dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR,
.dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT,
};
void dccp_options_init(struct dccp_options *dccpo)
{
memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo));
}
static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
{
u32 value = 0;
if (len > 3)
value += *bf++ << 24;
if (len > 2)
value += *bf++ << 16;
if (len > 1)
value += *bf++ << 8;
if (len > 0)
value += *bf;
return value;
}
int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
#ifdef DCCP_DEBUG
const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx opt: " :
"server rx opt: ";
#endif
const struct dccp_hdr *dh = dccp_hdr(skb);
const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
unsigned char *opt_ptr = options;
const unsigned char *opt_end = (unsigned char *)dh + (dh->dccph_doff * 4);
struct dccp_options_received *opt_recv = &dp->dccps_options_received;
unsigned char opt, len;
unsigned char *value;
memset(opt_recv, 0, sizeof(*opt_recv));
while (opt_ptr != opt_end) {
opt = *opt_ptr++;
len = 0;
value = NULL;
/* Check if this isn't a single byte option */
if (opt > DCCPO_MAX_RESERVED) {
if (opt_ptr == opt_end)
goto out_invalid_option;
len = *opt_ptr++;
if (len < 3)
goto out_invalid_option;
/*
* Remove the type and len fields, leaving
* just the value size
*/
len -= 2;
value = opt_ptr;
opt_ptr += len;
if (opt_ptr > opt_end)
goto out_invalid_option;
}
switch (opt) {
case DCCPO_PADDING:
break;
case DCCPO_NDP_COUNT:
if (len > 3)
goto out_invalid_option;
opt_recv->dccpor_ndp = dccp_decode_value_var(value, len);
dccp_pr_debug("%sNDP count=%d\n", debug_prefix, opt_recv->dccpor_ndp);
break;
case DCCPO_ACK_VECTOR_0:
if (len > DCCP_MAX_ACK_VECTOR_LEN)
goto out_invalid_option;
if (pkt_type == DCCP_PKT_DATA)
continue;
opt_recv->dccpor_ack_vector_len = len;
opt_recv->dccpor_ack_vector_idx = value - options;
dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n",
debug_prefix, len, DCCP_SKB_CB(skb)->dccpd_ack_seq);
dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq,
value, len);
dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, sk,
DCCP_SKB_CB(skb)->dccpd_ack_seq,
len, value);
break;
case DCCPO_TIMESTAMP:
if (len != 4)
goto out_invalid_option;
opt_recv->dccpor_timestamp = ntohl(*(u32 *)value);
dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
dp->dccps_timestamp_time = jiffies;
dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n",
debug_prefix, opt_recv->dccpor_timestamp,
DCCP_SKB_CB(skb)->dccpd_ack_seq);
break;
case DCCPO_TIMESTAMP_ECHO:
if (len < 4 || len > 8)
goto out_invalid_option;
opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value);
dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, diff=%u\n",
debug_prefix, opt_recv->dccpor_timestamp_echo,
len + 2, DCCP_SKB_CB(skb)->dccpd_ack_seq,
tcp_time_stamp - opt_recv->dccpor_timestamp_echo);
opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value + 4, len - 4);
dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", debug_prefix,
opt_recv->dccpor_elapsed_time);
break;
case DCCPO_ELAPSED_TIME:
if (len > 4)
goto out_invalid_option;
if (pkt_type == DCCP_PKT_DATA)
continue;
opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value, len);
dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix,
opt_recv->dccpor_elapsed_time);
break;
/*
* From draft-ietf-dccp-spec-11.txt:
*
* Option numbers 128 through 191 are for options sent from the HC-
* Sender to the HC-Receiver; option numbers 192 through 255 are for
* options sent from the HC-Receiver to the HC-Sender.
*/
case 128 ... 191: {
const u16 idx = value - options;
if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, opt, len, idx, value) != 0)
goto out_invalid_option;
}
break;
case 192 ... 255: {
const u16 idx = value - options;
if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, opt, len, idx, value) != 0)
goto out_invalid_option;
}
break;
default:
pr_info("DCCP(%p): option %d(len=%d) not implemented, ignoring\n",
sk, opt, len);
break;
}
}
return 0;
out_invalid_option:
DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len);
return -1;
}
static void dccp_encode_value_var(const u32 value, unsigned char *to,
const unsigned int len)
{
if (len > 3)
*to++ = (value & 0xFF000000) >> 24;
if (len > 2)
*to++ = (value & 0xFF0000) >> 16;
if (len > 1)
*to++ = (value & 0xFF00) >> 8;
if (len > 0)
*to++ = (value & 0xFF);
}
static inline int dccp_ndp_len(const int ndp)
{
return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3;
}
void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
const unsigned char option,
const void *value, const unsigned char len)
{
unsigned char *to;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) {
LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert %d option!\n", option));
return;
}
DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2;
to = skb_push(skb, len + 2);
*to++ = option;
*to++ = len + 2;
memcpy(to, value, len);
}
EXPORT_SYMBOL_GPL(dccp_insert_option);
static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
int ndp = dp->dccps_ndp_count;
if (dccp_non_data_packet(skb))
++dp->dccps_ndp_count;
else
dp->dccps_ndp_count = 0;
if (ndp > 0) {
unsigned char *ptr;
const int ndp_len = dccp_ndp_len(ndp);
const int len = ndp_len + 2;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
return;
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
ptr = skb_push(skb, len);
*ptr++ = DCCPO_NDP_COUNT;
*ptr++ = len;
dccp_encode_value_var(ndp, ptr, ndp_len);
}
}
static inline int dccp_elapsed_time_len(const u32 elapsed_time)
{
return elapsed_time == 0 ? 0 :
elapsed_time <= 0xFF ? 1 :
elapsed_time <= 0xFFFF ? 2 :
elapsed_time <= 0xFFFFFF ? 3 : 4;
}
void dccp_insert_option_elapsed_time(struct sock *sk,
struct sk_buff *skb,
u32 elapsed_time)
{
#ifdef DCCP_DEBUG
struct dccp_sock *dp = dccp_sk(sk);
const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " :
"server TX opt: ";
#endif
const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
const int len = 2 + elapsed_time_len;
unsigned char *to;
/* If elapsed_time == 0... */
if (elapsed_time_len == 2)
return;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert elapsed time!\n"));
return;
}
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
to = skb_push(skb, len);
*to++ = DCCPO_ELAPSED_TIME;
*to++ = len;
dccp_encode_value_var(elapsed_time, to, elapsed_time_len);
dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n",
debug_prefix, elapsed_time,
len, DCCP_SKB_CB(skb)->dccpd_seq);
}
EXPORT_SYMBOL(dccp_insert_option_elapsed_time);
static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
#ifdef DCCP_DEBUG
const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " :
"server TX opt: ";
#endif
struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
int len = ap->dccpap_buf_vector_len + 2;
const u32 elapsed_time = jiffies_to_usecs(jiffies - ap->dccpap_time) / 10;
unsigned char *to, *from;
if (elapsed_time != 0)
dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert ACK Vector!\n"));
return;
}
/*
* XXX: now we have just one ack vector sent record, so
* we have to wait for it to be cleared.
*
* Of course this is not acceptable, but this is just for
* basic testing now.
*/
if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1)
return;
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
to = skb_push(skb, len);
*to++ = DCCPO_ACK_VECTOR_0;
*to++ = len;
len = ap->dccpap_buf_vector_len;
from = ap->dccpap_buf + ap->dccpap_buf_head;
/* Check if buf_head wraps */
if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) {
const unsigned int tailsize = ap->dccpap_buf_len - ap->dccpap_buf_head;
memcpy(to, from, tailsize);
to += tailsize;
len -= tailsize;
from = ap->dccpap_buf;
}
memcpy(to, from, len);
/*
* From draft-ietf-dccp-spec-11.txt:
*
* For each acknowledgement it sends, the HC-Receiver will add an
* acknowledgement record. ack_seqno will equal the HC-Receiver
* sequence number it used for the ack packet; ack_ptr will equal
* buf_head; ack_ackno will equal buf_ackno; and ack_nonce will equal
* buf_nonce.
*
* This implemention uses just one ack record for now.
*/
ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
ap->dccpap_ack_ptr = ap->dccpap_buf_head;
ap->dccpap_ack_ackno = ap->dccpap_buf_ackno;
ap->dccpap_ack_nonce = ap->dccpap_buf_nonce;
ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len;
dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu\n",
debug_prefix, ap->dccpap_ack_vector_len,
ap->dccpap_ack_seqno, ap->dccpap_ack_ackno);
}
static inline void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
{
const u32 now = htonl(tcp_time_stamp);
dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now));
}
static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
#ifdef DCCP_DEBUG
const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " :
"server TX opt: ";
#endif
u32 tstamp_echo;
const u32 elapsed_time = jiffies_to_usecs(jiffies - dp->dccps_timestamp_time) / 10;
const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
const int len = 6 + elapsed_time_len;
unsigned char *to;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert timestamp echo!\n"));
return;
}
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
to = skb_push(skb, len);
*to++ = DCCPO_TIMESTAMP_ECHO;
*to++ = len;
tstamp_echo = htonl(dp->dccps_timestamp_echo);
memcpy(to, &tstamp_echo, 4);
to += 4;
dccp_encode_value_var(elapsed_time, to, elapsed_time_len);
dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n",
debug_prefix, dp->dccps_timestamp_echo,
len, DCCP_SKB_CB(skb)->dccpd_seq);
dp->dccps_timestamp_echo = 0;
dp->dccps_timestamp_time = 0;
}
void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
if (dp->dccps_options.dccpo_send_ndp_count)
dccp_insert_option_ndp(sk, skb);
if (!dccp_packet_without_ack(skb)) {
if (dp->dccps_options.dccpo_send_ack_vector &&
dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1)
dccp_insert_option_ack_vector(sk, skb);
dccp_insert_option_timestamp(sk, skb);
if (dp->dccps_timestamp_echo != 0)
dccp_insert_option_timestamp_echo(sk, skb);
}
ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb);
ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb);
/* XXX: insert other options when appropriate */
if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
/* The length of all options has to be a multiple of 4 */
int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4;
if (padding != 0) {
padding = 4 - padding;
memset(skb_push(skb, padding), 0, padding);
DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
}
}
}
struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority)
{
struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority);
if (ap != NULL) {
#ifdef DCCP_DEBUG
memset(ap->dccpap_buf, 0xFF, len);
#endif
ap->dccpap_buf_len = len;
ap->dccpap_buf_head = ap->dccpap_buf_tail = ap->dccpap_buf_len - 1;
ap->dccpap_buf_ackno = ap->dccpap_ack_ackno = ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0;
ap->dccpap_ack_ptr = 0;
ap->dccpap_time = 0;
ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0;
}
return ap;
}
void dccp_ackpkts_free(struct dccp_ackpkts *ap)
{
if (ap != NULL) {
#ifdef DCCP_DEBUG
memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len);
#endif
kfree(ap);
}
}
static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap,
const unsigned int index)
{
return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK;
}
static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap,
const unsigned int index)
{
return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK;
}
/*
* If several packets are missing, the HC-Receiver may prefer to enter multiple
* bytes with run length 0, rather than a single byte with a larger run length;
* this simplifies table updates if one of the missing packets arrives.
*/
static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap,
const unsigned int packets,
const unsigned char state)
{
unsigned int gap;
signed long new_head;
if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len)
return -ENOBUFS;
gap = packets - 1;
new_head = ap->dccpap_buf_head - packets;
if (new_head < 0) {
if (gap > 0) {
memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED,
gap + new_head + 1);
gap = -new_head;
}
new_head += ap->dccpap_buf_len;
}
ap->dccpap_buf_head = new_head;
if (gap > 0)
memset(ap->dccpap_buf + ap->dccpap_buf_head + 1,
DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap);
ap->dccpap_buf[ap->dccpap_buf_head] = state;
ap->dccpap_buf_vector_len += packets;
return 0;
}
/*
* Implements the draft-ietf-dccp-spec-11.txt Appendix A
*/
int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state)
{
/*
* Check at the right places if the buffer is full, if it is, tell the
* caller to start dropping packets till the HC-Sender acks our ACK
* vectors, when we will free up space in dccpap_buf.
*
* We may well decide to do buffer compression, etc, but for now lets
* just drop.
*
* From Appendix A:
*
* Of course, the circular buffer may overflow, either when the HC-
* Sender is sending data at a very high rate, when the HC-Receiver's
* acknowledgements are not reaching the HC-Sender, or when the HC-
* Sender is forgetting to acknowledge those acks (so the HC-Receiver
* is unable to clean up old state). In this case, the HC-Receiver
* should either compress the buffer (by increasing run lengths when
* possible), transfer its state to a larger buffer, or, as a last
* resort, drop all received packets, without processing them
* whatsoever, until its buffer shrinks again.
*/
/* See if this is the first ackno being inserted */
if (ap->dccpap_buf_vector_len == 0) {
ap->dccpap_buf[ap->dccpap_buf_head] = state;
ap->dccpap_buf_vector_len = 1;
} else if (after48(ackno, ap->dccpap_buf_ackno)) {
const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, ackno);
/*
* Look if the state of this packet is the same as the previous ackno
* and if so if we can bump the head len.
*/
if (delta == 1 &&
dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state &&
dccp_ackpkts_len(ap, ap->dccpap_buf_head) < DCCP_ACKPKTS_LEN_MASK)
ap->dccpap_buf[ap->dccpap_buf_head]++;
else if (dccp_ackpkts_set_buf_head_state(ap, delta, state))
return -ENOBUFS;
} else {
/*
* A.1.2. Old Packets
*
* When a packet with Sequence Number S arrives, and S <= buf_ackno,
* the HC-Receiver will scan the table for the byte corresponding to S.
* (Indexing structures could reduce the complexity of this scan.)
*/
u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno);
unsigned int index = ap->dccpap_buf_head;
while (1) {
const u8 len = dccp_ackpkts_len(ap, index);
const u8 state = dccp_ackpkts_state(ap, index);
/*
* valid packets not yet in dccpap_buf have a reserved entry, with
* a len equal to 0
*/
if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED &&
len == 0 && delta == 0) { /* Found our reserved seat! */
dccp_pr_debug("Found %llu reserved seat!\n", ackno);
ap->dccpap_buf[index] = state;
goto out;
}
/* len == 0 means one packet */
if (delta < len + 1)
goto out_duplicate;
delta -= len + 1;
if (++index == ap->dccpap_buf_len)
index = 0;
}
}
ap->dccpap_buf_ackno = ackno;
ap->dccpap_time = jiffies;
out:
dccp_pr_debug("");
dccp_ackpkts_print(ap);
return 0;
out_duplicate:
/* Duplicate packet */
dccp_pr_debug("Received a dup or already considered lost packet: %llu\n", ackno);
return -EILSEQ;
}
#ifdef DCCP_DEBUG
void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len)
{
if (!dccp_debug)
return;
printk("ACK vector len=%d, ackno=%llu |", len, ackno);
while (len--) {
const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6;
const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
printk("%d,%d|", state, rl);
++vector;
}
printk("\n");
}
void dccp_ackpkts_print(const struct dccp_ackpkts *ap)
{
dccp_ackvector_print(ap->dccpap_buf_ackno,
ap->dccpap_buf + ap->dccpap_buf_head,
ap->dccpap_buf_vector_len);
}
#endif
static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap)
{
/*
* As we're keeping track of the ack vector size
* (dccpap_buf_vector_len) and the sent ack vector size
* (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but
* keep this code here as in the future we'll implement a vector of ack
* records, as suggested in draft-ietf-dccp-spec-11.txt Appendix A. -acme
*/
#if 0
ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1;
if (ap->dccpap_buf_tail >= ap->dccpap_buf_len)
ap->dccpap_buf_tail -= ap->dccpap_buf_len;
#endif
ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len;
}
void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk,
u64 ackno)
{
/* Check if we actually sent an ACK vector */
if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
return;
if (ackno == ap->dccpap_ack_seqno) {
#ifdef DCCP_DEBUG
struct dccp_sock *dp = dccp_sk(sk);
const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " :
"server rx ack: ";
#endif
dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n",
debug_prefix, 1,
ap->dccpap_ack_seqno, ap->dccpap_ack_ackno);
dccp_ackpkts_trow_away_ack_record(ap);
ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
}
}
static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
struct sock *sk, u64 ackno,
const unsigned char len,
const unsigned char *vector)
{
unsigned char i;
/* Check if we actually sent an ACK vector */
if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
return;
/*
* We're in the receiver half connection, so if the received an ACK vector
* ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're not interested.
*
* Extra explanation with example:
*
* if we received an ACK vector with ackno 50, it can only be acking
* 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
*/
// dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno);
if (before48(ackno, ap->dccpap_ack_seqno)) {
// dccp_pr_debug_cat("yes\n");
return;
}
// dccp_pr_debug_cat("no\n");
i = len;
while (i--) {
const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
u64 ackno_end_rl;
dccp_set_seqno(&ackno_end_rl, ackno - rl);
// dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, ap->dccpap_ack_seqno, ackno);
if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) {
const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6;
// dccp_pr_debug_cat("yes\n");
if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) {
#ifdef DCCP_DEBUG
struct dccp_sock *dp = dccp_sk(sk);
const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " :
"server rx ack: ";
#endif
dccp_pr_debug("%sACK vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n",
debug_prefix, len,
ap->dccpap_ack_seqno, ap->dccpap_ack_ackno);
dccp_ackpkts_trow_away_ack_record(ap);
}
/*
* If dccpap_ack_seqno was not received, no problem we'll
* send another ACK vector.
*/
ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
break;
}
// dccp_pr_debug_cat("no\n");
dccp_set_seqno(&ackno, ackno_end_rl - 1);
++vector;
}
}

406
net/dccp/output.c Normal file
View file

@ -0,0 +1,406 @@
/*
* net/dccp/output.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include "ccid.h"
#include "dccp.h"
static inline void dccp_event_ack_sent(struct sock *sk)
{
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}
/*
* All SKB's seen here are completely headerless. It is our
* job to build the DCCP header, and pass the packet down to
* IP so it can do the same plus pass the packet off to the
* device.
*/
int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
{
if (likely(skb != NULL)) {
const struct inet_sock *inet = inet_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
struct dccp_hdr *dh;
/* XXX For now we're using only 48 bits sequence numbers */
const int dccp_header_size = sizeof(*dh) +
sizeof(struct dccp_hdr_ext) +
dccp_packet_hdr_len(dcb->dccpd_type);
int err, set_ack = 1;
u64 ackno = dp->dccps_gsr;
/*
* FIXME: study DCCP_PKT_SYNC[ACK] to see what is the right thing
* to do here...
*/
dccp_inc_seqno(&dp->dccps_gss);
dcb->dccpd_seq = dp->dccps_gss;
dccp_insert_options(sk, skb);
switch (dcb->dccpd_type) {
case DCCP_PKT_DATA:
set_ack = 0;
break;
case DCCP_PKT_SYNC:
case DCCP_PKT_SYNCACK:
ackno = dcb->dccpd_seq;
break;
}
skb->h.raw = skb_push(skb, dccp_header_size);
dh = dccp_hdr(skb);
/* Data packets are not cloned as they are never retransmitted */
if (skb_cloned(skb))
skb_set_owner_w(skb, sk);
/* Build DCCP header and checksum it. */
memset(dh, 0, dccp_header_size);
dh->dccph_type = dcb->dccpd_type;
dh->dccph_sport = inet->sport;
dh->dccph_dport = inet->dport;
dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4;
dh->dccph_ccval = dcb->dccpd_ccval;
/* XXX For now we're using only 48 bits sequence numbers */
dh->dccph_x = 1;
dp->dccps_awh = dp->dccps_gss;
dccp_hdr_set_seq(dh, dp->dccps_gss);
if (set_ack)
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno);
switch (dcb->dccpd_type) {
case DCCP_PKT_REQUEST:
dccp_hdr_request(skb)->dccph_req_service = dcb->dccpd_service;
break;
case DCCP_PKT_RESET:
dccp_hdr_reset(skb)->dccph_reset_code = dcb->dccpd_reset_code;
break;
}
dh->dccph_checksum = dccp_v4_checksum(skb);
if (dcb->dccpd_type == DCCP_PKT_ACK ||
dcb->dccpd_type == DCCP_PKT_DATAACK)
dccp_event_ack_sent(sk);
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
err = ip_queue_xmit(skb, 0);
if (err <= 0)
return err;
/* NET_XMIT_CN is special. It does not guarantee,
* that this packet is lost. It tells that device
* is about to start to drop packets or already
* drops some packets of the same priority and
* invokes us to send less aggressively.
*/
return err == NET_XMIT_CN ? 0 : err;
}
return -ENOBUFS;
}
unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
{
struct dccp_sock *dp = dccp_sk(sk);
int mss_now;
/*
* FIXME: we really should be using the af_specific thing to support IPv6.
* mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext);
*/
mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext);
/* Now subtract optional transport overhead */
mss_now -= dp->dccps_ext_header_len;
/*
* FIXME: this should come from the CCID infrastructure, where, say,
* TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets
* put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED
* TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to
* make it a multiple of 4
*/
mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
/* And store cached results */
dp->dccps_pmtu_cookie = pmtu;
dp->dccps_mss_cache = mss_now;
return mss_now;
}
int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
{
if (inet_sk_rebuild_header(sk) != 0)
return -EHOSTUNREACH; /* Routing failure or similar. */
return dccp_transmit_skb(sk, (skb_cloned(skb) ?
pskb_copy(skb, GFP_ATOMIC):
skb_clone(skb, GFP_ATOMIC)));
}
struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
struct request_sock *req)
{
struct dccp_hdr *dh;
const int dccp_header_size = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_response);
struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
dccp_header_size, 1,
GFP_ATOMIC);
if (skb == NULL)
return NULL;
/* Reserve space for headers. */
skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
skb->dst = dst_clone(dst);
skb->csum = 0;
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss;
dccp_insert_options(sk, skb);
skb->h.raw = skb_push(skb, dccp_header_size);
dh = dccp_hdr(skb);
memset(dh, 0, dccp_header_size);
dh->dccph_sport = inet_sk(sk)->sport;
dh->dccph_dport = inet_rsk(req)->rmt_port;
dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
dh->dccph_type = DCCP_PKT_RESPONSE;
dh->dccph_x = 1;
dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss);
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr);
dh->dccph_checksum = dccp_v4_checksum(skb);
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
return skb;
}
struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
const enum dccp_reset_codes code)
{
struct dccp_hdr *dh;
struct dccp_sock *dp = dccp_sk(sk);
const int dccp_header_size = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_reset);
struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
dccp_header_size, 1,
GFP_ATOMIC);
if (skb == NULL)
return NULL;
/* Reserve space for headers. */
skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
skb->dst = dst_clone(dst);
skb->csum = 0;
dccp_inc_seqno(&dp->dccps_gss);
DCCP_SKB_CB(skb)->dccpd_reset_code = code;
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET;
DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss;
dccp_insert_options(sk, skb);
skb->h.raw = skb_push(skb, dccp_header_size);
dh = dccp_hdr(skb);
memset(dh, 0, dccp_header_size);
dh->dccph_sport = inet_sk(sk)->sport;
dh->dccph_dport = inet_sk(sk)->dport;
dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
dh->dccph_type = DCCP_PKT_RESET;
dh->dccph_x = 1;
dccp_hdr_set_seq(dh, dp->dccps_gss);
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr);
dccp_hdr_reset(skb)->dccph_reset_code = code;
dh->dccph_checksum = dccp_v4_checksum(skb);
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
return skb;
}
/*
* Do all connect socket setups that can be done AF independent.
*/
static inline void dccp_connect_init(struct sock *sk)
{
struct dst_entry *dst = __sk_dst_get(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
sk->sk_err = 0;
sock_reset_flag(sk, SOCK_DONE);
dccp_sync_mss(sk, dst_mtu(dst));
/*
* FIXME: set dp->{dccps_swh,dccps_swl}, with
* something like dccp_inc_seq
*/
icsk->icsk_retransmits = 0;
}
int dccp_connect(struct sock *sk)
{
struct sk_buff *skb;
struct inet_connection_sock *icsk = inet_csk(sk);
dccp_connect_init(sk);
skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation);
if (unlikely(skb == NULL))
return -ENOBUFS;
/* Reserve space for headers. */
skb_reserve(skb, MAX_DCCP_HEADER);
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
/* FIXME: set service to something meaningful, coming
* from userspace*/
DCCP_SKB_CB(skb)->dccpd_service = 0;
skb->csum = 0;
skb_set_owner_w(skb, sk);
BUG_TRAP(sk->sk_send_head == NULL);
sk->sk_send_head = skb;
dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
/* Timer for repeating the REQUEST until an answer. */
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
return 0;
}
void dccp_send_ack(struct sock *sk)
{
/* If we have been reset, we may not send again. */
if (sk->sk_state != DCCP_CLOSED) {
struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
if (skb == NULL) {
inet_csk_schedule_ack(sk);
inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX);
return;
}
/* Reserve space for headers */
skb_reserve(skb, MAX_DCCP_HEADER);
skb->csum = 0;
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK;
skb_set_owner_w(skb, sk);
dccp_transmit_skb(sk, skb);
}
}
EXPORT_SYMBOL_GPL(dccp_send_ack);
void dccp_send_delayed_ack(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
/*
* FIXME: tune this timer. elapsed time fixes the skew, so no problem
* with using 2s, and active senders also piggyback the ACK into a
* DATAACK packet, so this is really for quiescent senders.
*/
unsigned long timeout = jiffies + 2 * HZ;
/* Use new timeout only if there wasn't a older one earlier. */
if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
/* If delack timer was blocked or is about to expire,
* send ACK now.
*
* FIXME: check the "about to expire" part
*/
if (icsk->icsk_ack.blocked) {
dccp_send_ack(sk);
return;
}
if (!time_before(timeout, icsk->icsk_ack.timeout))
timeout = icsk->icsk_ack.timeout;
}
icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
icsk->icsk_ack.timeout = timeout;
sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
}
void dccp_send_sync(struct sock *sk, u64 seq)
{
/*
* We are not putting this on the write queue, so
* dccp_transmit_skb() will set the ownership to this
* sock.
*/
struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
if (skb == NULL)
/* FIXME: how to make sure the sync is sent? */
return;
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, MAX_DCCP_HEADER);
skb->csum = 0;
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_SYNC;
DCCP_SKB_CB(skb)->dccpd_seq = seq;
skb_set_owner_w(skb, sk);
dccp_transmit_skb(sk, skb);
}
/* Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This cannot be
* allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under any circumstances.
*/
void dccp_send_close(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
struct sk_buff *skb;
/* Socket is locked, keep trying until memory is available. */
for (;;) {
skb = alloc_skb(sk->sk_prot->max_header, GFP_KERNEL);
if (skb != NULL)
break;
yield();
}
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, sk->sk_prot->max_header);
skb->csum = 0;
DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
skb_set_owner_w(skb, sk);
dccp_transmit_skb(sk, skb);
ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
}

818
net/dccp/proto.c Normal file
View file

@ -0,0 +1,818 @@
/*
* net/dccp/proto.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/in.h>
#include <linux/if_arp.h>
#include <linux/init.h>
#include <linux/random.h>
#include <net/checksum.h>
#include <net/inet_common.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/sock.h>
#include <net/xfrm.h>
#include <asm/semaphore.h>
#include <linux/spinlock.h>
#include <linux/timer.h>
#include <linux/delay.h>
#include <linux/poll.h>
#include <linux/dccp.h>
#include "ccid.h"
#include "dccp.h"
DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics);
atomic_t dccp_orphan_count = ATOMIC_INIT(0);
static struct net_protocol dccp_protocol = {
.handler = dccp_v4_rcv,
.err_handler = dccp_v4_err,
};
const char *dccp_packet_name(const int type)
{
static const char *dccp_packet_names[] = {
[DCCP_PKT_REQUEST] = "REQUEST",
[DCCP_PKT_RESPONSE] = "RESPONSE",
[DCCP_PKT_DATA] = "DATA",
[DCCP_PKT_ACK] = "ACK",
[DCCP_PKT_DATAACK] = "DATAACK",
[DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
[DCCP_PKT_CLOSE] = "CLOSE",
[DCCP_PKT_RESET] = "RESET",
[DCCP_PKT_SYNC] = "SYNC",
[DCCP_PKT_SYNCACK] = "SYNCACK",
};
if (type >= DCCP_NR_PKT_TYPES)
return "INVALID";
else
return dccp_packet_names[type];
}
EXPORT_SYMBOL_GPL(dccp_packet_name);
const char *dccp_state_name(const int state)
{
static char *dccp_state_names[] = {
[DCCP_OPEN] = "OPEN",
[DCCP_REQUESTING] = "REQUESTING",
[DCCP_PARTOPEN] = "PARTOPEN",
[DCCP_LISTEN] = "LISTEN",
[DCCP_RESPOND] = "RESPOND",
[DCCP_CLOSING] = "CLOSING",
[DCCP_TIME_WAIT] = "TIME_WAIT",
[DCCP_CLOSED] = "CLOSED",
};
if (state >= DCCP_MAX_STATES)
return "INVALID STATE!";
else
return dccp_state_names[state];
}
EXPORT_SYMBOL_GPL(dccp_state_name);
static inline int dccp_listen_start(struct sock *sk)
{
dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN;
return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
}
int dccp_disconnect(struct sock *sk, int flags)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
int err = 0;
const int old_state = sk->sk_state;
if (old_state != DCCP_CLOSED)
dccp_set_state(sk, DCCP_CLOSED);
/* ABORT function of RFC793 */
if (old_state == DCCP_LISTEN) {
inet_csk_listen_stop(sk);
/* FIXME: do the active reset thing */
} else if (old_state == DCCP_REQUESTING)
sk->sk_err = ECONNRESET;
dccp_clear_xmit_timers(sk);
__skb_queue_purge(&sk->sk_receive_queue);
if (sk->sk_send_head != NULL) {
__kfree_skb(sk->sk_send_head);
sk->sk_send_head = NULL;
}
inet->dport = 0;
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
inet_reset_saddr(sk);
sk->sk_shutdown = 0;
sock_reset_flag(sk, SOCK_DONE);
icsk->icsk_backoff = 0;
inet_csk_delack_init(sk);
__sk_dst_reset(sk);
BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
sk->sk_error_report(sk);
return err;
}
int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
dccp_pr_debug("entry\n");
return -ENOIOCTLCMD;
}
int dccp_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen)
{
dccp_pr_debug("entry\n");
if (level != SOL_DCCP)
return ip_setsockopt(sk, level, optname, optval, optlen);
return -EOPNOTSUPP;
}
int dccp_getsockopt(struct sock *sk, int level, int optname,
char *optval, int *optlen)
{
dccp_pr_debug("entry\n");
if (level != SOL_DCCP)
return ip_getsockopt(sk, level, optname, optval, optlen);
return -EOPNOTSUPP;
}
int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len)
{
const struct dccp_sock *dp = dccp_sk(sk);
const int flags = msg->msg_flags;
const int noblock = flags & MSG_DONTWAIT;
struct sk_buff *skb;
int rc, size;
long timeo;
if (len > dp->dccps_mss_cache)
return -EMSGSIZE;
lock_sock(sk);
timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
/*
* We have to use sk_stream_wait_connect here to set sk_write_pending,
* so that the trick in dccp_rcv_request_sent_state_process.
*/
/* Wait for a connection to finish. */
if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
goto out_err;
size = sk->sk_prot->max_header + len;
release_sock(sk);
skb = sock_alloc_send_skb(sk, size, noblock, &rc);
lock_sock(sk);
if (skb == NULL)
goto out_release;
skb_reserve(skb, sk->sk_prot->max_header);
rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
if (rc == 0) {
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
long delay;
/*
* XXX: This is just to match the Waikato tree CA interaction
* points, after the CCID3 code is stable and I have a better
* understanding of behaviour I'll change this to look more like
* TCP.
*/
while (1) {
rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk,
skb, len, &delay);
if (rc == 0)
break;
if (rc != -EAGAIN)
goto out_discard;
if (delay > timeo)
goto out_discard;
release_sock(sk);
delay = schedule_timeout(delay);
lock_sock(sk);
timeo -= delay;
if (signal_pending(current))
goto out_interrupted;
rc = -EPIPE;
if (!(sk->sk_state == DCCP_PARTOPEN || sk->sk_state == DCCP_OPEN))
goto out_discard;
}
if (sk->sk_state == DCCP_PARTOPEN) {
/* See 8.1.5. Handshake Completion */
inet_csk_schedule_ack(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
dcb->dccpd_type = DCCP_PKT_DATAACK;
/* FIXME: we really should have a dccps_ack_pending or use icsk */
} else if (inet_csk_ack_scheduled(sk) ||
(dp->dccps_options.dccpo_send_ack_vector &&
ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 &&
ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1))
dcb->dccpd_type = DCCP_PKT_DATAACK;
else
dcb->dccpd_type = DCCP_PKT_DATA;
dccp_transmit_skb(sk, skb);
ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
} else {
out_discard:
kfree_skb(skb);
}
out_release:
release_sock(sk);
return rc ? : len;
out_err:
rc = sk_stream_error(sk, flags, rc);
goto out_release;
out_interrupted:
rc = sock_intr_errno(timeo);
goto out_discard;
}
EXPORT_SYMBOL(dccp_sendmsg);
int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len, int nonblock, int flags, int *addr_len)
{
const struct dccp_hdr *dh;
int copied = 0;
unsigned long used;
int err;
int target; /* Read at least this many bytes */
long timeo;
lock_sock(sk);
err = -ENOTCONN;
if (sk->sk_state == DCCP_LISTEN)
goto out;
timeo = sock_rcvtimeo(sk, nonblock);
/* Urgent data needs to be handled specially. */
if (flags & MSG_OOB)
goto recv_urg;
/* FIXME */
#if 0
seq = &tp->copied_seq;
if (flags & MSG_PEEK) {
peek_seq = tp->copied_seq;
seq = &peek_seq;
}
#endif
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
do {
struct sk_buff *skb;
u32 offset;
/* FIXME */
#if 0
/* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */
if (tp->urg_data && tp->urg_seq == *seq) {
if (copied)
break;
if (signal_pending(current)) {
copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
break;
}
}
#endif
/* Next get a buffer. */
skb = skb_peek(&sk->sk_receive_queue);
do {
if (!skb)
break;
offset = 0;
dh = dccp_hdr(skb);
if (dh->dccph_type == DCCP_PKT_DATA ||
dh->dccph_type == DCCP_PKT_DATAACK)
goto found_ok_skb;
if (dh->dccph_type == DCCP_PKT_RESET ||
dh->dccph_type == DCCP_PKT_CLOSE) {
dccp_pr_debug("found fin ok!\n");
goto found_fin_ok;
}
dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type));
BUG_TRAP(flags & MSG_PEEK);
skb = skb->next;
} while (skb != (struct sk_buff *)&sk->sk_receive_queue);
/* Well, if we have backlog, try to process it now yet. */
if (copied >= target && !sk->sk_backlog.tail)
break;
if (copied) {
if (sk->sk_err ||
sk->sk_state == DCCP_CLOSED ||
(sk->sk_shutdown & RCV_SHUTDOWN) ||
!timeo ||
signal_pending(current) ||
(flags & MSG_PEEK))
break;
} else {
if (sock_flag(sk, SOCK_DONE))
break;
if (sk->sk_err) {
copied = sock_error(sk);
break;
}
if (sk->sk_shutdown & RCV_SHUTDOWN)
break;
if (sk->sk_state == DCCP_CLOSED) {
if (!sock_flag(sk, SOCK_DONE)) {
/* This occurs when user tries to read
* from never connected socket.
*/
copied = -ENOTCONN;
break;
}
break;
}
if (!timeo) {
copied = -EAGAIN;
break;
}
if (signal_pending(current)) {
copied = sock_intr_errno(timeo);
break;
}
}
/* FIXME: cleanup_rbuf(sk, copied); */
if (copied >= target) {
/* Do not sleep, just process backlog. */
release_sock(sk);
lock_sock(sk);
} else
sk_wait_data(sk, &timeo);
continue;
found_ok_skb:
/* Ok so how much can we use? */
used = skb->len - offset;
if (len < used)
used = len;
if (!(flags & MSG_TRUNC)) {
err = skb_copy_datagram_iovec(skb, offset,
msg->msg_iov, used);
if (err) {
/* Exception. Bailout! */
if (!copied)
copied = -EFAULT;
break;
}
}
copied += used;
len -= used;
/* FIXME: tcp_rcv_space_adjust(sk); */
//skip_copy:
if (used + offset < skb->len)
continue;
if (!(flags & MSG_PEEK))
sk_eat_skb(sk, skb);
continue;
found_fin_ok:
if (!(flags & MSG_PEEK))
sk_eat_skb(sk, skb);
break;
} while (len > 0);
/* According to UNIX98, msg_name/msg_namelen are ignored
* on connected socket. I was just happy when found this 8) --ANK
*/
/* Clean up data we have read: This will do ACK frames. */
/* FIXME: cleanup_rbuf(sk, copied); */
release_sock(sk);
return copied;
out:
release_sock(sk);
return err;
recv_urg:
/* FIXME: err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); */
goto out;
}
static int inet_dccp_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
unsigned char old_state;
int err;
lock_sock(sk);
err = -EINVAL;
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
goto out;
old_state = sk->sk_state;
if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
goto out;
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
if (old_state != DCCP_LISTEN) {
/*
* FIXME: here it probably should be sk->sk_prot->listen_start
* see tcp_listen_start
*/
err = dccp_listen_start(sk);
if (err)
goto out;
}
sk->sk_max_ack_backlog = backlog;
err = 0;
out:
release_sock(sk);
return err;
}
static const unsigned char dccp_new_state[] = {
/* current state: new state: action: */
[0] = DCCP_CLOSED,
[DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
[DCCP_REQUESTING] = DCCP_CLOSED,
[DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
[DCCP_LISTEN] = DCCP_CLOSED,
[DCCP_RESPOND] = DCCP_CLOSED,
[DCCP_CLOSING] = DCCP_CLOSED,
[DCCP_TIME_WAIT] = DCCP_CLOSED,
[DCCP_CLOSED] = DCCP_CLOSED,
};
static int dccp_close_state(struct sock *sk)
{
const int next = dccp_new_state[sk->sk_state];
const int ns = next & DCCP_STATE_MASK;
if (ns != sk->sk_state)
dccp_set_state(sk, ns);
return next & DCCP_ACTION_FIN;
}
void dccp_close(struct sock *sk, long timeout)
{
struct sk_buff *skb;
lock_sock(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
if (sk->sk_state == DCCP_LISTEN) {
dccp_set_state(sk, DCCP_CLOSED);
/* Special case. */
inet_csk_listen_stop(sk);
goto adjudge_to_death;
}
/*
* We need to flush the recv. buffs. We do this only on the
* descriptor close, not protocol-sourced closes, because the
*reader process may not have drained the data yet!
*/
/* FIXME: check for unread data */
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
__kfree_skb(skb);
}
if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
} else if (dccp_close_state(sk)) {
dccp_send_close(sk);
}
sk_stream_wait_close(sk, timeout);
adjudge_to_death:
release_sock(sk);
/*
* Now socket is owned by kernel and we acquire BH lock
* to finish close. No need to check for user refs.
*/
local_bh_disable();
bh_lock_sock(sk);
BUG_TRAP(!sock_owned_by_user(sk));
sock_hold(sk);
sock_orphan(sk);
if (sk->sk_state != DCCP_CLOSED)
dccp_set_state(sk, DCCP_CLOSED);
atomic_inc(&dccp_orphan_count);
if (sk->sk_state == DCCP_CLOSED)
inet_csk_destroy_sock(sk);
/* Otherwise, socket is reprieved until protocol close. */
bh_unlock_sock(sk);
local_bh_enable();
sock_put(sk);
}
void dccp_shutdown(struct sock *sk, int how)
{
dccp_pr_debug("entry\n");
}
struct proto_ops inet_dccp_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
.release = inet_release,
.bind = inet_bind,
.connect = inet_stream_connect,
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet_getname,
.poll = sock_no_poll,
.ioctl = inet_ioctl,
.listen = inet_dccp_listen, /* FIXME: work on inet_listen to rename it to sock_common_listen */
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
.sendmsg = inet_sendmsg,
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};
extern struct net_proto_family inet_family_ops;
static struct inet_protosw dccp_v4_protosw = {
.type = SOCK_DCCP,
.protocol = IPPROTO_DCCP,
.prot = &dccp_v4_prot,
.ops = &inet_dccp_ops,
.capability = -1,
.no_check = 0,
.flags = 0,
};
/*
* This is the global socket data structure used for responding to
* the Out-of-the-blue (OOTB) packets. A control sock will be created
* for this socket at the initialization time.
*/
struct socket *dccp_ctl_socket;
static char dccp_ctl_socket_err_msg[] __initdata =
KERN_ERR "DCCP: Failed to create the control socket.\n";
static int __init dccp_ctl_sock_init(void)
{
int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
&dccp_ctl_socket);
if (rc < 0)
printk(dccp_ctl_socket_err_msg);
else {
dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
/* Unhash it so that IP input processing does not even
* see it, we do not wish this socket to see incoming
* packets.
*/
dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
}
return rc;
}
static void __exit dccp_ctl_sock_exit(void)
{
if (dccp_ctl_socket != NULL)
sock_release(dccp_ctl_socket);
}
static int __init init_dccp_v4_mibs(void)
{
int rc = -ENOMEM;
dccp_statistics[0] = alloc_percpu(struct dccp_mib);
if (dccp_statistics[0] == NULL)
goto out;
dccp_statistics[1] = alloc_percpu(struct dccp_mib);
if (dccp_statistics[1] == NULL)
goto out_free_one;
rc = 0;
out:
return rc;
out_free_one:
free_percpu(dccp_statistics[0]);
dccp_statistics[0] = NULL;
goto out;
}
static int thash_entries;
module_param(thash_entries, int, 0444);
MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
int dccp_debug;
module_param(dccp_debug, int, 0444);
MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
static int __init dccp_init(void)
{
unsigned long goal;
int ehash_order, bhash_order, i;
int rc = proto_register(&dccp_v4_prot, 1);
if (rc)
goto out;
dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
goto out_proto_unregister;
/*
* Size and allocate the main established and bind bucket
* hash tables.
*
* The methodology is similar to that of the buffer cache.
*/
if (num_physpages >= (128 * 1024))
goal = num_physpages >> (21 - PAGE_SHIFT);
else
goal = num_physpages >> (23 - PAGE_SHIFT);
if (thash_entries)
goal = (thash_entries * sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
;
do {
dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
sizeof(struct inet_ehash_bucket);
dccp_hashinfo.ehash_size >>= 1;
while (dccp_hashinfo.ehash_size & (dccp_hashinfo.ehash_size - 1))
dccp_hashinfo.ehash_size--;
dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
__get_free_pages(GFP_ATOMIC, ehash_order);
} while (!dccp_hashinfo.ehash && --ehash_order > 0);
if (!dccp_hashinfo.ehash) {
printk(KERN_CRIT "Failed to allocate DCCP "
"established hash table\n");
goto out_free_bind_bucket_cachep;
}
for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
rwlock_init(&dccp_hashinfo.ehash[i].lock);
INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
}
bhash_order = ehash_order;
do {
dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
sizeof(struct inet_bind_hashbucket);
if ((dccp_hashinfo.bhash_size > (64 * 1024)) && bhash_order > 0)
continue;
dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
__get_free_pages(GFP_ATOMIC, bhash_order);
} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
if (!dccp_hashinfo.bhash) {
printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
goto out_free_dccp_ehash;
}
for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
spin_lock_init(&dccp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
}
if (init_dccp_v4_mibs())
goto out_free_dccp_bhash;
rc = -EAGAIN;
if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
goto out_free_dccp_v4_mibs;
inet_register_protosw(&dccp_v4_protosw);
rc = dccp_ctl_sock_init();
if (rc)
goto out_unregister_protosw;
out:
return rc;
out_unregister_protosw:
inet_unregister_protosw(&dccp_v4_protosw);
inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
out_free_dccp_v4_mibs:
free_percpu(dccp_statistics[0]);
free_percpu(dccp_statistics[1]);
dccp_statistics[0] = dccp_statistics[1] = NULL;
out_free_dccp_bhash:
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
dccp_hashinfo.bhash = NULL;
out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
dccp_hashinfo.ehash = NULL;
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
dccp_hashinfo.bind_bucket_cachep = NULL;
out_proto_unregister:
proto_unregister(&dccp_v4_prot);
goto out;
}
static const char dccp_del_proto_err_msg[] __exitdata =
KERN_ERR "can't remove dccp net_protocol\n";
static void __exit dccp_fini(void)
{
dccp_ctl_sock_exit();
inet_unregister_protosw(&dccp_v4_protosw);
if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
printk(dccp_del_proto_err_msg);
/* Free the control endpoint. */
sock_release(dccp_ctl_socket);
proto_unregister(&dccp_v4_prot);
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
}
module_init(dccp_init);
module_exit(dccp_fini);
/* __stringify doesn't likes enums, so use SOCK_DCCP (6) value directly */
MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-6");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");

249
net/dccp/timer.c Normal file
View file

@ -0,0 +1,249 @@
/*
* net/dccp/timer.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/skbuff.h>
#include "dccp.h"
static void dccp_write_timer(unsigned long data);
static void dccp_keepalive_timer(unsigned long data);
static void dccp_delack_timer(unsigned long data);
void dccp_init_xmit_timers(struct sock *sk)
{
inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
&dccp_keepalive_timer);
}
static void dccp_write_err(struct sock *sk)
{
sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
sk->sk_error_report(sk);
dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED);
dccp_done(sk);
DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT);
}
/* A write timeout has occurred. Process the after effects. */
static int dccp_write_timeout(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
int retry_until;
if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
if (icsk->icsk_retransmits != 0)
dst_negative_advice(&sk->sk_dst_cache);
retry_until = icsk->icsk_syn_retries ? : /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */;
} else {
if (icsk->icsk_retransmits >= /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) {
/* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
hole detection. :-(
It is place to make it. It is not made. I do not want
to make it. It is disguisting. It does not work in any
case. Let me to cite the same draft, which requires for
us to implement this:
"The one security concern raised by this memo is that ICMP black holes
are often caused by over-zealous security administrators who block
all ICMP messages. It is vitally important that those who design and
deploy security systems understand the impact of strict filtering on
upper-layer protocols. The safest web site in the world is worthless
if most TCP implementations cannot transfer data from it. It would
be far nicer to have all of the black holes fixed rather than fixing
all of the TCP implementations."
Golden words :-).
*/
dst_negative_advice(&sk->sk_dst_cache);
}
retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */;
/*
* FIXME: see tcp_write_timout and tcp_out_of_resources
*/
}
if (icsk->icsk_retransmits >= retry_until) {
/* Has it gone just too far? */
dccp_write_err(sk);
return 1;
}
return 0;
}
/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
static void dccp_delack_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct inet_connection_sock *icsk = inet_csk(sk);
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later. */
icsk->icsk_ack.blocked = 1;
NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN);
goto out;
}
if (sk->sk_state == DCCP_CLOSED || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
goto out;
if (time_after(icsk->icsk_ack.timeout, jiffies)) {
sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
goto out;
}
icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
if (inet_csk_ack_scheduled(sk)) {
if (!icsk->icsk_ack.pingpong) {
/* Delayed ACK missed: inflate ATO. */
icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
} else {
/* Delayed ACK missed: leave pingpong mode and
* deflate ATO.
*/
icsk->icsk_ack.pingpong = 0;
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
dccp_send_ack(sk);
NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
}
out:
bh_unlock_sock(sk);
sock_put(sk);
}
/*
* The DCCP retransmit timer.
*/
static void dccp_retransmit_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
/*
* sk->sk_send_head has to have one skb with
* DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP
* packet types (REQUEST, RESPONSE, the ACK in the 3way hanshake
* (PARTOPEN timer), etc).
*/
BUG_TRAP(sk->sk_send_head != NULL);
/*
* More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
* sent, no need to retransmit, this sock is dead.
*/
if (dccp_write_timeout(sk))
goto out;
/*
* We want to know the number of packets retransmitted, not the
* total number of retransmissions of clones of original packets.
*/
if (icsk->icsk_retransmits == 0)
DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS);
if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) {
/*
* Retransmission failed because of local congestion,
* do not backoff.
*/
if (icsk->icsk_retransmits == 0)
icsk->icsk_retransmits = 1;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
min(icsk->icsk_rto,
TCP_RESOURCE_PROBE_INTERVAL),
TCP_RTO_MAX);
goto out;
}
icsk->icsk_backoff++;
icsk->icsk_retransmits++;
icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */)
__sk_dst_reset(sk);
out:;
}
static void dccp_write_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct inet_connection_sock *icsk = inet_csk(sk);
int event = 0;
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later */
sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20));
goto out;
}
if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending)
goto out;
if (time_after(icsk->icsk_timeout, jiffies)) {
sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
goto out;
}
event = icsk->icsk_pending;
icsk->icsk_pending = 0;
switch (event) {
case ICSK_TIME_RETRANS:
dccp_retransmit_timer(sk);
break;
}
out:
bh_unlock_sock(sk);
sock_put(sk);
}
/*
* Timer for listening sockets
*/
static void dccp_response_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */;
reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL,
DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries);
}
static void dccp_keepalive_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
/* Only process if socket is not in use. */
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later. */
inet_csk_reset_keepalive_timer(sk, HZ / 20);
goto out;
}
if (sk->sk_state == DCCP_LISTEN) {
dccp_response_timer(sk);
goto out;
}
out:
bh_unlock_sock(sk);
sock_put(sk);
}