Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6

This commit is contained in:
Linus Torvalds 2005-11-09 19:32:25 -08:00
commit b01a55a865
69 changed files with 11838 additions and 486 deletions

51
include/linux/genetlink.h Normal file
View file

@ -0,0 +1,51 @@
#ifndef __LINUX_GENERIC_NETLINK_H
#define __LINUX_GENERIC_NETLINK_H
#include <linux/netlink.h>
#define GENL_NAMSIZ 16 /* length of family name */
#define GENL_MIN_ID NLMSG_MIN_TYPE
#define GENL_MAX_ID 1023
struct genlmsghdr {
__u8 cmd;
__u8 version;
__u16 reserved;
};
#define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr))
/*
* List of reserved static generic netlink identifiers:
*/
#define GENL_ID_GENERATE 0
#define GENL_ID_CTRL NLMSG_MIN_TYPE
/**************************************************************************
* Controller
**************************************************************************/
enum {
CTRL_CMD_UNSPEC,
CTRL_CMD_NEWFAMILY,
CTRL_CMD_DELFAMILY,
CTRL_CMD_GETFAMILY,
CTRL_CMD_NEWOPS,
CTRL_CMD_DELOPS,
CTRL_CMD_GETOPS,
__CTRL_CMD_MAX,
};
#define CTRL_CMD_MAX (__CTRL_CMD_MAX - 1)
enum {
CTRL_ATTR_UNSPEC,
CTRL_ATTR_FAMILY_ID,
CTRL_ATTR_FAMILY_NAME,
__CTRL_ATTR_MAX,
};
#define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1)
#endif /* __LINUX_GENERIC_NETLINK_H */

View file

@ -0,0 +1,159 @@
#ifndef _NF_CONNTRACK_COMMON_H
#define _NF_CONNTRACK_COMMON_H
/* Connection state tracking for netfilter. This is separated from,
but required by, the NAT layer; it can also be used by an iptables
extension. */
enum ip_conntrack_info
{
/* Part of an established connection (either direction). */
IP_CT_ESTABLISHED,
/* Like NEW, but related to an existing connection, or ICMP error
(in either direction). */
IP_CT_RELATED,
/* Started a new connection to track (only
IP_CT_DIR_ORIGINAL); may be a retransmission. */
IP_CT_NEW,
/* >= this indicates reply direction */
IP_CT_IS_REPLY,
/* Number of distinct IP_CT types (no NEW in reply dirn). */
IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1
};
/* Bitset representing status of connection. */
enum ip_conntrack_status {
/* It's an expected connection: bit 0 set. This bit never changed */
IPS_EXPECTED_BIT = 0,
IPS_EXPECTED = (1 << IPS_EXPECTED_BIT),
/* We've seen packets both ways: bit 1 set. Can be set, not unset. */
IPS_SEEN_REPLY_BIT = 1,
IPS_SEEN_REPLY = (1 << IPS_SEEN_REPLY_BIT),
/* Conntrack should never be early-expired. */
IPS_ASSURED_BIT = 2,
IPS_ASSURED = (1 << IPS_ASSURED_BIT),
/* Connection is confirmed: originating packet has left box */
IPS_CONFIRMED_BIT = 3,
IPS_CONFIRMED = (1 << IPS_CONFIRMED_BIT),
/* Connection needs src nat in orig dir. This bit never changed. */
IPS_SRC_NAT_BIT = 4,
IPS_SRC_NAT = (1 << IPS_SRC_NAT_BIT),
/* Connection needs dst nat in orig dir. This bit never changed. */
IPS_DST_NAT_BIT = 5,
IPS_DST_NAT = (1 << IPS_DST_NAT_BIT),
/* Both together. */
IPS_NAT_MASK = (IPS_DST_NAT | IPS_SRC_NAT),
/* Connection needs TCP sequence adjusted. */
IPS_SEQ_ADJUST_BIT = 6,
IPS_SEQ_ADJUST = (1 << IPS_SEQ_ADJUST_BIT),
/* NAT initialization bits. */
IPS_SRC_NAT_DONE_BIT = 7,
IPS_SRC_NAT_DONE = (1 << IPS_SRC_NAT_DONE_BIT),
IPS_DST_NAT_DONE_BIT = 8,
IPS_DST_NAT_DONE = (1 << IPS_DST_NAT_DONE_BIT),
/* Both together */
IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE),
/* Connection is dying (removed from lists), can not be unset. */
IPS_DYING_BIT = 9,
IPS_DYING = (1 << IPS_DYING_BIT),
};
/* Connection tracking event bits */
enum ip_conntrack_events
{
/* New conntrack */
IPCT_NEW_BIT = 0,
IPCT_NEW = (1 << IPCT_NEW_BIT),
/* Expected connection */
IPCT_RELATED_BIT = 1,
IPCT_RELATED = (1 << IPCT_RELATED_BIT),
/* Destroyed conntrack */
IPCT_DESTROY_BIT = 2,
IPCT_DESTROY = (1 << IPCT_DESTROY_BIT),
/* Timer has been refreshed */
IPCT_REFRESH_BIT = 3,
IPCT_REFRESH = (1 << IPCT_REFRESH_BIT),
/* Status has changed */
IPCT_STATUS_BIT = 4,
IPCT_STATUS = (1 << IPCT_STATUS_BIT),
/* Update of protocol info */
IPCT_PROTOINFO_BIT = 5,
IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT),
/* Volatile protocol info */
IPCT_PROTOINFO_VOLATILE_BIT = 6,
IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT),
/* New helper for conntrack */
IPCT_HELPER_BIT = 7,
IPCT_HELPER = (1 << IPCT_HELPER_BIT),
/* Update of helper info */
IPCT_HELPINFO_BIT = 8,
IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT),
/* Volatile helper info */
IPCT_HELPINFO_VOLATILE_BIT = 9,
IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT),
/* NAT info */
IPCT_NATINFO_BIT = 10,
IPCT_NATINFO = (1 << IPCT_NATINFO_BIT),
/* Counter highest bit has been set */
IPCT_COUNTER_FILLING_BIT = 11,
IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT),
};
enum ip_conntrack_expect_events {
IPEXP_NEW_BIT = 0,
IPEXP_NEW = (1 << IPEXP_NEW_BIT),
};
#ifdef __KERNEL__
struct ip_conntrack_counter
{
u_int32_t packets;
u_int32_t bytes;
};
struct ip_conntrack_stat
{
unsigned int searched;
unsigned int found;
unsigned int new;
unsigned int invalid;
unsigned int ignore;
unsigned int delete;
unsigned int delete_list;
unsigned int insert;
unsigned int insert_failed;
unsigned int drop;
unsigned int early_drop;
unsigned int error;
unsigned int expect_new;
unsigned int expect_create;
unsigned int expect_delete;
};
#endif /* __KERNEL__ */
#endif /* _NF_CONNTRACK_COMMON_H */

View file

@ -0,0 +1,44 @@
#ifndef _NF_CONNTRACK_FTP_H
#define _NF_CONNTRACK_FTP_H
/* FTP tracking. */
/* This enum is exposed to userspace */
enum ip_ct_ftp_type
{
/* PORT command from client */
IP_CT_FTP_PORT,
/* PASV response from server */
IP_CT_FTP_PASV,
/* EPRT command from client */
IP_CT_FTP_EPRT,
/* EPSV response from server */
IP_CT_FTP_EPSV,
};
#ifdef __KERNEL__
#define FTP_PORT 21
#define NUM_SEQ_TO_REMEMBER 2
/* This structure exists only once per master */
struct ip_ct_ftp_master {
/* Valid seq positions for cmd matching after newline */
u_int32_t seq_aft_nl[IP_CT_DIR_MAX][NUM_SEQ_TO_REMEMBER];
/* 0 means seq_match_aft_nl not set */
int seq_aft_nl_num[IP_CT_DIR_MAX];
};
struct ip_conntrack_expect;
/* For NAT to hook in when we find a packet which describes what other
* connection we should expect. */
extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
enum ip_conntrack_info ctinfo,
enum ip_ct_ftp_type type,
unsigned int matchoff,
unsigned int matchlen,
struct ip_conntrack_expect *exp,
u32 *seq);
#endif /* __KERNEL__ */
#endif /* _NF_CONNTRACK_FTP_H */

View file

@ -0,0 +1,27 @@
#ifndef _NF_CONNTRACK_SCTP_H
#define _NF_CONNTRACK_SCTP_H
/* SCTP tracking. */
#include <linux/netfilter/nf_conntrack_tuple_common.h>
enum sctp_conntrack {
SCTP_CONNTRACK_NONE,
SCTP_CONNTRACK_CLOSED,
SCTP_CONNTRACK_COOKIE_WAIT,
SCTP_CONNTRACK_COOKIE_ECHOED,
SCTP_CONNTRACK_ESTABLISHED,
SCTP_CONNTRACK_SHUTDOWN_SENT,
SCTP_CONNTRACK_SHUTDOWN_RECD,
SCTP_CONNTRACK_SHUTDOWN_ACK_SENT,
SCTP_CONNTRACK_MAX
};
struct ip_ct_sctp
{
enum sctp_conntrack state;
u_int32_t vtag[IP_CT_DIR_MAX];
u_int32_t ttag[IP_CT_DIR_MAX];
};
#endif /* _NF_CONNTRACK_SCTP_H */

View file

@ -0,0 +1,56 @@
#ifndef _NF_CONNTRACK_TCP_H
#define _NF_CONNTRACK_TCP_H
/* TCP tracking. */
/* This is exposed to userspace (ctnetlink) */
enum tcp_conntrack {
TCP_CONNTRACK_NONE,
TCP_CONNTRACK_SYN_SENT,
TCP_CONNTRACK_SYN_RECV,
TCP_CONNTRACK_ESTABLISHED,
TCP_CONNTRACK_FIN_WAIT,
TCP_CONNTRACK_CLOSE_WAIT,
TCP_CONNTRACK_LAST_ACK,
TCP_CONNTRACK_TIME_WAIT,
TCP_CONNTRACK_CLOSE,
TCP_CONNTRACK_LISTEN,
TCP_CONNTRACK_MAX,
TCP_CONNTRACK_IGNORE
};
/* Window scaling is advertised by the sender */
#define IP_CT_TCP_FLAG_WINDOW_SCALE 0x01
/* SACK is permitted by the sender */
#define IP_CT_TCP_FLAG_SACK_PERM 0x02
/* This sender sent FIN first */
#define IP_CT_TCP_FLAG_CLOSE_INIT 0x03
#ifdef __KERNEL__
struct ip_ct_tcp_state {
u_int32_t td_end; /* max of seq + len */
u_int32_t td_maxend; /* max of ack + max(win, 1) */
u_int32_t td_maxwin; /* max(win) */
u_int8_t td_scale; /* window scale factor */
u_int8_t loose; /* used when connection picked up from the middle */
u_int8_t flags; /* per direction options */
};
struct ip_ct_tcp
{
struct ip_ct_tcp_state seen[2]; /* connection parameters per direction */
u_int8_t state; /* state of the connection (enum tcp_conntrack) */
/* For detecting stale connections */
u_int8_t last_dir; /* Direction of the last packet (enum ip_conntrack_dir) */
u_int8_t retrans; /* Number of retransmitted packets */
u_int8_t last_index; /* Index of the last packet */
u_int32_t last_seq; /* Last sequence number seen in dir */
u_int32_t last_ack; /* Last sequence number seen in opposite dir */
u_int32_t last_end; /* Last seq + len */
};
#endif /* __KERNEL__ */
#endif /* _NF_CONNTRACK_TCP_H */

View file

@ -0,0 +1,13 @@
#ifndef _NF_CONNTRACK_TUPLE_COMMON_H
#define _NF_CONNTRACK_TUPLE_COMMON_H
enum ip_conntrack_dir
{
IP_CT_DIR_ORIGINAL,
IP_CT_DIR_REPLY,
IP_CT_DIR_MAX
};
#define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL)
#endif /* _NF_CONNTRACK_TUPLE_COMMON_H */

View file

@ -1,132 +1,7 @@
#ifndef _IP_CONNTRACK_H
#define _IP_CONNTRACK_H
/* Connection state tracking for netfilter. This is separated from,
but required by, the NAT layer; it can also be used by an iptables
extension. */
enum ip_conntrack_info
{
/* Part of an established connection (either direction). */
IP_CT_ESTABLISHED,
/* Like NEW, but related to an existing connection, or ICMP error
(in either direction). */
IP_CT_RELATED,
/* Started a new connection to track (only
IP_CT_DIR_ORIGINAL); may be a retransmission. */
IP_CT_NEW,
/* >= this indicates reply direction */
IP_CT_IS_REPLY,
/* Number of distinct IP_CT types (no NEW in reply dirn). */
IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1
};
/* Bitset representing status of connection. */
enum ip_conntrack_status {
/* It's an expected connection: bit 0 set. This bit never changed */
IPS_EXPECTED_BIT = 0,
IPS_EXPECTED = (1 << IPS_EXPECTED_BIT),
/* We've seen packets both ways: bit 1 set. Can be set, not unset. */
IPS_SEEN_REPLY_BIT = 1,
IPS_SEEN_REPLY = (1 << IPS_SEEN_REPLY_BIT),
/* Conntrack should never be early-expired. */
IPS_ASSURED_BIT = 2,
IPS_ASSURED = (1 << IPS_ASSURED_BIT),
/* Connection is confirmed: originating packet has left box */
IPS_CONFIRMED_BIT = 3,
IPS_CONFIRMED = (1 << IPS_CONFIRMED_BIT),
/* Connection needs src nat in orig dir. This bit never changed. */
IPS_SRC_NAT_BIT = 4,
IPS_SRC_NAT = (1 << IPS_SRC_NAT_BIT),
/* Connection needs dst nat in orig dir. This bit never changed. */
IPS_DST_NAT_BIT = 5,
IPS_DST_NAT = (1 << IPS_DST_NAT_BIT),
/* Both together. */
IPS_NAT_MASK = (IPS_DST_NAT | IPS_SRC_NAT),
/* Connection needs TCP sequence adjusted. */
IPS_SEQ_ADJUST_BIT = 6,
IPS_SEQ_ADJUST = (1 << IPS_SEQ_ADJUST_BIT),
/* NAT initialization bits. */
IPS_SRC_NAT_DONE_BIT = 7,
IPS_SRC_NAT_DONE = (1 << IPS_SRC_NAT_DONE_BIT),
IPS_DST_NAT_DONE_BIT = 8,
IPS_DST_NAT_DONE = (1 << IPS_DST_NAT_DONE_BIT),
/* Both together */
IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE),
/* Connection is dying (removed from lists), can not be unset. */
IPS_DYING_BIT = 9,
IPS_DYING = (1 << IPS_DYING_BIT),
};
/* Connection tracking event bits */
enum ip_conntrack_events
{
/* New conntrack */
IPCT_NEW_BIT = 0,
IPCT_NEW = (1 << IPCT_NEW_BIT),
/* Expected connection */
IPCT_RELATED_BIT = 1,
IPCT_RELATED = (1 << IPCT_RELATED_BIT),
/* Destroyed conntrack */
IPCT_DESTROY_BIT = 2,
IPCT_DESTROY = (1 << IPCT_DESTROY_BIT),
/* Timer has been refreshed */
IPCT_REFRESH_BIT = 3,
IPCT_REFRESH = (1 << IPCT_REFRESH_BIT),
/* Status has changed */
IPCT_STATUS_BIT = 4,
IPCT_STATUS = (1 << IPCT_STATUS_BIT),
/* Update of protocol info */
IPCT_PROTOINFO_BIT = 5,
IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT),
/* Volatile protocol info */
IPCT_PROTOINFO_VOLATILE_BIT = 6,
IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT),
/* New helper for conntrack */
IPCT_HELPER_BIT = 7,
IPCT_HELPER = (1 << IPCT_HELPER_BIT),
/* Update of helper info */
IPCT_HELPINFO_BIT = 8,
IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT),
/* Volatile helper info */
IPCT_HELPINFO_VOLATILE_BIT = 9,
IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT),
/* NAT info */
IPCT_NATINFO_BIT = 10,
IPCT_NATINFO = (1 << IPCT_NATINFO_BIT),
/* Counter highest bit has been set */
IPCT_COUNTER_FILLING_BIT = 11,
IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT),
};
enum ip_conntrack_expect_events {
IPEXP_NEW_BIT = 0,
IPEXP_NEW = (1 << IPEXP_NEW_BIT),
};
#include <linux/netfilter/nf_conntrack_common.h>
#ifdef __KERNEL__
#include <linux/config.h>
@ -194,12 +69,6 @@ do { \
#define IP_NF_ASSERT(x)
#endif
struct ip_conntrack_counter
{
u_int32_t packets;
u_int32_t bytes;
};
struct ip_conntrack_helper;
struct ip_conntrack
@ -426,25 +295,6 @@ static inline int is_dying(struct ip_conntrack *ct)
extern unsigned int ip_conntrack_htable_size;
struct ip_conntrack_stat
{
unsigned int searched;
unsigned int found;
unsigned int new;
unsigned int invalid;
unsigned int ignore;
unsigned int delete;
unsigned int delete_list;
unsigned int insert;
unsigned int insert_failed;
unsigned int drop;
unsigned int early_drop;
unsigned int error;
unsigned int expect_new;
unsigned int expect_create;
unsigned int expect_delete;
};
#define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)
#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS

View file

@ -1,43 +1,6 @@
#ifndef _IP_CONNTRACK_FTP_H
#define _IP_CONNTRACK_FTP_H
/* FTP tracking. */
#ifdef __KERNEL__
#include <linux/netfilter/nf_conntrack_ftp.h>
#define FTP_PORT 21
#endif /* __KERNEL__ */
enum ip_ct_ftp_type
{
/* PORT command from client */
IP_CT_FTP_PORT,
/* PASV response from server */
IP_CT_FTP_PASV,
/* EPRT command from client */
IP_CT_FTP_EPRT,
/* EPSV response from server */
IP_CT_FTP_EPSV,
};
#define NUM_SEQ_TO_REMEMBER 2
/* This structure exists only once per master */
struct ip_ct_ftp_master {
/* Valid seq positions for cmd matching after newline */
u_int32_t seq_aft_nl[IP_CT_DIR_MAX][NUM_SEQ_TO_REMEMBER];
/* 0 means seq_match_aft_nl not set */
int seq_aft_nl_num[IP_CT_DIR_MAX];
};
struct ip_conntrack_expect;
/* For NAT to hook in when we find a packet which describes what other
* connection we should expect. */
extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
enum ip_conntrack_info ctinfo,
enum ip_ct_ftp_type type,
unsigned int matchoff,
unsigned int matchlen,
struct ip_conntrack_expect *exp,
u32 *seq);
#endif /* _IP_CONNTRACK_FTP_H */

View file

@ -1,11 +1,6 @@
#ifndef _IP_CONNTRACK_ICMP_H
#define _IP_CONNTRACK_ICMP_H
/* ICMP tracking. */
#include <asm/atomic.h>
struct ip_ct_icmp
{
/* Optimization: when number in == number out, forget immediately. */
atomic_t count;
};
#include <net/netfilter/ipv4/nf_conntrack_icmp.h>
#endif /* _IP_CONNTRACK_ICMP_H */

View file

@ -1,25 +1,6 @@
#ifndef _IP_CONNTRACK_SCTP_H
#define _IP_CONNTRACK_SCTP_H
/* SCTP tracking. */
enum sctp_conntrack {
SCTP_CONNTRACK_NONE,
SCTP_CONNTRACK_CLOSED,
SCTP_CONNTRACK_COOKIE_WAIT,
SCTP_CONNTRACK_COOKIE_ECHOED,
SCTP_CONNTRACK_ESTABLISHED,
SCTP_CONNTRACK_SHUTDOWN_SENT,
SCTP_CONNTRACK_SHUTDOWN_RECD,
SCTP_CONNTRACK_SHUTDOWN_ACK_SENT,
SCTP_CONNTRACK_MAX
};
struct ip_ct_sctp
{
enum sctp_conntrack state;
u_int32_t vtag[IP_CT_DIR_MAX];
u_int32_t ttag[IP_CT_DIR_MAX];
};
#include <linux/netfilter/nf_conntrack_sctp.h>
#endif /* _IP_CONNTRACK_SCTP_H */

View file

@ -1,51 +1,6 @@
#ifndef _IP_CONNTRACK_TCP_H
#define _IP_CONNTRACK_TCP_H
/* TCP tracking. */
enum tcp_conntrack {
TCP_CONNTRACK_NONE,
TCP_CONNTRACK_SYN_SENT,
TCP_CONNTRACK_SYN_RECV,
TCP_CONNTRACK_ESTABLISHED,
TCP_CONNTRACK_FIN_WAIT,
TCP_CONNTRACK_CLOSE_WAIT,
TCP_CONNTRACK_LAST_ACK,
TCP_CONNTRACK_TIME_WAIT,
TCP_CONNTRACK_CLOSE,
TCP_CONNTRACK_LISTEN,
TCP_CONNTRACK_MAX,
TCP_CONNTRACK_IGNORE
};
/* Window scaling is advertised by the sender */
#define IP_CT_TCP_FLAG_WINDOW_SCALE 0x01
/* SACK is permitted by the sender */
#define IP_CT_TCP_FLAG_SACK_PERM 0x02
/* This sender sent FIN first */
#define IP_CT_TCP_FLAG_CLOSE_INIT 0x03
struct ip_ct_tcp_state {
u_int32_t td_end; /* max of seq + len */
u_int32_t td_maxend; /* max of ack + max(win, 1) */
u_int32_t td_maxwin; /* max(win) */
u_int8_t td_scale; /* window scale factor */
u_int8_t loose; /* used when connection picked up from the middle */
u_int8_t flags; /* per direction options */
};
struct ip_ct_tcp
{
struct ip_ct_tcp_state seen[2]; /* connection parameters per direction */
u_int8_t state; /* state of the connection (enum tcp_conntrack) */
/* For detecting stale connections */
u_int8_t last_dir; /* Direction of the last packet (enum ip_conntrack_dir) */
u_int8_t retrans; /* Number of retransmitted packets */
u_int8_t last_index; /* Index of the last packet */
u_int32_t last_seq; /* Last sequence number seen in dir */
u_int32_t last_ack; /* Last sequence number seen in opposite dir */
u_int32_t last_end; /* Last seq + len */
};
#include <linux/netfilter/nf_conntrack_tcp.h>
#endif /* _IP_CONNTRACK_TCP_H */

View file

@ -2,6 +2,7 @@
#define _IP_CONNTRACK_TUPLE_H
#include <linux/types.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
/* A `tuple' is a structure containing the information to uniquely
identify a connection. ie. if two packets have the same tuple, they
@ -88,13 +89,6 @@ struct ip_conntrack_tuple
(tuple)->dst.u.all = 0; \
} while (0)
enum ip_conntrack_dir
{
IP_CT_DIR_ORIGINAL,
IP_CT_DIR_REPLY,
IP_CT_DIR_MAX
};
#ifdef __KERNEL__
#define DUMP_TUPLE(tp) \
@ -103,8 +97,6 @@ DEBUGP("tuple %p: %u %u.%u.%u.%u:%hu -> %u.%u.%u.%u:%hu\n", \
NIPQUAD((tp)->src.ip), ntohs((tp)->src.u.all), \
NIPQUAD((tp)->dst.ip), ntohs((tp)->dst.u.all))
#define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL)
/* If we're the first tuple, it's the original dir. */
#define DIRECTION(h) ((enum ip_conntrack_dir)(h)->tuple.dst.dir)

View file

@ -59,6 +59,7 @@
enum nf_ip6_hook_priorities {
NF_IP6_PRI_FIRST = INT_MIN,
NF_IP6_PRI_CONNTRACK_DEFRAG = -400,
NF_IP6_PRI_SELINUX_FIRST = -225,
NF_IP6_PRI_CONNTRACK = -200,
NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD = -175,

View file

@ -71,7 +71,8 @@ struct nlmsghdr
#define NLMSG_ALIGNTO 4
#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(sizeof(struct nlmsghdr)))
#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(NLMSG_HDRLEN))
#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len))
#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0)))
#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \
@ -86,6 +87,8 @@ struct nlmsghdr
#define NLMSG_DONE 0x3 /* End of a dump */
#define NLMSG_OVERRUN 0x4 /* Data lost */
#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */
struct nlmsgerr
{
int error;
@ -108,6 +111,25 @@ enum {
NETLINK_CONNECTED,
};
/*
* <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)-->
* +---------------------+- - -+- - - - - - - - - -+- - -+
* | Header | Pad | Payload | Pad |
* | (struct nlattr) | ing | | ing |
* +---------------------+- - -+- - - - - - - - - -+- - -+
* <-------------- nlattr->nla_len -------------->
*/
struct nlattr
{
__u16 nla_len;
__u16 nla_type;
};
#define NLA_ALIGNTO 4
#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr)))
#ifdef __KERNEL__
#include <linux/capability.h>

View file

@ -274,6 +274,9 @@ struct sk_buff {
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
__u8 ipvs_property:1;
#endif
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct sk_buff *nfct_reasm;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
struct nf_bridge_info *nf_bridge;
#endif
@ -1313,10 +1316,26 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
if (nfct)
atomic_inc(&nfct->use);
}
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
{
if (skb)
atomic_inc(&skb->users);
}
static inline void nf_conntrack_put_reasm(struct sk_buff *skb)
{
if (skb)
kfree_skb(skb);
}
#endif
static inline void nf_reset(struct sk_buff *skb)
{
nf_conntrack_put(skb->nfct);
skb->nfct = NULL;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_conntrack_put_reasm(skb->nfct_reasm);
skb->nfct_reasm = NULL;
#endif
}
#ifdef CONFIG_BRIDGE_NETFILTER

View file

@ -205,6 +205,7 @@ enum
NET_ECONET=16,
NET_SCTP=17,
NET_LLC=18,
NET_NETFILTER=19,
};
/* /proc/sys/kernel/random */
@ -270,6 +271,42 @@ enum
NET_UNIX_MAX_DGRAM_QLEN=3,
};
/* /proc/sys/net/netfilter */
enum
{
NET_NF_CONNTRACK_MAX=1,
NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT=2,
NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV=3,
NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED=4,
NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT=5,
NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT=6,
NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK=7,
NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT=8,
NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE=9,
NET_NF_CONNTRACK_UDP_TIMEOUT=10,
NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM=11,
NET_NF_CONNTRACK_ICMP_TIMEOUT=12,
NET_NF_CONNTRACK_GENERIC_TIMEOUT=13,
NET_NF_CONNTRACK_BUCKETS=14,
NET_NF_CONNTRACK_LOG_INVALID=15,
NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16,
NET_NF_CONNTRACK_TCP_LOOSE=17,
NET_NF_CONNTRACK_TCP_BE_LIBERAL=18,
NET_NF_CONNTRACK_TCP_MAX_RETRANS=19,
NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20,
NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21,
NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22,
NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23,
NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24,
NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25,
NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26,
NET_NF_CONNTRACK_COUNT=27,
NET_NF_CONNTRACK_ICMPV6_TIMEOUT=28,
NET_NF_CONNTRACK_FRAG6_TIMEOUT=29,
NET_NF_CONNTRACK_FRAG6_LOW_THRESH=30,
NET_NF_CONNTRACK_FRAG6_HIGH_THRESH=31,
};
/* /proc/sys/net/ipv4 */
enum
{

154
include/net/genetlink.h Normal file
View file

@ -0,0 +1,154 @@
#ifndef __NET_GENERIC_NETLINK_H
#define __NET_GENERIC_NETLINK_H
#include <linux/genetlink.h>
#include <net/netlink.h>
/**
* struct genl_family - generic netlink family
* @id: protocol family idenfitier
* @hdrsize: length of user specific header in bytes
* @name: name of family
* @version: protocol version
* @maxattr: maximum number of attributes supported
* @attrbuf: buffer to store parsed attributes
* @ops_list: list of all assigned operations
* @family_list: family list
*/
struct genl_family
{
unsigned int id;
unsigned int hdrsize;
char name[GENL_NAMSIZ];
unsigned int version;
unsigned int maxattr;
struct module * owner;
struct nlattr ** attrbuf; /* private */
struct list_head ops_list; /* private */
struct list_head family_list; /* private */
};
#define GENL_ADMIN_PERM 0x01
/**
* struct genl_info - receiving information
* @snd_seq: sending sequence number
* @snd_pid: netlink pid of sender
* @nlhdr: netlink message header
* @genlhdr: generic netlink message header
* @userhdr: user specific header
* @attrs: netlink attributes
*/
struct genl_info
{
u32 snd_seq;
u32 snd_pid;
struct nlmsghdr * nlhdr;
struct genlmsghdr * genlhdr;
void * userhdr;
struct nlattr ** attrs;
};
/**
* struct genl_ops - generic netlink operations
* @cmd: command identifier
* @flags: flags
* @policy: attribute validation policy
* @doit: standard command callback
* @dumpit: callback for dumpers
* @ops_list: operations list
*/
struct genl_ops
{
unsigned int cmd;
unsigned int flags;
struct nla_policy *policy;
int (*doit)(struct sk_buff *skb,
struct genl_info *info);
int (*dumpit)(struct sk_buff *skb,
struct netlink_callback *cb);
struct list_head ops_list;
};
extern int genl_register_family(struct genl_family *family);
extern int genl_unregister_family(struct genl_family *family);
extern int genl_register_ops(struct genl_family *, struct genl_ops *ops);
extern int genl_unregister_ops(struct genl_family *, struct genl_ops *ops);
extern struct sock *genl_sock;
/**
* genlmsg_put - Add generic netlink header to netlink message
* @skb: socket buffer holding the message
* @pid: netlink pid the message is addressed to
* @seq: sequence number (usually the one of the sender)
* @type: netlink message type
* @hdrlen: length of the user specific header
* @flags netlink message flags
* @cmd: generic netlink command
* @version: version
*
* Returns pointer to user specific header
*/
static inline void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq,
int type, int hdrlen, int flags,
u8 cmd, u8 version)
{
struct nlmsghdr *nlh;
struct genlmsghdr *hdr;
nlh = nlmsg_put(skb, pid, seq, type, GENL_HDRLEN + hdrlen, flags);
if (nlh == NULL)
return NULL;
hdr = nlmsg_data(nlh);
hdr->cmd = cmd;
hdr->version = version;
hdr->reserved = 0;
return (char *) hdr + GENL_HDRLEN;
}
/**
* genlmsg_end - Finalize a generic netlink message
* @skb: socket buffer the message is stored in
* @hdr: user specific header
*/
static inline int genlmsg_end(struct sk_buff *skb, void *hdr)
{
return nlmsg_end(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
}
/**
* genlmsg_cancel - Cancel construction of a generic netlink message
* @skb: socket buffer the message is stored in
* @hdr: generic netlink message header
*/
static inline int genlmsg_cancel(struct sk_buff *skb, void *hdr)
{
return nlmsg_cancel(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
}
/**
* genlmsg_multicast - multicast a netlink message
* @skb: netlink message as socket buffer
* @pid: own netlink pid to avoid sending to yourself
* @group: multicast group id
*/
static inline int genlmsg_multicast(struct sk_buff *skb, u32 pid,
unsigned int group)
{
return nlmsg_multicast(genl_sock, skb, pid, group);
}
/**
* genlmsg_unicast - unicast a netlink message
* @skb: netlink message as socket buffer
* @pid: netlink pid of the destination socket
*/
static inline int genlmsg_unicast(struct sk_buff *skb, u32 pid)
{
return nlmsg_unicast(genl_sock, skb, pid);
}
#endif /* __NET_GENERIC_NETLINK_H */

View file

@ -0,0 +1,11 @@
#ifndef _NF_CONNTRACK_ICMP_H
#define _NF_CONNTRACK_ICMP_H
/* ICMP tracking. */
#include <asm/atomic.h>
struct ip_ct_icmp
{
/* Optimization: when number in == number out, forget immediately. */
atomic_t count;
};
#endif /* _NF_CONNTRACK_ICMP_H */

View file

@ -0,0 +1,43 @@
/*
* IPv4 support for nf_conntrack.
*
* 23 Mar 2004: Yasuyuki Kozakai @ USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - move L3 protocol dependent part from include/linux/netfilter_ipv4/
* ip_conntarck.h
*/
#ifndef _NF_CONNTRACK_IPV4_H
#define _NF_CONNTRACK_IPV4_H
#ifdef CONFIG_IP_NF_NAT_NEEDED
#include <linux/netfilter_ipv4/ip_nat.h>
/* per conntrack: nat application helper private data */
union ip_conntrack_nat_help {
/* insert nat helper private data here */
};
struct nf_conntrack_ipv4_nat {
struct ip_nat_info info;
union ip_conntrack_nat_help help;
#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
int masq_index;
#endif
};
#endif /* CONFIG_IP_NF_NAT_NEEDED */
struct nf_conntrack_ipv4 {
#ifdef CONFIG_IP_NF_NAT_NEEDED
struct nf_conntrack_ipv4_nat *nat;
#endif
};
/* Returns new sk_buff, or NULL */
struct sk_buff *
nf_ct_ipv4_ct_gather_frags(struct sk_buff *skb);
/* call to create an explicit dependency on nf_conntrack_l3proto_ipv4. */
extern void need_ip_conntrack(void);
#endif /*_NF_CONNTRACK_IPV4_H*/

View file

@ -0,0 +1,27 @@
/*
* ICMPv6 tracking.
*
* 21 Apl 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - separated from nf_conntrack_icmp.h
*
* Derived from include/linux/netfiter_ipv4/ip_conntrack_icmp.h
*/
#ifndef _NF_CONNTRACK_ICMPV6_H
#define _NF_CONNTRACK_ICMPV6_H
#include <asm/atomic.h>
#ifndef ICMPV6_NI_QUERY
#define ICMPV6_NI_QUERY 139
#endif
#ifndef ICMPV6_NI_REPLY
#define ICMPV6_NI_REPLY 140
#endif
struct nf_ct_icmpv6
{
/* Optimization: when number in == number out, forget immediately. */
atomic_t count;
};
#endif /* _NF_CONNTRACK_ICMPV6_H */

View file

@ -0,0 +1,354 @@
/*
* Connection state tracking for netfilter. This is separated from,
* but required by, the (future) NAT layer; it can also be used by an iptables
* extension.
*
* 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - generalize L3 protocol dependent part.
*
* Derived from include/linux/netfiter_ipv4/ip_conntrack.h
*/
#ifndef _NF_CONNTRACK_H
#define _NF_CONNTRACK_H
#include <linux/netfilter/nf_conntrack_common.h>
#ifdef __KERNEL__
#include <linux/config.h>
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <asm/atomic.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
#include <linux/netfilter/nf_conntrack_sctp.h>
#include <net/netfilter/ipv4/nf_conntrack_icmp.h>
#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
#include <net/netfilter/nf_conntrack_tuple.h>
/* per conntrack: protocol private data */
union nf_conntrack_proto {
/* insert conntrack proto private data here */
struct ip_ct_sctp sctp;
struct ip_ct_tcp tcp;
struct ip_ct_icmp icmp;
struct nf_ct_icmpv6 icmpv6;
};
union nf_conntrack_expect_proto {
/* insert expect proto private data here */
};
/* Add protocol helper include file here */
#include <linux/netfilter/nf_conntrack_ftp.h>
/* per conntrack: application helper private data */
union nf_conntrack_help {
/* insert conntrack helper private data (master) here */
struct ip_ct_ftp_master ct_ftp_info;
};
#include <linux/types.h>
#include <linux/skbuff.h>
#ifdef CONFIG_NETFILTER_DEBUG
#define NF_CT_ASSERT(x) \
do { \
if (!(x)) \
/* Wooah! I'm tripping my conntrack in a frenzy of \
netplay... */ \
printk("NF_CT_ASSERT: %s:%i(%s)\n", \
__FILE__, __LINE__, __FUNCTION__); \
} while(0)
#else
#define NF_CT_ASSERT(x)
#endif
struct nf_conntrack_helper;
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
struct nf_conn
{
/* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
plus 1 for any connection(s) we are `master' for */
struct nf_conntrack ct_general;
/* XXX should I move this to the tail ? - Y.K */
/* These are my tuples; original and reply */
struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
/* Have we seen traffic both ways yet? (bitset) */
unsigned long status;
/* Timer function; drops refcnt when it goes off. */
struct timer_list timeout;
#ifdef CONFIG_NF_CT_ACCT
/* Accounting Information (same cache line as other written members) */
struct ip_conntrack_counter counters[IP_CT_DIR_MAX];
#endif
/* If we were expected by an expectation, this will be it */
struct nf_conn *master;
/* Current number of expected connections */
unsigned int expecting;
/* Helper. if any */
struct nf_conntrack_helper *helper;
/* features - nat, helper, ... used by allocating system */
u_int32_t features;
/* Storage reserved for other modules: */
union nf_conntrack_proto proto;
#if defined(CONFIG_NF_CONNTRACK_MARK)
u_int32_t mark;
#endif
/* These members are dynamically allocated. */
union nf_conntrack_help *help;
/* Layer 3 dependent members. (ex: NAT) */
union {
struct nf_conntrack_ipv4 *ipv4;
} l3proto;
void *data[0];
};
struct nf_conntrack_expect
{
/* Internal linked list (global expectation list) */
struct list_head list;
/* We expect this tuple, with the following mask */
struct nf_conntrack_tuple tuple, mask;
/* Function to call after setup and insertion */
void (*expectfn)(struct nf_conn *new,
struct nf_conntrack_expect *this);
/* The conntrack of the master connection */
struct nf_conn *master;
/* Timer function; deletes the expectation. */
struct timer_list timeout;
/* Usage count. */
atomic_t use;
/* Flags */
unsigned int flags;
#ifdef CONFIG_NF_NAT_NEEDED
/* This is the original per-proto part, used to map the
* expected connection the way the recipient expects. */
union nf_conntrack_manip_proto saved_proto;
/* Direction relative to the master connection. */
enum ip_conntrack_dir dir;
#endif
};
#define NF_CT_EXPECT_PERMANENT 0x1
static inline struct nf_conn *
nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash)
{
return container_of(hash, struct nf_conn,
tuplehash[hash->tuple.dst.dir]);
}
/* get master conntrack via master expectation */
#define master_ct(conntr) (conntr->master)
/* Alter reply tuple (maybe alter helper). */
extern void
nf_conntrack_alter_reply(struct nf_conn *conntrack,
const struct nf_conntrack_tuple *newreply);
/* Is this tuple taken? (ignoring any belonging to the given
conntrack). */
extern int
nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
const struct nf_conn *ignored_conntrack);
/* Return conntrack_info and tuple hash for given skb. */
static inline struct nf_conn *
nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
{
*ctinfo = skb->nfctinfo;
return (struct nf_conn *)skb->nfct;
}
/* decrement reference count on a conntrack */
static inline void nf_ct_put(struct nf_conn *ct)
{
NF_CT_ASSERT(ct);
nf_conntrack_put(&ct->ct_general);
}
/* call to create an explicit dependency on nf_conntrack. */
extern void need_nf_conntrack(void);
extern int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig);
extern void __nf_ct_refresh_acct(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
const struct sk_buff *skb,
unsigned long extra_jiffies,
int do_acct);
/* Refresh conntrack for this many jiffies and do accounting */
static inline void nf_ct_refresh_acct(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
const struct sk_buff *skb,
unsigned long extra_jiffies)
{
__nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, 1);
}
/* Refresh conntrack for this many jiffies */
static inline void nf_ct_refresh(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned long extra_jiffies)
{
__nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0);
}
/* These are for NAT. Icky. */
/* Update TCP window tracking data when NAT mangles the packet */
extern void nf_conntrack_tcp_update(struct sk_buff *skb,
unsigned int dataoff,
struct nf_conn *conntrack,
int dir);
/* Call me when a conntrack is destroyed. */
extern void (*nf_conntrack_destroyed)(struct nf_conn *conntrack);
/* Fake conntrack entry for untracked connections */
extern struct nf_conn nf_conntrack_untracked;
extern int nf_ct_no_defrag;
/* Iterate over all conntracks: if iter returns true, it's deleted. */
extern void
nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data);
extern void nf_conntrack_free(struct nf_conn *ct);
extern struct nf_conn *
nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_tuple *repl);
/* It's confirmed if it is, or has been in the hash table. */
static inline int nf_ct_is_confirmed(struct nf_conn *ct)
{
return test_bit(IPS_CONFIRMED_BIT, &ct->status);
}
static inline int nf_ct_is_dying(struct nf_conn *ct)
{
return test_bit(IPS_DYING_BIT, &ct->status);
}
extern unsigned int nf_conntrack_htable_size;
#define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++)
#ifdef CONFIG_NF_CONNTRACK_EVENTS
#include <linux/notifier.h>
#include <linux/interrupt.h>
struct nf_conntrack_ecache {
struct nf_conn *ct;
unsigned int events;
};
DECLARE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
#define CONNTRACK_ECACHE(x) (__get_cpu_var(nf_conntrack_ecache).x)
extern struct notifier_block *nf_conntrack_chain;
extern struct notifier_block *nf_conntrack_expect_chain;
static inline int nf_conntrack_register_notifier(struct notifier_block *nb)
{
return notifier_chain_register(&nf_conntrack_chain, nb);
}
static inline int nf_conntrack_unregister_notifier(struct notifier_block *nb)
{
return notifier_chain_unregister(&nf_conntrack_chain, nb);
}
static inline int
nf_conntrack_expect_register_notifier(struct notifier_block *nb)
{
return notifier_chain_register(&nf_conntrack_expect_chain, nb);
}
static inline int
nf_conntrack_expect_unregister_notifier(struct notifier_block *nb)
{
return notifier_chain_unregister(&nf_conntrack_expect_chain, nb);
}
extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
extern void __nf_ct_event_cache_init(struct nf_conn *ct);
static inline void
nf_conntrack_event_cache(enum ip_conntrack_events event,
const struct sk_buff *skb)
{
struct nf_conn *ct = (struct nf_conn *)skb->nfct;
struct nf_conntrack_ecache *ecache;
local_bh_disable();
ecache = &__get_cpu_var(nf_conntrack_ecache);
if (ct != ecache->ct)
__nf_ct_event_cache_init(ct);
ecache->events |= event;
local_bh_enable();
}
static inline void nf_conntrack_event(enum ip_conntrack_events event,
struct nf_conn *ct)
{
if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct))
notifier_call_chain(&nf_conntrack_chain, event, ct);
}
static inline void
nf_conntrack_expect_event(enum ip_conntrack_expect_events event,
struct nf_conntrack_expect *exp)
{
notifier_call_chain(&nf_conntrack_expect_chain, event, exp);
}
#else /* CONFIG_NF_CONNTRACK_EVENTS */
static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
const struct sk_buff *skb) {}
static inline void nf_conntrack_event(enum ip_conntrack_events event,
struct nf_conn *ct) {}
static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {}
static inline void
nf_conntrack_expect_event(enum ip_conntrack_expect_events event,
struct nf_conntrack_expect *exp) {}
#endif /* CONFIG_NF_CONNTRACK_EVENTS */
/* no helper, no nat */
#define NF_CT_F_BASIC 0
/* for helper */
#define NF_CT_F_HELP 1
/* for nat. */
#define NF_CT_F_NAT 2
#define NF_CT_F_NUM 4
extern int
nf_conntrack_register_cache(u_int32_t features, const char *name, size_t size,
int (*init_conntrack)(struct nf_conn *, u_int32_t));
extern void
nf_conntrack_unregister_cache(u_int32_t features);
#endif /* __KERNEL__ */
#endif /* _NF_CONNTRACK_H */

View file

@ -0,0 +1,108 @@
#ifndef _NF_CONNTRACK_COMPAT_H
#define _NF_CONNTRACK_COMPAT_H
#ifdef __KERNEL__
#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
#include <linux/netfilter_ipv4/ip_conntrack.h>
#ifdef CONFIG_IP_NF_CONNTRACK_MARK
static inline u_int32_t *nf_ct_get_mark(const struct sk_buff *skb,
u_int32_t *ctinfo)
{
struct ip_conntrack *ct = ip_conntrack_get(skb, ctinfo);
if (ct)
return &ct->mark;
else
return NULL;
}
#endif /* CONFIG_IP_NF_CONNTRACK_MARK */
#ifdef CONFIG_IP_NF_CT_ACCT
static inline struct ip_conntrack_counter *
nf_ct_get_counters(const struct sk_buff *skb)
{
enum ip_conntrack_info ctinfo;
struct ip_conntrack *ct = ip_conntrack_get(skb, &ctinfo);
if (ct)
return ct->counters;
else
return NULL;
}
#endif /* CONFIG_IP_NF_CT_ACCT */
static inline int nf_ct_is_untracked(const struct sk_buff *skb)
{
return (skb->nfct == &ip_conntrack_untracked.ct_general);
}
static inline void nf_ct_untrack(struct sk_buff *skb)
{
skb->nfct = &ip_conntrack_untracked.ct_general;
}
static inline int nf_ct_get_ctinfo(const struct sk_buff *skb,
enum ip_conntrack_info *ctinfo)
{
struct ip_conntrack *ct = ip_conntrack_get(skb, ctinfo);
return (ct != NULL);
}
#else /* CONFIG_IP_NF_CONNTRACK */
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/nf_conntrack.h>
#ifdef CONFIG_NF_CONNTRACK_MARK
static inline u_int32_t *nf_ct_get_mark(const struct sk_buff *skb,
u_int32_t *ctinfo)
{
struct nf_conn *ct = nf_ct_get(skb, ctinfo);
if (ct)
return &ct->mark;
else
return NULL;
}
#endif /* CONFIG_NF_CONNTRACK_MARK */
#ifdef CONFIG_NF_CT_ACCT
static inline struct ip_conntrack_counter *
nf_ct_get_counters(const struct sk_buff *skb)
{
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
if (ct)
return ct->counters;
else
return NULL;
}
#endif /* CONFIG_NF_CT_ACCT */
static inline int nf_ct_is_untracked(const struct sk_buff *skb)
{
return (skb->nfct == &nf_conntrack_untracked.ct_general);
}
static inline void nf_ct_untrack(struct sk_buff *skb)
{
skb->nfct = &nf_conntrack_untracked.ct_general;
}
static inline int nf_ct_get_ctinfo(const struct sk_buff *skb,
enum ip_conntrack_info *ctinfo)
{
struct nf_conn *ct = nf_ct_get(skb, ctinfo);
return (ct != NULL);
}
#endif /* CONFIG_IP_NF_CONNTRACK */
#endif /* __KERNEL__ */
#endif /* _NF_CONNTRACK_COMPAT_H */

View file

@ -0,0 +1,76 @@
/*
* This header is used to share core functionality between the
* standalone connection tracking module, and the compatibility layer's use
* of connection tracking.
*
* 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - generalize L3 protocol dependent part.
*
* Derived from include/linux/netfiter_ipv4/ip_conntrack_core.h
*/
#ifndef _NF_CONNTRACK_CORE_H
#define _NF_CONNTRACK_CORE_H
#include <linux/netfilter.h>
/* This header is used to share core functionality between the
standalone connection tracking module, and the compatibility layer's use
of connection tracking. */
extern unsigned int nf_conntrack_in(int pf,
unsigned int hooknum,
struct sk_buff **pskb);
extern int nf_conntrack_init(void);
extern void nf_conntrack_cleanup(void);
struct nf_conntrack_l3proto;
extern struct nf_conntrack_l3proto *nf_ct_find_l3proto(u_int16_t pf);
/* Like above, but you already have conntrack read lock. */
extern struct nf_conntrack_l3proto *__nf_ct_find_l3proto(u_int16_t l3proto);
struct nf_conntrack_protocol;
extern int
nf_ct_get_tuple(const struct sk_buff *skb,
unsigned int nhoff,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_protocol *protocol);
extern int
nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_protocol *protocol);
/* Find a connection corresponding to a tuple. */
extern struct nf_conntrack_tuple_hash *
nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
const struct nf_conn *ignored_conntrack);
extern int __nf_conntrack_confirm(struct sk_buff **pskb);
/* Confirm a connection: returns NF_DROP if packet must be dropped. */
static inline int nf_conntrack_confirm(struct sk_buff **pskb)
{
struct nf_conn *ct = (struct nf_conn *)(*pskb)->nfct;
int ret = NF_ACCEPT;
if (ct) {
if (!nf_ct_is_confirmed(ct))
ret = __nf_conntrack_confirm(pskb);
nf_ct_deliver_cached_events(ct);
}
return ret;
}
extern void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb);
extern struct list_head *nf_conntrack_hash;
extern struct list_head nf_conntrack_expect_list;
extern rwlock_t nf_conntrack_lock ;
#endif /* _NF_CONNTRACK_CORE_H */

View file

@ -0,0 +1,51 @@
/*
* connection tracking helpers.
*
* 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - generalize L3 protocol dependent part.
*
* Derived from include/linux/netfiter_ipv4/ip_conntrack_helper.h
*/
#ifndef _NF_CONNTRACK_HELPER_H
#define _NF_CONNTRACK_HELPER_H
#include <net/netfilter/nf_conntrack.h>
struct module;
struct nf_conntrack_helper
{
struct list_head list; /* Internal use. */
const char *name; /* name of the module */
struct module *me; /* pointer to self */
unsigned int max_expected; /* Maximum number of concurrent
* expected connections */
unsigned int timeout; /* timeout for expecteds */
/* Mask of things we will help (compared against server response) */
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple mask;
/* Function to call when data passes; return verdict, or -1 to
invalidate. */
int (*help)(struct sk_buff **pskb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info conntrackinfo);
};
extern int nf_conntrack_helper_register(struct nf_conntrack_helper *);
extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *);
/* Allocate space for an expectation: this is mandatory before calling
nf_conntrack_expect_related. You will have to call put afterwards. */
extern struct nf_conntrack_expect *
nf_conntrack_expect_alloc(struct nf_conn *master);
extern void nf_conntrack_expect_put(struct nf_conntrack_expect *exp);
/* Add an expected connection: can have more than one per connection */
extern int nf_conntrack_expect_related(struct nf_conntrack_expect *exp);
extern void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp);
#endif /*_NF_CONNTRACK_HELPER_H*/

View file

@ -0,0 +1,93 @@
/*
* Copyright (C)2003,2004 USAGI/WIDE Project
*
* Header for use in defining a given L3 protocol for connection tracking.
*
* Author:
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
*
* Derived from include/netfilter_ipv4/ip_conntrack_protocol.h
*/
#ifndef _NF_CONNTRACK_L3PROTO_H
#define _NF_CONNTRACK_L3PROTO_H
#include <linux/seq_file.h>
#include <net/netfilter/nf_conntrack.h>
struct nf_conntrack_l3proto
{
/* Next pointer. */
struct list_head list;
/* L3 Protocol Family number. ex) PF_INET */
u_int16_t l3proto;
/* Protocol name */
const char *name;
/*
* Try to fill in the third arg: nhoff is offset of l3 proto
* hdr. Return true if possible.
*/
int (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple);
/*
* Invert the per-proto part of the tuple: ie. turn xmit into reply.
* Some packets can't be inverted: return 0 in that case.
*/
int (*invert_tuple)(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig);
/* Print out the per-protocol part of the tuple. */
int (*print_tuple)(struct seq_file *s,
const struct nf_conntrack_tuple *);
/* Print out the private part of the conntrack. */
int (*print_conntrack)(struct seq_file *s, const struct nf_conn *);
/* Returns verdict for packet, or -1 for invalid. */
int (*packet)(struct nf_conn *conntrack,
const struct sk_buff *skb,
enum ip_conntrack_info ctinfo);
/*
* Called when a new connection for this protocol found;
* returns TRUE if it's OK. If so, packet() called next.
*/
int (*new)(struct nf_conn *conntrack, const struct sk_buff *skb);
/* Called when a conntrack entry is destroyed */
void (*destroy)(struct nf_conn *conntrack);
/*
* Called before tracking.
* *dataoff: offset of protocol header (TCP, UDP,...) in *pskb
* *protonum: protocol number
*/
int (*prepare)(struct sk_buff **pskb, unsigned int hooknum,
unsigned int *dataoff, u_int8_t *protonum);
u_int32_t (*get_features)(const struct nf_conntrack_tuple *tuple);
/* Module (if any) which this is connected to. */
struct module *me;
};
extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX];
/* Protocol registration. */
extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto);
extern void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto);
static inline struct nf_conntrack_l3proto *
nf_ct_find_l3proto(u_int16_t l3proto)
{
return nf_ct_l3protos[l3proto];
}
/* Existing built-in protocols */
extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
extern struct nf_conntrack_l3proto nf_conntrack_generic_l3proto;
#endif /*_NF_CONNTRACK_L3PROTO_H*/

View file

@ -0,0 +1,105 @@
/*
* Header for use in defining a given protocol for connection tracking.
*
* 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - generalized L3 protocol dependent part.
*
* Derived from include/linux/netfiter_ipv4/ip_conntrack_protcol.h
*/
#ifndef _NF_CONNTRACK_PROTOCOL_H
#define _NF_CONNTRACK_PROTOCOL_H
#include <net/netfilter/nf_conntrack.h>
struct seq_file;
struct nf_conntrack_protocol
{
/* Next pointer. */
struct list_head list;
/* L3 Protocol number. */
u_int16_t l3proto;
/* Protocol number. */
u_int8_t proto;
/* Protocol name */
const char *name;
/* Try to fill in the third arg: dataoff is offset past network protocol
hdr. Return true if possible. */
int (*pkt_to_tuple)(const struct sk_buff *skb,
unsigned int dataoff,
struct nf_conntrack_tuple *tuple);
/* Invert the per-proto part of the tuple: ie. turn xmit into reply.
* Some packets can't be inverted: return 0 in that case.
*/
int (*invert_tuple)(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig);
/* Print out the per-protocol part of the tuple. Return like seq_* */
int (*print_tuple)(struct seq_file *s,
const struct nf_conntrack_tuple *);
/* Print out the private part of the conntrack. */
int (*print_conntrack)(struct seq_file *s, const struct nf_conn *);
/* Returns verdict for packet, or -1 for invalid. */
int (*packet)(struct nf_conn *conntrack,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
int pf,
unsigned int hooknum);
/* Called when a new connection for this protocol found;
* returns TRUE if it's OK. If so, packet() called next. */
int (*new)(struct nf_conn *conntrack, const struct sk_buff *skb,
unsigned int dataoff);
/* Called when a conntrack entry is destroyed */
void (*destroy)(struct nf_conn *conntrack);
int (*error)(struct sk_buff *skb, unsigned int dataoff,
enum ip_conntrack_info *ctinfo,
int pf, unsigned int hooknum);
/* Module (if any) which this is connected to. */
struct module *me;
};
/* Existing built-in protocols */
extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp6;
extern struct nf_conntrack_protocol nf_conntrack_protocol_udp4;
extern struct nf_conntrack_protocol nf_conntrack_protocol_udp6;
extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
#define MAX_NF_CT_PROTO 256
extern struct nf_conntrack_protocol **nf_ct_protos[PF_MAX];
extern struct nf_conntrack_protocol *
nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol);
/* Protocol registration. */
extern int nf_conntrack_protocol_register(struct nf_conntrack_protocol *proto);
extern void nf_conntrack_protocol_unregister(struct nf_conntrack_protocol *proto);
/* Log invalid packets */
extern unsigned int nf_ct_log_invalid;
#ifdef CONFIG_SYSCTL
#ifdef DEBUG_INVALID_PACKETS
#define LOG_INVALID(proto) \
(nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW)
#else
#define LOG_INVALID(proto) \
((nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW) \
&& net_ratelimit())
#endif
#else
#define LOG_INVALID(proto) 0
#endif /* CONFIG_SYSCTL */
#endif /*_NF_CONNTRACK_PROTOCOL_H*/

View file

@ -0,0 +1,190 @@
/*
* Definitions and Declarations for tuple.
*
* 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - generalize L3 protocol dependent part.
*
* Derived from include/linux/netfiter_ipv4/ip_conntrack_tuple.h
*/
#ifndef _NF_CONNTRACK_TUPLE_H
#define _NF_CONNTRACK_TUPLE_H
#include <linux/netfilter/nf_conntrack_tuple_common.h>
/* A `tuple' is a structure containing the information to uniquely
identify a connection. ie. if two packets have the same tuple, they
are in the same connection; if not, they are not.
We divide the structure along "manipulatable" and
"non-manipulatable" lines, for the benefit of the NAT code.
*/
#define NF_CT_TUPLE_L3SIZE 4
/* The l3 protocol-specific manipulable parts of the tuple: always in
network order! */
union nf_conntrack_man_l3proto {
u_int32_t all[NF_CT_TUPLE_L3SIZE];
u_int32_t ip;
u_int32_t ip6[4];
};
/* The protocol-specific manipulable parts of the tuple: always in
network order! */
union nf_conntrack_man_proto
{
/* Add other protocols here. */
u_int16_t all;
struct {
u_int16_t port;
} tcp;
struct {
u_int16_t port;
} udp;
struct {
u_int16_t id;
} icmp;
struct {
u_int16_t port;
} sctp;
};
/* The manipulable part of the tuple. */
struct nf_conntrack_man
{
union nf_conntrack_man_l3proto u3;
union nf_conntrack_man_proto u;
/* Layer 3 protocol */
u_int16_t l3num;
};
/* This contains the information to distinguish a connection. */
struct nf_conntrack_tuple
{
struct nf_conntrack_man src;
/* These are the parts of the tuple which are fixed. */
struct {
union {
u_int32_t all[NF_CT_TUPLE_L3SIZE];
u_int32_t ip;
u_int32_t ip6[4];
} u3;
union {
/* Add other protocols here. */
u_int16_t all;
struct {
u_int16_t port;
} tcp;
struct {
u_int16_t port;
} udp;
struct {
u_int8_t type, code;
} icmp;
struct {
u_int16_t port;
} sctp;
} u;
/* The protocol. */
u_int8_t protonum;
/* The direction (for tuplehash) */
u_int8_t dir;
} dst;
};
/* This is optimized opposed to a memset of the whole structure. Everything we
* really care about is the source/destination unions */
#define NF_CT_TUPLE_U_BLANK(tuple) \
do { \
(tuple)->src.u.all = 0; \
(tuple)->dst.u.all = 0; \
memset(&(tuple)->src.u3, 0, sizeof((tuple)->src.u3)); \
memset(&(tuple)->dst.u3, 0, sizeof((tuple)->dst.u3)); \
} while (0)
#ifdef __KERNEL__
#define NF_CT_DUMP_TUPLE(tp) \
DEBUGP("tuple %p: %u %u %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x %hu -> %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x %hu\n", \
(tp), (tp)->src.l3num, (tp)->dst.protonum, \
NIP6(*(struct in6_addr *)(tp)->src.u3.all), ntohs((tp)->src.u.all), \
NIP6(*(struct in6_addr *)(tp)->dst.u3.all), ntohs((tp)->dst.u.all))
/* If we're the first tuple, it's the original dir. */
#define NF_CT_DIRECTION(h) \
((enum ip_conntrack_dir)(h)->tuple.dst.dir)
/* Connections have two entries in the hash table: one for each way */
struct nf_conntrack_tuple_hash
{
struct list_head list;
struct nf_conntrack_tuple tuple;
};
#endif /* __KERNEL__ */
static inline int nf_ct_tuple_src_equal(const struct nf_conntrack_tuple *t1,
const struct nf_conntrack_tuple *t2)
{
return (t1->src.u3.all[0] == t2->src.u3.all[0] &&
t1->src.u3.all[1] == t2->src.u3.all[1] &&
t1->src.u3.all[2] == t2->src.u3.all[2] &&
t1->src.u3.all[3] == t2->src.u3.all[3] &&
t1->src.u.all == t2->src.u.all &&
t1->src.l3num == t2->src.l3num &&
t1->dst.protonum == t2->dst.protonum);
}
static inline int nf_ct_tuple_dst_equal(const struct nf_conntrack_tuple *t1,
const struct nf_conntrack_tuple *t2)
{
return (t1->dst.u3.all[0] == t2->dst.u3.all[0] &&
t1->dst.u3.all[1] == t2->dst.u3.all[1] &&
t1->dst.u3.all[2] == t2->dst.u3.all[2] &&
t1->dst.u3.all[3] == t2->dst.u3.all[3] &&
t1->dst.u.all == t2->dst.u.all &&
t1->src.l3num == t2->src.l3num &&
t1->dst.protonum == t2->dst.protonum);
}
static inline int nf_ct_tuple_equal(const struct nf_conntrack_tuple *t1,
const struct nf_conntrack_tuple *t2)
{
return nf_ct_tuple_src_equal(t1, t2) && nf_ct_tuple_dst_equal(t1, t2);
}
static inline int nf_ct_tuple_mask_cmp(const struct nf_conntrack_tuple *t,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *mask)
{
int count = 0;
for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
if ((t->src.u3.all[count] ^ tuple->src.u3.all[count]) &
mask->src.u3.all[count])
return 0;
}
for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
if ((t->dst.u3.all[count] ^ tuple->dst.u3.all[count]) &
mask->dst.u3.all[count])
return 0;
}
if ((t->src.u.all ^ tuple->src.u.all) & mask->src.u.all ||
(t->dst.u.all ^ tuple->dst.u.all) & mask->dst.u.all ||
(t->src.l3num ^ tuple->src.l3num) & mask->src.l3num ||
(t->dst.protonum ^ tuple->dst.protonum) & mask->dst.protonum)
return 0;
return 1;
}
#endif /* _NF_CONNTRACK_TUPLE_H */

883
include/net/netlink.h Normal file
View file

@ -0,0 +1,883 @@
#ifndef __NET_NETLINK_H
#define __NET_NETLINK_H
#include <linux/types.h>
#include <linux/netlink.h>
/* ========================================================================
* Netlink Messages and Attributes Interface (As Seen On TV)
* ------------------------------------------------------------------------
* Messages Interface
* ------------------------------------------------------------------------
*
* Message Format:
* <--- nlmsg_total_size(payload) --->
* <-- nlmsg_msg_size(payload) ->
* +----------+- - -+-------------+- - -+-------- - -
* | nlmsghdr | Pad | Payload | Pad | nlmsghdr
* +----------+- - -+-------------+- - -+-------- - -
* nlmsg_data(nlh)---^ ^
* nlmsg_next(nlh)-----------------------+
*
* Payload Format:
* <---------------------- nlmsg_len(nlh) --------------------->
* <------ hdrlen ------> <- nlmsg_attrlen(nlh, hdrlen) ->
* +----------------------+- - -+--------------------------------+
* | Family Header | Pad | Attributes |
* +----------------------+- - -+--------------------------------+
* nlmsg_attrdata(nlh, hdrlen)---^
*
* Data Structures:
* struct nlmsghdr netlink message header
*
* Message Construction:
* nlmsg_new() create a new netlink message
* nlmsg_put() add a netlink message to an skb
* nlmsg_put_answer() callback based nlmsg_put()
* nlmsg_end() finanlize netlink message
* nlmsg_cancel() cancel message construction
* nlmsg_free() free a netlink message
*
* Message Sending:
* nlmsg_multicast() multicast message to several groups
* nlmsg_unicast() unicast a message to a single socket
*
* Message Length Calculations:
* nlmsg_msg_size(payload) length of message w/o padding
* nlmsg_total_size(payload) length of message w/ padding
* nlmsg_padlen(payload) length of padding at tail
*
* Message Payload Access:
* nlmsg_data(nlh) head of message payload
* nlmsg_len(nlh) length of message payload
* nlmsg_attrdata(nlh, hdrlen) head of attributes data
* nlmsg_attrlen(nlh, hdrlen) length of attributes data
*
* Message Parsing:
* nlmsg_ok(nlh, remaining) does nlh fit into remaining bytes?
* nlmsg_next(nlh, remaining) get next netlink message
* nlmsg_parse() parse attributes of a message
* nlmsg_find_attr() find an attribute in a message
* nlmsg_for_each_msg() loop over all messages
* nlmsg_validate() validate netlink message incl. attrs
* nlmsg_for_each_attr() loop over all attributes
*
* ------------------------------------------------------------------------
* Attributes Interface
* ------------------------------------------------------------------------
*
* Attribute Format:
* <------- nla_total_size(payload) ------->
* <---- nla_attr_size(payload) ----->
* +----------+- - -+- - - - - - - - - +- - -+-------- - -
* | Header | Pad | Payload | Pad | Header
* +----------+- - -+- - - - - - - - - +- - -+-------- - -
* <- nla_len(nla) -> ^
* nla_data(nla)----^ |
* nla_next(nla)-----------------------------'
*
* Data Structures:
* struct nlattr netlink attribtue header
*
* Attribute Construction:
* nla_reserve(skb, type, len) reserve skb tailroom for an attribute
* nla_put(skb, type, len, data) add attribute to skb
*
* Attribute Construction for Basic Types:
* nla_put_u8(skb, type, value) add u8 attribute to skb
* nla_put_u16(skb, type, value) add u16 attribute to skb
* nla_put_u32(skb, type, value) add u32 attribute to skb
* nla_put_u64(skb, type, value) add u64 attribute to skb
* nla_put_string(skb, type, str) add string attribute to skb
* nla_put_flag(skb, type) add flag attribute to skb
* nla_put_msecs(skb, type, jiffies) add msecs attribute to skb
*
* Exceptions Based Attribute Construction:
* NLA_PUT(skb, type, len, data) add attribute to skb
* NLA_PUT_U8(skb, type, value) add u8 attribute to skb
* NLA_PUT_U16(skb, type, value) add u16 attribute to skb
* NLA_PUT_U32(skb, type, value) add u32 attribute to skb
* NLA_PUT_U64(skb, type, value) add u64 attribute to skb
* NLA_PUT_STRING(skb, type, str) add string attribute to skb
* NLA_PUT_FLAG(skb, type) add flag attribute to skb
* NLA_PUT_MSECS(skb, type, jiffies) add msecs attribute to skb
*
* The meaning of these functions is equal to their lower case
* variants but they jump to the label nla_put_failure in case
* of a failure.
*
* Nested Attributes Construction:
* nla_nest_start(skb, type) start a nested attribute
* nla_nest_end(skb, nla) finalize a nested attribute
* nla_nest_cancel(skb, nla) cancel nested attribute construction
*
* Attribute Length Calculations:
* nla_attr_size(payload) length of attribute w/o padding
* nla_total_size(payload) length of attribute w/ padding
* nla_padlen(payload) length of padding
*
* Attribute Payload Access:
* nla_data(nla) head of attribute payload
* nla_len(nla) length of attribute payload
*
* Attribute Payload Access for Basic Types:
* nla_get_u8(nla) get payload for a u8 attribute
* nla_get_u16(nla) get payload for a u16 attribute
* nla_get_u32(nla) get payload for a u32 attribute
* nla_get_u64(nla) get payload for a u64 attribute
* nla_get_flag(nla) return 1 if flag is true
* nla_get_msecs(nla) get payload for a msecs attribute
*
* Attribute Misc:
* nla_memcpy(dest, nla, count) copy attribute into memory
* nla_memcmp(nla, data, size) compare attribute with memory area
* nla_strlcpy(dst, nla, size) copy attribute to a sized string
* nla_strcmp(nla, str) compare attribute with string
*
* Attribute Parsing:
* nla_ok(nla, remaining) does nla fit into remaining bytes?
* nla_next(nla, remaining) get next netlink attribute
* nla_validate() validate a stream of attributes
* nla_find() find attribute in stream of attributes
* nla_parse() parse and validate stream of attrs
* nla_parse_nested() parse nested attribuets
* nla_for_each_attr() loop over all attributes
*=========================================================================
*/
/**
* Standard attribute types to specify validation policy
*/
enum {
NLA_UNSPEC,
NLA_U8,
NLA_U16,
NLA_U32,
NLA_U64,
NLA_STRING,
NLA_FLAG,
NLA_MSECS,
NLA_NESTED,
__NLA_TYPE_MAX,
};
#define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1)
/**
* struct nla_policy - attribute validation policy
* @type: Type of attribute or NLA_UNSPEC
* @minlen: Minimal length of payload required to be available
*
* Policies are defined as arrays of this struct, the array must be
* accessible by attribute type up to the highest identifier to be expected.
*
* Example:
* static struct nla_policy my_policy[ATTR_MAX+1] __read_mostly = {
* [ATTR_FOO] = { .type = NLA_U16 },
* [ATTR_BAR] = { .type = NLA_STRING },
* [ATTR_BAZ] = { .minlen = sizeof(struct mystruct) },
* };
*/
struct nla_policy {
u16 type;
u16 minlen;
};
extern void netlink_run_queue(struct sock *sk, unsigned int *qlen,
int (*cb)(struct sk_buff *,
struct nlmsghdr *, int *));
extern void netlink_queue_skip(struct nlmsghdr *nlh,
struct sk_buff *skb);
extern int nla_validate(struct nlattr *head, int len, int maxtype,
struct nla_policy *policy);
extern int nla_parse(struct nlattr *tb[], int maxtype,
struct nlattr *head, int len,
struct nla_policy *policy);
extern struct nlattr * nla_find(struct nlattr *head, int len, int attrtype);
extern size_t nla_strlcpy(char *dst, const struct nlattr *nla,
size_t dstsize);
extern int nla_memcpy(void *dest, struct nlattr *src, int count);
extern int nla_memcmp(const struct nlattr *nla, const void *data,
size_t size);
extern int nla_strcmp(const struct nlattr *nla, const char *str);
extern struct nlattr * __nla_reserve(struct sk_buff *skb, int attrtype,
int attrlen);
extern struct nlattr * nla_reserve(struct sk_buff *skb, int attrtype,
int attrlen);
extern void __nla_put(struct sk_buff *skb, int attrtype,
int attrlen, const void *data);
extern int nla_put(struct sk_buff *skb, int attrtype,
int attrlen, const void *data);
/**************************************************************************
* Netlink Messages
**************************************************************************/
/**
* nlmsg_msg_size - length of netlink message not including padding
* @payload: length of message payload
*/
static inline int nlmsg_msg_size(int payload)
{
return NLMSG_HDRLEN + payload;
}
/**
* nlmsg_total_size - length of netlink message including padding
* @payload: length of message payload
*/
static inline int nlmsg_total_size(int payload)
{
return NLMSG_ALIGN(nlmsg_msg_size(payload));
}
/**
* nlmsg_padlen - length of padding at the message's tail
* @payload: length of message payload
*/
static inline int nlmsg_padlen(int payload)
{
return nlmsg_total_size(payload) - nlmsg_msg_size(payload);
}
/**
* nlmsg_data - head of message payload
* @nlh: netlink messsage header
*/
static inline void *nlmsg_data(const struct nlmsghdr *nlh)
{
return (unsigned char *) nlh + NLMSG_HDRLEN;
}
/**
* nlmsg_len - length of message payload
* @nlh: netlink message header
*/
static inline int nlmsg_len(const struct nlmsghdr *nlh)
{
return nlh->nlmsg_len - NLMSG_HDRLEN;
}
/**
* nlmsg_attrdata - head of attributes data
* @nlh: netlink message header
* @hdrlen: length of family specific header
*/
static inline struct nlattr *nlmsg_attrdata(const struct nlmsghdr *nlh,
int hdrlen)
{
unsigned char *data = nlmsg_data(nlh);
return (struct nlattr *) (data + NLMSG_ALIGN(hdrlen));
}
/**
* nlmsg_attrlen - length of attributes data
* @nlh: netlink message header
* @hdrlen: length of family specific header
*/
static inline int nlmsg_attrlen(const struct nlmsghdr *nlh, int hdrlen)
{
return nlmsg_len(nlh) - NLMSG_ALIGN(hdrlen);
}
/**
* nlmsg_ok - check if the netlink message fits into the remaining bytes
* @nlh: netlink message header
* @remaining: number of bytes remaining in message stream
*/
static inline int nlmsg_ok(const struct nlmsghdr *nlh, int remaining)
{
return (remaining >= sizeof(struct nlmsghdr) &&
nlh->nlmsg_len >= sizeof(struct nlmsghdr) &&
nlh->nlmsg_len <= remaining);
}
/**
* nlmsg_next - next netlink message in message stream
* @nlh: netlink message header
* @remaining: number of bytes remaining in message stream
*
* Returns the next netlink message in the message stream and
* decrements remaining by the size of the current message.
*/
static inline struct nlmsghdr *nlmsg_next(struct nlmsghdr *nlh, int *remaining)
{
int totlen = NLMSG_ALIGN(nlh->nlmsg_len);
*remaining -= totlen;
return (struct nlmsghdr *) ((unsigned char *) nlh + totlen);
}
/**
* nlmsg_parse - parse attributes of a netlink message
* @nlh: netlink message header
* @hdrlen: length of family specific header
* @tb: destination array with maxtype+1 elements
* @maxtype: maximum attribute type to be expected
* @policy: validation policy
*
* See nla_parse()
*/
static inline int nlmsg_parse(struct nlmsghdr *nlh, int hdrlen,
struct nlattr *tb[], int maxtype,
struct nla_policy *policy)
{
if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
return -EINVAL;
return nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen),
nlmsg_attrlen(nlh, hdrlen), policy);
}
/**
* nlmsg_find_attr - find a specific attribute in a netlink message
* @nlh: netlink message header
* @hdrlen: length of familiy specific header
* @attrtype: type of attribute to look for
*
* Returns the first attribute which matches the specified type.
*/
static inline struct nlattr *nlmsg_find_attr(struct nlmsghdr *nlh,
int hdrlen, int attrtype)
{
return nla_find(nlmsg_attrdata(nlh, hdrlen),
nlmsg_attrlen(nlh, hdrlen), attrtype);
}
/**
* nlmsg_validate - validate a netlink message including attributes
* @nlh: netlinket message header
* @hdrlen: length of familiy specific header
* @maxtype: maximum attribute type to be expected
* @policy: validation policy
*/
static inline int nlmsg_validate(struct nlmsghdr *nlh, int hdrlen, int maxtype,
struct nla_policy *policy)
{
if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
return -EINVAL;
return nla_validate(nlmsg_attrdata(nlh, hdrlen),
nlmsg_attrlen(nlh, hdrlen), maxtype, policy);
}
/**
* nlmsg_for_each_attr - iterate over a stream of attributes
* @pos: loop counter, set to current attribute
* @nlh: netlink message header
* @hdrlen: length of familiy specific header
* @rem: initialized to len, holds bytes currently remaining in stream
*/
#define nlmsg_for_each_attr(pos, nlh, hdrlen, rem) \
nla_for_each_attr(pos, nlmsg_attrdata(nlh, hdrlen), \
nlmsg_attrlen(nlh, hdrlen), rem)
#if 0
/* FIXME: Enable once all users have been converted */
/**
* __nlmsg_put - Add a new netlink message to an skb
* @skb: socket buffer to store message in
* @pid: netlink process id
* @seq: sequence number of message
* @type: message type
* @payload: length of message payload
* @flags: message flags
*
* The caller is responsible to ensure that the skb provides enough
* tailroom for both the netlink header and payload.
*/
static inline struct nlmsghdr *__nlmsg_put(struct sk_buff *skb, u32 pid,
u32 seq, int type, int payload,
int flags)
{
struct nlmsghdr *nlh;
nlh = (struct nlmsghdr *) skb_put(skb, nlmsg_total_size(payload));
nlh->nlmsg_type = type;
nlh->nlmsg_len = nlmsg_msg_size(payload);
nlh->nlmsg_flags = flags;
nlh->nlmsg_pid = pid;
nlh->nlmsg_seq = seq;
memset((unsigned char *) nlmsg_data(nlh) + payload, 0,
nlmsg_padlen(payload));
return nlh;
}
#endif
/**
* nlmsg_put - Add a new netlink message to an skb
* @skb: socket buffer to store message in
* @pid: netlink process id
* @seq: sequence number of message
* @type: message type
* @payload: length of message payload
* @flags: message flags
*
* Returns NULL if the tailroom of the skb is insufficient to store
* the message header and payload.
*/
static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq,
int type, int payload, int flags)
{
if (unlikely(skb_tailroom(skb) < nlmsg_total_size(payload)))
return NULL;
return __nlmsg_put(skb, pid, seq, type, payload, flags);
}
/**
* nlmsg_put_answer - Add a new callback based netlink message to an skb
* @skb: socket buffer to store message in
* @cb: netlink callback
* @type: message type
* @payload: length of message payload
* @flags: message flags
*
* Returns NULL if the tailroom of the skb is insufficient to store
* the message header and payload.
*/
static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb,
struct netlink_callback *cb,
int type, int payload,
int flags)
{
return nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
type, payload, flags);
}
/**
* nlmsg_new - Allocate a new netlink message
* @size: maximum size of message
*
* Use NLMSG_GOODSIZE if size isn't know and you need a good default size.
*/
static inline struct sk_buff *nlmsg_new(int size)
{
return alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
}
/**
* nlmsg_end - Finalize a netlink message
* @skb: socket buffer the message is stored in
* @nlh: netlink message header
*
* Corrects the netlink message header to include the appeneded
* attributes. Only necessary if attributes have been added to
* the message.
*
* Returns the total data length of the skb.
*/
static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh)
{
nlh->nlmsg_len = skb->tail - (unsigned char *) nlh;
return skb->len;
}
/**
* nlmsg_cancel - Cancel construction of a netlink message
* @skb: socket buffer the message is stored in
* @nlh: netlink message header
*
* Removes the complete netlink message including all
* attributes from the socket buffer again. Returns -1.
*/
static inline int nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh)
{
skb_trim(skb, (unsigned char *) nlh - skb->data);
return -1;
}
/**
* nlmsg_free - free a netlink message
* @skb: socket buffer of netlink message
*/
static inline void nlmsg_free(struct sk_buff *skb)
{
kfree_skb(skb);
}
/**
* nlmsg_multicast - multicast a netlink message
* @sk: netlink socket to spread messages to
* @skb: netlink message as socket buffer
* @pid: own netlink pid to avoid sending to yourself
* @group: multicast group id
*/
static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
u32 pid, unsigned int group)
{
int err;
NETLINK_CB(skb).dst_group = group;
err = netlink_broadcast(sk, skb, pid, group, GFP_KERNEL);
if (err > 0)
err = 0;
return err;
}
/**
* nlmsg_unicast - unicast a netlink message
* @sk: netlink socket to spread message to
* @skb: netlink message as socket buffer
* @pid: netlink pid of the destination socket
*/
static inline int nlmsg_unicast(struct sock *sk, struct sk_buff *skb, u32 pid)
{
int err;
err = netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
if (err > 0)
err = 0;
return err;
}
/**
* nlmsg_for_each_msg - iterate over a stream of messages
* @pos: loop counter, set to current message
* @head: head of message stream
* @len: length of message stream
* @rem: initialized to len, holds bytes currently remaining in stream
*/
#define nlmsg_for_each_msg(pos, head, len, rem) \
for (pos = head, rem = len; \
nlmsg_ok(pos, rem); \
pos = nlmsg_next(pos, &(rem)))
/**************************************************************************
* Netlink Attributes
**************************************************************************/
/**
* nla_attr_size - length of attribute not including padding
* @payload: length of payload
*/
static inline int nla_attr_size(int payload)
{
return NLA_HDRLEN + payload;
}
/**
* nla_total_size - total length of attribute including padding
* @payload: length of payload
*/
static inline int nla_total_size(int payload)
{
return NLA_ALIGN(nla_attr_size(payload));
}
/**
* nla_padlen - length of padding at the tail of attribute
* @payload: length of payload
*/
static inline int nla_padlen(int payload)
{
return nla_total_size(payload) - nla_attr_size(payload);
}
/**
* nla_data - head of payload
* @nla: netlink attribute
*/
static inline void *nla_data(const struct nlattr *nla)
{
return (char *) nla + NLA_HDRLEN;
}
/**
* nla_len - length of payload
* @nla: netlink attribute
*/
static inline int nla_len(const struct nlattr *nla)
{
return nla->nla_len - NLA_HDRLEN;
}
/**
* nla_ok - check if the netlink attribute fits into the remaining bytes
* @nla: netlink attribute
* @remaining: number of bytes remaining in attribute stream
*/
static inline int nla_ok(const struct nlattr *nla, int remaining)
{
return remaining >= sizeof(*nla) &&
nla->nla_len >= sizeof(*nla) &&
nla->nla_len <= remaining;
}
/**
* nla_next - next netlink attribte in attribute stream
* @nla: netlink attribute
* @remaining: number of bytes remaining in attribute stream
*
* Returns the next netlink attribute in the attribute stream and
* decrements remaining by the size of the current attribute.
*/
static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
{
int totlen = NLA_ALIGN(nla->nla_len);
*remaining -= totlen;
return (struct nlattr *) ((char *) nla + totlen);
}
/**
* nla_parse_nested - parse nested attributes
* @tb: destination array with maxtype+1 elements
* @maxtype: maximum attribute type to be expected
* @nla: attribute containing the nested attributes
* @policy: validation policy
*
* See nla_parse()
*/
static inline int nla_parse_nested(struct nlattr *tb[], int maxtype,
struct nlattr *nla,
struct nla_policy *policy)
{
return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy);
}
/**
* nla_put_u8 - Add a u16 netlink attribute to a socket buffer
* @skb: socket buffer to add attribute to
* @attrtype: attribute type
* @value: numeric value
*/
static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value)
{
return nla_put(skb, attrtype, sizeof(u8), &value);
}
/**
* nla_put_u16 - Add a u16 netlink attribute to a socket buffer
* @skb: socket buffer to add attribute to
* @attrtype: attribute type
* @value: numeric value
*/
static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value)
{
return nla_put(skb, attrtype, sizeof(u16), &value);
}
/**
* nla_put_u32 - Add a u32 netlink attribute to a socket buffer
* @skb: socket buffer to add attribute to
* @attrtype: attribute type
* @value: numeric value
*/
static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value)
{
return nla_put(skb, attrtype, sizeof(u32), &value);
}
/**
* nla_put_64 - Add a u64 netlink attribute to a socket buffer
* @skb: socket buffer to add attribute to
* @attrtype: attribute type
* @value: numeric value
*/
static inline int nla_put_u64(struct sk_buff *skb, int attrtype, u64 value)
{
return nla_put(skb, attrtype, sizeof(u64), &value);
}
/**
* nla_put_string - Add a string netlink attribute to a socket buffer
* @skb: socket buffer to add attribute to
* @attrtype: attribute type
* @str: NUL terminated string
*/
static inline int nla_put_string(struct sk_buff *skb, int attrtype,
const char *str)
{
return nla_put(skb, attrtype, strlen(str) + 1, str);
}
/**
* nla_put_flag - Add a flag netlink attribute to a socket buffer
* @skb: socket buffer to add attribute to
* @attrtype: attribute type
*/
static inline int nla_put_flag(struct sk_buff *skb, int attrtype)
{
return nla_put(skb, attrtype, 0, NULL);
}
/**
* nla_put_msecs - Add a msecs netlink attribute to a socket buffer
* @skb: socket buffer to add attribute to
* @attrtype: attribute type
* @jiffies: number of msecs in jiffies
*/
static inline int nla_put_msecs(struct sk_buff *skb, int attrtype,
unsigned long jiffies)
{
u64 tmp = jiffies_to_msecs(jiffies);
return nla_put(skb, attrtype, sizeof(u64), &tmp);
}
#define NLA_PUT(skb, attrtype, attrlen, data) \
do { \
if (nla_put(skb, attrtype, attrlen, data) < 0) \
goto nla_put_failure; \
} while(0)
#define NLA_PUT_TYPE(skb, type, attrtype, value) \
do { \
type __tmp = value; \
NLA_PUT(skb, attrtype, sizeof(type), &__tmp); \
} while(0)
#define NLA_PUT_U8(skb, attrtype, value) \
NLA_PUT_TYPE(skb, u8, attrtype, value)
#define NLA_PUT_U16(skb, attrtype, value) \
NLA_PUT_TYPE(skb, u16, attrtype, value)
#define NLA_PUT_U32(skb, attrtype, value) \
NLA_PUT_TYPE(skb, u32, attrtype, value)
#define NLA_PUT_U64(skb, attrtype, value) \
NLA_PUT_TYPE(skb, u64, attrtype, value)
#define NLA_PUT_STRING(skb, attrtype, value) \
NLA_PUT(skb, attrtype, strlen(value) + 1, value)
#define NLA_PUT_FLAG(skb, attrtype, value) \
NLA_PUT(skb, attrtype, 0, NULL)
#define NLA_PUT_MSECS(skb, attrtype, jiffies) \
NLA_PUT_U64(skb, attrtype, jiffies_to_msecs(jiffies))
/**
* nla_get_u32 - return payload of u32 attribute
* @nla: u32 netlink attribute
*/
static inline u32 nla_get_u32(struct nlattr *nla)
{
return *(u32 *) nla_data(nla);
}
/**
* nla_get_u16 - return payload of u16 attribute
* @nla: u16 netlink attribute
*/
static inline u16 nla_get_u16(struct nlattr *nla)
{
return *(u16 *) nla_data(nla);
}
/**
* nla_get_u8 - return payload of u8 attribute
* @nla: u8 netlink attribute
*/
static inline u8 nla_get_u8(struct nlattr *nla)
{
return *(u8 *) nla_data(nla);
}
/**
* nla_get_u64 - return payload of u64 attribute
* @nla: u64 netlink attribute
*/
static inline u64 nla_get_u64(struct nlattr *nla)
{
u64 tmp;
nla_memcpy(&tmp, nla, sizeof(tmp));
return tmp;
}
/**
* nla_get_flag - return payload of flag attribute
* @nla: flag netlink attribute
*/
static inline int nla_get_flag(struct nlattr *nla)
{
return !!nla;
}
/**
* nla_get_msecs - return payload of msecs attribute
* @nla: msecs netlink attribute
*
* Returns the number of milliseconds in jiffies.
*/
static inline unsigned long nla_get_msecs(struct nlattr *nla)
{
u64 msecs = nla_get_u64(nla);
return msecs_to_jiffies((unsigned long) msecs);
}
/**
* nla_nest_start - Start a new level of nested attributes
* @skb: socket buffer to add attributes to
* @attrtype: attribute type of container
*
* Returns the container attribute
*/
static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
{
struct nlattr *start = (struct nlattr *) skb->tail;
if (nla_put(skb, attrtype, 0, NULL) < 0)
return NULL;
return start;
}
/**
* nla_nest_end - Finalize nesting of attributes
* @skb: socket buffer the attribtues are stored in
* @start: container attribute
*
* Corrects the container attribute header to include the all
* appeneded attributes.
*
* Returns the total data length of the skb.
*/
static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start)
{
start->nla_len = skb->tail - (unsigned char *) start;
return skb->len;
}
/**
* nla_nest_cancel - Cancel nesting of attributes
* @skb: socket buffer the message is stored in
* @start: container attribute
*
* Removes the container attribute and including all nested
* attributes. Returns -1.
*/
static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
{
if (start)
skb_trim(skb, (unsigned char *) start - skb->data);
return -1;
}
/**
* nla_for_each_attr - iterate over a stream of attributes
* @pos: loop counter, set to current attribute
* @head: head of attribute stream
* @len: length of attribute stream
* @rem: initialized to len, holds bytes currently remaining in stream
*/
#define nla_for_each_attr(pos, head, len, rem) \
for (pos = head, rem = len; \
nla_ok(pos, rem); \
pos = nla_next(pos, &(rem)))
#endif

View file

@ -49,6 +49,7 @@
#include <net/udp.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/netlink.h>
DECLARE_MUTEX(rtnl_sem);
@ -462,11 +463,6 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL);
}
static int rtnetlink_done(struct netlink_callback *cb)
{
return 0;
}
/* Protected by RTNL sempahore. */
static struct rtattr **rta_buf;
static int rtattr_max;
@ -524,8 +520,6 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
}
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
u32 rlen;
if (link->dumpit == NULL)
link = &(rtnetlink_links[PF_UNSPEC][type]);
@ -533,14 +527,11 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
goto err_inval;
if ((*errp = netlink_dump_start(rtnl, skb, nlh,
link->dumpit,
rtnetlink_done)) != 0) {
link->dumpit, NULL)) != 0) {
return -1;
}
rlen = NLMSG_ALIGN(nlh->nlmsg_len);
if (rlen > skb->len)
rlen = skb->len;
skb_pull(skb, rlen);
netlink_queue_skip(nlh, skb);
return -1;
}
@ -579,75 +570,13 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
return -1;
}
/*
* Process one packet of messages.
* Malformed skbs with wrong lengths of messages are discarded silently.
*/
static inline int rtnetlink_rcv_skb(struct sk_buff *skb)
{
int err;
struct nlmsghdr * nlh;
while (skb->len >= NLMSG_SPACE(0)) {
u32 rlen;
nlh = (struct nlmsghdr *)skb->data;
if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
return 0;
rlen = NLMSG_ALIGN(nlh->nlmsg_len);
if (rlen > skb->len)
rlen = skb->len;
if (rtnetlink_rcv_msg(skb, nlh, &err)) {
/* Not error, but we must interrupt processing here:
* Note, that in this case we do not pull message
* from skb, it will be processed later.
*/
if (err == 0)
return -1;
netlink_ack(skb, nlh, err);
} else if (nlh->nlmsg_flags&NLM_F_ACK)
netlink_ack(skb, nlh, 0);
skb_pull(skb, rlen);
}
return 0;
}
/*
* rtnetlink input queue processing routine:
* - process as much as there was in the queue upon entry.
* - feed skbs to rtnetlink_rcv_skb, until it refuse a message,
* that will occur, when a dump started.
*/
static void rtnetlink_rcv(struct sock *sk, int len)
{
unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
unsigned int qlen = 0;
do {
struct sk_buff *skb;
rtnl_lock();
if (qlen > skb_queue_len(&sk->sk_receive_queue))
qlen = skb_queue_len(&sk->sk_receive_queue);
for (; qlen; qlen--) {
skb = skb_dequeue(&sk->sk_receive_queue);
if (rtnetlink_rcv_skb(skb)) {
if (skb->len)
skb_queue_head(&sk->sk_receive_queue,
skb);
else {
kfree_skb(skb);
qlen--;
}
break;
}
kfree_skb(skb);
}
netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg);
up(&rtnl_sem);
netdev_run_todo();

View file

@ -336,6 +336,9 @@ void __kfree_skb(struct sk_buff *skb)
}
#ifdef CONFIG_NETFILTER
nf_conntrack_put(skb->nfct);
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_conntrack_put_reasm(skb->nfct_reasm);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_put(skb->nf_bridge);
#endif
@ -414,9 +417,17 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
C(nfct);
nf_conntrack_get(skb->nfct);
C(nfctinfo);
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
C(nfct_reasm);
nf_conntrack_get_reasm(skb->nfct_reasm);
#endif
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
C(ipvs_property);
#endif
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
C(nfct_reasm);
nf_conntrack_get_reasm(skb->nfct_reasm);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
C(nf_bridge);
nf_bridge_get(skb->nf_bridge);
@ -474,6 +485,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->nfct = old->nfct;
nf_conntrack_get(old->nfct);
new->nfctinfo = old->nfctinfo;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
new->nfct_reasm = old->nfct_reasm;
nf_conntrack_get_reasm(old->nfct_reasm);
#endif
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
new->ipvs_property = old->ipvs_property;
#endif

View file

@ -724,12 +724,6 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
static int inet_diag_dump_done(struct netlink_callback *cb)
{
return 0;
}
static __inline__ int
inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
@ -760,8 +754,7 @@ inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
goto err_inval;
}
return netlink_dump_start(idiagnl, skb, nlh,
inet_diag_dump,
inet_diag_dump_done);
inet_diag_dump, NULL);
} else {
return inet_diag_get_exact(skb, nlh);
}

View file

@ -5,6 +5,20 @@
menu "IP: Netfilter Configuration"
depends on INET && NETFILTER
config NF_CONNTRACK_IPV4
tristate "IPv4 support for new connection tracking (EXPERIMENTAL)"
depends on EXPERIMENTAL && NF_CONNTRACK
---help---
Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related
into connections.
This is IPv4 support on Layer 3 independent connection tracking.
Layer 3 independent connection tracking is experimental scheme
which generalize ip_conntrack to support other layer 3 protocols.
To compile it as a module, choose M here. If unsure, say N.
# connection tracking, helpers and protocols
config IP_NF_CONNTRACK
tristate "Connection tracking (required for masq/NAT)"
@ -209,8 +223,8 @@ config IP_NF_MATCH_PKTTYPE
tristate "Packet type match support"
depends on IP_NF_IPTABLES
help
Packet type matching allows you to match a packet by
its "class", eg. BROADCAST, MULTICAST, ...
Packet type matching allows you to match a packet by
its "class", eg. BROADCAST, MULTICAST, ...
Typical usage:
iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG
@ -317,7 +331,8 @@ config IP_NF_MATCH_TCPMSS
config IP_NF_MATCH_HELPER
tristate "Helper match support"
depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
depends on IP_NF_IPTABLES
depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
help
Helper matching allows you to match packets in dynamic connections
tracked by a conntrack-helper, ie. ip_conntrack_ftp
@ -326,7 +341,8 @@ config IP_NF_MATCH_HELPER
config IP_NF_MATCH_STATE
tristate "Connection state match support"
depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
depends on IP_NF_IPTABLES
depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
help
Connection state matching allows you to match packets based on their
relationship to a tracked connection (ie. previous packets). This
@ -336,7 +352,8 @@ config IP_NF_MATCH_STATE
config IP_NF_MATCH_CONNTRACK
tristate "Connection tracking match support"
depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
depends on IP_NF_IPTABLES
depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
help
This is a general conntrack match module, a superset of the state match.
@ -422,7 +439,8 @@ config IP_NF_MATCH_COMMENT
config IP_NF_MATCH_CONNMARK
tristate 'Connection mark match support'
depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES
depends on IP_NF_IPTABLES
depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
help
This option adds a `connmark' match, which allows you to match the
connection mark value previously set for the session by `CONNMARK'.
@ -433,7 +451,8 @@ config IP_NF_MATCH_CONNMARK
config IP_NF_MATCH_CONNBYTES
tristate 'Connection byte/packet counter match support'
depends on IP_NF_CT_ACCT && IP_NF_IPTABLES
depends on IP_NF_IPTABLES
depends on IP_NF_CT_ACCT || (NF_CT_ACCT && NF_CONNTRACK_IPV4)
help
This option adds a `connbytes' match, which allows you to match the
number of bytes and/or packets for each direction within a connection.
@ -747,7 +766,8 @@ config IP_NF_TARGET_TTL
config IP_NF_TARGET_CONNMARK
tristate 'CONNMARK target support'
depends on IP_NF_CONNTRACK_MARK && IP_NF_MANGLE
depends on IP_NF_MANGLE
depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
help
This option adds a `CONNMARK' target, which allows one to manipulate
the connection mark value. Similar to the MARK target, but
@ -759,7 +779,8 @@ config IP_NF_TARGET_CONNMARK
config IP_NF_TARGET_CLUSTERIP
tristate "CLUSTERIP target support (EXPERIMENTAL)"
depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES && EXPERIMENTAL
depends on IP_NF_IPTABLES && EXPERIMENTAL
depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
help
The CLUSTERIP target allows you to build load-balancing clusters of
network servers without having a dedicated load-balancing
@ -782,7 +803,7 @@ config IP_NF_RAW
config IP_NF_TARGET_NOTRACK
tristate 'NOTRACK target support'
depends on IP_NF_RAW
depends on IP_NF_CONNTRACK
depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
help
The NOTRACK target allows a select rule to specify
which packets *not* to enter the conntrack/NAT

View file

@ -103,3 +103,9 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
# objects for l3 independent conntrack
nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
# l3 independent conntrack
obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o

View file

@ -1376,7 +1376,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
ip_conntrack_expect_put(exp);
}
}
write_unlock(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
} else {
/* This basically means we have to flush everything*/
write_lock_bh(&ip_conntrack_lock);

View file

@ -29,7 +29,7 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <net/netfilter/nf_conntrack_compat.h>
#define CLUSTERIP_VERSION "0.8"
@ -316,14 +316,14 @@ target(struct sk_buff **pskb,
{
const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
enum ip_conntrack_info ctinfo;
struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo);
u_int32_t hash;
u_int32_t *mark, hash;
/* don't need to clusterip_config_get() here, since refcount
* is only decremented by destroy() - and ip_tables guarantees
* that the ->target() function isn't called after ->destroy() */
if (!ct) {
mark = nf_ct_get_mark((*pskb), &ctinfo);
if (mark == NULL) {
printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
/* FIXME: need to drop invalid ones, since replies
* to outgoing connections of other nodes will be
@ -346,7 +346,7 @@ target(struct sk_buff **pskb,
switch (ctinfo) {
case IP_CT_NEW:
ct->mark = hash;
*mark = hash;
break;
case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY:
@ -363,7 +363,7 @@ target(struct sk_buff **pskb,
#ifdef DEBUG_CLUSTERP
DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
#endif
DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark);
DEBUGP("hash=%u ct_hash=%u ", hash, *mark);
if (!clusterip_responsible(cipinfo->config, hash)) {
DEBUGP("not responsible\n");
return NF_DROP;

View file

@ -29,7 +29,7 @@ MODULE_LICENSE("GPL");
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_CONNMARK.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <net/netfilter/nf_conntrack_compat.h>
static unsigned int
target(struct sk_buff **pskb,
@ -43,24 +43,24 @@ target(struct sk_buff **pskb,
u_int32_t diff;
u_int32_t nfmark;
u_int32_t newmark;
u_int32_t ctinfo;
u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
enum ip_conntrack_info ctinfo;
struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo);
if (ct) {
if (ctmark) {
switch(markinfo->mode) {
case IPT_CONNMARK_SET:
newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
if (newmark != ct->mark)
ct->mark = newmark;
newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
if (newmark != *ctmark)
*ctmark = newmark;
break;
case IPT_CONNMARK_SAVE:
newmark = (ct->mark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
if (ct->mark != newmark)
ct->mark = newmark;
newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
if (*ctmark != newmark)
*ctmark = newmark;
break;
case IPT_CONNMARK_RESTORE:
nfmark = (*pskb)->nfmark;
diff = (ct->mark ^ nfmark) & markinfo->mask;
diff = (*ctmark ^ nfmark) & markinfo->mask;
if (diff != 0)
(*pskb)->nfmark = nfmark ^ diff;
break;

View file

@ -5,7 +5,7 @@
#include <linux/skbuff.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <net/netfilter/nf_conntrack_compat.h>
static unsigned int
target(struct sk_buff **pskb,
@ -23,7 +23,7 @@ target(struct sk_buff **pskb,
If there is a real ct entry correspondig to this packet,
it'll hang aroun till timing out. We don't deal with it
for performance reasons. JK */
(*pskb)->nfct = &ip_conntrack_untracked.ct_general;
nf_ct_untrack(*pskb);
(*pskb)->nfctinfo = IP_CT_NEW;
nf_conntrack_get((*pskb)->nfct);

View file

@ -10,7 +10,7 @@
*/
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <net/netfilter/nf_conntrack_compat.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_connbytes.h>
@ -46,60 +46,59 @@ match(const struct sk_buff *skb,
int *hotdrop)
{
const struct ipt_connbytes_info *sinfo = matchinfo;
enum ip_conntrack_info ctinfo;
struct ip_conntrack *ct;
u_int64_t what = 0; /* initialize to make gcc happy */
const struct ip_conntrack_counter *counters;
if (!(ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo)))
if (!(counters = nf_ct_get_counters(skb)))
return 0; /* no match */
switch (sinfo->what) {
case IPT_CONNBYTES_PKTS:
switch (sinfo->direction) {
case IPT_CONNBYTES_DIR_ORIGINAL:
what = ct->counters[IP_CT_DIR_ORIGINAL].packets;
what = counters[IP_CT_DIR_ORIGINAL].packets;
break;
case IPT_CONNBYTES_DIR_REPLY:
what = ct->counters[IP_CT_DIR_REPLY].packets;
what = counters[IP_CT_DIR_REPLY].packets;
break;
case IPT_CONNBYTES_DIR_BOTH:
what = ct->counters[IP_CT_DIR_ORIGINAL].packets;
what += ct->counters[IP_CT_DIR_REPLY].packets;
what = counters[IP_CT_DIR_ORIGINAL].packets;
what += counters[IP_CT_DIR_REPLY].packets;
break;
}
break;
case IPT_CONNBYTES_BYTES:
switch (sinfo->direction) {
case IPT_CONNBYTES_DIR_ORIGINAL:
what = ct->counters[IP_CT_DIR_ORIGINAL].bytes;
what = counters[IP_CT_DIR_ORIGINAL].bytes;
break;
case IPT_CONNBYTES_DIR_REPLY:
what = ct->counters[IP_CT_DIR_REPLY].bytes;
what = counters[IP_CT_DIR_REPLY].bytes;
break;
case IPT_CONNBYTES_DIR_BOTH:
what = ct->counters[IP_CT_DIR_ORIGINAL].bytes;
what += ct->counters[IP_CT_DIR_REPLY].bytes;
what = counters[IP_CT_DIR_ORIGINAL].bytes;
what += counters[IP_CT_DIR_REPLY].bytes;
break;
}
break;
case IPT_CONNBYTES_AVGPKT:
switch (sinfo->direction) {
case IPT_CONNBYTES_DIR_ORIGINAL:
what = div64_64(ct->counters[IP_CT_DIR_ORIGINAL].bytes,
ct->counters[IP_CT_DIR_ORIGINAL].packets);
what = div64_64(counters[IP_CT_DIR_ORIGINAL].bytes,
counters[IP_CT_DIR_ORIGINAL].packets);
break;
case IPT_CONNBYTES_DIR_REPLY:
what = div64_64(ct->counters[IP_CT_DIR_REPLY].bytes,
ct->counters[IP_CT_DIR_REPLY].packets);
what = div64_64(counters[IP_CT_DIR_REPLY].bytes,
counters[IP_CT_DIR_REPLY].packets);
break;
case IPT_CONNBYTES_DIR_BOTH:
{
u_int64_t bytes;
u_int64_t pkts;
bytes = ct->counters[IP_CT_DIR_ORIGINAL].bytes +
ct->counters[IP_CT_DIR_REPLY].bytes;
pkts = ct->counters[IP_CT_DIR_ORIGINAL].packets+
ct->counters[IP_CT_DIR_REPLY].packets;
bytes = counters[IP_CT_DIR_ORIGINAL].bytes +
counters[IP_CT_DIR_REPLY].bytes;
pkts = counters[IP_CT_DIR_ORIGINAL].packets+
counters[IP_CT_DIR_REPLY].packets;
/* FIXME_THEORETICAL: what to do if sum
* overflows ? */

View file

@ -28,7 +28,7 @@ MODULE_LICENSE("GPL");
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_connmark.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <net/netfilter/nf_conntrack_compat.h>
static int
match(const struct sk_buff *skb,
@ -39,12 +39,12 @@ match(const struct sk_buff *skb,
int *hotdrop)
{
const struct ipt_connmark_info *info = matchinfo;
enum ip_conntrack_info ctinfo;
struct ip_conntrack *ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
if (!ct)
u_int32_t ctinfo;
const u_int32_t *ctmark = nf_ct_get_mark(skb, &ctinfo);
if (!ctmark)
return 0;
return ((ct->mark & info->mask) == info->mark) ^ info->invert;
return (((*ctmark) & info->mask) == info->mark) ^ info->invert;
}
static int

View file

@ -10,7 +10,14 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
#else
#include <net/netfilter/nf_conntrack.h>
#endif
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_conntrack.h>
@ -18,6 +25,8 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
MODULE_DESCRIPTION("iptables connection tracking match module");
#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
static int
match(const struct sk_buff *skb,
const struct net_device *in,
@ -102,6 +111,93 @@ match(const struct sk_buff *skb,
return 1;
}
#else /* CONFIG_IP_NF_CONNTRACK */
static int
match(const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
const void *matchinfo,
int offset,
int *hotdrop)
{
const struct ipt_conntrack_info *sinfo = matchinfo;
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
unsigned int statebit;
ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
if (ct == &nf_conntrack_untracked)
statebit = IPT_CONNTRACK_STATE_UNTRACKED;
else if (ct)
statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
else
statebit = IPT_CONNTRACK_STATE_INVALID;
if(sinfo->flags & IPT_CONNTRACK_STATE) {
if (ct) {
if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip !=
ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip)
statebit |= IPT_CONNTRACK_STATE_SNAT;
if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip !=
ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip)
statebit |= IPT_CONNTRACK_STATE_DNAT;
}
if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE))
return 0;
}
if(sinfo->flags & IPT_CONNTRACK_PROTO) {
if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO))
return 0;
}
if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) {
if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC))
return 0;
}
if(sinfo->flags & IPT_CONNTRACK_ORIGDST) {
if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST))
return 0;
}
if(sinfo->flags & IPT_CONNTRACK_REPLSRC) {
if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC))
return 0;
}
if(sinfo->flags & IPT_CONNTRACK_REPLDST) {
if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST))
return 0;
}
if(sinfo->flags & IPT_CONNTRACK_STATUS) {
if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS))
return 0;
}
if(sinfo->flags & IPT_CONNTRACK_EXPIRES) {
unsigned long expires;
if(!ct)
return 0;
expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES))
return 0;
}
return 1;
}
#endif /* CONFIG_NF_IP_CONNTRACK */
static int check(const char *tablename,
const struct ipt_ip *ip,
void *matchinfo,

View file

@ -13,9 +13,15 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#else
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_helper.h>
#endif
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_helper.h>
@ -29,6 +35,7 @@ MODULE_DESCRIPTION("iptables helper match module");
#define DEBUGP(format, args...)
#endif
#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
static int
match(const struct sk_buff *skb,
const struct net_device *in,
@ -73,6 +80,53 @@ match(const struct sk_buff *skb,
return ret;
}
#else /* CONFIG_IP_NF_CONNTRACK */
static int
match(const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
const void *matchinfo,
int offset,
int *hotdrop)
{
const struct ipt_helper_info *info = matchinfo;
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
int ret = info->invert;
ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
if (!ct) {
DEBUGP("ipt_helper: Eek! invalid conntrack?\n");
return ret;
}
if (!ct->master) {
DEBUGP("ipt_helper: conntrack %p has no master\n", ct);
return ret;
}
read_lock_bh(&nf_conntrack_lock);
if (!ct->master->helper) {
DEBUGP("ipt_helper: master ct %p has no helper\n",
exp->expectant);
goto out_unlock;
}
DEBUGP("master's name = %s , info->name = %s\n",
ct->master->helper->name, info->name);
if (info->name[0] == '\0')
ret ^= 1;
else
ret ^= !strncmp(ct->master->helper->name, info->name,
strlen(ct->master->helper->name));
out_unlock:
read_unlock_bh(&nf_conntrack_lock);
return ret;
}
#endif
static int check(const char *tablename,
const struct ipt_ip *ip,
void *matchinfo,

View file

@ -10,7 +10,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <net/netfilter/nf_conntrack_compat.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_state.h>
@ -30,9 +30,9 @@ match(const struct sk_buff *skb,
enum ip_conntrack_info ctinfo;
unsigned int statebit;
if (skb->nfct == &ip_conntrack_untracked.ct_general)
if (nf_ct_is_untracked(skb))
statebit = IPT_STATE_UNTRACKED;
else if (!ip_conntrack_get(skb, &ctinfo))
else if (!nf_ct_get_ctinfo(skb, &ctinfo))
statebit = IPT_STATE_INVALID;
else
statebit = IPT_STATE_BIT(ctinfo);

View file

@ -0,0 +1,571 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - move L3 protocol dependent part to this file.
* 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - add get_features() to support various size of conntrack
* structures.
*
* Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
*/
#include <linux/config.h>
#include <linux/types.h>
#include <linux/ip.h>
#include <linux/netfilter.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/icmp.h>
#include <linux/sysctl.h>
#include <net/ip.h>
#include <linux/netfilter_ipv4.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_protocol.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#if 0
#define DEBUGP printk
#else
#define DEBUGP(format, args...)
#endif
DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat);
static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
u_int32_t _addrs[2], *ap;
ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
sizeof(u_int32_t) * 2, _addrs);
if (ap == NULL)
return 0;
tuple->src.u3.ip = ap[0];
tuple->dst.u3.ip = ap[1];
return 1;
}
static int ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
tuple->src.u3.ip = orig->dst.u3.ip;
tuple->dst.u3.ip = orig->src.u3.ip;
return 1;
}
static int ipv4_print_tuple(struct seq_file *s,
const struct nf_conntrack_tuple *tuple)
{
return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
NIPQUAD(tuple->src.u3.ip),
NIPQUAD(tuple->dst.u3.ip));
}
static int ipv4_print_conntrack(struct seq_file *s,
const struct nf_conn *conntrack)
{
return 0;
}
/* Returns new sk_buff, or NULL */
static struct sk_buff *
nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
{
skb_orphan(skb);
local_bh_disable();
skb = ip_defrag(skb, user);
local_bh_enable();
if (skb)
ip_send_check(skb->nh.iph);
return skb;
}
static int
ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
u_int8_t *protonum)
{
/* Never happen */
if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
if (net_ratelimit()) {
printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n",
(*pskb)->nh.iph->protocol, hooknum);
}
return -NF_DROP;
}
*dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4;
*protonum = (*pskb)->nh.iph->protocol;
return NF_ACCEPT;
}
int nat_module_is_loaded = 0;
static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple)
{
if (nat_module_is_loaded)
return NF_CT_F_NAT;
return NF_CT_F_BASIC;
}
static unsigned int ipv4_confirm(unsigned int hooknum,
struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(pskb);
}
static unsigned int ipv4_conntrack_help(unsigned int hooknum,
struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(*pskb, &ctinfo);
if (ct && ct->helper) {
unsigned int ret;
ret = ct->helper->help(pskb,
(*pskb)->nh.raw - (*pskb)->data
+ (*pskb)->nh.iph->ihl*4,
ct, ctinfo);
if (ret != NF_ACCEPT)
return ret;
}
return NF_ACCEPT;
}
static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
/* Previously seen (loopback)? Ignore. Do this before
fragment check. */
if ((*pskb)->nfct)
return NF_ACCEPT;
#endif
/* Gather fragments. */
if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
*pskb = nf_ct_ipv4_gather_frags(*pskb,
hooknum == NF_IP_PRE_ROUTING ?
IP_DEFRAG_CONNTRACK_IN :
IP_DEFRAG_CONNTRACK_OUT);
if (!*pskb)
return NF_STOLEN;
}
return NF_ACCEPT;
}
static unsigned int ipv4_refrag(unsigned int hooknum,
struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct rtable *rt = (struct rtable *)(*pskb)->dst;
/* We've seen it coming out the other side: confirm */
if (ipv4_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
return NF_DROP;
/* Local packets are never produced too large for their
interface. We degfragment them at LOCAL_OUT, however,
so we have to refragment them here. */
if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
!skb_shinfo(*pskb)->tso_size) {
/* No hook can be after us, so this should be OK. */
ip_fragment(*pskb, okfn);
return NF_STOLEN;
}
return NF_ACCEPT;
}
static unsigned int ipv4_conntrack_in(unsigned int hooknum,
struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
return nf_conntrack_in(PF_INET, hooknum, pskb);
}
static unsigned int ipv4_conntrack_local(unsigned int hooknum,
struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr)
|| (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ipt_hook: happy cracking.\n");
return NF_ACCEPT;
}
return nf_conntrack_in(PF_INET, hooknum, pskb);
}
/* Connection tracking may drop packets, but never alters them, so
make it the first hook. */
static struct nf_hook_ops ipv4_conntrack_defrag_ops = {
.hook = ipv4_conntrack_defrag,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_IP_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_DEFRAG,
};
static struct nf_hook_ops ipv4_conntrack_in_ops = {
.hook = ipv4_conntrack_in,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_IP_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK,
};
static struct nf_hook_ops ipv4_conntrack_defrag_local_out_ops = {
.hook = ipv4_conntrack_defrag,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_IP_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK_DEFRAG,
};
static struct nf_hook_ops ipv4_conntrack_local_out_ops = {
.hook = ipv4_conntrack_local,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_IP_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK,
};
/* helpers */
static struct nf_hook_ops ipv4_conntrack_helper_out_ops = {
.hook = ipv4_conntrack_help,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_IP_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_HELPER,
};
static struct nf_hook_ops ipv4_conntrack_helper_in_ops = {
.hook = ipv4_conntrack_help,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_IP_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_HELPER,
};
/* Refragmenter; last chance. */
static struct nf_hook_ops ipv4_conntrack_out_ops = {
.hook = ipv4_refrag,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_IP_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
};
static struct nf_hook_ops ipv4_conntrack_local_in_ops = {
.hook = ipv4_confirm,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_IP_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
};
#ifdef CONFIG_SYSCTL
/* From nf_conntrack_proto_icmp.c */
extern unsigned long nf_ct_icmp_timeout;
static struct ctl_table_header *nf_ct_ipv4_sysctl_header;
static ctl_table nf_ct_sysctl_table[] = {
{
.ctl_name = NET_NF_CONNTRACK_ICMP_TIMEOUT,
.procname = "nf_conntrack_icmp_timeout",
.data = &nf_ct_icmp_timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{ .ctl_name = 0 }
};
static ctl_table nf_ct_netfilter_table[] = {
{
.ctl_name = NET_NETFILTER,
.procname = "netfilter",
.mode = 0555,
.child = nf_ct_sysctl_table,
},
{ .ctl_name = 0 }
};
static ctl_table nf_ct_net_table[] = {
{
.ctl_name = CTL_NET,
.procname = "net",
.mode = 0555,
.child = nf_ct_netfilter_table,
},
{ .ctl_name = 0 }
};
#endif
/* Fast function for those who don't want to parse /proc (and I don't
blame them). */
/* Reversing the socket's dst/src point of view gives us the reply
mapping. */
static int
getorigdst(struct sock *sk, int optval, void __user *user, int *len)
{
struct inet_sock *inet = inet_sk(sk);
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
NF_CT_TUPLE_U_BLANK(&tuple);
tuple.src.u3.ip = inet->rcv_saddr;
tuple.src.u.tcp.port = inet->sport;
tuple.dst.u3.ip = inet->daddr;
tuple.dst.u.tcp.port = inet->dport;
tuple.src.l3num = PF_INET;
tuple.dst.protonum = IPPROTO_TCP;
/* We only do TCP at the moment: is there a better way? */
if (strcmp(sk->sk_prot->name, "TCP")) {
DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
return -ENOPROTOOPT;
}
if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
*len, sizeof(struct sockaddr_in));
return -EINVAL;
}
h = nf_conntrack_find_get(&tuple, NULL);
if (h) {
struct sockaddr_in sin;
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
sin.sin_family = AF_INET;
sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.dst.u.tcp.port;
sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.dst.u3.ip;
DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
nf_ct_put(ct);
if (copy_to_user(user, &sin, sizeof(sin)) != 0)
return -EFAULT;
else
return 0;
}
DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port),
NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port));
return -ENOENT;
}
static struct nf_sockopt_ops so_getorigdst = {
.pf = PF_INET,
.get_optmin = SO_ORIGINAL_DST,
.get_optmax = SO_ORIGINAL_DST+1,
.get = &getorigdst,
};
struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
.l3proto = PF_INET,
.name = "ipv4",
.pkt_to_tuple = ipv4_pkt_to_tuple,
.invert_tuple = ipv4_invert_tuple,
.print_tuple = ipv4_print_tuple,
.print_conntrack = ipv4_print_conntrack,
.prepare = ipv4_prepare,
.get_features = ipv4_get_features,
.me = THIS_MODULE,
};
extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp4;
extern struct nf_conntrack_protocol nf_conntrack_protocol_udp4;
extern struct nf_conntrack_protocol nf_conntrack_protocol_icmp;
static int init_or_cleanup(int init)
{
int ret = 0;
if (!init) goto cleanup;
ret = nf_register_sockopt(&so_getorigdst);
if (ret < 0) {
printk(KERN_ERR "Unable to register netfilter socket option\n");
goto cleanup_nothing;
}
ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp4);
if (ret < 0) {
printk("nf_conntrack_ipv4: can't register tcp.\n");
goto cleanup_sockopt;
}
ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp4);
if (ret < 0) {
printk("nf_conntrack_ipv4: can't register udp.\n");
goto cleanup_tcp;
}
ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmp);
if (ret < 0) {
printk("nf_conntrack_ipv4: can't register icmp.\n");
goto cleanup_udp;
}
ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
if (ret < 0) {
printk("nf_conntrack_ipv4: can't register ipv4\n");
goto cleanup_icmp;
}
ret = nf_register_hook(&ipv4_conntrack_defrag_ops);
if (ret < 0) {
printk("nf_conntrack_ipv4: can't register pre-routing defrag hook.\n");
goto cleanup_ipv4;
}
ret = nf_register_hook(&ipv4_conntrack_defrag_local_out_ops);
if (ret < 0) {
printk("nf_conntrack_ipv4: can't register local_out defrag hook.\n");
goto cleanup_defragops;
}
ret = nf_register_hook(&ipv4_conntrack_in_ops);
if (ret < 0) {
printk("nf_conntrack_ipv4: can't register pre-routing hook.\n");
goto cleanup_defraglocalops;
}
ret = nf_register_hook(&ipv4_conntrack_local_out_ops);
if (ret < 0) {
printk("nf_conntrack_ipv4: can't register local out hook.\n");
goto cleanup_inops;
}
ret = nf_register_hook(&ipv4_conntrack_helper_in_ops);
if (ret < 0) {
printk("nf_conntrack_ipv4: can't register local helper hook.\n");
goto cleanup_inandlocalops;
}
ret = nf_register_hook(&ipv4_conntrack_helper_out_ops);
if (ret < 0) {
printk("nf_conntrack_ipv4: can't register postrouting helper hook.\n");
goto cleanup_helperinops;
}
ret = nf_register_hook(&ipv4_conntrack_out_ops);
if (ret < 0) {
printk("nf_conntrack_ipv4: can't register post-routing hook.\n");
goto cleanup_helperoutops;
}
ret = nf_register_hook(&ipv4_conntrack_local_in_ops);
if (ret < 0) {
printk("nf_conntrack_ipv4: can't register local in hook.\n");
goto cleanup_inoutandlocalops;
}
#ifdef CONFIG_SYSCTL
nf_ct_ipv4_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
if (nf_ct_ipv4_sysctl_header == NULL) {
printk("nf_conntrack: can't register to sysctl.\n");
ret = -ENOMEM;
goto cleanup_localinops;
}
#endif
/* For use by REJECT target */
ip_ct_attach = __nf_conntrack_attach;
return ret;
cleanup:
synchronize_net();
ip_ct_attach = NULL;
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(nf_ct_ipv4_sysctl_header);
cleanup_localinops:
#endif
nf_unregister_hook(&ipv4_conntrack_local_in_ops);
cleanup_inoutandlocalops:
nf_unregister_hook(&ipv4_conntrack_out_ops);
cleanup_helperoutops:
nf_unregister_hook(&ipv4_conntrack_helper_out_ops);
cleanup_helperinops:
nf_unregister_hook(&ipv4_conntrack_helper_in_ops);
cleanup_inandlocalops:
nf_unregister_hook(&ipv4_conntrack_local_out_ops);
cleanup_inops:
nf_unregister_hook(&ipv4_conntrack_in_ops);
cleanup_defraglocalops:
nf_unregister_hook(&ipv4_conntrack_defrag_local_out_ops);
cleanup_defragops:
nf_unregister_hook(&ipv4_conntrack_defrag_ops);
cleanup_ipv4:
nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
cleanup_icmp:
nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmp);
cleanup_udp:
nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp4);
cleanup_tcp:
nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp4);
cleanup_sockopt:
nf_unregister_sockopt(&so_getorigdst);
cleanup_nothing:
return ret;
}
MODULE_LICENSE("GPL");
static int __init init(void)
{
need_nf_conntrack();
return init_or_cleanup(1);
}
static void __exit fini(void)
{
init_or_cleanup(0);
}
module_init(init);
module_exit(fini);
void need_ip_conntrack(void)
{
}
EXPORT_SYMBOL(need_ip_conntrack);
EXPORT_SYMBOL(nf_ct_ipv4_gather_frags);

View file

@ -0,0 +1,301 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - enable working with Layer 3 protocol independent connection tracking.
*
* Derived from net/ipv4/netfilter/ip_conntrack_proto_icmp.c
*/
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/netfilter.h>
#include <linux/in.h>
#include <linux/icmp.h>
#include <linux/seq_file.h>
#include <net/ip.h>
#include <net/checksum.h>
#include <linux/netfilter_ipv4.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_protocol.h>
#include <net/netfilter/nf_conntrack_core.h>
unsigned long nf_ct_icmp_timeout = 30*HZ;
#if 0
#define DEBUGP printk
#else
#define DEBUGP(format, args...)
#endif
static int icmp_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct nf_conntrack_tuple *tuple)
{
struct icmphdr _hdr, *hp;
hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hp == NULL)
return 0;
tuple->dst.u.icmp.type = hp->type;
tuple->src.u.icmp.id = hp->un.echo.id;
tuple->dst.u.icmp.code = hp->code;
return 1;
}
static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
/* Add 1; spaces filled with 0. */
static u_int8_t invmap[]
= { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
[ICMP_ECHOREPLY] = ICMP_ECHO + 1,
[ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
[ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
[ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
[ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
[ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
if (orig->dst.u.icmp.type >= sizeof(invmap)
|| !invmap[orig->dst.u.icmp.type])
return 0;
tuple->src.u.icmp.id = orig->src.u.icmp.id;
tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
return 1;
}
/* Print out the per-protocol part of the tuple. */
static int icmp_print_tuple(struct seq_file *s,
const struct nf_conntrack_tuple *tuple)
{
return seq_printf(s, "type=%u code=%u id=%u ",
tuple->dst.u.icmp.type,
tuple->dst.u.icmp.code,
ntohs(tuple->src.u.icmp.id));
}
/* Print out the private part of the conntrack. */
static int icmp_print_conntrack(struct seq_file *s,
const struct nf_conn *conntrack)
{
return 0;
}
/* Returns verdict for packet, or -1 for invalid. */
static int icmp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
int pf,
unsigned int hooknum)
{
/* Try to delete connection immediately after all replies:
won't actually vanish as we still have skb, and del_timer
means this will only run once even if count hits zero twice
(theoretically possible with SMP) */
if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
if (atomic_dec_and_test(&ct->proto.icmp.count)
&& del_timer(&ct->timeout))
ct->timeout.function((unsigned long)ct);
} else {
atomic_inc(&ct->proto.icmp.count);
nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
}
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static int icmp_new(struct nf_conn *conntrack,
const struct sk_buff *skb, unsigned int dataoff)
{
static u_int8_t valid_new[]
= { [ICMP_ECHO] = 1,
[ICMP_TIMESTAMP] = 1,
[ICMP_INFO_REQUEST] = 1,
[ICMP_ADDRESS] = 1 };
if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
|| !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
/* Can't create a new ICMP `conn' with this. */
DEBUGP("icmp: can't create new conn with type %u\n",
conntrack->tuplehash[0].tuple.dst.u.icmp.type);
NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
return 0;
}
atomic_set(&conntrack->proto.icmp.count, 0);
return 1;
}
extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
static int
icmp_error_message(struct sk_buff *skb,
enum ip_conntrack_info *ctinfo,
unsigned int hooknum)
{
struct nf_conntrack_tuple innertuple, origtuple;
struct {
struct icmphdr icmp;
struct iphdr ip;
} _in, *inside;
struct nf_conntrack_protocol *innerproto;
struct nf_conntrack_tuple_hash *h;
int dataoff;
NF_CT_ASSERT(skb->nfct == NULL);
/* Not enough header? */
inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
if (inside == NULL)
return -NF_ACCEPT;
/* Ignore ICMP's containing fragments (shouldn't happen) */
if (inside->ip.frag_off & htons(IP_OFFSET)) {
DEBUGP("icmp_error_message: fragment of proto %u\n",
inside->ip.protocol);
return -NF_ACCEPT;
}
innerproto = nf_ct_find_proto(PF_INET, inside->ip.protocol);
dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
/* Are they talking about one of our connections? */
if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
inside->ip.protocol, &origtuple,
&nf_conntrack_l3proto_ipv4, innerproto)) {
DEBUGP("icmp_error_message: ! get_tuple p=%u",
inside->ip.protocol);
return -NF_ACCEPT;
}
/* Ordinarily, we'd expect the inverted tupleproto, but it's
been preserved inside the ICMP. */
if (!nf_ct_invert_tuple(&innertuple, &origtuple,
&nf_conntrack_l3proto_ipv4, innerproto)) {
DEBUGP("icmp_error_message: no match\n");
return -NF_ACCEPT;
}
*ctinfo = IP_CT_RELATED;
h = nf_conntrack_find_get(&innertuple, NULL);
if (!h) {
/* Locally generated ICMPs will match inverted if they
haven't been SNAT'ed yet */
/* FIXME: NAT code has to handle half-done double NAT --RR */
if (hooknum == NF_IP_LOCAL_OUT)
h = nf_conntrack_find_get(&origtuple, NULL);
if (!h) {
DEBUGP("icmp_error_message: no match\n");
return -NF_ACCEPT;
}
/* Reverse direction from that found */
if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
*ctinfo += IP_CT_IS_REPLY;
} else {
if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
*ctinfo += IP_CT_IS_REPLY;
}
/* Update skb to refer to this connection */
skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
skb->nfctinfo = *ctinfo;
return -NF_ACCEPT;
}
/* Small and modified version of icmp_rcv */
static int
icmp_error(struct sk_buff *skb, unsigned int dataoff,
enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
{
struct icmphdr _ih, *icmph;
/* Not enough header? */
icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
if (icmph == NULL) {
if (LOG_INVALID(IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: short packet ");
return -NF_ACCEPT;
}
/* See ip_conntrack_proto_tcp.c */
if (hooknum != NF_IP_PRE_ROUTING)
goto checksum_skipped;
switch (skb->ip_summed) {
case CHECKSUM_HW:
if (!(u16)csum_fold(skb->csum))
break;
if (LOG_INVALID(IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: bad HW ICMP checksum ");
return -NF_ACCEPT;
case CHECKSUM_NONE:
if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) {
if (LOG_INVALID(IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
NULL,
"nf_ct_icmp: bad ICMP checksum ");
return -NF_ACCEPT;
}
default:
break;
}
checksum_skipped:
/*
* 18 is the highest 'known' ICMP type. Anything else is a mystery
*
* RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
* discarded.
*/
if (icmph->type > NR_ICMP_TYPES) {
if (LOG_INVALID(IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: invalid ICMP type ");
return -NF_ACCEPT;
}
/* Need to track icmp error message? */
if (icmph->type != ICMP_DEST_UNREACH
&& icmph->type != ICMP_SOURCE_QUENCH
&& icmph->type != ICMP_TIME_EXCEEDED
&& icmph->type != ICMP_PARAMETERPROB
&& icmph->type != ICMP_REDIRECT)
return NF_ACCEPT;
return icmp_error_message(skb, ctinfo, hooknum);
}
struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
{
.list = { NULL, NULL },
.l3proto = PF_INET,
.proto = IPPROTO_ICMP,
.name = "icmp",
.pkt_to_tuple = icmp_pkt_to_tuple,
.invert_tuple = icmp_invert_tuple,
.print_tuple = icmp_print_tuple,
.print_conntrack = icmp_print_conntrack,
.packet = icmp_packet,
.new = icmp_new,
.error = icmp_error,
.destroy = NULL,
.me = NULL
};
EXPORT_SYMBOL(nf_conntrack_protocol_icmp);

View file

@ -176,6 +176,11 @@ static inline int ip6_input_finish(struct sk_buff *skb)
if (ipprot->flags & INET6_PROTO_FINAL) {
struct ipv6hdr *hdr;
/* Free reference early: we don't need it any more,
and it may hold ip_conntrack module loaded
indefinitely. */
nf_reset(skb);
skb_postpull_rcsum(skb, skb->nh.raw,
skb->h.raw - skb->nh.raw);
hdr = skb->nh.ipv6h;

View file

@ -441,9 +441,15 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
#ifdef CONFIG_NETFILTER
to->nfmark = from->nfmark;
/* Connection association is same as pre-frag packet */
nf_conntrack_put(to->nfct);
to->nfct = from->nfct;
nf_conntrack_get(to->nfct);
to->nfctinfo = from->nfctinfo;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_conntrack_put_reasm(to->nfct_reasm);
to->nfct_reasm = from->nfct_reasm;
nf_conntrack_get_reasm(to->nfct_reasm);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_put(to->nf_bridge);
to->nf_bridge = from->nf_bridge;

View file

@ -278,5 +278,19 @@ config IP6_NF_RAW
If you want to compile it as a module, say M here and read
<file:Documentation/modules.txt>. If unsure, say `N'.
config NF_CONNTRACK_IPV6
tristate "IPv6 support for new connection tracking (EXPERIMENTAL)"
depends on EXPERIMENTAL && NF_CONNTRACK
---help---
Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related
into connections.
This is IPv6 support on Layer 3 independent connection tracking.
Layer 3 independent connection tracking is experimental scheme
which generalize ip_conntrack to support other layer 3 protocols.
To compile it as a module, choose M here. If unsure, say N.
endmenu

View file

@ -27,3 +27,9 @@ obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o
obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
# objects for l3 independent conntrack
nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o
# l3 independent conntrack
obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o

View file

@ -56,9 +56,9 @@ checkentry(const char *tablename,
return 1;
}
static struct ip6t_target ip6t_mark_reg = {
.name = "MARK",
.target = target,
static struct ip6t_target ip6t_mark_reg = {
.name = "MARK",
.target = target,
.checkentry = checkentry,
.me = THIS_MODULE
};

View file

@ -0,0 +1,556 @@
/*
* Copyright (C)2004 USAGI/WIDE Project
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Author:
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
*
* 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - support Layer 3 protocol independent connection tracking.
* Based on the original ip_conntrack code which had the following
* copyright information:
* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
*
* 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - add get_features() to support various size of conntrack
* structures.
*/
#include <linux/config.h>
#include <linux/types.h>
#include <linux/ipv6.h>
#include <linux/in6.h>
#include <linux/netfilter.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/icmp.h>
#include <linux/sysctl.h>
#include <net/ipv6.h>
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_protocol.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#if 0
#define DEBUGP printk
#else
#define DEBUGP(format, args...)
#endif
DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
u_int32_t _addrs[8], *ap;
ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr),
sizeof(_addrs), _addrs);
if (ap == NULL)
return 0;
memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
return 1;
}
static int ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6));
memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6));
return 1;
}
static int ipv6_print_tuple(struct seq_file *s,
const struct nf_conntrack_tuple *tuple)
{
return seq_printf(s, "src=%x:%x:%x:%x:%x:%x:%x:%x dst=%x:%x:%x:%x:%x:%x:%x:%x ",
NIP6(*((struct in6_addr *)tuple->src.u3.ip6)),
NIP6(*((struct in6_addr *)tuple->dst.u3.ip6)));
}
static int ipv6_print_conntrack(struct seq_file *s,
const struct nf_conn *conntrack)
{
return 0;
}
/*
* Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c
*
* This function parses (probably truncated) exthdr set "hdr"
* of length "len". "nexthdrp" initially points to some place,
* where type of the first header can be found.
*
* It skips all well-known exthdrs, and returns pointer to the start
* of unparsable area i.e. the first header with unknown type.
* if success, *nexthdr is updated by type/protocol of this header.
*
* NOTES: - it may return pointer pointing beyond end of packet,
* if the last recognized header is truncated in the middle.
* - if packet is truncated, so that all parsed headers are skipped,
* it returns -1.
* - if packet is fragmented, return pointer of the fragment header.
* - ESP is unparsable for now and considered like
* normal payload protocol.
* - Note also special handling of AUTH header. Thanks to IPsec wizards.
*/
int nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp,
int len)
{
u8 nexthdr = *nexthdrp;
while (ipv6_ext_hdr(nexthdr)) {
struct ipv6_opt_hdr hdr;
int hdrlen;
if (len < (int)sizeof(struct ipv6_opt_hdr))
return -1;
if (nexthdr == NEXTHDR_NONE)
break;
if (nexthdr == NEXTHDR_FRAGMENT)
break;
if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
BUG();
if (nexthdr == NEXTHDR_AUTH)
hdrlen = (hdr.hdrlen+2)<<2;
else
hdrlen = ipv6_optlen(&hdr);
nexthdr = hdr.nexthdr;
len -= hdrlen;
start += hdrlen;
}
*nexthdrp = nexthdr;
return start;
}
static int
ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
u_int8_t *protonum)
{
unsigned int extoff;
unsigned char pnum;
int protoff;
extoff = (u8*)((*pskb)->nh.ipv6h + 1) - (*pskb)->data;
pnum = (*pskb)->nh.ipv6h->nexthdr;
protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
(*pskb)->len - extoff);
/*
* (protoff == (*pskb)->len) mean that the packet doesn't have no data
* except of IPv6 & ext headers. but it's tracked anyway. - YK
*/
if ((protoff < 0) || (protoff > (*pskb)->len)) {
DEBUGP("ip6_conntrack_core: can't find proto in pkt\n");
NF_CT_STAT_INC(error);
NF_CT_STAT_INC(invalid);
return -NF_ACCEPT;
}
*dataoff = protoff;
*protonum = pnum;
return NF_ACCEPT;
}
static u_int32_t ipv6_get_features(const struct nf_conntrack_tuple *tuple)
{
return NF_CT_F_BASIC;
}
static unsigned int ipv6_confirm(unsigned int hooknum,
struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(*pskb, &ctinfo);
if (ct && ct->helper) {
unsigned int ret, protoff;
unsigned int extoff = (u8*)((*pskb)->nh.ipv6h + 1)
- (*pskb)->data;
unsigned char pnum = (*pskb)->nh.ipv6h->nexthdr;
protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
(*pskb)->len - extoff);
if (protoff < 0 || protoff > (*pskb)->len ||
pnum == NEXTHDR_FRAGMENT) {
DEBUGP("proto header not found\n");
return NF_ACCEPT;
}
ret = ct->helper->help(pskb, protoff, ct, ctinfo);
if (ret != NF_ACCEPT)
return ret;
}
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(pskb);
}
extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb);
extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
struct net_device *in,
struct net_device *out,
int (*okfn)(struct sk_buff *));
static unsigned int ipv6_defrag(unsigned int hooknum,
struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct sk_buff *reasm;
/* Previously seen (loopback)? */
if ((*pskb)->nfct)
return NF_ACCEPT;
reasm = nf_ct_frag6_gather(*pskb);
/* queued */
if (reasm == NULL)
return NF_STOLEN;
/* error occured or not fragmented */
if (reasm == *pskb)
return NF_ACCEPT;
nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
(struct net_device *)out, okfn);
return NF_STOLEN;
}
static unsigned int ipv6_conntrack_in(unsigned int hooknum,
struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct sk_buff *reasm = (*pskb)->nfct_reasm;
/* This packet is fragmented and has reassembled packet. */
if (reasm) {
/* Reassembled packet isn't parsed yet ? */
if (!reasm->nfct) {
unsigned int ret;
ret = nf_conntrack_in(PF_INET6, hooknum, &reasm);
if (ret != NF_ACCEPT)
return ret;
}
nf_conntrack_get(reasm->nfct);
(*pskb)->nfct = reasm->nfct;
return NF_ACCEPT;
}
return nf_conntrack_in(PF_INET6, hooknum, pskb);
}
static unsigned int ipv6_conntrack_local(unsigned int hooknum,
struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct ipv6hdr)) {
if (net_ratelimit())
printk("ipv6_conntrack_local: packet too short\n");
return NF_ACCEPT;
}
return ipv6_conntrack_in(hooknum, pskb, in, out, okfn);
}
/* Connection tracking may drop packets, but never alters them, so
make it the first hook. */
static struct nf_hook_ops ipv6_conntrack_defrag_ops = {
.hook = ipv6_defrag,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_IP6_PRE_ROUTING,
.priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
};
static struct nf_hook_ops ipv6_conntrack_in_ops = {
.hook = ipv6_conntrack_in,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_IP6_PRE_ROUTING,
.priority = NF_IP6_PRI_CONNTRACK,
};
static struct nf_hook_ops ipv6_conntrack_local_out_ops = {
.hook = ipv6_conntrack_local,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_IP6_LOCAL_OUT,
.priority = NF_IP6_PRI_CONNTRACK,
};
static struct nf_hook_ops ipv6_conntrack_defrag_local_out_ops = {
.hook = ipv6_defrag,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_IP6_LOCAL_OUT,
.priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
};
/* Refragmenter; last chance. */
static struct nf_hook_ops ipv6_conntrack_out_ops = {
.hook = ipv6_confirm,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_IP6_POST_ROUTING,
.priority = NF_IP6_PRI_LAST,
};
static struct nf_hook_ops ipv6_conntrack_local_in_ops = {
.hook = ipv6_confirm,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_IP6_LOCAL_IN,
.priority = NF_IP6_PRI_LAST-1,
};
#ifdef CONFIG_SYSCTL
/* From nf_conntrack_proto_icmpv6.c */
extern unsigned long nf_ct_icmpv6_timeout;
/* From nf_conntrack_frag6.c */
extern unsigned long nf_ct_frag6_timeout;
extern unsigned long nf_ct_frag6_low_thresh;
extern unsigned long nf_ct_frag6_high_thresh;
static struct ctl_table_header *nf_ct_ipv6_sysctl_header;
static ctl_table nf_ct_sysctl_table[] = {
{
.ctl_name = NET_NF_CONNTRACK_ICMPV6_TIMEOUT,
.procname = "nf_conntrack_icmpv6_timeout",
.data = &nf_ct_icmpv6_timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_FRAG6_TIMEOUT,
.procname = "nf_conntrack_frag6_timeout",
.data = &nf_ct_frag6_timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
.procname = "nf_conntrack_frag6_low_thresh",
.data = &nf_ct_frag6_low_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
.procname = "nf_conntrack_frag6_high_thresh",
.data = &nf_ct_frag6_high_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{ .ctl_name = 0 }
};
static ctl_table nf_ct_netfilter_table[] = {
{
.ctl_name = NET_NETFILTER,
.procname = "netfilter",
.mode = 0555,
.child = nf_ct_sysctl_table,
},
{ .ctl_name = 0 }
};
static ctl_table nf_ct_net_table[] = {
{
.ctl_name = CTL_NET,
.procname = "net",
.mode = 0555,
.child = nf_ct_netfilter_table,
},
{ .ctl_name = 0 }
};
#endif
struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
.l3proto = PF_INET6,
.name = "ipv6",
.pkt_to_tuple = ipv6_pkt_to_tuple,
.invert_tuple = ipv6_invert_tuple,
.print_tuple = ipv6_print_tuple,
.print_conntrack = ipv6_print_conntrack,
.prepare = ipv6_prepare,
.get_features = ipv6_get_features,
.me = THIS_MODULE,
};
extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp6;
extern struct nf_conntrack_protocol nf_conntrack_protocol_udp6;
extern struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6;
extern int nf_ct_frag6_init(void);
extern void nf_ct_frag6_cleanup(void);
static int init_or_cleanup(int init)
{
int ret = 0;
if (!init) goto cleanup;
ret = nf_ct_frag6_init();
if (ret < 0) {
printk("nf_conntrack_ipv6: can't initialize frag6.\n");
goto cleanup_nothing;
}
ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp6);
if (ret < 0) {
printk("nf_conntrack_ipv6: can't register tcp.\n");
goto cleanup_frag6;
}
ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp6);
if (ret < 0) {
printk("nf_conntrack_ipv6: can't register udp.\n");
goto cleanup_tcp;
}
ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmpv6);
if (ret < 0) {
printk("nf_conntrack_ipv6: can't register icmpv6.\n");
goto cleanup_udp;
}
ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6);
if (ret < 0) {
printk("nf_conntrack_ipv6: can't register ipv6\n");
goto cleanup_icmpv6;
}
ret = nf_register_hook(&ipv6_conntrack_defrag_ops);
if (ret < 0) {
printk("nf_conntrack_ipv6: can't register pre-routing defrag "
"hook.\n");
goto cleanup_ipv6;
}
ret = nf_register_hook(&ipv6_conntrack_defrag_local_out_ops);
if (ret < 0) {
printk("nf_conntrack_ipv6: can't register local_out defrag "
"hook.\n");
goto cleanup_defragops;
}
ret = nf_register_hook(&ipv6_conntrack_in_ops);
if (ret < 0) {
printk("nf_conntrack_ipv6: can't register pre-routing hook.\n");
goto cleanup_defraglocalops;
}
ret = nf_register_hook(&ipv6_conntrack_local_out_ops);
if (ret < 0) {
printk("nf_conntrack_ipv6: can't register local out hook.\n");
goto cleanup_inops;
}
ret = nf_register_hook(&ipv6_conntrack_out_ops);
if (ret < 0) {
printk("nf_conntrack_ipv6: can't register post-routing hook.\n");
goto cleanup_inandlocalops;
}
ret = nf_register_hook(&ipv6_conntrack_local_in_ops);
if (ret < 0) {
printk("nf_conntrack_ipv6: can't register local in hook.\n");
goto cleanup_inoutandlocalops;
}
#ifdef CONFIG_SYSCTL
nf_ct_ipv6_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
if (nf_ct_ipv6_sysctl_header == NULL) {
printk("nf_conntrack: can't register to sysctl.\n");
ret = -ENOMEM;
goto cleanup_localinops;
}
#endif
return ret;
cleanup:
synchronize_net();
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(nf_ct_ipv6_sysctl_header);
cleanup_localinops:
#endif
nf_unregister_hook(&ipv6_conntrack_local_in_ops);
cleanup_inoutandlocalops:
nf_unregister_hook(&ipv6_conntrack_out_ops);
cleanup_inandlocalops:
nf_unregister_hook(&ipv6_conntrack_local_out_ops);
cleanup_inops:
nf_unregister_hook(&ipv6_conntrack_in_ops);
cleanup_defraglocalops:
nf_unregister_hook(&ipv6_conntrack_defrag_local_out_ops);
cleanup_defragops:
nf_unregister_hook(&ipv6_conntrack_defrag_ops);
cleanup_ipv6:
nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
cleanup_icmpv6:
nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmpv6);
cleanup_udp:
nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp6);
cleanup_tcp:
nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp6);
cleanup_frag6:
nf_ct_frag6_cleanup();
cleanup_nothing:
return ret;
}
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
static int __init init(void)
{
need_nf_conntrack();
return init_or_cleanup(1);
}
static void __exit fini(void)
{
init_or_cleanup(0);
}
module_init(init);
module_exit(fini);
void need_ip6_conntrack(void)
{
}
EXPORT_SYMBOL(need_ip6_conntrack);

View file

@ -0,0 +1,272 @@
/*
* Copyright (C)2003,2004 USAGI/WIDE Project
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Author:
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
*
* 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - ICMPv6 tracking support. Derived from the original ip_conntrack code
* net/ipv4/netfilter/ip_conntrack_proto_icmp.c which had the following
* copyright information:
* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
*/
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/in6.h>
#include <linux/icmpv6.h>
#include <linux/ipv6.h>
#include <net/ipv6.h>
#include <net/ip6_checksum.h>
#include <linux/seq_file.h>
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_protocol.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
unsigned long nf_ct_icmpv6_timeout = 30*HZ;
#if 0
#define DEBUGP printk
#else
#define DEBUGP(format, args...)
#endif
static int icmpv6_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct nf_conntrack_tuple *tuple)
{
struct icmp6hdr _hdr, *hp;
hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hp == NULL)
return 0;
tuple->dst.u.icmp.type = hp->icmp6_type;
tuple->src.u.icmp.id = hp->icmp6_identifier;
tuple->dst.u.icmp.code = hp->icmp6_code;
return 1;
}
static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
/* Add 1; spaces filled with 0. */
static u_int8_t invmap[] = {
[ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1,
[ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1,
[ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1,
[ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1
};
__u8 type = orig->dst.u.icmp.type - 128;
if (type >= sizeof(invmap) || !invmap[type])
return 0;
tuple->src.u.icmp.id = orig->src.u.icmp.id;
tuple->dst.u.icmp.type = invmap[type] - 1;
tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
return 1;
}
/* Print out the per-protocol part of the tuple. */
static int icmpv6_print_tuple(struct seq_file *s,
const struct nf_conntrack_tuple *tuple)
{
return seq_printf(s, "type=%u code=%u id=%u ",
tuple->dst.u.icmp.type,
tuple->dst.u.icmp.code,
ntohs(tuple->src.u.icmp.id));
}
/* Print out the private part of the conntrack. */
static int icmpv6_print_conntrack(struct seq_file *s,
const struct nf_conn *conntrack)
{
return 0;
}
/* Returns verdict for packet, or -1 for invalid. */
static int icmpv6_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
int pf,
unsigned int hooknum)
{
/* Try to delete connection immediately after all replies:
won't actually vanish as we still have skb, and del_timer
means this will only run once even if count hits zero twice
(theoretically possible with SMP) */
if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
if (atomic_dec_and_test(&ct->proto.icmp.count)
&& del_timer(&ct->timeout))
ct->timeout.function((unsigned long)ct);
} else {
atomic_inc(&ct->proto.icmp.count);
nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
}
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static int icmpv6_new(struct nf_conn *conntrack,
const struct sk_buff *skb,
unsigned int dataoff)
{
static u_int8_t valid_new[] = {
[ICMPV6_ECHO_REQUEST - 128] = 1,
[ICMPV6_NI_QUERY - 128] = 1
};
if (conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128 >= sizeof(valid_new)
|| !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128]) {
/* Can't create a new ICMPv6 `conn' with this. */
DEBUGP("icmp: can't create new conn with type %u\n",
conntrack->tuplehash[0].tuple.dst.u.icmp.type);
NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
return 0;
}
atomic_set(&conntrack->proto.icmp.count, 0);
return 1;
}
extern int
nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, int len);
extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
static int
icmpv6_error_message(struct sk_buff *skb,
unsigned int icmp6off,
enum ip_conntrack_info *ctinfo,
unsigned int hooknum)
{
struct nf_conntrack_tuple intuple, origtuple;
struct nf_conntrack_tuple_hash *h;
struct icmp6hdr _hdr, *hp;
unsigned int inip6off;
struct nf_conntrack_protocol *inproto;
u_int8_t inprotonum;
unsigned int inprotoff;
NF_CT_ASSERT(skb->nfct == NULL);
hp = skb_header_pointer(skb, icmp6off, sizeof(_hdr), &_hdr);
if (hp == NULL) {
DEBUGP("icmpv6_error: Can't get ICMPv6 hdr.\n");
return -NF_ACCEPT;
}
inip6off = icmp6off + sizeof(_hdr);
if (skb_copy_bits(skb, inip6off+offsetof(struct ipv6hdr, nexthdr),
&inprotonum, sizeof(inprotonum)) != 0) {
DEBUGP("icmpv6_error: Can't get nexthdr in inner IPv6 header.\n");
return -NF_ACCEPT;
}
inprotoff = nf_ct_ipv6_skip_exthdr(skb,
inip6off + sizeof(struct ipv6hdr),
&inprotonum,
skb->len - inip6off
- sizeof(struct ipv6hdr));
if ((inprotoff < 0) || (inprotoff > skb->len) ||
(inprotonum == NEXTHDR_FRAGMENT)) {
DEBUGP("icmpv6_error: Can't get protocol header in ICMPv6 payload.\n");
return -NF_ACCEPT;
}
inproto = nf_ct_find_proto(PF_INET6, inprotonum);
/* Are they talking about one of our connections? */
if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum,
&origtuple, &nf_conntrack_l3proto_ipv6, inproto)) {
DEBUGP("icmpv6_error: Can't get tuple\n");
return -NF_ACCEPT;
}
/* Ordinarily, we'd expect the inverted tupleproto, but it's
been preserved inside the ICMP. */
if (!nf_ct_invert_tuple(&intuple, &origtuple,
&nf_conntrack_l3proto_ipv6, inproto)) {
DEBUGP("icmpv6_error: Can't invert tuple\n");
return -NF_ACCEPT;
}
*ctinfo = IP_CT_RELATED;
h = nf_conntrack_find_get(&intuple, NULL);
if (!h) {
DEBUGP("icmpv6_error: no match\n");
return -NF_ACCEPT;
} else {
if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
*ctinfo += IP_CT_IS_REPLY;
}
/* Update skb to refer to this connection */
skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
skb->nfctinfo = *ctinfo;
return -NF_ACCEPT;
}
static int
icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
{
struct icmp6hdr _ih, *icmp6h;
icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
if (icmp6h == NULL) {
if (LOG_INVALID(IPPROTO_ICMPV6))
nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
"nf_ct_icmpv6: short packet ");
return -NF_ACCEPT;
}
if (hooknum != NF_IP6_PRE_ROUTING)
goto skipped;
/* Ignore it if the checksum's bogus. */
if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
skb->len - dataoff, IPPROTO_ICMPV6,
skb_checksum(skb, dataoff,
skb->len - dataoff, 0))) {
nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
"nf_ct_icmpv6: ICMPv6 checksum failed\n");
return -NF_ACCEPT;
}
skipped:
/* is not error message ? */
if (icmp6h->icmp6_type >= 128)
return NF_ACCEPT;
return icmpv6_error_message(skb, dataoff, ctinfo, hooknum);
}
struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
{
.l3proto = PF_INET6,
.proto = IPPROTO_ICMPV6,
.name = "icmpv6",
.pkt_to_tuple = icmpv6_pkt_to_tuple,
.invert_tuple = icmpv6_invert_tuple,
.print_tuple = icmpv6_print_tuple,
.print_conntrack = icmpv6_print_conntrack,
.packet = icmpv6_packet,
.new = icmpv6_new,
.error = icmpv6_error,
};
EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6);

View file

@ -0,0 +1,885 @@
/*
* IPv6 fragment reassembly for connection tracking
*
* Copyright (C)2004 USAGI/WIDE Project
*
* Author:
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
*
* Based on: net/ipv6/reassembly.c
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/jiffies.h>
#include <linux/net.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/in6.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
#include <linux/jhash.h>
#include <net/sock.h>
#include <net/snmp.h>
#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
#include <net/rawv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <linux/sysctl.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
#include <linux/kernel.h>
#include <linux/module.h>
#if 0
#define DEBUGP printk
#else
#define DEBUGP(format, args...)
#endif
#define NF_CT_FRAG6_HIGH_THRESH 262144 /* == 256*1024 */
#define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */
#define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
int nf_ct_frag6_high_thresh = 256*1024;
int nf_ct_frag6_low_thresh = 192*1024;
int nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT;
struct nf_ct_frag6_skb_cb
{
struct inet6_skb_parm h;
int offset;
struct sk_buff *orig;
};
#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb))
struct nf_ct_frag6_queue
{
struct nf_ct_frag6_queue *next;
struct list_head lru_list; /* lru list member */
__u32 id; /* fragment id */
struct in6_addr saddr;
struct in6_addr daddr;
spinlock_t lock;
atomic_t refcnt;
struct timer_list timer; /* expire timer */
struct sk_buff *fragments;
int len;
int meat;
struct timeval stamp;
unsigned int csum;
__u8 last_in; /* has first/last segment arrived? */
#define COMPLETE 4
#define FIRST_IN 2
#define LAST_IN 1
__u16 nhoffset;
struct nf_ct_frag6_queue **pprev;
};
/* Hash table. */
#define FRAG6Q_HASHSZ 64
static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ];
static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED;
static u32 nf_ct_frag6_hash_rnd;
static LIST_HEAD(nf_ct_frag6_lru_list);
int nf_ct_frag6_nqueues = 0;
static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq)
{
if (fq->next)
fq->next->pprev = fq->pprev;
*fq->pprev = fq->next;
list_del(&fq->lru_list);
nf_ct_frag6_nqueues--;
}
static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq)
{
write_lock(&nf_ct_frag6_lock);
__fq_unlink(fq);
write_unlock(&nf_ct_frag6_lock);
}
static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
struct in6_addr *daddr)
{
u32 a, b, c;
a = saddr->s6_addr32[0];
b = saddr->s6_addr32[1];
c = saddr->s6_addr32[2];
a += JHASH_GOLDEN_RATIO;
b += JHASH_GOLDEN_RATIO;
c += nf_ct_frag6_hash_rnd;
__jhash_mix(a, b, c);
a += saddr->s6_addr32[3];
b += daddr->s6_addr32[0];
c += daddr->s6_addr32[1];
__jhash_mix(a, b, c);
a += daddr->s6_addr32[2];
b += daddr->s6_addr32[3];
c += id;
__jhash_mix(a, b, c);
return c & (FRAG6Q_HASHSZ - 1);
}
static struct timer_list nf_ct_frag6_secret_timer;
int nf_ct_frag6_secret_interval = 10 * 60 * HZ;
static void nf_ct_frag6_secret_rebuild(unsigned long dummy)
{
unsigned long now = jiffies;
int i;
write_lock(&nf_ct_frag6_lock);
get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32));
for (i = 0; i < FRAG6Q_HASHSZ; i++) {
struct nf_ct_frag6_queue *q;
q = nf_ct_frag6_hash[i];
while (q) {
struct nf_ct_frag6_queue *next = q->next;
unsigned int hval = ip6qhashfn(q->id,
&q->saddr,
&q->daddr);
if (hval != i) {
/* Unlink. */
if (q->next)
q->next->pprev = q->pprev;
*q->pprev = q->next;
/* Relink to new hash chain. */
if ((q->next = nf_ct_frag6_hash[hval]) != NULL)
q->next->pprev = &q->next;
nf_ct_frag6_hash[hval] = q;
q->pprev = &nf_ct_frag6_hash[hval];
}
q = next;
}
}
write_unlock(&nf_ct_frag6_lock);
mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval);
}
atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0);
/* Memory Tracking Functions. */
static inline void frag_kfree_skb(struct sk_buff *skb)
{
atomic_sub(skb->truesize, &nf_ct_frag6_mem);
if (NFCT_FRAG6_CB(skb)->orig)
kfree_skb(NFCT_FRAG6_CB(skb)->orig);
kfree_skb(skb);
}
static inline void frag_free_queue(struct nf_ct_frag6_queue *fq)
{
atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
kfree(fq);
}
static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
{
struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
if (!fq)
return NULL;
atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
return fq;
}
/* Destruction primitives. */
/* Complete destruction of fq. */
static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq)
{
struct sk_buff *fp;
BUG_TRAP(fq->last_in&COMPLETE);
BUG_TRAP(del_timer(&fq->timer) == 0);
/* Release all fragment data. */
fp = fq->fragments;
while (fp) {
struct sk_buff *xp = fp->next;
frag_kfree_skb(fp);
fp = xp;
}
frag_free_queue(fq);
}
static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
{
if (atomic_dec_and_test(&fq->refcnt))
nf_ct_frag6_destroy(fq);
}
/* Kill fq entry. It is not destroyed immediately,
* because caller (and someone more) holds reference count.
*/
static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
{
if (del_timer(&fq->timer))
atomic_dec(&fq->refcnt);
if (!(fq->last_in & COMPLETE)) {
fq_unlink(fq);
atomic_dec(&fq->refcnt);
fq->last_in |= COMPLETE;
}
}
static void nf_ct_frag6_evictor(void)
{
struct nf_ct_frag6_queue *fq;
struct list_head *tmp;
for (;;) {
if (atomic_read(&nf_ct_frag6_mem) <= nf_ct_frag6_low_thresh)
return;
read_lock(&nf_ct_frag6_lock);
if (list_empty(&nf_ct_frag6_lru_list)) {
read_unlock(&nf_ct_frag6_lock);
return;
}
tmp = nf_ct_frag6_lru_list.next;
fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list);
atomic_inc(&fq->refcnt);
read_unlock(&nf_ct_frag6_lock);
spin_lock(&fq->lock);
if (!(fq->last_in&COMPLETE))
fq_kill(fq);
spin_unlock(&fq->lock);
fq_put(fq);
}
}
static void nf_ct_frag6_expire(unsigned long data)
{
struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data;
spin_lock(&fq->lock);
if (fq->last_in & COMPLETE)
goto out;
fq_kill(fq);
out:
spin_unlock(&fq->lock);
fq_put(fq);
}
/* Creation primitives. */
static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash,
struct nf_ct_frag6_queue *fq_in)
{
struct nf_ct_frag6_queue *fq;
write_lock(&nf_ct_frag6_lock);
#ifdef CONFIG_SMP
for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) {
if (fq->id == fq_in->id &&
!ipv6_addr_cmp(&fq_in->saddr, &fq->saddr) &&
!ipv6_addr_cmp(&fq_in->daddr, &fq->daddr)) {
atomic_inc(&fq->refcnt);
write_unlock(&nf_ct_frag6_lock);
fq_in->last_in |= COMPLETE;
fq_put(fq_in);
return fq;
}
}
#endif
fq = fq_in;
if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout))
atomic_inc(&fq->refcnt);
atomic_inc(&fq->refcnt);
if ((fq->next = nf_ct_frag6_hash[hash]) != NULL)
fq->next->pprev = &fq->next;
nf_ct_frag6_hash[hash] = fq;
fq->pprev = &nf_ct_frag6_hash[hash];
INIT_LIST_HEAD(&fq->lru_list);
list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
nf_ct_frag6_nqueues++;
write_unlock(&nf_ct_frag6_lock);
return fq;
}
static struct nf_ct_frag6_queue *
nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst)
{
struct nf_ct_frag6_queue *fq;
if ((fq = frag_alloc_queue()) == NULL) {
DEBUGP("Can't alloc new queue\n");
goto oom;
}
memset(fq, 0, sizeof(struct nf_ct_frag6_queue));
fq->id = id;
ipv6_addr_copy(&fq->saddr, src);
ipv6_addr_copy(&fq->daddr, dst);
init_timer(&fq->timer);
fq->timer.function = nf_ct_frag6_expire;
fq->timer.data = (long) fq;
fq->lock = SPIN_LOCK_UNLOCKED;
atomic_set(&fq->refcnt, 1);
return nf_ct_frag6_intern(hash, fq);
oom:
return NULL;
}
static __inline__ struct nf_ct_frag6_queue *
fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst)
{
struct nf_ct_frag6_queue *fq;
unsigned int hash = ip6qhashfn(id, src, dst);
read_lock(&nf_ct_frag6_lock);
for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) {
if (fq->id == id &&
!ipv6_addr_cmp(src, &fq->saddr) &&
!ipv6_addr_cmp(dst, &fq->daddr)) {
atomic_inc(&fq->refcnt);
read_unlock(&nf_ct_frag6_lock);
return fq;
}
}
read_unlock(&nf_ct_frag6_lock);
return nf_ct_frag6_create(hash, id, src, dst);
}
static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
struct frag_hdr *fhdr, int nhoff)
{
struct sk_buff *prev, *next;
int offset, end;
if (fq->last_in & COMPLETE) {
DEBUGP("Allready completed\n");
goto err;
}
offset = ntohs(fhdr->frag_off) & ~0x7;
end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) {
DEBUGP("offset is too large.\n");
return -1;
}
if (skb->ip_summed == CHECKSUM_HW)
skb->csum = csum_sub(skb->csum,
csum_partial(skb->nh.raw,
(u8*)(fhdr + 1) - skb->nh.raw,
0));
/* Is this the final fragment? */
if (!(fhdr->frag_off & htons(IP6_MF))) {
/* If we already have some bits beyond end
* or have different end, the segment is corrupted.
*/
if (end < fq->len ||
((fq->last_in & LAST_IN) && end != fq->len)) {
DEBUGP("already received last fragment\n");
goto err;
}
fq->last_in |= LAST_IN;
fq->len = end;
} else {
/* Check if the fragment is rounded to 8 bytes.
* Required by the RFC.
*/
if (end & 0x7) {
/* RFC2460 says always send parameter problem in
* this case. -DaveM
*/
DEBUGP("the end of this fragment is not rounded to 8 bytes.\n");
return -1;
}
if (end > fq->len) {
/* Some bits beyond end -> corruption. */
if (fq->last_in & LAST_IN) {
DEBUGP("last packet already reached.\n");
goto err;
}
fq->len = end;
}
}
if (end == offset)
goto err;
/* Point into the IP datagram 'data' part. */
if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) {
DEBUGP("queue: message is too short.\n");
goto err;
}
if (end-offset < skb->len) {
if (pskb_trim(skb, end - offset)) {
DEBUGP("Can't trim\n");
goto err;
}
if (skb->ip_summed != CHECKSUM_UNNECESSARY)
skb->ip_summed = CHECKSUM_NONE;
}
/* Find out which fragments are in front and at the back of us
* in the chain of fragments so far. We must know where to put
* this fragment, right?
*/
prev = NULL;
for (next = fq->fragments; next != NULL; next = next->next) {
if (NFCT_FRAG6_CB(next)->offset >= offset)
break; /* bingo! */
prev = next;
}
/* We found where to put this one. Check for overlap with
* preceding fragment, and, if needed, align things so that
* any overlaps are eliminated.
*/
if (prev) {
int i = (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset;
if (i > 0) {
offset += i;
if (end <= offset) {
DEBUGP("overlap\n");
goto err;
}
if (!pskb_pull(skb, i)) {
DEBUGP("Can't pull\n");
goto err;
}
if (skb->ip_summed != CHECKSUM_UNNECESSARY)
skb->ip_summed = CHECKSUM_NONE;
}
}
/* Look for overlap with succeeding segments.
* If we can merge fragments, do it.
*/
while (next && NFCT_FRAG6_CB(next)->offset < end) {
/* overlap is 'i' bytes */
int i = end - NFCT_FRAG6_CB(next)->offset;
if (i < next->len) {
/* Eat head of the next overlapped fragment
* and leave the loop. The next ones cannot overlap.
*/
DEBUGP("Eat head of the overlapped parts.: %d", i);
if (!pskb_pull(next, i))
goto err;
/* next fragment */
NFCT_FRAG6_CB(next)->offset += i;
fq->meat -= i;
if (next->ip_summed != CHECKSUM_UNNECESSARY)
next->ip_summed = CHECKSUM_NONE;
break;
} else {
struct sk_buff *free_it = next;
/* Old fragmnet is completely overridden with
* new one drop it.
*/
next = next->next;
if (prev)
prev->next = next;
else
fq->fragments = next;
fq->meat -= free_it->len;
frag_kfree_skb(free_it);
}
}
NFCT_FRAG6_CB(skb)->offset = offset;
/* Insert this fragment in the chain of fragments. */
skb->next = next;
if (prev)
prev->next = skb;
else
fq->fragments = skb;
skb->dev = NULL;
skb_get_timestamp(skb, &fq->stamp);
fq->meat += skb->len;
atomic_add(skb->truesize, &nf_ct_frag6_mem);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
*/
if (offset == 0) {
fq->nhoffset = nhoff;
fq->last_in |= FIRST_IN;
}
write_lock(&nf_ct_frag6_lock);
list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
write_unlock(&nf_ct_frag6_lock);
return 0;
err:
return -1;
}
/*
* Check if this packet is complete.
* Returns NULL on failure by any reason, and pointer
* to current nexthdr field in reassembled frame.
*
* It is called with locked fq, and caller must check that
* queue is eligible for reassembly i.e. it is not COMPLETE,
* the last and the first frames arrived and all the bits are here.
*/
static struct sk_buff *
nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
{
struct sk_buff *fp, *op, *head = fq->fragments;
int payload_len;
fq_kill(fq);
BUG_TRAP(head != NULL);
BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0);
/* Unfragmented part is taken from the first segment. */
payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
if (payload_len > IPV6_MAXPLEN) {
DEBUGP("payload len is too large.\n");
goto out_oversize;
}
/* Head of list must not be cloned. */
if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) {
DEBUGP("skb is cloned but can't expand head");
goto out_oom;
}
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
* and the second, holding only fragments. */
if (skb_shinfo(head)->frag_list) {
struct sk_buff *clone;
int i, plen = 0;
if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) {
DEBUGP("Can't alloc skb\n");
goto out_oom;
}
clone->next = head->next;
head->next = clone;
skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
skb_shinfo(head)->frag_list = NULL;
for (i=0; i<skb_shinfo(head)->nr_frags; i++)
plen += skb_shinfo(head)->frags[i].size;
clone->len = clone->data_len = head->data_len - plen;
head->data_len -= clone->len;
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
NFCT_FRAG6_CB(clone)->orig = NULL;
atomic_add(clone->truesize, &nf_ct_frag6_mem);
}
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
head->nh.raw[fq->nhoffset] = head->h.raw[0];
memmove(head->head + sizeof(struct frag_hdr), head->head,
(head->data - head->head) - sizeof(struct frag_hdr));
head->mac.raw += sizeof(struct frag_hdr);
head->nh.raw += sizeof(struct frag_hdr);
skb_shinfo(head)->frag_list = head->next;
head->h.raw = head->data;
skb_push(head, head->data - head->nh.raw);
atomic_sub(head->truesize, &nf_ct_frag6_mem);
for (fp=head->next; fp; fp = fp->next) {
head->data_len += fp->len;
head->len += fp->len;
if (head->ip_summed != fp->ip_summed)
head->ip_summed = CHECKSUM_NONE;
else if (head->ip_summed == CHECKSUM_HW)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
atomic_sub(fp->truesize, &nf_ct_frag6_mem);
}
head->next = NULL;
head->dev = dev;
skb_set_timestamp(head, &fq->stamp);
head->nh.ipv6h->payload_len = htons(payload_len);
/* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_HW)
head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
fq->fragments = NULL;
/* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
fp = skb_shinfo(head)->frag_list;
if (NFCT_FRAG6_CB(fp)->orig == NULL)
/* at above code, head skb is divided into two skbs. */
fp = fp->next;
op = NFCT_FRAG6_CB(head)->orig;
for (; fp; fp = fp->next) {
struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig;
op->next = orig;
op = orig;
NFCT_FRAG6_CB(fp)->orig = NULL;
}
return head;
out_oversize:
if (net_ratelimit())
printk(KERN_DEBUG "nf_ct_frag6_reasm: payload len = %d\n", payload_len);
goto out_fail;
out_oom:
if (net_ratelimit())
printk(KERN_DEBUG "nf_ct_frag6_reasm: no memory for reassembly\n");
out_fail:
return NULL;
}
/*
* find the header just before Fragment Header.
*
* if success return 0 and set ...
* (*prevhdrp): the value of "Next Header Field" in the header
* just before Fragment Header.
* (*prevhoff): the offset of "Next Header Field" in the header
* just before Fragment Header.
* (*fhoff) : the offset of Fragment Header.
*
* Based on ipv6_skip_hdr() in net/ipv6/exthdr.c
*
*/
static int
find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
{
u8 nexthdr = skb->nh.ipv6h->nexthdr;
u8 prev_nhoff = (u8 *)&skb->nh.ipv6h->nexthdr - skb->data;
int start = (u8 *)(skb->nh.ipv6h+1) - skb->data;
int len = skb->len - start;
u8 prevhdr = NEXTHDR_IPV6;
while (nexthdr != NEXTHDR_FRAGMENT) {
struct ipv6_opt_hdr hdr;
int hdrlen;
if (!ipv6_ext_hdr(nexthdr)) {
return -1;
}
if (len < (int)sizeof(struct ipv6_opt_hdr)) {
DEBUGP("too short\n");
return -1;
}
if (nexthdr == NEXTHDR_NONE) {
DEBUGP("next header is none\n");
return -1;
}
if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
BUG();
if (nexthdr == NEXTHDR_AUTH)
hdrlen = (hdr.hdrlen+2)<<2;
else
hdrlen = ipv6_optlen(&hdr);
prevhdr = nexthdr;
prev_nhoff = start;
nexthdr = hdr.nexthdr;
len -= hdrlen;
start += hdrlen;
}
if (len < 0)
return -1;
*prevhdrp = prevhdr;
*prevhoff = prev_nhoff;
*fhoff = start;
return 0;
}
struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
{
struct sk_buff *clone;
struct net_device *dev = skb->dev;
struct frag_hdr *fhdr;
struct nf_ct_frag6_queue *fq;
struct ipv6hdr *hdr;
int fhoff, nhoff;
u8 prevhdr;
struct sk_buff *ret_skb = NULL;
/* Jumbo payload inhibits frag. header */
if (skb->nh.ipv6h->payload_len == 0) {
DEBUGP("payload len = 0\n");
return skb;
}
if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
return skb;
clone = skb_clone(skb, GFP_ATOMIC);
if (clone == NULL) {
DEBUGP("Can't clone skb\n");
return skb;
}
NFCT_FRAG6_CB(clone)->orig = skb;
if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) {
DEBUGP("message is too short.\n");
goto ret_orig;
}
clone->h.raw = clone->data + fhoff;
hdr = clone->nh.ipv6h;
fhdr = (struct frag_hdr *)clone->h.raw;
if (!(fhdr->frag_off & htons(0xFFF9))) {
DEBUGP("Invalid fragment offset\n");
/* It is not a fragmented frame */
goto ret_orig;
}
if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh)
nf_ct_frag6_evictor();
fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
if (fq == NULL) {
DEBUGP("Can't find and can't create new queue\n");
goto ret_orig;
}
spin_lock(&fq->lock);
if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
spin_unlock(&fq->lock);
DEBUGP("Can't insert skb to queue\n");
fq_put(fq);
goto ret_orig;
}
if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) {
ret_skb = nf_ct_frag6_reasm(fq, dev);
if (ret_skb == NULL)
DEBUGP("Can't reassemble fragmented packets\n");
}
spin_unlock(&fq->lock);
fq_put(fq);
return ret_skb;
ret_orig:
kfree_skb(clone);
return skb;
}
void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
struct net_device *in, struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct sk_buff *s, *s2;
for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
nf_conntrack_put_reasm(s->nfct_reasm);
nf_conntrack_get_reasm(skb);
s->nfct_reasm = skb;
s2 = s->next;
NF_HOOK_THRESH(PF_INET6, hooknum, s, in, out, okfn,
NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
s = s2;
}
nf_conntrack_put_reasm(skb);
}
int nf_ct_frag6_kfree_frags(struct sk_buff *skb)
{
struct sk_buff *s, *s2;
for (s = NFCT_FRAG6_CB(skb)->orig; s; s = s2) {
s2 = s->next;
kfree_skb(s);
}
kfree_skb(skb);
return 0;
}
int nf_ct_frag6_init(void)
{
nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
(jiffies ^ (jiffies >> 6)));
init_timer(&nf_ct_frag6_secret_timer);
nf_ct_frag6_secret_timer.function = nf_ct_frag6_secret_rebuild;
nf_ct_frag6_secret_timer.expires = jiffies
+ nf_ct_frag6_secret_interval;
add_timer(&nf_ct_frag6_secret_timer);
return 0;
}
void nf_ct_frag6_cleanup(void)
{
del_timer(&nf_ct_frag6_secret_timer);
nf_ct_frag6_evictor();
}

View file

@ -174,8 +174,10 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
/* Not releasing hash table! */
if (clone)
if (clone) {
nf_reset(clone);
rawv6_rcv(sk, clone);
}
}
sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr,
IP6CB(skb)->iif);

View file

@ -1710,7 +1710,7 @@ static void fib6_dump_end(struct netlink_callback *cb)
static int fib6_dump_done(struct netlink_callback *cb)
{
fib6_dump_end(cb);
return cb->done(cb);
return cb->done ? cb->done(cb) : 0;
}
int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)

View file

@ -1,3 +1,6 @@
menu "Core Netfilter Configuration"
depends on NET && NETFILTER
config NETFILTER_NETLINK
tristate "Netfilter netlink interface"
help
@ -22,3 +25,74 @@ config NETFILTER_NETLINK_LOG
and is also scheduled to replace the old syslog-based ipt_LOG
and ip6t_LOG modules.
config NF_CONNTRACK
tristate "Layer 3 Independent Connection tracking (EXPERIMENTAL)"
depends on EXPERIMENTAL && IP_NF_CONNTRACK=n
default n
---help---
Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related
into connections.
Layer 3 independent connection tracking is experimental scheme
which generalize ip_conntrack to support other layer 3 protocols.
To compile it as a module, choose M here. If unsure, say N.
config NF_CT_ACCT
bool "Connection tracking flow accounting"
depends on NF_CONNTRACK
help
If this option is enabled, the connection tracking code will
keep per-flow packet and byte counters.
Those counters can be used for flow-based accounting or the
`connbytes' match.
If unsure, say `N'.
config NF_CONNTRACK_MARK
bool 'Connection mark tracking support'
depends on NF_CONNTRACK
help
This option enables support for connection marks, used by the
`CONNMARK' target and `connmark' match. Similar to the mark value
of packets, but this mark value is kept in the conntrack session
instead of the individual packets.
config NF_CONNTRACK_EVENTS
bool "Connection tracking events"
depends on NF_CONNTRACK
help
If this option is enabled, the connection tracking code will
provide a notifier chain that can be used by other kernel code
to get notified aboutchanges in the connection tracking state.
If unsure, say `N'.
config NF_CT_PROTO_SCTP
tristate 'SCTP protocol on new connection tracking support (EXPERIMENTAL)'
depends on EXPERIMENTAL && NF_CONNTRACK
default n
help
With this option enabled, the layer 3 independent connection
tracking code will be able to do state tracking on SCTP connections.
If you want to compile it as a module, say M here and read
Documentation/modules.txt. If unsure, say `N'.
config NF_CONNTRACK_FTP
tristate "FTP support on new connection tracking (EXPERIMENTAL)"
depends on EXPERIMENTAL && NF_CONNTRACK
help
Tracking FTP connections is problematic: special helpers are
required for tracking them, and doing masquerading and other forms
of Network Address Translation on them.
This is FTP support on Layer 3 independent connection tracking.
Layer 3 independent connection tracking is experimental scheme
which generalize ip_conntrack to support other layer 3 protocols.
To compile it as a module, choose M here. If unsure, say N.
endmenu

View file

@ -5,3 +5,11 @@ obj-$(CONFIG_NETFILTER) = netfilter.o
obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
nf_conntrack-objs := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o
obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
# SCTP protocol connection tracking
obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,698 @@
/* FTP extension for connection tracking. */
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
* (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - enable working with Layer 3 protocol independent connection tracking.
* - track EPRT and EPSV commands with IPv6 address.
*
* Derived from net/ipv4/netfilter/ip_conntrack_ftp.c
*/
#include <linux/config.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/netfilter.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/ctype.h>
#include <net/checksum.h>
#include <net/tcp.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <linux/netfilter/nf_conntrack_ftp.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
MODULE_DESCRIPTION("ftp connection tracking helper");
/* This is slow, but it's simple. --RR */
static char *ftp_buffer;
static DEFINE_SPINLOCK(nf_ftp_lock);
#define MAX_PORTS 8
static u_int16_t ports[MAX_PORTS];
static unsigned int ports_c;
module_param_array(ports, ushort, &ports_c, 0400);
static int loose;
module_param(loose, int, 0600);
unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb,
enum ip_conntrack_info ctinfo,
enum ip_ct_ftp_type type,
unsigned int matchoff,
unsigned int matchlen,
struct nf_conntrack_expect *exp,
u32 *seq);
EXPORT_SYMBOL_GPL(nf_nat_ftp_hook);
#if 0
#define DEBUGP printk
#else
#define DEBUGP(format, args...)
#endif
static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char);
static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char);
static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *,
char);
static struct ftp_search {
enum ip_conntrack_dir dir;
const char *pattern;
size_t plen;
char skip;
char term;
enum ip_ct_ftp_type ftptype;
int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char);
} search[] = {
{
IP_CT_DIR_ORIGINAL,
"PORT", sizeof("PORT") - 1, ' ', '\r',
IP_CT_FTP_PORT,
try_rfc959,
},
{
IP_CT_DIR_REPLY,
"227 ", sizeof("227 ") - 1, '(', ')',
IP_CT_FTP_PASV,
try_rfc959,
},
{
IP_CT_DIR_ORIGINAL,
"EPRT", sizeof("EPRT") - 1, ' ', '\r',
IP_CT_FTP_EPRT,
try_eprt,
},
{
IP_CT_DIR_REPLY,
"229 ", sizeof("229 ") - 1, '(', ')',
IP_CT_FTP_EPSV,
try_epsv_response,
},
};
/* This code is based on inet_pton() in glibc-2.2.4 */
static int
get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term)
{
static const char xdigits[] = "0123456789abcdef";
u_int8_t tmp[16], *tp, *endp, *colonp;
int ch, saw_xdigit;
u_int32_t val;
size_t clen = 0;
tp = memset(tmp, '\0', sizeof(tmp));
endp = tp + sizeof(tmp);
colonp = NULL;
/* Leading :: requires some special handling. */
if (*src == ':'){
if (*++src != ':') {
DEBUGP("invalid \":\" at the head of addr\n");
return 0;
}
clen++;
}
saw_xdigit = 0;
val = 0;
while ((clen < dlen) && (*src != term)) {
const char *pch;
ch = tolower(*src++);
clen++;
pch = strchr(xdigits, ch);
if (pch != NULL) {
val <<= 4;
val |= (pch - xdigits);
if (val > 0xffff)
return 0;
saw_xdigit = 1;
continue;
}
if (ch != ':') {
DEBUGP("get_ipv6_addr: invalid char. \'%c\'\n", ch);
return 0;
}
if (!saw_xdigit) {
if (colonp) {
DEBUGP("invalid location of \"::\".\n");
return 0;
}
colonp = tp;
continue;
} else if (*src == term) {
DEBUGP("trancated IPv6 addr\n");
return 0;
}
if (tp + 2 > endp)
return 0;
*tp++ = (u_int8_t) (val >> 8) & 0xff;
*tp++ = (u_int8_t) val & 0xff;
saw_xdigit = 0;
val = 0;
continue;
}
if (saw_xdigit) {
if (tp + 2 > endp)
return 0;
*tp++ = (u_int8_t) (val >> 8) & 0xff;
*tp++ = (u_int8_t) val & 0xff;
}
if (colonp != NULL) {
/*
* Since some memmove()'s erroneously fail to handle
* overlapping regions, we'll do the shift by hand.
*/
const int n = tp - colonp;
int i;
if (tp == endp)
return 0;
for (i = 1; i <= n; i++) {
endp[- i] = colonp[n - i];
colonp[n - i] = 0;
}
tp = endp;
}
if (tp != endp || (*src != term))
return 0;
memcpy(dst->s6_addr, tmp, sizeof(dst->s6_addr));
return clen;
}
static int try_number(const char *data, size_t dlen, u_int32_t array[],
int array_size, char sep, char term)
{
u_int32_t i, len;
memset(array, 0, sizeof(array[0])*array_size);
/* Keep data pointing at next char. */
for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) {
if (*data >= '0' && *data <= '9') {
array[i] = array[i]*10 + *data - '0';
}
else if (*data == sep)
i++;
else {
/* Unexpected character; true if it's the
terminator and we're finished. */
if (*data == term && i == array_size - 1)
return len;
DEBUGP("Char %u (got %u nums) `%u' unexpected\n",
len, i, *data);
return 0;
}
}
DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep);
return 0;
}
/* Returns 0, or length of numbers: 192,168,1,1,5,6 */
static int try_rfc959(const char *data, size_t dlen,
struct nf_conntrack_man *cmd, char term)
{
int length;
u_int32_t array[6];
length = try_number(data, dlen, array, 6, ',', term);
if (length == 0)
return 0;
cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) |
(array[2] << 8) | array[3]);
cmd->u.tcp.port = htons((array[4] << 8) | array[5]);
return length;
}
/* Grab port: number up to delimiter */
static int get_port(const char *data, int start, size_t dlen, char delim,
u_int16_t *port)
{
u_int16_t tmp_port = 0;
int i;
for (i = start; i < dlen; i++) {
/* Finished? */
if (data[i] == delim) {
if (tmp_port == 0)
break;
*port = htons(tmp_port);
DEBUGP("get_port: return %d\n", tmp_port);
return i + 1;
}
else if (data[i] >= '0' && data[i] <= '9')
tmp_port = tmp_port*10 + data[i] - '0';
else { /* Some other crap */
DEBUGP("get_port: invalid char.\n");
break;
}
}
return 0;
}
/* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */
static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
char term)
{
char delim;
int length;
/* First character is delimiter, then "1" for IPv4 or "2" for IPv6,
then delimiter again. */
if (dlen <= 3) {
DEBUGP("EPRT: too short\n");
return 0;
}
delim = data[0];
if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) {
DEBUGP("try_eprt: invalid delimitter.\n");
return 0;
}
if ((cmd->l3num == PF_INET && data[1] != '1') ||
(cmd->l3num == PF_INET6 && data[1] != '2')) {
DEBUGP("EPRT: invalid protocol number.\n");
return 0;
}
DEBUGP("EPRT: Got %c%c%c\n", delim, data[1], delim);
if (data[1] == '1') {
u_int32_t array[4];
/* Now we have IP address. */
length = try_number(data + 3, dlen - 3, array, 4, '.', delim);
if (length != 0)
cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16)
| (array[2] << 8) | array[3]);
} else {
/* Now we have IPv6 address. */
length = get_ipv6_addr(data + 3, dlen - 3,
(struct in6_addr *)cmd->u3.ip6, delim);
}
if (length == 0)
return 0;
DEBUGP("EPRT: Got IP address!\n");
/* Start offset includes initial "|1|", and trailing delimiter */
return get_port(data, 3 + length + 1, dlen, delim, &cmd->u.tcp.port);
}
/* Returns 0, or length of numbers: |||6446| */
static int try_epsv_response(const char *data, size_t dlen,
struct nf_conntrack_man *cmd, char term)
{
char delim;
/* Three delimiters. */
if (dlen <= 3) return 0;
delim = data[0];
if (isdigit(delim) || delim < 33 || delim > 126
|| data[1] != delim || data[2] != delim)
return 0;
return get_port(data, 3, dlen, delim, &cmd->u.tcp.port);
}
/* Return 1 for match, 0 for accept, -1 for partial. */
static int find_pattern(const char *data, size_t dlen,
const char *pattern, size_t plen,
char skip, char term,
unsigned int *numoff,
unsigned int *numlen,
struct nf_conntrack_man *cmd,
int (*getnum)(const char *, size_t,
struct nf_conntrack_man *, char))
{
size_t i;
DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen);
if (dlen == 0)
return 0;
if (dlen <= plen) {
/* Short packet: try for partial? */
if (strnicmp(data, pattern, dlen) == 0)
return -1;
else return 0;
}
if (strnicmp(data, pattern, plen) != 0) {
#if 0
size_t i;
DEBUGP("ftp: string mismatch\n");
for (i = 0; i < plen; i++) {
DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n",
i, data[i], data[i],
pattern[i], pattern[i]);
}
#endif
return 0;
}
DEBUGP("Pattern matches!\n");
/* Now we've found the constant string, try to skip
to the 'skip' character */
for (i = plen; data[i] != skip; i++)
if (i == dlen - 1) return -1;
/* Skip over the last character */
i++;
DEBUGP("Skipped up to `%c'!\n", skip);
*numoff = i;
*numlen = getnum(data + i, dlen - i, cmd, term);
if (!*numlen)
return -1;
DEBUGP("Match succeeded!\n");
return 1;
}
/* Look up to see if we're just after a \n. */
static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir)
{
unsigned int i;
for (i = 0; i < info->seq_aft_nl_num[dir]; i++)
if (info->seq_aft_nl[dir][i] == seq)
return 1;
return 0;
}
/* We don't update if it's older than what we have. */
static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir,
struct sk_buff *skb)
{
unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
/* Look for oldest: if we find exact match, we're done. */
for (i = 0; i < info->seq_aft_nl_num[dir]; i++) {
if (info->seq_aft_nl[dir][i] == nl_seq)
return;
if (oldest == info->seq_aft_nl_num[dir]
|| before(info->seq_aft_nl[dir][i], oldest))
oldest = i;
}
if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
} else if (oldest != NUM_SEQ_TO_REMEMBER) {
info->seq_aft_nl[dir][oldest] = nl_seq;
nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
}
}
static int help(struct sk_buff **pskb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
unsigned int dataoff, datalen;
struct tcphdr _tcph, *th;
char *fb_ptr;
int ret;
u32 seq;
int dir = CTINFO2DIR(ctinfo);
unsigned int matchlen, matchoff;
struct ip_ct_ftp_master *ct_ftp_info = &ct->help->ct_ftp_info;
struct nf_conntrack_expect *exp;
struct nf_conntrack_man cmd = {};
unsigned int i;
int found = 0, ends_in_nl;
/* Until there's been traffic both ways, don't look in packets. */
if (ctinfo != IP_CT_ESTABLISHED
&& ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo);
return NF_ACCEPT;
}
th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
if (th == NULL)
return NF_ACCEPT;
dataoff = protoff + th->doff * 4;
/* No data? */
if (dataoff >= (*pskb)->len) {
DEBUGP("ftp: dataoff(%u) >= skblen(%u)\n", dataoff,
(*pskb)->len);
return NF_ACCEPT;
}
datalen = (*pskb)->len - dataoff;
spin_lock_bh(&nf_ftp_lock);
fb_ptr = skb_header_pointer(*pskb, dataoff, datalen, ftp_buffer);
BUG_ON(fb_ptr == NULL);
ends_in_nl = (fb_ptr[datalen - 1] == '\n');
seq = ntohl(th->seq) + datalen;
/* Look up to see if we're just after a \n. */
if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
/* Now if this ends in \n, update ftp info. */
DEBUGP("nf_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n",
ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)",
ct_ftp_info->seq_aft_nl[dir][0],
ct_ftp_info->seq_aft_nl_num[dir] > 1 ? "" : "(UNSET)",
ct_ftp_info->seq_aft_nl[dir][1]);
ret = NF_ACCEPT;
goto out_update_nl;
}
/* Initialize IP/IPv6 addr to expected address (it's not mentioned
in EPSV responses) */
cmd.l3num = ct->tuplehash[dir].tuple.src.l3num;
memcpy(cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all,
sizeof(cmd.u3.all));
for (i = 0; i < ARRAY_SIZE(search); i++) {
if (search[i].dir != dir) continue;
found = find_pattern(fb_ptr, datalen,
search[i].pattern,
search[i].plen,
search[i].skip,
search[i].term,
&matchoff, &matchlen,
&cmd,
search[i].getnum);
if (found) break;
}
if (found == -1) {
/* We don't usually drop packets. After all, this is
connection tracking, not packet filtering.
However, it is necessary for accurate tracking in
this case. */
if (net_ratelimit())
printk("conntrack_ftp: partial %s %u+%u\n",
search[i].pattern,
ntohl(th->seq), datalen);
ret = NF_DROP;
goto out;
} else if (found == 0) { /* No match */
ret = NF_ACCEPT;
goto out_update_nl;
}
DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n",
(int)matchlen, fb_ptr + matchoff,
matchlen, ntohl(th->seq) + matchoff);
exp = nf_conntrack_expect_alloc(ct);
if (exp == NULL) {
ret = NF_DROP;
goto out;
}
/* We refer to the reverse direction ("!dir") tuples here,
* because we're expecting something in the other direction.
* Doesn't matter unless NAT is happening. */
exp->tuple.dst.u3 = ct->tuplehash[!dir].tuple.dst.u3;
/* Update the ftp info */
if ((cmd.l3num == ct->tuplehash[dir].tuple.src.l3num) &&
memcmp(&cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all,
sizeof(cmd.u3.all))) {
/* Enrico Scholz's passive FTP to partially RNAT'd ftp
server: it really wants us to connect to a
different IP address. Simply don't record it for
NAT. */
if (cmd.l3num == PF_INET) {
DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n",
NIPQUAD(cmd.u3.ip),
NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip));
} else {
DEBUGP("conntrack_ftp: NOT RECORDING: %x:%x:%x:%x:%x:%x:%x:%x != %x:%x:%x:%x:%x:%x:%x:%x\n",
NIP6(*((struct in6_addr *)cmd.u3.ip6)),
NIP6(*((struct in6_addr *)ct->tuplehash[dir]
.tuple.src.u3.ip6)));
}
/* Thanks to Cristiano Lincoln Mattos
<lincoln@cesar.org.br> for reporting this potential
problem (DMZ machines opening holes to internal
networks, or the packet filter itself). */
if (!loose) {
ret = NF_ACCEPT;
goto out_put_expect;
}
memcpy(&exp->tuple.dst.u3, &cmd.u3.all,
sizeof(exp->tuple.dst.u3));
}
exp->tuple.src.u3 = ct->tuplehash[!dir].tuple.src.u3;
exp->tuple.src.l3num = cmd.l3num;
exp->tuple.src.u.tcp.port = 0;
exp->tuple.dst.u.tcp.port = cmd.u.tcp.port;
exp->tuple.dst.protonum = IPPROTO_TCP;
exp->mask = (struct nf_conntrack_tuple)
{ .src = { .l3num = 0xFFFF,
.u = { .tcp = { 0 }},
},
.dst = { .protonum = 0xFF,
.u = { .tcp = { 0xFFFF }},
},
};
if (cmd.l3num == PF_INET) {
exp->mask.src.u3.ip = 0xFFFFFFFF;
exp->mask.dst.u3.ip = 0xFFFFFFFF;
} else {
memset(exp->mask.src.u3.ip6, 0xFF,
sizeof(exp->mask.src.u3.ip6));
memset(exp->mask.dst.u3.ip6, 0xFF,
sizeof(exp->mask.src.u3.ip6));
}
exp->expectfn = NULL;
exp->flags = 0;
/* Now, NAT might want to mangle the packet, and register the
* (possibly changed) expectation itself. */
if (nf_nat_ftp_hook)
ret = nf_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
matchoff, matchlen, exp, &seq);
else {
/* Can't expect this? Best to drop packet now. */
if (nf_conntrack_expect_related(exp) != 0)
ret = NF_DROP;
else
ret = NF_ACCEPT;
}
out_put_expect:
nf_conntrack_expect_put(exp);
out_update_nl:
/* Now if this ends in \n, update ftp info. Seq may have been
* adjusted by NAT code. */
if (ends_in_nl)
update_nl_seq(seq, ct_ftp_info, dir, *pskb);
out:
spin_unlock_bh(&nf_ftp_lock);
return ret;
}
static struct nf_conntrack_helper ftp[MAX_PORTS][2];
static char ftp_names[MAX_PORTS][2][sizeof("ftp-65535")];
/* don't make this __exit, since it's called from __init ! */
static void fini(void)
{
int i, j;
for (i = 0; i < ports_c; i++) {
for (j = 0; j < 2; j++) {
if (ftp[i][j].me == NULL)
continue;
DEBUGP("nf_ct_ftp: unregistering helper for pf: %d "
"port: %d\n",
ftp[i][j].tuple.src.l3num, ports[i]);
nf_conntrack_helper_unregister(&ftp[i][j]);
}
}
kfree(ftp_buffer);
}
static int __init init(void)
{
int i, j = -1, ret = 0;
char *tmpname;
ftp_buffer = kmalloc(65536, GFP_KERNEL);
if (!ftp_buffer)
return -ENOMEM;
if (ports_c == 0)
ports[ports_c++] = FTP_PORT;
/* FIXME should be configurable whether IPv4 and IPv6 FTP connections
are tracked or not - YK */
for (i = 0; i < ports_c; i++) {
memset(&ftp[i], 0, sizeof(struct nf_conntrack_helper));
ftp[i][0].tuple.src.l3num = PF_INET;
ftp[i][1].tuple.src.l3num = PF_INET6;
for (j = 0; j < 2; j++) {
ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]);
ftp[i][j].tuple.dst.protonum = IPPROTO_TCP;
ftp[i][j].mask.src.u.tcp.port = 0xFFFF;
ftp[i][j].mask.dst.protonum = 0xFF;
ftp[i][j].max_expected = 1;
ftp[i][j].timeout = 5 * 60; /* 5 Minutes */
ftp[i][j].me = THIS_MODULE;
ftp[i][j].help = help;
tmpname = &ftp_names[i][j][0];
if (ports[i] == FTP_PORT)
sprintf(tmpname, "ftp");
else
sprintf(tmpname, "ftp-%d", ports[i]);
ftp[i][j].name = tmpname;
DEBUGP("nf_ct_ftp: registering helper for pf: %d "
"port: %d\n",
ftp[i][j].tuple.src.l3num, ports[i]);
ret = nf_conntrack_helper_register(&ftp[i][j]);
if (ret) {
printk("nf_ct_ftp: failed to register helper "
" for pf: %d port: %d\n",
ftp[i][j].tuple.src.l3num, ports[i]);
fini();
return ret;
}
}
}
return 0;
}
module_init(init);
module_exit(fini);

View file

@ -0,0 +1,98 @@
/*
* (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
*
* Based largely upon the original ip_conntrack code which
* had the following copyright information:
*
* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Author:
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
*/
#include <linux/config.h>
#include <linux/types.h>
#include <linux/ip.h>
#include <linux/netfilter.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/icmp.h>
#include <linux/sysctl.h>
#include <net/ip.h>
#include <linux/netfilter_ipv4.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_protocol.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#if 0
#define DEBUGP printk
#else
#define DEBUGP(format, args...)
#endif
DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat);
static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
return 1;
}
static int generic_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
return 1;
}
static int generic_print_tuple(struct seq_file *s,
const struct nf_conntrack_tuple *tuple)
{
return 0;
}
static int generic_print_conntrack(struct seq_file *s,
const struct nf_conn *conntrack)
{
return 0;
}
static int
generic_prepare(struct sk_buff **pskb, unsigned int hooknum,
unsigned int *dataoff, u_int8_t *protonum)
{
/* Never track !!! */
return -NF_ACCEPT;
}
static u_int32_t generic_get_features(const struct nf_conntrack_tuple *tuple)
{
return NF_CT_F_BASIC;
}
struct nf_conntrack_l3proto nf_conntrack_generic_l3proto = {
.l3proto = PF_UNSPEC,
.name = "unknown",
.pkt_to_tuple = generic_pkt_to_tuple,
.invert_tuple = generic_invert_tuple,
.print_tuple = generic_print_tuple,
.print_conntrack = generic_print_conntrack,
.prepare = generic_prepare,
.get_features = generic_get_features,
.me = THIS_MODULE,
};

View file

@ -0,0 +1,85 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - enable working with L3 protocol independent connection tracking.
*
* Derived from net/ipv4/netfilter/ip_conntrack_proto_generic.c
*/
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_conntrack_protocol.h>
unsigned long nf_ct_generic_timeout = 600*HZ;
static int generic_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct nf_conntrack_tuple *tuple)
{
tuple->src.u.all = 0;
tuple->dst.u.all = 0;
return 1;
}
static int generic_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
tuple->src.u.all = 0;
tuple->dst.u.all = 0;
return 1;
}
/* Print out the per-protocol part of the tuple. */
static int generic_print_tuple(struct seq_file *s,
const struct nf_conntrack_tuple *tuple)
{
return 0;
}
/* Print out the private part of the conntrack. */
static int generic_print_conntrack(struct seq_file *s,
const struct nf_conn *state)
{
return 0;
}
/* Returns verdict for packet, or -1 for invalid. */
static int packet(struct nf_conn *conntrack,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
int pf,
unsigned int hooknum)
{
nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_generic_timeout);
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static int new(struct nf_conn *conntrack, const struct sk_buff *skb,
unsigned int dataoff)
{
return 1;
}
struct nf_conntrack_protocol nf_conntrack_generic_protocol =
{
.l3proto = PF_UNSPEC,
.proto = 0,
.name = "unknown",
.pkt_to_tuple = generic_pkt_to_tuple,
.invert_tuple = generic_invert_tuple,
.print_tuple = generic_print_tuple,
.print_conntrack = generic_print_conntrack,
.packet = packet,
.new = new,
};

View file

@ -0,0 +1,670 @@
/*
* Connection tracking protocol helper module for SCTP.
*
* SCTP is defined in RFC 2960. References to various sections in this code
* are to this RFC.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* 17 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - enable working with L3 protocol independent connection tracking.
*
* Derived from net/ipv4/ip_conntrack_sctp.c
*/
/*
* Added support for proc manipulation of timeouts.
*/
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/netfilter.h>
#include <linux/module.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/sctp.h>
#include <linux/string.h>
#include <linux/seq_file.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_protocol.h>
#if 0
#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__)
#else
#define DEBUGP(format, args...)
#endif
/* Protects conntrack->proto.sctp */
static DEFINE_RWLOCK(sctp_lock);
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR
And so for me for SCTP :D -Kiran */
static const char *sctp_conntrack_names[] = {
"NONE",
"CLOSED",
"COOKIE_WAIT",
"COOKIE_ECHOED",
"ESTABLISHED",
"SHUTDOWN_SENT",
"SHUTDOWN_RECD",
"SHUTDOWN_ACK_SENT",
};
#define SECS * HZ
#define MINS * 60 SECS
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
static unsigned long nf_ct_sctp_timeout_closed = 10 SECS;
static unsigned long nf_ct_sctp_timeout_cookie_wait = 3 SECS;
static unsigned long nf_ct_sctp_timeout_cookie_echoed = 3 SECS;
static unsigned long nf_ct_sctp_timeout_established = 5 DAYS;
static unsigned long nf_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000;
static unsigned long nf_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000;
static unsigned long nf_ct_sctp_timeout_shutdown_ack_sent = 3 SECS;
static unsigned long * sctp_timeouts[]
= { NULL, /* SCTP_CONNTRACK_NONE */
&nf_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */
&nf_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */
&nf_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */
&nf_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */
&nf_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */
&nf_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */
&nf_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */
};
#define sNO SCTP_CONNTRACK_NONE
#define sCL SCTP_CONNTRACK_CLOSED
#define sCW SCTP_CONNTRACK_COOKIE_WAIT
#define sCE SCTP_CONNTRACK_COOKIE_ECHOED
#define sES SCTP_CONNTRACK_ESTABLISHED
#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT
#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
#define sIV SCTP_CONNTRACK_MAX
/*
These are the descriptions of the states:
NOTE: These state names are tantalizingly similar to the states of an
SCTP endpoint. But the interpretation of the states is a little different,
considering that these are the states of the connection and not of an end
point. Please note the subtleties. -Kiran
NONE - Nothing so far.
COOKIE WAIT - We have seen an INIT chunk in the original direction, or also
an INIT_ACK chunk in the reply direction.
COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction.
ESTABLISHED - We have seen a COOKIE_ACK in the reply direction.
SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction.
SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin.
SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
to that of the SHUTDOWN chunk.
CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
the SHUTDOWN chunk. Connection is closed.
*/
/* TODO
- I have assumed that the first INIT is in the original direction.
This messes things when an INIT comes in the reply direction in CLOSED
state.
- Check the error type in the reply dir before transitioning from
cookie echoed to closed.
- Sec 5.2.4 of RFC 2960
- Multi Homing support.
*/
/* SCTP conntrack state transitions */
static enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA},
/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},
/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/
/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */
/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
},
{
/* REPLY */
/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */
/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},
/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */
/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
}
};
static int sctp_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct nf_conntrack_tuple *tuple)
{
sctp_sctphdr_t _hdr, *hp;
DEBUGP(__FUNCTION__);
DEBUGP("\n");
/* Actually only need first 8 bytes. */
hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
if (hp == NULL)
return 0;
tuple->src.u.sctp.port = hp->source;
tuple->dst.u.sctp.port = hp->dest;
return 1;
}
static int sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
DEBUGP(__FUNCTION__);
DEBUGP("\n");
tuple->src.u.sctp.port = orig->dst.u.sctp.port;
tuple->dst.u.sctp.port = orig->src.u.sctp.port;
return 1;
}
/* Print out the per-protocol part of the tuple. */
static int sctp_print_tuple(struct seq_file *s,
const struct nf_conntrack_tuple *tuple)
{
DEBUGP(__FUNCTION__);
DEBUGP("\n");
return seq_printf(s, "sport=%hu dport=%hu ",
ntohs(tuple->src.u.sctp.port),
ntohs(tuple->dst.u.sctp.port));
}
/* Print out the private part of the conntrack. */
static int sctp_print_conntrack(struct seq_file *s,
const struct nf_conn *conntrack)
{
enum sctp_conntrack state;
DEBUGP(__FUNCTION__);
DEBUGP("\n");
read_lock_bh(&sctp_lock);
state = conntrack->proto.sctp.state;
read_unlock_bh(&sctp_lock);
return seq_printf(s, "%s ", sctp_conntrack_names[state]);
}
#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \
for (offset = dataoff + sizeof(sctp_sctphdr_t), count = 0; \
offset < skb->len && \
(sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \
offset += (htons(sch->length) + 3) & ~3, count++)
/* Some validity checks to make sure the chunks are fine */
static int do_basic_checks(struct nf_conn *conntrack,
const struct sk_buff *skb,
unsigned int dataoff,
char *map)
{
u_int32_t offset, count;
sctp_chunkhdr_t _sch, *sch;
int flag;
DEBUGP(__FUNCTION__);
DEBUGP("\n");
flag = 0;
for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
DEBUGP("Chunk Num: %d Type: %d\n", count, sch->type);
if (sch->type == SCTP_CID_INIT
|| sch->type == SCTP_CID_INIT_ACK
|| sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
flag = 1;
}
/* Cookie Ack/Echo chunks not the first OR
Init / Init Ack / Shutdown compl chunks not the only chunks */
if ((sch->type == SCTP_CID_COOKIE_ACK
|| sch->type == SCTP_CID_COOKIE_ECHO
|| flag)
&& count !=0 ) {
DEBUGP("Basic checks failed\n");
return 1;
}
if (map) {
set_bit(sch->type, (void *)map);
}
}
DEBUGP("Basic checks passed\n");
return 0;
}
static int new_state(enum ip_conntrack_dir dir,
enum sctp_conntrack cur_state,
int chunk_type)
{
int i;
DEBUGP(__FUNCTION__);
DEBUGP("\n");
DEBUGP("Chunk type: %d\n", chunk_type);
switch (chunk_type) {
case SCTP_CID_INIT:
DEBUGP("SCTP_CID_INIT\n");
i = 0; break;
case SCTP_CID_INIT_ACK:
DEBUGP("SCTP_CID_INIT_ACK\n");
i = 1; break;
case SCTP_CID_ABORT:
DEBUGP("SCTP_CID_ABORT\n");
i = 2; break;
case SCTP_CID_SHUTDOWN:
DEBUGP("SCTP_CID_SHUTDOWN\n");
i = 3; break;
case SCTP_CID_SHUTDOWN_ACK:
DEBUGP("SCTP_CID_SHUTDOWN_ACK\n");
i = 4; break;
case SCTP_CID_ERROR:
DEBUGP("SCTP_CID_ERROR\n");
i = 5; break;
case SCTP_CID_COOKIE_ECHO:
DEBUGP("SCTP_CID_COOKIE_ECHO\n");
i = 6; break;
case SCTP_CID_COOKIE_ACK:
DEBUGP("SCTP_CID_COOKIE_ACK\n");
i = 7; break;
case SCTP_CID_SHUTDOWN_COMPLETE:
DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n");
i = 8; break;
default:
/* Other chunks like DATA, SACK, HEARTBEAT and
its ACK do not cause a change in state */
DEBUGP("Unknown chunk type, Will stay in %s\n",
sctp_conntrack_names[cur_state]);
return cur_state;
}
DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n",
dir, sctp_conntrack_names[cur_state], chunk_type,
sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
return sctp_conntracks[dir][i][cur_state];
}
/* Returns verdict for packet, or -1 for invalid. */
static int sctp_packet(struct nf_conn *conntrack,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
int pf,
unsigned int hooknum)
{
enum sctp_conntrack newconntrack, oldsctpstate;
sctp_sctphdr_t _sctph, *sh;
sctp_chunkhdr_t _sch, *sch;
u_int32_t offset, count;
char map[256 / sizeof (char)] = {0};
DEBUGP(__FUNCTION__);
DEBUGP("\n");
sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
if (sh == NULL)
return -1;
if (do_basic_checks(conntrack, skb, dataoff, map) != 0)
return -1;
/* Check the verification tag (Sec 8.5) */
if (!test_bit(SCTP_CID_INIT, (void *)map)
&& !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)
&& !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map)
&& !test_bit(SCTP_CID_ABORT, (void *)map)
&& !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map)
&& (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
DEBUGP("Verification tag check failed\n");
return -1;
}
oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX;
for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
write_lock_bh(&sctp_lock);
/* Special cases of Verification tag check (Sec 8.5.1) */
if (sch->type == SCTP_CID_INIT) {
/* Sec 8.5.1 (A) */
if (sh->vtag != 0) {
write_unlock_bh(&sctp_lock);
return -1;
}
} else if (sch->type == SCTP_CID_ABORT) {
/* Sec 8.5.1 (B) */
if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
&& !(sh->vtag == conntrack->proto.sctp.vtag
[1 - CTINFO2DIR(ctinfo)])) {
write_unlock_bh(&sctp_lock);
return -1;
}
} else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
/* Sec 8.5.1 (C) */
if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
&& !(sh->vtag == conntrack->proto.sctp.vtag
[1 - CTINFO2DIR(ctinfo)]
&& (sch->flags & 1))) {
write_unlock_bh(&sctp_lock);
return -1;
}
} else if (sch->type == SCTP_CID_COOKIE_ECHO) {
/* Sec 8.5.1 (D) */
if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
write_unlock_bh(&sctp_lock);
return -1;
}
}
oldsctpstate = conntrack->proto.sctp.state;
newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type);
/* Invalid */
if (newconntrack == SCTP_CONNTRACK_MAX) {
DEBUGP("nf_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n",
CTINFO2DIR(ctinfo), sch->type, oldsctpstate);
write_unlock_bh(&sctp_lock);
return -1;
}
/* If it is an INIT or an INIT ACK note down the vtag */
if (sch->type == SCTP_CID_INIT
|| sch->type == SCTP_CID_INIT_ACK) {
sctp_inithdr_t _inithdr, *ih;
ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
sizeof(_inithdr), &_inithdr);
if (ih == NULL) {
write_unlock_bh(&sctp_lock);
return -1;
}
DEBUGP("Setting vtag %x for dir %d\n",
ih->init_tag, !CTINFO2DIR(ctinfo));
conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
}
conntrack->proto.sctp.state = newconntrack;
if (oldsctpstate != newconntrack)
nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
write_unlock_bh(&sctp_lock);
}
nf_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED
&& CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
&& newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
DEBUGP("Setting assured bit\n");
set_bit(IPS_ASSURED_BIT, &conntrack->status);
nf_conntrack_event_cache(IPCT_STATUS, skb);
}
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
unsigned int dataoff)
{
enum sctp_conntrack newconntrack;
sctp_sctphdr_t _sctph, *sh;
sctp_chunkhdr_t _sch, *sch;
u_int32_t offset, count;
char map[256 / sizeof (char)] = {0};
DEBUGP(__FUNCTION__);
DEBUGP("\n");
sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
if (sh == NULL)
return 0;
if (do_basic_checks(conntrack, skb, dataoff, map) != 0)
return 0;
/* If an OOTB packet has any of these chunks discard (Sec 8.4) */
if ((test_bit (SCTP_CID_ABORT, (void *)map))
|| (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map))
|| (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) {
return 0;
}
newconntrack = SCTP_CONNTRACK_MAX;
for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
/* Don't need lock here: this conntrack not in circulation yet */
newconntrack = new_state(IP_CT_DIR_ORIGINAL,
SCTP_CONNTRACK_NONE, sch->type);
/* Invalid: delete conntrack */
if (newconntrack == SCTP_CONNTRACK_MAX) {
DEBUGP("nf_conntrack_sctp: invalid new deleting.\n");
return 0;
}
/* Copy the vtag into the state info */
if (sch->type == SCTP_CID_INIT) {
if (sh->vtag == 0) {
sctp_inithdr_t _inithdr, *ih;
ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
sizeof(_inithdr), &_inithdr);
if (ih == NULL)
return 0;
DEBUGP("Setting vtag %x for new conn\n",
ih->init_tag);
conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] =
ih->init_tag;
} else {
/* Sec 8.5.1 (A) */
return 0;
}
}
/* If it is a shutdown ack OOTB packet, we expect a return
shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
else {
DEBUGP("Setting vtag %x for new conn OOTB\n",
sh->vtag);
conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
}
conntrack->proto.sctp.state = newconntrack;
}
return 1;
}
struct nf_conntrack_protocol nf_conntrack_protocol_sctp4 = {
.l3proto = PF_INET,
.proto = IPPROTO_SCTP,
.name = "sctp",
.pkt_to_tuple = sctp_pkt_to_tuple,
.invert_tuple = sctp_invert_tuple,
.print_tuple = sctp_print_tuple,
.print_conntrack = sctp_print_conntrack,
.packet = sctp_packet,
.new = sctp_new,
.destroy = NULL,
.me = THIS_MODULE
};
struct nf_conntrack_protocol nf_conntrack_protocol_sctp6 = {
.l3proto = PF_INET6,
.proto = IPPROTO_SCTP,
.name = "sctp",
.pkt_to_tuple = sctp_pkt_to_tuple,
.invert_tuple = sctp_invert_tuple,
.print_tuple = sctp_print_tuple,
.print_conntrack = sctp_print_conntrack,
.packet = sctp_packet,
.new = sctp_new,
.destroy = NULL,
.me = THIS_MODULE
};
#ifdef CONFIG_SYSCTL
static ctl_table nf_ct_sysctl_table[] = {
{
.ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED,
.procname = "nf_conntrack_sctp_timeout_closed",
.data = &nf_ct_sctp_timeout_closed,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT,
.procname = "nf_conntrack_sctp_timeout_cookie_wait",
.data = &nf_ct_sctp_timeout_cookie_wait,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED,
.procname = "nf_conntrack_sctp_timeout_cookie_echoed",
.data = &nf_ct_sctp_timeout_cookie_echoed,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED,
.procname = "nf_conntrack_sctp_timeout_established",
.data = &nf_ct_sctp_timeout_established,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT,
.procname = "nf_conntrack_sctp_timeout_shutdown_sent",
.data = &nf_ct_sctp_timeout_shutdown_sent,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD,
.procname = "nf_conntrack_sctp_timeout_shutdown_recd",
.data = &nf_ct_sctp_timeout_shutdown_recd,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT,
.procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent",
.data = &nf_ct_sctp_timeout_shutdown_ack_sent,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{ .ctl_name = 0 }
};
static ctl_table nf_ct_netfilter_table[] = {
{
.ctl_name = NET_NETFILTER,
.procname = "netfilter",
.mode = 0555,
.child = nf_ct_sysctl_table,
},
{ .ctl_name = 0 }
};
static ctl_table nf_ct_net_table[] = {
{
.ctl_name = CTL_NET,
.procname = "net",
.mode = 0555,
.child = nf_ct_netfilter_table,
},
{ .ctl_name = 0 }
};
static struct ctl_table_header *nf_ct_sysctl_header;
#endif
int __init init(void)
{
int ret;
ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp4);
if (ret) {
printk("nf_conntrack_proto_sctp4: protocol register failed\n");
goto out;
}
ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp6);
if (ret) {
printk("nf_conntrack_proto_sctp6: protocol register failed\n");
goto cleanup_sctp4;
}
#ifdef CONFIG_SYSCTL
nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
if (nf_ct_sysctl_header == NULL) {
printk("nf_conntrack_proto_sctp: can't register to sysctl.\n");
goto cleanup;
}
#endif
return ret;
#ifdef CONFIG_SYSCTL
cleanup:
nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6);
#endif
cleanup_sctp4:
nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4);
out:
DEBUGP("SCTP conntrack module loading %s\n",
ret ? "failed": "succeeded");
return ret;
}
void __exit fini(void)
{
nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6);
nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4);
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(nf_ct_sysctl_header);
#endif
DEBUGP("SCTP conntrack module unloaded\n");
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Kiran Kumar Immidi");
MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,216 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - enable working with Layer 3 protocol independent connection tracking.
*
* Derived from net/ipv4/netfilter/ip_conntrack_proto_udp.c
*/
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/udp.h>
#include <linux/seq_file.h>
#include <linux/skbuff.h>
#include <linux/ipv6.h>
#include <net/ip6_checksum.h>
#include <net/checksum.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack_protocol.h>
unsigned long nf_ct_udp_timeout = 30*HZ;
unsigned long nf_ct_udp_timeout_stream = 180*HZ;
static int udp_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct nf_conntrack_tuple *tuple)
{
struct udphdr _hdr, *hp;
/* Actually only need first 8 bytes. */
hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hp == NULL)
return 0;
tuple->src.u.udp.port = hp->source;
tuple->dst.u.udp.port = hp->dest;
return 1;
}
static int udp_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
tuple->src.u.udp.port = orig->dst.u.udp.port;
tuple->dst.u.udp.port = orig->src.u.udp.port;
return 1;
}
/* Print out the per-protocol part of the tuple. */
static int udp_print_tuple(struct seq_file *s,
const struct nf_conntrack_tuple *tuple)
{
return seq_printf(s, "sport=%hu dport=%hu ",
ntohs(tuple->src.u.udp.port),
ntohs(tuple->dst.u.udp.port));
}
/* Print out the private part of the conntrack. */
static int udp_print_conntrack(struct seq_file *s,
const struct nf_conn *conntrack)
{
return 0;
}
/* Returns verdict for packet, and may modify conntracktype */
static int udp_packet(struct nf_conn *conntrack,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
int pf,
unsigned int hooknum)
{
/* If we've seen traffic both ways, this is some kind of UDP
stream. Extend timeout. */
if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
nf_ct_refresh_acct(conntrack, ctinfo, skb,
nf_ct_udp_timeout_stream);
/* Also, more likely to be important, and not a probe */
if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
nf_conntrack_event_cache(IPCT_STATUS, skb);
} else
nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_udp_timeout);
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static int udp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
unsigned int dataoff)
{
return 1;
}
static int udp_error(struct sk_buff *skb, unsigned int dataoff,
enum ip_conntrack_info *ctinfo,
int pf,
unsigned int hooknum,
int (*csum)(const struct sk_buff *, unsigned int))
{
unsigned int udplen = skb->len - dataoff;
struct udphdr _hdr, *hdr;
/* Header is too small? */
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hdr == NULL) {
if (LOG_INVALID(IPPROTO_UDP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udp: short packet ");
return -NF_ACCEPT;
}
/* Truncated/malformed packets */
if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
if (LOG_INVALID(IPPROTO_UDP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udp: truncated/malformed packet ");
return -NF_ACCEPT;
}
/* Packet with no checksum */
if (!hdr->check)
return NF_ACCEPT;
/* Checksum invalid? Ignore.
* We skip checking packets on the outgoing path
* because the semantic of CHECKSUM_HW is different there
* and moreover root might send raw packets.
* FIXME: Source route IP option packets --RR */
if (((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
(pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING))
&& skb->ip_summed != CHECKSUM_UNNECESSARY
&& csum(skb, dataoff)) {
if (LOG_INVALID(IPPROTO_UDP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udp: bad UDP checksum ");
return -NF_ACCEPT;
}
return NF_ACCEPT;
}
static int csum4(const struct sk_buff *skb, unsigned int dataoff)
{
return csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
skb->len - dataoff, IPPROTO_UDP,
skb->ip_summed == CHECKSUM_HW ? skb->csum
: skb_checksum(skb, dataoff,
skb->len - dataoff, 0));
}
static int csum6(const struct sk_buff *skb, unsigned int dataoff)
{
return csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
skb->len - dataoff, IPPROTO_UDP,
skb->ip_summed == CHECKSUM_HW ? skb->csum
: skb_checksum(skb, dataoff, skb->len - dataoff,
0));
}
static int udp_error4(struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info *ctinfo,
int pf,
unsigned int hooknum)
{
return udp_error(skb, dataoff, ctinfo, pf, hooknum, csum4);
}
static int udp_error6(struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info *ctinfo,
int pf,
unsigned int hooknum)
{
return udp_error(skb, dataoff, ctinfo, pf, hooknum, csum6);
}
struct nf_conntrack_protocol nf_conntrack_protocol_udp4 =
{
.l3proto = PF_INET,
.proto = IPPROTO_UDP,
.name = "udp",
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
.print_tuple = udp_print_tuple,
.print_conntrack = udp_print_conntrack,
.packet = udp_packet,
.new = udp_new,
.error = udp_error4,
};
struct nf_conntrack_protocol nf_conntrack_protocol_udp6 =
{
.l3proto = PF_INET6,
.proto = IPPROTO_UDP,
.name = "udp",
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
.print_tuple = udp_print_tuple,
.print_conntrack = udp_print_conntrack,
.packet = udp_packet,
.new = udp_new,
.error = udp_error6,
};
EXPORT_SYMBOL(nf_conntrack_protocol_udp4);
EXPORT_SYMBOL(nf_conntrack_protocol_udp6);

View file

@ -0,0 +1,869 @@
/* This file contains all the functions required for the standalone
nf_conntrack module.
These are not required by the compatibility layer.
*/
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
* - generalize L3 protocol dependent part.
*
* Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
*/
#include <linux/config.h>
#include <linux/types.h>
#include <linux/netfilter.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/percpu.h>
#include <linux/netdevice.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
#define ASSERT_READ_LOCK(x)
#define ASSERT_WRITE_LOCK(x)
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_protocol.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <linux/netfilter_ipv4/listhelp.h>
#if 0
#define DEBUGP printk
#else
#define DEBUGP(format, args...)
#endif
MODULE_LICENSE("GPL");
extern atomic_t nf_conntrack_count;
DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
static int kill_l3proto(struct nf_conn *i, void *data)
{
return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num ==
((struct nf_conntrack_l3proto *)data)->l3proto);
}
static int kill_proto(struct nf_conn *i, void *data)
{
struct nf_conntrack_protocol *proto;
proto = (struct nf_conntrack_protocol *)data;
return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
proto->proto) &&
(i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num ==
proto->l3proto);
}
#ifdef CONFIG_PROC_FS
static int
print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_l3proto *l3proto,
struct nf_conntrack_protocol *proto)
{
return l3proto->print_tuple(s, tuple) || proto->print_tuple(s, tuple);
}
#ifdef CONFIG_NF_CT_ACCT
static unsigned int
seq_print_counters(struct seq_file *s,
const struct ip_conntrack_counter *counter)
{
return seq_printf(s, "packets=%llu bytes=%llu ",
(unsigned long long)counter->packets,
(unsigned long long)counter->bytes);
}
#else
#define seq_print_counters(x, y) 0
#endif
struct ct_iter_state {
unsigned int bucket;
};
static struct list_head *ct_get_first(struct seq_file *seq)
{
struct ct_iter_state *st = seq->private;
for (st->bucket = 0;
st->bucket < nf_conntrack_htable_size;
st->bucket++) {
if (!list_empty(&nf_conntrack_hash[st->bucket]))
return nf_conntrack_hash[st->bucket].next;
}
return NULL;
}
static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head)
{
struct ct_iter_state *st = seq->private;
head = head->next;
while (head == &nf_conntrack_hash[st->bucket]) {
if (++st->bucket >= nf_conntrack_htable_size)
return NULL;
head = nf_conntrack_hash[st->bucket].next;
}
return head;
}
static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
{
struct list_head *head = ct_get_first(seq);
if (head)
while (pos && (head = ct_get_next(seq, head)))
pos--;
return pos ? NULL : head;
}
static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
{
read_lock_bh(&nf_conntrack_lock);
return ct_get_idx(seq, *pos);
}
static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
(*pos)++;
return ct_get_next(s, v);
}
static void ct_seq_stop(struct seq_file *s, void *v)
{
read_unlock_bh(&nf_conntrack_lock);
}
/* return 0 on success, 1 in case of error */
static int ct_seq_show(struct seq_file *s, void *v)
{
const struct nf_conntrack_tuple_hash *hash = v;
const struct nf_conn *conntrack = nf_ct_tuplehash_to_ctrack(hash);
struct nf_conntrack_l3proto *l3proto;
struct nf_conntrack_protocol *proto;
ASSERT_READ_LOCK(&nf_conntrack_lock);
NF_CT_ASSERT(conntrack);
/* we only want to print DIR_ORIGINAL */
if (NF_CT_DIRECTION(hash))
return 0;
l3proto = nf_ct_find_l3proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.src.l3num);
NF_CT_ASSERT(l3proto);
proto = nf_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.src.l3num,
conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.dst.protonum);
NF_CT_ASSERT(proto);
if (seq_printf(s, "%-8s %u %-8s %u %ld ",
l3proto->name,
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num,
proto->name,
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
timer_pending(&conntrack->timeout)
? (long)(conntrack->timeout.expires - jiffies)/HZ : 0) != 0)
return -ENOSPC;
if (l3proto->print_conntrack(s, conntrack))
return -ENOSPC;
if (proto->print_conntrack(s, conntrack))
return -ENOSPC;
if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
l3proto, proto))
return -ENOSPC;
if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
return -ENOSPC;
if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
if (seq_printf(s, "[UNREPLIED] "))
return -ENOSPC;
if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
l3proto, proto))
return -ENOSPC;
if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
return -ENOSPC;
if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
if (seq_printf(s, "[ASSURED] "))
return -ENOSPC;
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (seq_printf(s, "mark=%u ", conntrack->mark))
return -ENOSPC;
#endif
if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
return -ENOSPC;
return 0;
}
static struct seq_operations ct_seq_ops = {
.start = ct_seq_start,
.next = ct_seq_next,
.stop = ct_seq_stop,
.show = ct_seq_show
};
static int ct_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
struct ct_iter_state *st;
int ret;
st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL);
if (st == NULL)
return -ENOMEM;
ret = seq_open(file, &ct_seq_ops);
if (ret)
goto out_free;
seq = file->private_data;
seq->private = st;
memset(st, 0, sizeof(struct ct_iter_state));
return ret;
out_free:
kfree(st);
return ret;
}
static struct file_operations ct_file_ops = {
.owner = THIS_MODULE,
.open = ct_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
};
/* expects */
static void *exp_seq_start(struct seq_file *s, loff_t *pos)
{
struct list_head *e = &nf_conntrack_expect_list;
loff_t i;
/* strange seq_file api calls stop even if we fail,
* thus we need to grab lock since stop unlocks */
read_lock_bh(&nf_conntrack_lock);
if (list_empty(e))
return NULL;
for (i = 0; i <= *pos; i++) {
e = e->next;
if (e == &nf_conntrack_expect_list)
return NULL;
}
return e;
}
static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
struct list_head *e = v;
++*pos;
e = e->next;
if (e == &nf_conntrack_expect_list)
return NULL;
return e;
}
static void exp_seq_stop(struct seq_file *s, void *v)
{
read_unlock_bh(&nf_conntrack_lock);
}
static int exp_seq_show(struct seq_file *s, void *v)
{
struct nf_conntrack_expect *expect = v;
if (expect->timeout.function)
seq_printf(s, "%ld ", timer_pending(&expect->timeout)
? (long)(expect->timeout.expires - jiffies)/HZ : 0);
else
seq_printf(s, "- ");
seq_printf(s, "l3proto = %u proto=%u ",
expect->tuple.src.l3num,
expect->tuple.dst.protonum);
print_tuple(s, &expect->tuple,
nf_ct_find_l3proto(expect->tuple.src.l3num),
nf_ct_find_proto(expect->tuple.src.l3num,
expect->tuple.dst.protonum));
return seq_putc(s, '\n');
}
static struct seq_operations exp_seq_ops = {
.start = exp_seq_start,
.next = exp_seq_next,
.stop = exp_seq_stop,
.show = exp_seq_show
};
static int exp_open(struct inode *inode, struct file *file)
{
return seq_open(file, &exp_seq_ops);
}
static struct file_operations exp_file_ops = {
.owner = THIS_MODULE,
.open = exp_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
};
static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
{
int cpu;
if (*pos == 0)
return SEQ_START_TOKEN;
for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
if (!cpu_possible(cpu))
continue;
*pos = cpu + 1;
return &per_cpu(nf_conntrack_stat, cpu);
}
return NULL;
}
static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
int cpu;
for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
if (!cpu_possible(cpu))
continue;
*pos = cpu + 1;
return &per_cpu(nf_conntrack_stat, cpu);
}
return NULL;
}
static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
{
}
static int ct_cpu_seq_show(struct seq_file *seq, void *v)
{
unsigned int nr_conntracks = atomic_read(&nf_conntrack_count);
struct ip_conntrack_stat *st = v;
if (v == SEQ_START_TOKEN) {
seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n");
return 0;
}
seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
"%08x %08x %08x %08x %08x %08x %08x %08x \n",
nr_conntracks,
st->searched,
st->found,
st->new,
st->invalid,
st->ignore,
st->delete,
st->delete_list,
st->insert,
st->insert_failed,
st->drop,
st->early_drop,
st->error,
st->expect_new,
st->expect_create,
st->expect_delete
);
return 0;
}
static struct seq_operations ct_cpu_seq_ops = {
.start = ct_cpu_seq_start,
.next = ct_cpu_seq_next,
.stop = ct_cpu_seq_stop,
.show = ct_cpu_seq_show,
};
static int ct_cpu_seq_open(struct inode *inode, struct file *file)
{
return seq_open(file, &ct_cpu_seq_ops);
}
static struct file_operations ct_cpu_seq_fops = {
.owner = THIS_MODULE,
.open = ct_cpu_seq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
};
#endif /* CONFIG_PROC_FS */
/* Sysctl support */
#ifdef CONFIG_SYSCTL
/* From nf_conntrack_core.c */
extern int nf_conntrack_max;
extern unsigned int nf_conntrack_htable_size;
/* From nf_conntrack_proto_tcp.c */
extern unsigned long nf_ct_tcp_timeout_syn_sent;
extern unsigned long nf_ct_tcp_timeout_syn_recv;
extern unsigned long nf_ct_tcp_timeout_established;
extern unsigned long nf_ct_tcp_timeout_fin_wait;
extern unsigned long nf_ct_tcp_timeout_close_wait;
extern unsigned long nf_ct_tcp_timeout_last_ack;
extern unsigned long nf_ct_tcp_timeout_time_wait;
extern unsigned long nf_ct_tcp_timeout_close;
extern unsigned long nf_ct_tcp_timeout_max_retrans;
extern int nf_ct_tcp_loose;
extern int nf_ct_tcp_be_liberal;
extern int nf_ct_tcp_max_retrans;
/* From nf_conntrack_proto_udp.c */
extern unsigned long nf_ct_udp_timeout;
extern unsigned long nf_ct_udp_timeout_stream;
/* From nf_conntrack_proto_generic.c */
extern unsigned long nf_ct_generic_timeout;
/* Log invalid packets of a given protocol */
static int log_invalid_proto_min = 0;
static int log_invalid_proto_max = 255;
static struct ctl_table_header *nf_ct_sysctl_header;
static ctl_table nf_ct_sysctl_table[] = {
{
.ctl_name = NET_NF_CONNTRACK_MAX,
.procname = "nf_conntrack_max",
.data = &nf_conntrack_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = NET_NF_CONNTRACK_COUNT,
.procname = "nf_conntrack_count",
.data = &nf_conntrack_count,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = NET_NF_CONNTRACK_BUCKETS,
.procname = "nf_conntrack_buckets",
.data = &nf_conntrack_htable_size,
.maxlen = sizeof(unsigned int),
.mode = 0444,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
.procname = "nf_conntrack_tcp_timeout_syn_sent",
.data = &nf_ct_tcp_timeout_syn_sent,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
.procname = "nf_conntrack_tcp_timeout_syn_recv",
.data = &nf_ct_tcp_timeout_syn_recv,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
.procname = "nf_conntrack_tcp_timeout_established",
.data = &nf_ct_tcp_timeout_established,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
.procname = "nf_conntrack_tcp_timeout_fin_wait",
.data = &nf_ct_tcp_timeout_fin_wait,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
.procname = "nf_conntrack_tcp_timeout_close_wait",
.data = &nf_ct_tcp_timeout_close_wait,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
.procname = "nf_conntrack_tcp_timeout_last_ack",
.data = &nf_ct_tcp_timeout_last_ack,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
.procname = "nf_conntrack_tcp_timeout_time_wait",
.data = &nf_ct_tcp_timeout_time_wait,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
.procname = "nf_conntrack_tcp_timeout_close",
.data = &nf_ct_tcp_timeout_close,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT,
.procname = "nf_conntrack_udp_timeout",
.data = &nf_ct_udp_timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM,
.procname = "nf_conntrack_udp_timeout_stream",
.data = &nf_ct_udp_timeout_stream,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_GENERIC_TIMEOUT,
.procname = "nf_conntrack_generic_timeout",
.data = &nf_ct_generic_timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_LOG_INVALID,
.procname = "nf_conntrack_log_invalid",
.data = &nf_ct_log_invalid,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &log_invalid_proto_min,
.extra2 = &log_invalid_proto_max,
},
{
.ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
.procname = "nf_conntrack_tcp_timeout_max_retrans",
.data = &nf_ct_tcp_timeout_max_retrans,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_NF_CONNTRACK_TCP_LOOSE,
.procname = "nf_conntrack_tcp_loose",
.data = &nf_ct_tcp_loose,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL,
.procname = "nf_conntrack_tcp_be_liberal",
.data = &nf_ct_tcp_be_liberal,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS,
.procname = "nf_conntrack_tcp_max_retrans",
.data = &nf_ct_tcp_max_retrans,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ .ctl_name = 0 }
};
#define NET_NF_CONNTRACK_MAX 2089
static ctl_table nf_ct_netfilter_table[] = {
{
.ctl_name = NET_NETFILTER,
.procname = "netfilter",
.mode = 0555,
.child = nf_ct_sysctl_table,
},
{
.ctl_name = NET_NF_CONNTRACK_MAX,
.procname = "nf_conntrack_max",
.data = &nf_conntrack_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ .ctl_name = 0 }
};
static ctl_table nf_ct_net_table[] = {
{
.ctl_name = CTL_NET,
.procname = "net",
.mode = 0555,
.child = nf_ct_netfilter_table,
},
{ .ctl_name = 0 }
};
EXPORT_SYMBOL(nf_ct_log_invalid);
#endif /* CONFIG_SYSCTL */
static int init_or_cleanup(int init)
{
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc, *proc_exp, *proc_stat;
#endif
int ret = 0;
if (!init) goto cleanup;
ret = nf_conntrack_init();
if (ret < 0)
goto cleanup_nothing;
#ifdef CONFIG_PROC_FS
proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops);
if (!proc) goto cleanup_init;
proc_exp = proc_net_fops_create("nf_conntrack_expect", 0440,
&exp_file_ops);
if (!proc_exp) goto cleanup_proc;
proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat);
if (!proc_stat)
goto cleanup_proc_exp;
proc_stat->proc_fops = &ct_cpu_seq_fops;
proc_stat->owner = THIS_MODULE;
#endif
#ifdef CONFIG_SYSCTL
nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
if (nf_ct_sysctl_header == NULL) {
printk("nf_conntrack: can't register to sysctl.\n");
ret = -ENOMEM;
goto cleanup_proc_stat;
}
#endif
return ret;
cleanup:
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(nf_ct_sysctl_header);
cleanup_proc_stat:
#endif
#ifdef CONFIG_PROC_FS
proc_net_remove("nf_conntrack_stat");
cleanup_proc_exp:
proc_net_remove("nf_conntrack_expect");
cleanup_proc:
proc_net_remove("nf_conntrack");
cleanup_init:
#endif /* CNFIG_PROC_FS */
nf_conntrack_cleanup();
cleanup_nothing:
return ret;
}
int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
{
int ret = 0;
write_lock_bh(&nf_conntrack_lock);
if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_generic_l3proto) {
ret = -EBUSY;
goto out;
}
nf_ct_l3protos[proto->l3proto] = proto;
out:
write_unlock_bh(&nf_conntrack_lock);
return ret;
}
void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
{
write_lock_bh(&nf_conntrack_lock);
nf_ct_l3protos[proto->l3proto] = &nf_conntrack_generic_l3proto;
write_unlock_bh(&nf_conntrack_lock);
/* Somebody could be still looking at the proto in bh. */
synchronize_net();
/* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(kill_l3proto, proto);
}
/* FIXME: Allow NULL functions and sub in pointers to generic for
them. --RR */
int nf_conntrack_protocol_register(struct nf_conntrack_protocol *proto)
{
int ret = 0;
retry:
write_lock_bh(&nf_conntrack_lock);
if (nf_ct_protos[proto->l3proto]) {
if (nf_ct_protos[proto->l3proto][proto->proto]
!= &nf_conntrack_generic_protocol) {
ret = -EBUSY;
goto out_unlock;
}
} else {
/* l3proto may be loaded latter. */
struct nf_conntrack_protocol **proto_array;
int i;
write_unlock_bh(&nf_conntrack_lock);
proto_array = (struct nf_conntrack_protocol **)
kmalloc(MAX_NF_CT_PROTO *
sizeof(struct nf_conntrack_protocol *),
GFP_KERNEL);
if (proto_array == NULL) {
ret = -ENOMEM;
goto out;
}
for (i = 0; i < MAX_NF_CT_PROTO; i++)
proto_array[i] = &nf_conntrack_generic_protocol;
write_lock_bh(&nf_conntrack_lock);
if (nf_ct_protos[proto->l3proto]) {
/* bad timing, but no problem */
write_unlock_bh(&nf_conntrack_lock);
kfree(proto_array);
} else {
nf_ct_protos[proto->l3proto] = proto_array;
write_unlock_bh(&nf_conntrack_lock);
}
/*
* Just once because array is never freed until unloading
* nf_conntrack.ko
*/
goto retry;
}
nf_ct_protos[proto->l3proto][proto->proto] = proto;
out_unlock:
write_unlock_bh(&nf_conntrack_lock);
out:
return ret;
}
void nf_conntrack_protocol_unregister(struct nf_conntrack_protocol *proto)
{
write_lock_bh(&nf_conntrack_lock);
nf_ct_protos[proto->l3proto][proto->proto]
= &nf_conntrack_generic_protocol;
write_unlock_bh(&nf_conntrack_lock);
/* Somebody could be still looking at the proto in bh. */
synchronize_net();
/* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(kill_proto, proto);
}
static int __init init(void)
{
return init_or_cleanup(1);
}
static void __exit fini(void)
{
init_or_cleanup(0);
}
module_init(init);
module_exit(fini);
/* Some modules need us, but don't depend directly on any symbol.
They should call this. */
void need_nf_conntrack(void)
{
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
EXPORT_SYMBOL_GPL(nf_conntrack_chain);
EXPORT_SYMBOL_GPL(nf_conntrack_expect_chain);
EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init);
EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache);
EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
#endif
EXPORT_SYMBOL(nf_conntrack_l3proto_register);
EXPORT_SYMBOL(nf_conntrack_l3proto_unregister);
EXPORT_SYMBOL(nf_conntrack_protocol_register);
EXPORT_SYMBOL(nf_conntrack_protocol_unregister);
EXPORT_SYMBOL(nf_ct_invert_tuplepr);
EXPORT_SYMBOL(nf_conntrack_alter_reply);
EXPORT_SYMBOL(nf_conntrack_destroyed);
EXPORT_SYMBOL(need_nf_conntrack);
EXPORT_SYMBOL(nf_conntrack_helper_register);
EXPORT_SYMBOL(nf_conntrack_helper_unregister);
EXPORT_SYMBOL(nf_ct_iterate_cleanup);
EXPORT_SYMBOL(__nf_ct_refresh_acct);
EXPORT_SYMBOL(nf_ct_protos);
EXPORT_SYMBOL(nf_ct_find_proto);
EXPORT_SYMBOL(nf_ct_l3protos);
EXPORT_SYMBOL(nf_conntrack_expect_alloc);
EXPORT_SYMBOL(nf_conntrack_expect_put);
EXPORT_SYMBOL(nf_conntrack_expect_related);
EXPORT_SYMBOL(nf_conntrack_unexpect_related);
EXPORT_SYMBOL(nf_conntrack_tuple_taken);
EXPORT_SYMBOL(nf_conntrack_htable_size);
EXPORT_SYMBOL(nf_conntrack_lock);
EXPORT_SYMBOL(nf_conntrack_hash);
EXPORT_SYMBOL(nf_conntrack_untracked);
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
#ifdef CONFIG_IP_NF_NAT_NEEDED
EXPORT_SYMBOL(nf_conntrack_tcp_update);
#endif
EXPORT_SYMBOL(__nf_conntrack_confirm);
EXPORT_SYMBOL(nf_ct_get_tuple);
EXPORT_SYMBOL(nf_ct_invert_tuple);
EXPORT_SYMBOL(nf_conntrack_in);
EXPORT_SYMBOL(__nf_conntrack_attach);

View file

@ -2,4 +2,4 @@
# Makefile for the netlink driver.
#
obj-y := af_netlink.o
obj-y := af_netlink.o attr.o genetlink.o

View file

@ -58,6 +58,7 @@
#include <net/sock.h>
#include <net/scm.h>
#include <net/netlink.h>
#define Nprintk(a...)
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
@ -427,7 +428,8 @@ static int netlink_release(struct socket *sock)
spin_lock(&nlk->cb_lock);
if (nlk->cb) {
nlk->cb->done(nlk->cb);
if (nlk->cb->done)
nlk->cb->done(nlk->cb);
netlink_destroy_callback(nlk->cb);
nlk->cb = NULL;
}
@ -1322,7 +1324,8 @@ static int netlink_dump(struct sock *sk)
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk, skb->len);
cb->done(cb);
if (cb->done)
cb->done(cb);
nlk->cb = NULL;
spin_unlock(&nlk->cb_lock);
@ -1409,6 +1412,94 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
}
static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
struct nlmsghdr *, int *))
{
unsigned int total_len;
struct nlmsghdr *nlh;
int err;
while (skb->len >= nlmsg_total_size(0)) {
nlh = (struct nlmsghdr *) skb->data;
if (skb->len < nlh->nlmsg_len)
return 0;
total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len);
if (cb(skb, nlh, &err) < 0) {
/* Not an error, but we have to interrupt processing
* here. Note: that in this case we do not pull
* message from skb, it will be processed later.
*/
if (err == 0)
return -1;
netlink_ack(skb, nlh, err);
} else if (nlh->nlmsg_flags & NLM_F_ACK)
netlink_ack(skb, nlh, 0);
skb_pull(skb, total_len);
}
return 0;
}
/**
* nelink_run_queue - Process netlink receive queue.
* @sk: Netlink socket containing the queue
* @qlen: Place to store queue length upon entry
* @cb: Callback function invoked for each netlink message found
*
* Processes as much as there was in the queue upon entry and invokes
* a callback function for each netlink message found. The callback
* function may refuse a message by returning a negative error code
* but setting the error pointer to 0 in which case this function
* returns with a qlen != 0.
*
* qlen must be initialized to 0 before the initial entry, afterwards
* the function may be called repeatedly until qlen reaches 0.
*/
void netlink_run_queue(struct sock *sk, unsigned int *qlen,
int (*cb)(struct sk_buff *, struct nlmsghdr *, int *))
{
struct sk_buff *skb;
if (!*qlen || *qlen > skb_queue_len(&sk->sk_receive_queue))
*qlen = skb_queue_len(&sk->sk_receive_queue);
for (; *qlen; (*qlen)--) {
skb = skb_dequeue(&sk->sk_receive_queue);
if (netlink_rcv_skb(skb, cb)) {
if (skb->len)
skb_queue_head(&sk->sk_receive_queue, skb);
else {
kfree_skb(skb);
(*qlen)--;
}
break;
}
kfree_skb(skb);
}
}
/**
* netlink_queue_skip - Skip netlink message while processing queue.
* @nlh: Netlink message to be skipped
* @skb: Socket buffer containing the netlink messages.
*
* Pulls the given netlink message off the socket buffer so the next
* call to netlink_queue_run() will not reconsider the message.
*/
void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
{
int msglen = NLMSG_ALIGN(nlh->nlmsg_len);
if (msglen > skb->len)
msglen = skb->len;
skb_pull(skb, msglen);
}
#ifdef CONFIG_PROC_FS
struct nl_seq_iter {
@ -1657,6 +1748,8 @@ static int __init netlink_proto_init(void)
core_initcall(netlink_proto_init);
EXPORT_SYMBOL(netlink_ack);
EXPORT_SYMBOL(netlink_run_queue);
EXPORT_SYMBOL(netlink_queue_skip);
EXPORT_SYMBOL(netlink_broadcast);
EXPORT_SYMBOL(netlink_dump_start);
EXPORT_SYMBOL(netlink_kernel_create);

328
net/netlink/attr.c Normal file
View file

@ -0,0 +1,328 @@
/*
* NETLINK Netlink attributes
*
* Authors: Thomas Graf <tgraf@suug.ch>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*/
#include <linux/config.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/jiffies.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/string.h>
#include <linux/types.h>
#include <net/netlink.h>
static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
[NLA_U8] = sizeof(u8),
[NLA_U16] = sizeof(u16),
[NLA_U32] = sizeof(u32),
[NLA_U64] = sizeof(u64),
[NLA_STRING] = 1,
[NLA_NESTED] = NLA_HDRLEN,
};
static int validate_nla(struct nlattr *nla, int maxtype,
struct nla_policy *policy)
{
struct nla_policy *pt;
int minlen = 0;
if (nla->nla_type <= 0 || nla->nla_type > maxtype)
return 0;
pt = &policy[nla->nla_type];
BUG_ON(pt->type > NLA_TYPE_MAX);
if (pt->minlen)
minlen = pt->minlen;
else if (pt->type != NLA_UNSPEC)
minlen = nla_attr_minlen[pt->type];
if (pt->type == NLA_FLAG && nla_len(nla) > 0)
return -ERANGE;
if (nla_len(nla) < minlen)
return -ERANGE;
return 0;
}
/**
* nla_validate - Validate a stream of attributes
* @head: head of attribute stream
* @len: length of attribute stream
* @maxtype: maximum attribute type to be expected
* @policy: validation policy
*
* Validates all attributes in the specified attribute stream against the
* specified policy. Attributes with a type exceeding maxtype will be
* ignored. See documenation of struct nla_policy for more details.
*
* Returns 0 on success or a negative error code.
*/
int nla_validate(struct nlattr *head, int len, int maxtype,
struct nla_policy *policy)
{
struct nlattr *nla;
int rem, err;
nla_for_each_attr(nla, head, len, rem) {
err = validate_nla(nla, maxtype, policy);
if (err < 0)
goto errout;
}
err = 0;
errout:
return err;
}
/**
* nla_parse - Parse a stream of attributes into a tb buffer
* @tb: destination array with maxtype+1 elements
* @maxtype: maximum attribute type to be expected
* @head: head of attribute stream
* @len: length of attribute stream
*
* Parses a stream of attributes and stores a pointer to each attribute in
* the tb array accessable via the attribute type. Attributes with a type
* exceeding maxtype will be silently ignored for backwards compatibility
* reasons. policy may be set to NULL if no validation is required.
*
* Returns 0 on success or a negative error code.
*/
int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
struct nla_policy *policy)
{
struct nlattr *nla;
int rem, err;
memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
nla_for_each_attr(nla, head, len, rem) {
u16 type = nla->nla_type;
if (type > 0 && type <= maxtype) {
if (policy) {
err = validate_nla(nla, maxtype, policy);
if (err < 0)
goto errout;
}
tb[type] = nla;
}
}
if (unlikely(rem > 0))
printk(KERN_WARNING "netlink: %d bytes leftover after parsing "
"attributes.\n", rem);
err = 0;
errout:
return err;
}
/**
* nla_find - Find a specific attribute in a stream of attributes
* @head: head of attribute stream
* @len: length of attribute stream
* @attrtype: type of attribute to look for
*
* Returns the first attribute in the stream matching the specified type.
*/
struct nlattr *nla_find(struct nlattr *head, int len, int attrtype)
{
struct nlattr *nla;
int rem;
nla_for_each_attr(nla, head, len, rem)
if (nla->nla_type == attrtype)
return nla;
return NULL;
}
/**
* nla_strlcpy - Copy string attribute payload into a sized buffer
* @dst: where to copy the string to
* @src: attribute to copy the string from
* @dstsize: size of destination buffer
*
* Copies at most dstsize - 1 bytes into the destination buffer.
* The result is always a valid NUL-terminated string. Unlike
* strlcpy the destination buffer is always padded out.
*
* Returns the length of the source buffer.
*/
size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize)
{
size_t srclen = nla_len(nla);
char *src = nla_data(nla);
if (srclen > 0 && src[srclen - 1] == '\0')
srclen--;
if (dstsize > 0) {
size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen;
memset(dst, 0, dstsize);
memcpy(dst, src, len);
}
return srclen;
}
/**
* nla_memcpy - Copy a netlink attribute into another memory area
* @dest: where to copy to memcpy
* @src: netlink attribute to copy from
* @count: size of the destination area
*
* Note: The number of bytes copied is limited by the length of
* attribute's payload. memcpy
*
* Returns the number of bytes copied.
*/
int nla_memcpy(void *dest, struct nlattr *src, int count)
{
int minlen = min_t(int, count, nla_len(src));
memcpy(dest, nla_data(src), minlen);
return minlen;
}
/**
* nla_memcmp - Compare an attribute with sized memory area
* @nla: netlink attribute
* @data: memory area
* @size: size of memory area
*/
int nla_memcmp(const struct nlattr *nla, const void *data,
size_t size)
{
int d = nla_len(nla) - size;
if (d == 0)
d = memcmp(nla_data(nla), data, size);
return d;
}
/**
* nla_strcmp - Compare a string attribute against a string
* @nla: netlink string attribute
* @str: another string
*/
int nla_strcmp(const struct nlattr *nla, const char *str)
{
int len = strlen(str) + 1;
int d = nla_len(nla) - len;
if (d == 0)
d = memcmp(nla_data(nla), str, len);
return d;
}
/**
* __nla_reserve - reserve room for attribute on the skb
* @skb: socket buffer to reserve room on
* @attrtype: attribute type
* @attrlen: length of attribute payload
*
* Adds a netlink attribute header to a socket buffer and reserves
* room for the payload but does not copy it.
*
* The caller is responsible to ensure that the skb provides enough
* tailroom for the attribute header and payload.
*/
struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
{
struct nlattr *nla;
nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen));
nla->nla_type = attrtype;
nla->nla_len = nla_attr_size(attrlen);
memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen));
return nla;
}
/**
* nla_reserve - reserve room for attribute on the skb
* @skb: socket buffer to reserve room on
* @attrtype: attribute type
* @attrlen: length of attribute payload
*
* Adds a netlink attribute header to a socket buffer and reserves
* room for the payload but does not copy it.
*
* Returns NULL if the tailroom of the skb is insufficient to store
* the attribute header and payload.
*/
struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
{
if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
return NULL;
return __nla_reserve(skb, attrtype, attrlen);
}
/**
* __nla_put - Add a netlink attribute to a socket buffer
* @skb: socket buffer to add attribute to
* @attrtype: attribute type
* @attrlen: length of attribute payload
* @data: head of attribute payload
*
* The caller is responsible to ensure that the skb provides enough
* tailroom for the attribute header and payload.
*/
void __nla_put(struct sk_buff *skb, int attrtype, int attrlen,
const void *data)
{
struct nlattr *nla;
nla = __nla_reserve(skb, attrtype, attrlen);
memcpy(nla_data(nla), data, attrlen);
}
/**
* nla_put - Add a netlink attribute to a socket buffer
* @skb: socket buffer to add attribute to
* @attrtype: attribute type
* @attrlen: length of attribute payload
* @data: head of attribute payload
*
* Returns -1 if the tailroom of the skb is insufficient to store
* the attribute header and payload.
*/
int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
{
if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
return -1;
__nla_put(skb, attrtype, attrlen, data);
return 0;
}
EXPORT_SYMBOL(nla_validate);
EXPORT_SYMBOL(nla_parse);
EXPORT_SYMBOL(nla_find);
EXPORT_SYMBOL(nla_strlcpy);
EXPORT_SYMBOL(__nla_reserve);
EXPORT_SYMBOL(nla_reserve);
EXPORT_SYMBOL(__nla_put);
EXPORT_SYMBOL(nla_put);
EXPORT_SYMBOL(nla_memcpy);
EXPORT_SYMBOL(nla_memcmp);
EXPORT_SYMBOL(nla_strcmp);

579
net/netlink/genetlink.c Normal file
View file

@ -0,0 +1,579 @@
/*
* NETLINK Generic Netlink Family
*
* Authors: Jamal Hadi Salim
* Thomas Graf <tgraf@suug.ch>
*/
#include <linux/config.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/string.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/genetlink.h>
struct sock *genl_sock = NULL;
static DECLARE_MUTEX(genl_sem); /* serialization of message processing */
static void genl_lock(void)
{
down(&genl_sem);
}
static int genl_trylock(void)
{
return down_trylock(&genl_sem);
}
static void genl_unlock(void)
{
up(&genl_sem);
if (genl_sock && genl_sock->sk_receive_queue.qlen)
genl_sock->sk_data_ready(genl_sock, 0);
}
#define GENL_FAM_TAB_SIZE 16
#define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1)
static struct list_head family_ht[GENL_FAM_TAB_SIZE];
static int genl_ctrl_event(int event, void *data);
static inline unsigned int genl_family_hash(unsigned int id)
{
return id & GENL_FAM_TAB_MASK;
}
static inline struct list_head *genl_family_chain(unsigned int id)
{
return &family_ht[genl_family_hash(id)];
}
static struct genl_family *genl_family_find_byid(unsigned int id)
{
struct genl_family *f;
list_for_each_entry(f, genl_family_chain(id), family_list)
if (f->id == id)
return f;
return NULL;
}
static struct genl_family *genl_family_find_byname(char *name)
{
struct genl_family *f;
int i;
for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
list_for_each_entry(f, genl_family_chain(i), family_list)
if (strcmp(f->name, name) == 0)
return f;
return NULL;
}
static struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family)
{
struct genl_ops *ops;
list_for_each_entry(ops, &family->ops_list, ops_list)
if (ops->cmd == cmd)
return ops;
return NULL;
}
/* Of course we are going to have problems once we hit
* 2^16 alive types, but that can only happen by year 2K
*/
static inline u16 genl_generate_id(void)
{
static u16 id_gen_idx;
int overflowed = 0;
do {
if (id_gen_idx == 0)
id_gen_idx = GENL_MIN_ID;
if (++id_gen_idx > GENL_MAX_ID) {
if (!overflowed) {
overflowed = 1;
id_gen_idx = 0;
continue;
} else
return 0;
}
} while (genl_family_find_byid(id_gen_idx));
return id_gen_idx;
}
/**
* genl_register_ops - register generic netlink operations
* @family: generic netlink family
* @ops: operations to be registered
*
* Registers the specified operations and assigns them to the specified
* family. Either a doit or dumpit callback must be specified or the
* operation will fail. Only one operation structure per command
* identifier may be registered.
*
* See include/net/genetlink.h for more documenation on the operations
* structure.
*
* Returns 0 on success or a negative error code.
*/
int genl_register_ops(struct genl_family *family, struct genl_ops *ops)
{
int err = -EINVAL;
if (ops->dumpit == NULL && ops->doit == NULL)
goto errout;
if (genl_get_cmd(ops->cmd, family)) {
err = -EEXIST;
goto errout;
}
genl_lock();
list_add_tail(&ops->ops_list, &family->ops_list);
genl_unlock();
genl_ctrl_event(CTRL_CMD_NEWOPS, ops);
err = 0;
errout:
return err;
}
/**
* genl_unregister_ops - unregister generic netlink operations
* @family: generic netlink family
* @ops: operations to be unregistered
*
* Unregisters the specified operations and unassigns them from the
* specified family. The operation blocks until the current message
* processing has finished and doesn't start again until the
* unregister process has finished.
*
* Note: It is not necessary to unregister all operations before
* unregistering the family, unregistering the family will cause
* all assigned operations to be unregistered automatically.
*
* Returns 0 on success or a negative error code.
*/
int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops)
{
struct genl_ops *rc;
genl_lock();
list_for_each_entry(rc, &family->ops_list, ops_list) {
if (rc == ops) {
list_del(&ops->ops_list);
genl_unlock();
genl_ctrl_event(CTRL_CMD_DELOPS, ops);
return 0;
}
}
genl_unlock();
return -ENOENT;
}
/**
* genl_register_family - register a generic netlink family
* @family: generic netlink family
*
* Registers the specified family after validating it first. Only one
* family may be registered with the same family name or identifier.
* The family id may equal GENL_ID_GENERATE causing an unique id to
* be automatically generated and assigned.
*
* Return 0 on success or a negative error code.
*/
int genl_register_family(struct genl_family *family)
{
int err = -EINVAL;
if (family->id && family->id < GENL_MIN_ID)
goto errout;
if (family->id > GENL_MAX_ID)
goto errout;
INIT_LIST_HEAD(&family->ops_list);
genl_lock();
if (genl_family_find_byname(family->name)) {
err = -EEXIST;
goto errout_locked;
}
if (genl_family_find_byid(family->id)) {
err = -EEXIST;
goto errout_locked;
}
if (!try_module_get(family->owner)) {
err = -EBUSY;
goto errout_locked;
}
if (family->id == GENL_ID_GENERATE) {
u16 newid = genl_generate_id();
if (!newid) {
err = -ENOMEM;
goto errout_locked;
}
family->id = newid;
}
if (family->maxattr) {
family->attrbuf = kmalloc((family->maxattr+1) *
sizeof(struct nlattr *), GFP_KERNEL);
if (family->attrbuf == NULL) {
err = -ENOMEM;
goto errout;
}
} else
family->attrbuf = NULL;
list_add_tail(&family->family_list, genl_family_chain(family->id));
genl_unlock();
genl_ctrl_event(CTRL_CMD_NEWFAMILY, family);
return 0;
errout_locked:
genl_unlock();
errout:
return err;
}
/**
* genl_unregister_family - unregister generic netlink family
* @family: generic netlink family
*
* Unregisters the specified family.
*
* Returns 0 on success or a negative error code.
*/
int genl_unregister_family(struct genl_family *family)
{
struct genl_family *rc;
genl_lock();
list_for_each_entry(rc, genl_family_chain(family->id), family_list) {
if (family->id != rc->id || strcmp(rc->name, family->name))
continue;
list_del(&rc->family_list);
INIT_LIST_HEAD(&family->ops_list);
genl_unlock();
module_put(family->owner);
kfree(family->attrbuf);
genl_ctrl_event(CTRL_CMD_DELFAMILY, family);
return 0;
}
genl_unlock();
return -ENOENT;
}
static inline int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
int *errp)
{
struct genl_ops *ops;
struct genl_family *family;
struct genl_info info;
struct genlmsghdr *hdr = nlmsg_data(nlh);
int hdrlen, err = -EINVAL;
if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
goto ignore;
if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
goto ignore;
family = genl_family_find_byid(nlh->nlmsg_type);
if (family == NULL) {
err = -ENOENT;
goto errout;
}
hdrlen = GENL_HDRLEN + family->hdrsize;
if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
goto errout;
ops = genl_get_cmd(hdr->cmd, family);
if (ops == NULL) {
err = -EOPNOTSUPP;
goto errout;
}
if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb)) {
err = -EPERM;
goto errout;
}
if (nlh->nlmsg_flags & NLM_F_DUMP) {
if (ops->dumpit == NULL) {
err = -EOPNOTSUPP;
goto errout;
}
*errp = err = netlink_dump_start(genl_sock, skb, nlh,
ops->dumpit, NULL);
if (err == 0)
skb_pull(skb, min(NLMSG_ALIGN(nlh->nlmsg_len),
skb->len));
return -1;
}
if (ops->doit == NULL) {
err = -EOPNOTSUPP;
goto errout;
}
if (family->attrbuf) {
err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr,
ops->policy);
if (err < 0)
goto errout;
}
info.snd_seq = nlh->nlmsg_seq;
info.snd_pid = NETLINK_CB(skb).pid;
info.nlhdr = nlh;
info.genlhdr = nlmsg_data(nlh);
info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
info.attrs = family->attrbuf;
*errp = err = ops->doit(skb, &info);
return err;
ignore:
return 0;
errout:
*errp = err;
return -1;
}
static void genl_rcv(struct sock *sk, int len)
{
unsigned int qlen = 0;
do {
if (genl_trylock())
return;
netlink_run_queue(sk, &qlen, &genl_rcv_msg);
genl_unlock();
} while (qlen && genl_sock && genl_sock->sk_receive_queue.qlen);
}
/**************************************************************************
* Controller
**************************************************************************/
static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
u32 flags, struct sk_buff *skb, u8 cmd)
{
void *hdr;
hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd,
family->version);
if (hdr == NULL)
return -1;
NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name);
NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id);
return genlmsg_end(skb, hdr);
nla_put_failure:
return genlmsg_cancel(skb, hdr);
}
static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
{
int i, n = 0;
struct genl_family *rt;
int chains_to_skip = cb->args[0];
int fams_to_skip = cb->args[1];
for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
if (i < chains_to_skip)
continue;
n = 0;
list_for_each_entry(rt, genl_family_chain(i), family_list) {
if (++n < fams_to_skip)
continue;
if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
skb, CTRL_CMD_NEWFAMILY) < 0)
goto errout;
}
fams_to_skip = 0;
}
errout:
cb->args[0] = i;
cb->args[1] = n;
return skb->len;
}
static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid,
int seq, int cmd)
{
struct sk_buff *skb;
int err;
skb = nlmsg_new(NLMSG_GOODSIZE);
if (skb == NULL)
return ERR_PTR(-ENOBUFS);
err = ctrl_fill_info(family, pid, seq, 0, skb, cmd);
if (err < 0) {
nlmsg_free(skb);
return ERR_PTR(err);
}
return skb;
}
static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = {
[CTRL_ATTR_FAMILY_ID] = { .type = NLA_U16 },
[CTRL_ATTR_FAMILY_NAME] = { .type = NLA_STRING },
};
static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
{
struct sk_buff *msg;
struct genl_family *res = NULL;
int err = -EINVAL;
if (info->attrs[CTRL_ATTR_FAMILY_ID]) {
u16 id = nla_get_u16(info->attrs[CTRL_ATTR_FAMILY_ID]);
res = genl_family_find_byid(id);
}
if (info->attrs[CTRL_ATTR_FAMILY_NAME]) {
char name[GENL_NAMSIZ];
if (nla_strlcpy(name, info->attrs[CTRL_ATTR_FAMILY_NAME],
GENL_NAMSIZ) >= GENL_NAMSIZ)
goto errout;
res = genl_family_find_byname(name);
}
if (res == NULL) {
err = -ENOENT;
goto errout;
}
msg = ctrl_build_msg(res, info->snd_pid, info->snd_seq,
CTRL_CMD_NEWFAMILY);
if (IS_ERR(msg)) {
err = PTR_ERR(msg);
goto errout;
}
err = genlmsg_unicast(msg, info->snd_pid);
errout:
return err;
}
static int genl_ctrl_event(int event, void *data)
{
struct sk_buff *msg;
if (genl_sock == NULL)
return 0;
switch (event) {
case CTRL_CMD_NEWFAMILY:
case CTRL_CMD_DELFAMILY:
msg = ctrl_build_msg(data, 0, 0, event);
if (IS_ERR(msg))
return PTR_ERR(msg);
genlmsg_multicast(msg, 0, GENL_ID_CTRL);
break;
}
return 0;
}
static struct genl_ops genl_ctrl_ops = {
.cmd = CTRL_CMD_GETFAMILY,
.doit = ctrl_getfamily,
.dumpit = ctrl_dumpfamily,
.policy = ctrl_policy,
};
static struct genl_family genl_ctrl = {
.id = GENL_ID_CTRL,
.name = "nlctrl",
.version = 0x1,
.maxattr = CTRL_ATTR_MAX,
.owner = THIS_MODULE,
};
static int __init genl_init(void)
{
int i, err;
for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
INIT_LIST_HEAD(&family_ht[i]);
err = genl_register_family(&genl_ctrl);
if (err < 0)
goto errout;
err = genl_register_ops(&genl_ctrl, &genl_ctrl_ops);
if (err < 0)
goto errout_register;
netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV);
genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID,
genl_rcv, THIS_MODULE);
if (genl_sock == NULL) {
panic("GENL: Cannot initialize generic netlink\n");
return -ENOMEM;
}
return 0;
errout_register:
genl_unregister_family(&genl_ctrl);
errout:
panic("GENL: Cannot register controller: %d\n", err);
return err;
}
subsys_initcall(genl_init);
EXPORT_SYMBOL(genl_sock);
EXPORT_SYMBOL(genl_register_ops);
EXPORT_SYMBOL(genl_unregister_ops);
EXPORT_SYMBOL(genl_register_family);
EXPORT_SYMBOL(genl_unregister_family);

View file

@ -18,7 +18,6 @@
#include <linux/string.h>
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/pfkeyv2.h>
#include <linux/ipsec.h>
@ -26,6 +25,7 @@
#include <linux/security.h>
#include <net/sock.h>
#include <net/xfrm.h>
#include <net/netlink.h>
#include <asm/uaccess.h>
static struct sock *xfrm_nl;
@ -948,11 +948,6 @@ static struct xfrm_link {
[XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_flush_policy },
};
static int xfrm_done(struct netlink_callback *cb)
{
return 0;
}
static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
{
struct rtattr *xfrma[XFRMA_MAX];
@ -984,20 +979,15 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) &&
(nlh->nlmsg_flags & NLM_F_DUMP)) {
u32 rlen;
if (link->dump == NULL)
goto err_einval;
if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh,
link->dump,
xfrm_done)) != 0) {
link->dump, NULL)) != 0) {
return -1;
}
rlen = NLMSG_ALIGN(nlh->nlmsg_len);
if (rlen > skb->len)
rlen = skb->len;
skb_pull(skb, rlen);
netlink_queue_skip(nlh, skb);
return -1;
}
@ -1032,60 +1022,13 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
return -1;
}
static int xfrm_user_rcv_skb(struct sk_buff *skb)
{
int err;
struct nlmsghdr *nlh;
while (skb->len >= NLMSG_SPACE(0)) {
u32 rlen;
nlh = (struct nlmsghdr *) skb->data;
if (nlh->nlmsg_len < sizeof(*nlh) ||
skb->len < nlh->nlmsg_len)
return 0;
rlen = NLMSG_ALIGN(nlh->nlmsg_len);
if (rlen > skb->len)
rlen = skb->len;
if (xfrm_user_rcv_msg(skb, nlh, &err) < 0) {
if (err == 0)
return -1;
netlink_ack(skb, nlh, err);
} else if (nlh->nlmsg_flags & NLM_F_ACK)
netlink_ack(skb, nlh, 0);
skb_pull(skb, rlen);
}
return 0;
}
static void xfrm_netlink_rcv(struct sock *sk, int len)
{
unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
unsigned int qlen = 0;
do {
struct sk_buff *skb;
down(&xfrm_cfg_sem);
if (qlen > skb_queue_len(&sk->sk_receive_queue))
qlen = skb_queue_len(&sk->sk_receive_queue);
for (; qlen; qlen--) {
skb = skb_dequeue(&sk->sk_receive_queue);
if (xfrm_user_rcv_skb(skb)) {
if (skb->len)
skb_queue_head(&sk->sk_receive_queue,
skb);
else {
kfree_skb(skb);
qlen--;
}
break;
}
kfree_skb(skb);
}
netlink_run_queue(sk, &qlen, &xfrm_user_rcv_msg);
up(&xfrm_cfg_sem);
} while (qlen);