Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/net...
authorPatrick McHardy <kaber@trash.net>
Thu, 11 Jun 2009 14:00:49 +0000 (16:00 +0200)
committerPatrick McHardy <kaber@trash.net>
Thu, 11 Jun 2009 14:00:49 +0000 (16:00 +0200)
40 files changed:
include/linux/netfilter/Kbuild
include/linux/netfilter/nf_conntrack_common.h
include/linux/netfilter/nf_conntrack_tcp.h
include/linux/netfilter/nfnetlink.h
include/linux/netfilter/nfnetlink_conntrack.h
include/linux/netfilter/x_tables.h
include/linux/netfilter/xt_NFQUEUE.h
include/linux/netfilter/xt_osf.h [new file with mode: 0644]
include/linux/netfilter/xt_socket.h [new file with mode: 0644]
include/net/netfilter/ipv4/nf_conntrack_icmp.h [deleted file]
include/net/netfilter/ipv6/nf_conntrack_icmpv6.h
include/net/netfilter/nf_conntrack.h
include/net/netfilter/nf_conntrack_ecache.h
include/net/netfilter/nf_conntrack_l4proto.h
include/net/netlink.h
net/bridge/netfilter/ebtables.c
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ip_queue.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/netfilter/ipt_MASQUERADE.c
net/ipv4/netfilter/nf_conntrack_proto_icmp.c
net/ipv6/netfilter/ip6_queue.c
net/ipv6/netfilter/ip6_tables.c
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
net/netfilter/Kconfig
net/netfilter/Makefile
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_ecache.c
net/netfilter/nf_conntrack_ftp.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_proto_dccp.c
net/netfilter/nf_conntrack_proto_gre.c
net/netfilter/nf_conntrack_proto_sctp.c
net/netfilter/nf_conntrack_proto_tcp.c
net/netfilter/nf_queue.c
net/netfilter/nfnetlink.c
net/netfilter/x_tables.c
net/netfilter/xt_NFQUEUE.c
net/netfilter/xt_osf.c [new file with mode: 0644]
net/netfilter/xt_socket.c

index af9d2fb..2aea503 100644 (file)
@@ -33,6 +33,7 @@ header-y += xt_limit.h
 header-y += xt_mac.h
 header-y += xt_mark.h
 header-y += xt_multiport.h
+header-y += xt_osf.h
 header-y += xt_owner.h
 header-y += xt_pkttype.h
 header-y += xt_quota.h
index 885cbe2..a8248ee 100644 (file)
@@ -75,75 +75,6 @@ enum ip_conntrack_status {
        IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT),
 };
 
-/* Connection tracking event bits */
-enum ip_conntrack_events
-{
-       /* New conntrack */
-       IPCT_NEW_BIT = 0,
-       IPCT_NEW = (1 << IPCT_NEW_BIT),
-
-       /* Expected connection */
-       IPCT_RELATED_BIT = 1,
-       IPCT_RELATED = (1 << IPCT_RELATED_BIT),
-
-       /* Destroyed conntrack */
-       IPCT_DESTROY_BIT = 2,
-       IPCT_DESTROY = (1 << IPCT_DESTROY_BIT),
-
-       /* Timer has been refreshed */
-       IPCT_REFRESH_BIT = 3,
-       IPCT_REFRESH = (1 << IPCT_REFRESH_BIT),
-
-       /* Status has changed */
-       IPCT_STATUS_BIT = 4,
-       IPCT_STATUS = (1 << IPCT_STATUS_BIT),
-
-       /* Update of protocol info */
-       IPCT_PROTOINFO_BIT = 5,
-       IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT),
-
-       /* Volatile protocol info */
-       IPCT_PROTOINFO_VOLATILE_BIT = 6,
-       IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT),
-
-       /* New helper for conntrack */
-       IPCT_HELPER_BIT = 7,
-       IPCT_HELPER = (1 << IPCT_HELPER_BIT),
-
-       /* Update of helper info */
-       IPCT_HELPINFO_BIT = 8,
-       IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT),
-
-       /* Volatile helper info */
-       IPCT_HELPINFO_VOLATILE_BIT = 9,
-       IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT),
-
-       /* NAT info */
-       IPCT_NATINFO_BIT = 10,
-       IPCT_NATINFO = (1 << IPCT_NATINFO_BIT),
-
-       /* Counter highest bit has been set, unused */
-       IPCT_COUNTER_FILLING_BIT = 11,
-       IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT),
-
-       /* Mark is set */
-       IPCT_MARK_BIT = 12,
-       IPCT_MARK = (1 << IPCT_MARK_BIT),
-
-       /* NAT sequence adjustment */
-       IPCT_NATSEQADJ_BIT = 13,
-       IPCT_NATSEQADJ = (1 << IPCT_NATSEQADJ_BIT),
-
-       /* Secmark is set */
-       IPCT_SECMARK_BIT = 14,
-       IPCT_SECMARK = (1 << IPCT_SECMARK_BIT),
-};
-
-enum ip_conntrack_expect_events {
-       IPEXP_NEW_BIT = 0,
-       IPEXP_NEW = (1 << IPEXP_NEW_BIT),
-};
-
 #ifdef __KERNEL__
 struct ip_conntrack_stat
 {
index b2f384d..4352fee 100644 (file)
@@ -15,7 +15,8 @@ enum tcp_conntrack {
        TCP_CONNTRACK_LAST_ACK,
        TCP_CONNTRACK_TIME_WAIT,
        TCP_CONNTRACK_CLOSE,
-       TCP_CONNTRACK_LISTEN,
+       TCP_CONNTRACK_LISTEN,   /* obsolete */
+#define TCP_CONNTRACK_SYN_SENT2        TCP_CONNTRACK_LISTEN
        TCP_CONNTRACK_MAX,
        TCP_CONNTRACK_IGNORE
 };
index c600083..bff4d57 100644 (file)
@@ -46,7 +46,8 @@ struct nfgenmsg {
 #define NFNL_SUBSYS_CTNETLINK_EXP      2
 #define NFNL_SUBSYS_QUEUE              3
 #define NFNL_SUBSYS_ULOG               4
-#define NFNL_SUBSYS_COUNT              5
+#define NFNL_SUBSYS_OSF                        5
+#define NFNL_SUBSYS_COUNT              6
 
 #ifdef __KERNEL__
 
@@ -75,7 +76,7 @@ extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n);
 
 extern int nfnetlink_has_listeners(unsigned int group);
 extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, 
-                         int echo);
+                         int echo, gfp_t flags);
 extern void nfnetlink_set_err(u32 pid, u32 group, int error);
 extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags);
 
index 1a865e4..ed4ef8d 100644 (file)
@@ -101,6 +101,7 @@ enum ctattr_protoinfo_dccp {
        CTA_PROTOINFO_DCCP_UNSPEC,
        CTA_PROTOINFO_DCCP_STATE,
        CTA_PROTOINFO_DCCP_ROLE,
+       CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ,
        __CTA_PROTOINFO_DCCP_MAX,
 };
 #define CTA_PROTOINFO_DCCP_MAX (__CTA_PROTOINFO_DCCP_MAX - 1)
index c9efe03..1030b75 100644 (file)
@@ -184,9 +184,10 @@ struct xt_counters_info
  * @matchinfo: per-match data
  * @fragoff:   packet is a fragment, this is the data offset
  * @thoff:     position of transport header relative to skb->data
- * @hotdrop:   drop packet if we had inspection problems
+ * @hook:      hook number given packet came from
  * @family:    Actual NFPROTO_* through which the function is invoked
  *             (helpful when match->family == NFPROTO_UNSPEC)
+ * @hotdrop:   drop packet if we had inspection problems
  */
 struct xt_match_param {
        const struct net_device *in, *out;
@@ -194,8 +195,9 @@ struct xt_match_param {
        const void *matchinfo;
        int fragoff;
        unsigned int thoff;
-       bool *hotdrop;
+       unsigned int hooknum;
        u_int8_t family;
+       bool *hotdrop;
 };
 
 /**
index 982a89f..2584f4a 100644 (file)
@@ -15,4 +15,9 @@ struct xt_NFQ_info {
        __u16 queuenum;
 };
 
+struct xt_NFQ_info_v1 {
+       __u16 queuenum;
+       __u16 queues_total;
+};
+
 #endif /* _XT_NFQ_TARGET_H */
diff --git a/include/linux/netfilter/xt_osf.h b/include/linux/netfilter/xt_osf.h
new file mode 100644 (file)
index 0000000..fd2272e
--- /dev/null
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2003+ Evgeniy Polyakov <johnpol@2ka.mxt.ru>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _XT_OSF_H
+#define _XT_OSF_H
+
+#define MAXGENRELEN            32
+
+#define XT_OSF_GENRE           (1<<0)
+#define        XT_OSF_TTL              (1<<1)
+#define XT_OSF_LOG             (1<<2)
+#define XT_OSF_INVERT          (1<<3)
+
+#define XT_OSF_LOGLEVEL_ALL    0       /* log all matched fingerprints */
+#define XT_OSF_LOGLEVEL_FIRST  1       /* log only the first matced fingerprint */
+#define XT_OSF_LOGLEVEL_ALL_KNOWN      2 /* do not log unknown packets */
+
+#define XT_OSF_TTL_TRUE                0       /* True ip and fingerprint TTL comparison */
+#define XT_OSF_TTL_LESS                1       /* Check if ip TTL is less than fingerprint one */
+#define XT_OSF_TTL_NOCHECK     2       /* Do not compare ip and fingerprint TTL at all */
+
+struct xt_osf_info {
+       char                    genre[MAXGENRELEN];
+       __u32                   len;
+       __u32                   flags;
+       __u32                   loglevel;
+       __u32                   ttl;
+};
+
+/*
+ * Wildcard MSS (kind of).
+ * It is used to implement a state machine for the different wildcard values
+ * of the MSS and window sizes.
+ */
+struct xt_osf_wc {
+       __u32                   wc;
+       __u32                   val;
+};
+
+/*
+ * This struct represents IANA options
+ * http://www.iana.org/assignments/tcp-parameters
+ */
+struct xt_osf_opt {
+       __u16                   kind, length;
+       struct xt_osf_wc        wc;
+};
+
+struct xt_osf_user_finger {
+       struct xt_osf_wc        wss;
+
+       __u8                    ttl, df;
+       __u16                   ss, mss;
+       __u16                   opt_num;
+
+       char                    genre[MAXGENRELEN];
+       char                    version[MAXGENRELEN];
+       char                    subtype[MAXGENRELEN];
+
+       /* MAX_IPOPTLEN is maximum if all options are NOPs or EOLs */
+       struct xt_osf_opt       opt[MAX_IPOPTLEN];
+};
+
+struct xt_osf_nlmsg {
+       struct xt_osf_user_finger       f;
+       struct iphdr            ip;
+       struct tcphdr           tcp;
+};
+
+/* Defines for IANA option kinds */
+
+enum iana_options {
+       OSFOPT_EOL = 0,         /* End of options */
+       OSFOPT_NOP,             /* NOP */
+       OSFOPT_MSS,             /* Maximum segment size */
+       OSFOPT_WSO,             /* Window scale option */
+       OSFOPT_SACKP,           /* SACK permitted */
+       OSFOPT_SACK,            /* SACK */
+       OSFOPT_ECHO,
+       OSFOPT_ECHOREPLY,
+       OSFOPT_TS,              /* Timestamp option */
+       OSFOPT_POCP,            /* Partial Order Connection Permitted */
+       OSFOPT_POSP,            /* Partial Order Service Profile */
+
+       /* Others are not used in the current OSF */
+       OSFOPT_EMPTY = 255,
+};
+
+/*
+ * Initial window size option state machine: multiple of mss, mtu or
+ * plain numeric value. Can also be made as plain numeric value which
+ * is not a multiple of specified value.
+ */
+enum xt_osf_window_size_options {
+       OSF_WSS_PLAIN   = 0,
+       OSF_WSS_MSS,
+       OSF_WSS_MTU,
+       OSF_WSS_MODULO,
+       OSF_WSS_MAX,
+};
+
+/*
+ * Add/remove fingerprint from the kernel.
+ */
+enum xt_osf_msg_types {
+       OSF_MSG_ADD,
+       OSF_MSG_REMOVE,
+       OSF_MSG_MAX,
+};
+
+enum xt_osf_attr_type {
+       OSF_ATTR_UNSPEC,
+       OSF_ATTR_FINGER,
+       OSF_ATTR_MAX,
+};
+
+#endif                         /* _XT_OSF_H */
diff --git a/include/linux/netfilter/xt_socket.h b/include/linux/netfilter/xt_socket.h
new file mode 100644 (file)
index 0000000..6f475b8
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef _XT_SOCKET_H
+#define _XT_SOCKET_H
+
+enum {
+       XT_SOCKET_TRANSPARENT = 1 << 0,
+};
+
+struct xt_socket_mtinfo1 {
+       __u8 flags;
+};
+
+#endif /* _XT_SOCKET_H */
diff --git a/include/net/netfilter/ipv4/nf_conntrack_icmp.h b/include/net/netfilter/ipv4/nf_conntrack_icmp.h
deleted file mode 100644 (file)
index 3dd22cf..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _NF_CONNTRACK_ICMP_H
-#define _NF_CONNTRACK_ICMP_H
-/* ICMP tracking. */
-#include <asm/atomic.h>
-
-struct ip_ct_icmp
-{
-       /* Optimization: when number in == number out, forget immediately. */
-       atomic_t count;
-};
-#endif /* _NF_CONNTRACK_ICMP_H */
index 86591af..67edd50 100644 (file)
@@ -9,7 +9,6 @@
 
 #ifndef _NF_CONNTRACK_ICMPV6_H
 #define _NF_CONNTRACK_ICMPV6_H
-#include <asm/atomic.h>
 
 #ifndef ICMPV6_NI_QUERY
 #define ICMPV6_NI_QUERY 139
 #define ICMPV6_NI_REPLY 140
 #endif
 
-struct nf_ct_icmpv6
-{
-       /* Optimization: when number in == number out, forget immediately. */
-       atomic_t count;
-};
-
 #endif /* _NF_CONNTRACK_ICMPV6_H */
index 6c3f964..ecc79f9 100644 (file)
@@ -23,7 +23,6 @@
 #include <linux/netfilter/nf_conntrack_dccp.h>
 #include <linux/netfilter/nf_conntrack_sctp.h>
 #include <linux/netfilter/nf_conntrack_proto_gre.h>
-#include <net/netfilter/ipv4/nf_conntrack_icmp.h>
 #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
 
 #include <net/netfilter/nf_conntrack_tuple.h>
@@ -34,8 +33,6 @@ union nf_conntrack_proto {
        struct nf_ct_dccp dccp;
        struct ip_ct_sctp sctp;
        struct ip_ct_tcp tcp;
-       struct ip_ct_icmp icmp;
-       struct nf_ct_icmpv6 icmpv6;
        struct nf_ct_gre gre;
 };
 
@@ -96,6 +93,8 @@ struct nf_conn {
            plus 1 for any connection(s) we are `master' for */
        struct nf_conntrack ct_general;
 
+       spinlock_t lock;
+
        /* XXX should I move this to the tail ? - Y.K */
        /* These are my tuples; original and reply */
        struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
@@ -144,6 +143,8 @@ static inline u_int8_t nf_ct_protonum(const struct nf_conn *ct)
        return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
 }
 
+#define nf_ct_tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
+
 /* get master conntrack via master expectation */
 #define master_ct(conntr) (conntr->master)
 
@@ -201,7 +202,7 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple);
 
 extern void nf_conntrack_hash_insert(struct nf_conn *ct);
 
-extern void nf_conntrack_flush(struct net *net, u32 pid, int report);
+extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report);
 
 extern bool nf_ct_get_tuplepr(const struct sk_buff *skb,
                              unsigned int nhoff, u_int16_t l3num,
index 0ff0dc6..1afb907 100644 (file)
@@ -6,11 +6,55 @@
 #define _NF_CONNTRACK_ECACHE_H
 #include <net/netfilter/nf_conntrack.h>
 
-#include <linux/notifier.h>
 #include <linux/interrupt.h>
 #include <net/net_namespace.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 
+/* Connection tracking event bits */
+enum ip_conntrack_events
+{
+       /* New conntrack */
+       IPCT_NEW_BIT = 0,
+       IPCT_NEW = (1 << IPCT_NEW_BIT),
+
+       /* Expected connection */
+       IPCT_RELATED_BIT = 1,
+       IPCT_RELATED = (1 << IPCT_RELATED_BIT),
+
+       /* Destroyed conntrack */
+       IPCT_DESTROY_BIT = 2,
+       IPCT_DESTROY = (1 << IPCT_DESTROY_BIT),
+
+       /* Status has changed */
+       IPCT_STATUS_BIT = 3,
+       IPCT_STATUS = (1 << IPCT_STATUS_BIT),
+
+       /* Update of protocol info */
+       IPCT_PROTOINFO_BIT = 4,
+       IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT),
+
+       /* New helper for conntrack */
+       IPCT_HELPER_BIT = 5,
+       IPCT_HELPER = (1 << IPCT_HELPER_BIT),
+
+       /* Mark is set */
+       IPCT_MARK_BIT = 6,
+       IPCT_MARK = (1 << IPCT_MARK_BIT),
+
+       /* NAT sequence adjustment */
+       IPCT_NATSEQADJ_BIT = 7,
+       IPCT_NATSEQADJ = (1 << IPCT_NATSEQADJ_BIT),
+
+       /* Secmark is set */
+       IPCT_SECMARK_BIT = 8,
+       IPCT_SECMARK = (1 << IPCT_SECMARK_BIT),
+};
+
+enum ip_conntrack_expect_events {
+       IPEXP_NEW_BIT = 0,
+       IPEXP_NEW = (1 << IPEXP_NEW_BIT),
+};
+
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 struct nf_conntrack_ecache {
        struct nf_conn *ct;
@@ -24,9 +68,13 @@ struct nf_ct_event {
        int report;
 };
 
-extern struct atomic_notifier_head nf_conntrack_chain;
-extern int nf_conntrack_register_notifier(struct notifier_block *nb);
-extern int nf_conntrack_unregister_notifier(struct notifier_block *nb);
+struct nf_ct_event_notifier {
+       int (*fcn)(unsigned int events, struct nf_ct_event *item);
+};
+
+extern struct nf_ct_event_notifier *nf_conntrack_event_cb;
+extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
+extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
 
 extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
 extern void __nf_ct_event_cache_init(struct nf_conn *ct);
@@ -52,13 +100,23 @@ nf_conntrack_event_report(enum ip_conntrack_events event,
                          u32 pid,
                          int report)
 {
-       struct nf_ct_event item = {
-               .ct     = ct,
-               .pid    = pid,
-               .report = report
-       };
-       if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct))
-               atomic_notifier_call_chain(&nf_conntrack_chain, event, &item);
+       struct nf_ct_event_notifier *notify;
+
+       rcu_read_lock();
+       notify = rcu_dereference(nf_conntrack_event_cb);
+       if (notify == NULL)
+               goto out_unlock;
+
+       if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
+               struct nf_ct_event item = {
+                       .ct     = ct,
+                       .pid    = pid,
+                       .report = report
+               };
+               notify->fcn(event, &item);
+       }
+out_unlock:
+       rcu_read_unlock();
 }
 
 static inline void
@@ -73,9 +131,13 @@ struct nf_exp_event {
        int report;
 };
 
-extern struct atomic_notifier_head nf_ct_expect_chain;
-extern int nf_ct_expect_register_notifier(struct notifier_block *nb);
-extern int nf_ct_expect_unregister_notifier(struct notifier_block *nb);
+struct nf_exp_event_notifier {
+       int (*fcn)(unsigned int events, struct nf_exp_event *item);
+};
+
+extern struct nf_exp_event_notifier *nf_expect_event_cb;
+extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb);
+extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb);
 
 static inline void
 nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
@@ -83,12 +145,23 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
                          u32 pid,
                          int report)
 {
-       struct nf_exp_event item = {
-               .exp    = exp,
-               .pid    = pid,
-               .report = report
-       };
-       atomic_notifier_call_chain(&nf_ct_expect_chain, event, &item);
+       struct nf_exp_event_notifier *notify;
+
+       rcu_read_lock();
+       notify = rcu_dereference(nf_expect_event_cb);
+       if (notify == NULL)
+               goto out_unlock;
+
+       {
+               struct nf_exp_event item = {
+                       .exp    = exp,
+                       .pid    = pid,
+                       .report = report
+               };
+               notify->fcn(event, &item);
+       }
+out_unlock:
+       rcu_read_unlock();
 }
 
 static inline void
index ba32ed7..3767fb4 100644 (file)
@@ -59,11 +59,11 @@ struct nf_conntrack_l4proto
                           const struct nf_conntrack_tuple *);
 
        /* Print out the private part of the conntrack. */
-       int (*print_conntrack)(struct seq_file *s, const struct nf_conn *);
+       int (*print_conntrack)(struct seq_file *s, struct nf_conn *);
 
        /* convert protoinfo to nfnetink attributes */
        int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla,
-                        const struct nf_conn *ct);
+                        struct nf_conn *ct);
        /* Calculate protoinfo nlattr size */
        int (*nlattr_size)(void);
 
index eddb502..007bdb0 100644 (file)
@@ -940,6 +940,15 @@ static inline u64 nla_get_u64(const struct nlattr *nla)
 }
 
 /**
+ * nla_get_be64 - return payload of __be64 attribute
+ * @nla: __be64 netlink attribute
+ */
+static inline __be64 nla_get_be64(const struct nlattr *nla)
+{
+       return *(__be64 *) nla_data(nla);
+}
+
+/**
  * nla_get_flag - return payload of flag attribute
  * @nla: flag netlink attribute
  */
index 820252a..37928d5 100644 (file)
@@ -142,6 +142,12 @@ static inline int ebt_basic_match(struct ebt_entry *e, struct ethhdr *h,
        return 0;
 }
 
+static inline __pure
+struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry)
+{
+       return (void *)entry + entry->next_offset;
+}
+
 /* Do some firewalling */
 unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
    const struct net_device *in, const struct net_device *out,
@@ -164,7 +170,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
        mtpar.in      = tgpar.in  = in;
        mtpar.out     = tgpar.out = out;
        mtpar.hotdrop = &hotdrop;
-       tgpar.hooknum = hook;
+       mtpar.hooknum = tgpar.hooknum = hook;
 
        read_lock_bh(&table->lock);
        private = table->private;
@@ -249,8 +255,7 @@ letsreturn:
                /* jump to a udc */
                cs[sp].n = i + 1;
                cs[sp].chaininfo = chaininfo;
-               cs[sp].e = (struct ebt_entry *)
-                  (((char *)point) + point->next_offset);
+               cs[sp].e = ebt_next_entry(point);
                i = 0;
                chaininfo = (struct ebt_entries *) (base + verdict);
 #ifdef CONFIG_NETFILTER_DEBUG
@@ -266,8 +271,7 @@ letsreturn:
                sp++;
                continue;
 letscontinue:
-               point = (struct ebt_entry *)
-                  (((char *)point) + point->next_offset);
+               point = ebt_next_entry(point);
                i++;
        }
 
@@ -787,7 +791,7 @@ static int check_chainloops(struct ebt_entries *chain, struct ebt_cl_stack *cl_s
                        /* this can't be 0, so the loop test is correct */
                        cl_s[i].cs.n = pos + 1;
                        pos = 0;
-                       cl_s[i].cs.e = ((void *)e + e->next_offset);
+                       cl_s[i].cs.e = ebt_next_entry(e);
                        e = (struct ebt_entry *)(hlp2->data);
                        nentries = hlp2->nentries;
                        cl_s[i].from = chain_nr;
@@ -797,7 +801,7 @@ static int check_chainloops(struct ebt_entries *chain, struct ebt_cl_stack *cl_s
                        continue;
                }
 letscontinue:
-               e = (void *)e + e->next_offset;
+               e = ebt_next_entry(e);
                pos++;
        }
        return 0;
index 831fe18..7505dff 100644 (file)
@@ -231,6 +231,12 @@ static inline struct arpt_entry *get_entry(void *base, unsigned int offset)
        return (struct arpt_entry *)(base + offset);
 }
 
+static inline __pure
+struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
+{
+       return (void *)entry + entry->next_offset;
+}
+
 unsigned int arpt_do_table(struct sk_buff *skb,
                           unsigned int hook,
                           const struct net_device *in,
@@ -267,67 +273,64 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 
        arp = arp_hdr(skb);
        do {
-               if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
-                       struct arpt_entry_target *t;
-                       int hdr_len;
-
-                       hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
-                               (2 * skb->dev->addr_len);
+               struct arpt_entry_target *t;
+               int hdr_len;
 
-                       ADD_COUNTER(e->counters, hdr_len, 1);
+               if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
+                       e = arpt_next_entry(e);
+                       continue;
+               }
 
-                       t = arpt_get_target(e);
+               hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
+                       (2 * skb->dev->addr_len);
+               ADD_COUNTER(e->counters, hdr_len, 1);
 
-                       /* Standard target? */
-                       if (!t->u.kernel.target->target) {
-                               int v;
+               t = arpt_get_target(e);
 
-                               v = ((struct arpt_standard_target *)t)->verdict;
-                               if (v < 0) {
-                                       /* Pop from stack? */
-                                       if (v != ARPT_RETURN) {
-                                               verdict = (unsigned)(-v) - 1;
-                                               break;
-                                       }
-                                       e = back;
-                                       back = get_entry(table_base,
-                                                        back->comefrom);
-                                       continue;
-                               }
-                               if (table_base + v
-                                   != (void *)e + e->next_offset) {
-                                       /* Save old back ptr in next entry */
-                                       struct arpt_entry *next
-                                               = (void *)e + e->next_offset;
-                                       next->comefrom =
-                                               (void *)back - table_base;
-
-                                       /* set back pointer to next entry */
-                                       back = next;
-                               }
+               /* Standard target? */
+               if (!t->u.kernel.target->target) {
+                       int v;
 
-                               e = get_entry(table_base, v);
-                       } else {
-                               /* Targets which reenter must return
-                                * abs. verdicts
-                                */
-                               tgpar.target   = t->u.kernel.target;
-                               tgpar.targinfo = t->data;
-                               verdict = t->u.kernel.target->target(skb,
-                                                                    &tgpar);
-
-                               /* Target might have changed stuff. */
-                               arp = arp_hdr(skb);
-
-                               if (verdict == ARPT_CONTINUE)
-                                       e = (void *)e + e->next_offset;
-                               else
-                                       /* Verdict */
+                       v = ((struct arpt_standard_target *)t)->verdict;
+                       if (v < 0) {
+                               /* Pop from stack? */
+                               if (v != ARPT_RETURN) {
+                                       verdict = (unsigned)(-v) - 1;
                                        break;
+                               }
+                               e = back;
+                               back = get_entry(table_base, back->comefrom);
+                               continue;
                        }
-               } else {
-                       e = (void *)e + e->next_offset;
+                       if (table_base + v
+                           != arpt_next_entry(e)) {
+                               /* Save old back ptr in next entry */
+                               struct arpt_entry *next = arpt_next_entry(e);
+                               next->comefrom = (void *)back - table_base;
+
+                               /* set back pointer to next entry */
+                               back = next;
+                       }
+
+                       e = get_entry(table_base, v);
+                       continue;
                }
+
+               /* Targets which reenter must return
+                * abs. verdicts
+                */
+               tgpar.target   = t->u.kernel.target;
+               tgpar.targinfo = t->data;
+               verdict = t->u.kernel.target->target(skb, &tgpar);
+
+               /* Target might have changed stuff. */
+               arp = arp_hdr(skb);
+
+               if (verdict == ARPT_CONTINUE)
+                       e = arpt_next_entry(e);
+               else
+                       /* Verdict */
+                       break;
        } while (!hotdrop);
        xt_info_rdunlock_bh();
 
index 5f22c91..c156db2 100644 (file)
@@ -596,7 +596,7 @@ static int __init ip_queue_init(void)
 #ifdef CONFIG_SYSCTL
        ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
 #endif
-       status = nf_register_queue_handler(PF_INET, &nfqh);
+       status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh);
        if (status < 0) {
                printk(KERN_ERR "ip_queue: failed to register queue handler\n");
                goto cleanup_sysctl;
index 2ec8d72..5bf7c3f 100644 (file)
@@ -238,8 +238,8 @@ static struct nf_loginfo trace_loginfo = {
 /* Mildly perf critical (only if packet tracing is on) */
 static inline int
 get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
-                     char *hookname, char **chainname,
-                     char **comment, unsigned int *rulenum)
+                     const char *hookname, const char **chainname,
+                     const char **comment, unsigned int *rulenum)
 {
        struct ipt_standard_target *t = (void *)ipt_get_target(s);
 
@@ -257,8 +257,8 @@ get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
                   && unconditional(&s->ip)) {
                        /* Tail of chains: STANDARD target (return/policy) */
                        *comment = *chainname == hookname
-                               ? (char *)comments[NF_IP_TRACE_COMMENT_POLICY]
-                               : (char *)comments[NF_IP_TRACE_COMMENT_RETURN];
+                               ? comments[NF_IP_TRACE_COMMENT_POLICY]
+                               : comments[NF_IP_TRACE_COMMENT_RETURN];
                }
                return 1;
        } else
@@ -277,14 +277,14 @@ static void trace_packet(struct sk_buff *skb,
 {
        void *table_base;
        const struct ipt_entry *root;
-       char *hookname, *chainname, *comment;
+       const char *hookname, *chainname, *comment;
        unsigned int rulenum = 0;
 
-       table_base = (void *)private->entries[smp_processor_id()];
+       table_base = private->entries[smp_processor_id()];
        root = get_entry(table_base, private->hook_entry[hook]);
 
-       hookname = chainname = (char *)hooknames[hook];
-       comment = (char *)comments[NF_IP_TRACE_COMMENT_RULE];
+       hookname = chainname = hooknames[hook];
+       comment = comments[NF_IP_TRACE_COMMENT_RULE];
 
        IPT_ENTRY_ITERATE(root,
                          private->size - private->hook_entry[hook],
@@ -297,6 +297,12 @@ static void trace_packet(struct sk_buff *skb,
 }
 #endif
 
+static inline __pure
+struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
+{
+       return (void *)entry + entry->next_offset;
+}
+
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
 ipt_do_table(struct sk_buff *skb,
@@ -305,6 +311,8 @@ ipt_do_table(struct sk_buff *skb,
             const struct net_device *out,
             struct xt_table *table)
 {
+#define tb_comefrom ((struct ipt_entry *)table_base)->comefrom
+
        static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
        const struct iphdr *ip;
        u_int16_t datalen;
@@ -335,7 +343,7 @@ ipt_do_table(struct sk_buff *skb,
        mtpar.in      = tgpar.in  = in;
        mtpar.out     = tgpar.out = out;
        mtpar.family  = tgpar.family = NFPROTO_IPV4;
-       tgpar.hooknum = hook;
+       mtpar.hooknum = tgpar.hooknum = hook;
 
        IP_NF_ASSERT(table->valid_hooks & (1 << hook));
        xt_info_rdlock_bh();
@@ -348,92 +356,84 @@ ipt_do_table(struct sk_buff *skb,
        back = get_entry(table_base, private->underflow[hook]);
 
        do {
+               struct ipt_entry_target *t;
+
                IP_NF_ASSERT(e);
                IP_NF_ASSERT(back);
-               if (ip_packet_match(ip, indev, outdev,
-                   &e->ip, mtpar.fragoff)) {
-                       struct ipt_entry_target *t;
-
-                       if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
-                               goto no_match;
+               if (!ip_packet_match(ip, indev, outdev,
+                   &e->ip, mtpar.fragoff) ||
+                   IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) {
+                       e = ipt_next_entry(e);
+                       continue;
+               }
 
-                       ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
+               ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
 
-                       t = ipt_get_target(e);
-                       IP_NF_ASSERT(t->u.kernel.target);
+               t = ipt_get_target(e);
+               IP_NF_ASSERT(t->u.kernel.target);
 
 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
-                       /* The packet is traced: log it */
-                       if (unlikely(skb->nf_trace))
-                               trace_packet(skb, hook, in, out,
-                                            table->name, private, e);
+               /* The packet is traced: log it */
+               if (unlikely(skb->nf_trace))
+                       trace_packet(skb, hook, in, out,
+                                    table->name, private, e);
 #endif
-                       /* Standard target? */
-                       if (!t->u.kernel.target->target) {
-                               int v;
-
-                               v = ((struct ipt_standard_target *)t)->verdict;
-                               if (v < 0) {
-                                       /* Pop from stack? */
-                                       if (v != IPT_RETURN) {
-                                               verdict = (unsigned)(-v) - 1;
-                                               break;
-                                       }
-                                       e = back;
-                                       back = get_entry(table_base,
-                                                        back->comefrom);
-                                       continue;
-                               }
-                               if (table_base + v != (void *)e + e->next_offset
-                                   && !(e->ip.flags & IPT_F_GOTO)) {
-                                       /* Save old back ptr in next entry */
-                                       struct ipt_entry *next
-                                               = (void *)e + e->next_offset;
-                                       next->comefrom
-                                               = (void *)back - table_base;
-                                       /* set back pointer to next entry */
-                                       back = next;
+               /* Standard target? */
+               if (!t->u.kernel.target->target) {
+                       int v;
+
+                       v = ((struct ipt_standard_target *)t)->verdict;
+                       if (v < 0) {
+                               /* Pop from stack? */
+                               if (v != IPT_RETURN) {
+                                       verdict = (unsigned)(-v) - 1;
+                                       break;
                                }
+                               e = back;
+                               back = get_entry(table_base, back->comefrom);
+                               continue;
+                       }
+                       if (table_base + v != ipt_next_entry(e)
+                           && !(e->ip.flags & IPT_F_GOTO)) {
+                               /* Save old back ptr in next entry */
+                               struct ipt_entry *next = ipt_next_entry(e);
+                               next->comefrom = (void *)back - table_base;
+                               /* set back pointer to next entry */
+                               back = next;
+                       }
+
+                       e = get_entry(table_base, v);
+                       continue;
+               }
+
+               /* Targets which reenter must return
+                  abs. verdicts */
+               tgpar.target   = t->u.kernel.target;
+               tgpar.targinfo = t->data;
+
 
-                               e = get_entry(table_base, v);
-                       } else {
-                               /* Targets which reenter must return
-                                  abs. verdicts */
-                               tgpar.target   = t->u.kernel.target;
-                               tgpar.targinfo = t->data;
 #ifdef CONFIG_NETFILTER_DEBUG
-                               ((struct ipt_entry *)table_base)->comefrom
-                                       = 0xeeeeeeec;
+               tb_comefrom = 0xeeeeeeec;
 #endif
-                               verdict = t->u.kernel.target->target(skb,
-                                                                    &tgpar);
+               verdict = t->u.kernel.target->target(skb, &tgpar);
 #ifdef CONFIG_NETFILTER_DEBUG
-                               if (((struct ipt_entry *)table_base)->comefrom
-                                   != 0xeeeeeeec
-                                   && verdict == IPT_CONTINUE) {
-                                       printk("Target %s reentered!\n",
-                                              t->u.kernel.target->name);
-                                       verdict = NF_DROP;
-                               }
-                               ((struct ipt_entry *)table_base)->comefrom
-                                       = 0x57acc001;
+               if (comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) {
+                       printk("Target %s reentered!\n",
+                              t->u.kernel.target->name);
+                       verdict = NF_DROP;
+               }
+               tb_comefrom = 0x57acc001;
 #endif
-                               /* Target might have changed stuff. */
-                               ip = ip_hdr(skb);
-                               datalen = skb->len - ip->ihl * 4;
-
-                               if (verdict == IPT_CONTINUE)
-                                       e = (void *)e + e->next_offset;
-                               else
-                                       /* Verdict */
-                                       break;
-                       }
-               } else {
+               /* Target might have changed stuff. */
+               ip = ip_hdr(skb);
+               datalen = skb->len - ip->ihl * 4;
 
-               no_match:
-                       e = (void *)e + e->next_offset;
-               }
+               if (verdict == IPT_CONTINUE)
+                       e = ipt_next_entry(e);
+               else
+                       /* Verdict */
+                       break;
        } while (!hotdrop);
        xt_info_rdunlock_bh();
 
@@ -444,6 +444,8 @@ ipt_do_table(struct sk_buff *skb,
                return NF_DROP;
        else return verdict;
 #endif
+
+#undef tb_comefrom
 }
 
 /* Figures out from what hook each rule can be called: returns 0 if
@@ -2158,7 +2160,7 @@ static bool icmp_checkentry(const struct xt_mtchk_param *par)
 static struct xt_target ipt_standard_target __read_mostly = {
        .name           = IPT_STANDARD_TARGET,
        .targetsize     = sizeof(int),
-       .family         = AF_INET,
+       .family         = NFPROTO_IPV4,
 #ifdef CONFIG_COMPAT
        .compatsize     = sizeof(compat_int_t),
        .compat_from_user = compat_standard_from_user,
@@ -2170,7 +2172,7 @@ static struct xt_target ipt_error_target __read_mostly = {
        .name           = IPT_ERROR_TARGET,
        .target         = ipt_error,
        .targetsize     = IPT_FUNCTION_MAXNAMELEN,
-       .family         = AF_INET,
+       .family         = NFPROTO_IPV4,
 };
 
 static struct nf_sockopt_ops ipt_sockopts = {
@@ -2196,17 +2198,17 @@ static struct xt_match icmp_matchstruct __read_mostly = {
        .matchsize      = sizeof(struct ipt_icmp),
        .checkentry     = icmp_checkentry,
        .proto          = IPPROTO_ICMP,
-       .family         = AF_INET,
+       .family         = NFPROTO_IPV4,
 };
 
 static int __net_init ip_tables_net_init(struct net *net)
 {
-       return xt_proto_init(net, AF_INET);
+       return xt_proto_init(net, NFPROTO_IPV4);
 }
 
 static void __net_exit ip_tables_net_exit(struct net *net)
 {
-       xt_proto_fini(net, AF_INET);
+       xt_proto_fini(net, NFPROTO_IPV4);
 }
 
 static struct pernet_operations ip_tables_net_ops = {
index c0992c7..dada086 100644 (file)
@@ -27,9 +27,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
 
-/* Lock protects masq region inside conntrack */
-static DEFINE_RWLOCK(masq_lock);
-
 /* FIXME: Multiple targets. --RR */
 static bool masquerade_tg_check(const struct xt_tgchk_param *par)
 {
@@ -79,9 +76,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par)
                return NF_DROP;
        }
 
-       write_lock_bh(&masq_lock);
        nat->masq_index = par->out->ifindex;
-       write_unlock_bh(&masq_lock);
 
        /* Transfer from original range. */
        newrange = ((struct nf_nat_range)
@@ -97,16 +92,11 @@ static int
 device_cmp(struct nf_conn *i, void *ifindex)
 {
        const struct nf_conn_nat *nat = nfct_nat(i);
-       int ret;
 
        if (!nat)
                return 0;
 
-       read_lock_bh(&masq_lock);
-       ret = (nat->masq_index == (int)(long)ifindex);
-       read_unlock_bh(&masq_lock);
-
-       return ret;
+       return nat->masq_index == (int)(long)ifindex;
 }
 
 static int masq_device_event(struct notifier_block *this,
index 23b2c2e..d71ba76 100644 (file)
@@ -82,18 +82,10 @@ static int icmp_packet(struct nf_conn *ct,
                       u_int8_t pf,
                       unsigned int hooknum)
 {
-       /* Try to delete connection immediately after all replies:
-          won't actually vanish as we still have skb, and del_timer
-          means this will only run once even if count hits zero twice
-          (theoretically possible with SMP) */
-       if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
-               if (atomic_dec_and_test(&ct->proto.icmp.count))
-                       nf_ct_kill_acct(ct, ctinfo, skb);
-       } else {
-               atomic_inc(&ct->proto.icmp.count);
-               nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
-               nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
-       }
+       /* Do not immediately delete the connection after the first
+          successful reply to avoid excessive conntrackd traffic
+          and also to handle correctly ICMP echo reply duplicates. */
+       nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
 
        return NF_ACCEPT;
 }
@@ -117,7 +109,6 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
                nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
                return false;
        }
-       atomic_set(&ct->proto.icmp.count, 0);
        return true;
 }
 
index b693f84..1cf3f0c 100644 (file)
@@ -598,7 +598,7 @@ static int __init ip6_queue_init(void)
 #ifdef CONFIG_SYSCTL
        ipq_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, ipq_table);
 #endif
-       status = nf_register_queue_handler(PF_INET6, &nfqh);
+       status = nf_register_queue_handler(NFPROTO_IPV6, &nfqh);
        if (status < 0) {
                printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
                goto cleanup_sysctl;
index 219e165..ced1f2c 100644 (file)
@@ -270,8 +270,8 @@ static struct nf_loginfo trace_loginfo = {
 /* Mildly perf critical (only if packet tracing is on) */
 static inline int
 get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e,
-                     char *hookname, char **chainname,
-                     char **comment, unsigned int *rulenum)
+                     const char *hookname, const char **chainname,
+                     const char **comment, unsigned int *rulenum)
 {
        struct ip6t_standard_target *t = (void *)ip6t_get_target(s);
 
@@ -289,8 +289,8 @@ get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e,
                   && unconditional(&s->ipv6)) {
                        /* Tail of chains: STANDARD target (return/policy) */
                        *comment = *chainname == hookname
-                               ? (char *)comments[NF_IP6_TRACE_COMMENT_POLICY]
-                               : (char *)comments[NF_IP6_TRACE_COMMENT_RETURN];
+                               ? comments[NF_IP6_TRACE_COMMENT_POLICY]
+                               : comments[NF_IP6_TRACE_COMMENT_RETURN];
                }
                return 1;
        } else
@@ -309,14 +309,14 @@ static void trace_packet(struct sk_buff *skb,
 {
        void *table_base;
        const struct ip6t_entry *root;
-       char *hookname, *chainname, *comment;
+       const char *hookname, *chainname, *comment;
        unsigned int rulenum = 0;
 
-       table_base = (void *)private->entries[smp_processor_id()];
+       table_base = private->entries[smp_processor_id()];
        root = get_entry(table_base, private->hook_entry[hook]);
 
-       hookname = chainname = (char *)hooknames[hook];
-       comment = (char *)comments[NF_IP6_TRACE_COMMENT_RULE];
+       hookname = chainname = hooknames[hook];
+       comment = comments[NF_IP6_TRACE_COMMENT_RULE];
 
        IP6T_ENTRY_ITERATE(root,
                           private->size - private->hook_entry[hook],
@@ -329,6 +329,12 @@ static void trace_packet(struct sk_buff *skb,
 }
 #endif
 
+static inline __pure struct ip6t_entry *
+ip6t_next_entry(const struct ip6t_entry *entry)
+{
+       return (void *)entry + entry->next_offset;
+}
+
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
 ip6t_do_table(struct sk_buff *skb,
@@ -337,6 +343,8 @@ ip6t_do_table(struct sk_buff *skb,
              const struct net_device *out,
              struct xt_table *table)
 {
+#define tb_comefrom ((struct ip6t_entry *)table_base)->comefrom
+
        static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
        bool hotdrop = false;
        /* Initializing verdict to NF_DROP keeps gcc happy. */
@@ -361,7 +369,7 @@ ip6t_do_table(struct sk_buff *skb,
        mtpar.in      = tgpar.in  = in;
        mtpar.out     = tgpar.out = out;
        mtpar.family  = tgpar.family = NFPROTO_IPV6;
-       tgpar.hooknum = hook;
+       mtpar.hooknum = tgpar.hooknum = hook;
 
        IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 
@@ -375,96 +383,86 @@ ip6t_do_table(struct sk_buff *skb,
        back = get_entry(table_base, private->underflow[hook]);
 
        do {
+               struct ip6t_entry_target *t;
+
                IP_NF_ASSERT(e);
                IP_NF_ASSERT(back);
-               if (ip6_packet_match(skb, indev, outdev, &e->ipv6,
-                       &mtpar.thoff, &mtpar.fragoff, &hotdrop)) {
-                       struct ip6t_entry_target *t;
-
-                       if (IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
-                               goto no_match;
+               if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
+                   &mtpar.thoff, &mtpar.fragoff, &hotdrop) ||
+                   IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) {
+                       e = ip6t_next_entry(e);
+                       continue;
+               }
 
-                       ADD_COUNTER(e->counters,
-                                   ntohs(ipv6_hdr(skb)->payload_len) +
-                                   sizeof(struct ipv6hdr), 1);
+               ADD_COUNTER(e->counters,
+                           ntohs(ipv6_hdr(skb)->payload_len) +
+                           sizeof(struct ipv6hdr), 1);
 
-                       t = ip6t_get_target(e);
-                       IP_NF_ASSERT(t->u.kernel.target);
+               t = ip6t_get_target(e);
+               IP_NF_ASSERT(t->u.kernel.target);
 
 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
-                       /* The packet is traced: log it */
-                       if (unlikely(skb->nf_trace))
-                               trace_packet(skb, hook, in, out,
-                                            table->name, private, e);
+               /* The packet is traced: log it */
+               if (unlikely(skb->nf_trace))
+                       trace_packet(skb, hook, in, out,
+                                    table->name, private, e);
 #endif
-                       /* Standard target? */
-                       if (!t->u.kernel.target->target) {
-                               int v;
-
-                               v = ((struct ip6t_standard_target *)t)->verdict;
-                               if (v < 0) {
-                                       /* Pop from stack? */
-                                       if (v != IP6T_RETURN) {
-                                               verdict = (unsigned)(-v) - 1;
-                                               break;
-                                       }
-                                       e = back;
-                                       back = get_entry(table_base,
-                                                        back->comefrom);
-                                       continue;
-                               }
-                               if (table_base + v != (void *)e + e->next_offset
-                                   && !(e->ipv6.flags & IP6T_F_GOTO)) {
-                                       /* Save old back ptr in next entry */
-                                       struct ip6t_entry *next
-                                               = (void *)e + e->next_offset;
-                                       next->comefrom
-                                               = (void *)back - table_base;
-                                       /* set back pointer to next entry */
-                                       back = next;
+               /* Standard target? */
+               if (!t->u.kernel.target->target) {
+                       int v;
+
+                       v = ((struct ip6t_standard_target *)t)->verdict;
+                       if (v < 0) {
+                               /* Pop from stack? */
+                               if (v != IP6T_RETURN) {
+                                       verdict = (unsigned)(-v) - 1;
+                                       break;
                                }
+                               e = back;
+                               back = get_entry(table_base, back->comefrom);
+                               continue;
+                       }
+                       if (table_base + v != ip6t_next_entry(e)
+                           && !(e->ipv6.flags & IP6T_F_GOTO)) {
+                               /* Save old back ptr in next entry */
+                               struct ip6t_entry *next = ip6t_next_entry(e);
+                               next->comefrom = (void *)back - table_base;
+                               /* set back pointer to next entry */
+                               back = next;
+                       }
 
-                               e = get_entry(table_base, v);
-                       } else {
-                               /* Targets which reenter must return
-                                  abs. verdicts */
-                               tgpar.target   = t->u.kernel.target;
-                               tgpar.targinfo = t->data;
+                       e = get_entry(table_base, v);
+                       continue;
+               }
 
-#ifdef CONFIG_NETFILTER_DEBUG
-                               ((struct ip6t_entry *)table_base)->comefrom
-                                       = 0xeeeeeeec;
-#endif
-                               verdict = t->u.kernel.target->target(skb,
-                                                                    &tgpar);
+               /* Targets which reenter must return
+                  abs. verdicts */
+               tgpar.target   = t->u.kernel.target;
+               tgpar.targinfo = t->data;
 
 #ifdef CONFIG_NETFILTER_DEBUG
-                               if (((struct ip6t_entry *)table_base)->comefrom
-                                   != 0xeeeeeeec
-                                   && verdict == IP6T_CONTINUE) {
-                                       printk("Target %s reentered!\n",
-                                              t->u.kernel.target->name);
-                                       verdict = NF_DROP;
-                               }
-                               ((struct ip6t_entry *)table_base)->comefrom
-                                       = 0x57acc001;
+               tb_comefrom = 0xeeeeeeec;
 #endif
-                               if (verdict == IP6T_CONTINUE)
-                                       e = (void *)e + e->next_offset;
-                               else
-                                       /* Verdict */
-                                       break;
-                       }
-               } else {
+               verdict = t->u.kernel.target->target(skb, &tgpar);
 
-               no_match:
-                       e = (void *)e + e->next_offset;
+#ifdef CONFIG_NETFILTER_DEBUG
+               if (tb_comefrom != 0xeeeeeeec && verdict == IP6T_CONTINUE) {
+                       printk("Target %s reentered!\n",
+                              t->u.kernel.target->name);
+                       verdict = NF_DROP;
                }
+               tb_comefrom = 0x57acc001;
+#endif
+               if (verdict == IP6T_CONTINUE)
+                       e = ip6t_next_entry(e);
+               else
+                       /* Verdict */
+                       break;
        } while (!hotdrop);
 
 #ifdef CONFIG_NETFILTER_DEBUG
-       ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
+       tb_comefrom = NETFILTER_LINK_POISON;
 #endif
        xt_info_rdunlock_bh();
 
@@ -475,6 +473,8 @@ ip6t_do_table(struct sk_buff *skb,
                return NF_DROP;
        else return verdict;
 #endif
+
+#undef tb_comefrom
 }
 
 /* Figures out from what hook each rule can be called: returns 0 if
@@ -2191,7 +2191,7 @@ static bool icmp6_checkentry(const struct xt_mtchk_param *par)
 static struct xt_target ip6t_standard_target __read_mostly = {
        .name           = IP6T_STANDARD_TARGET,
        .targetsize     = sizeof(int),
-       .family         = AF_INET6,
+       .family         = NFPROTO_IPV6,
 #ifdef CONFIG_COMPAT
        .compatsize     = sizeof(compat_int_t),
        .compat_from_user = compat_standard_from_user,
@@ -2203,7 +2203,7 @@ static struct xt_target ip6t_error_target __read_mostly = {
        .name           = IP6T_ERROR_TARGET,
        .target         = ip6t_error,
        .targetsize     = IP6T_FUNCTION_MAXNAMELEN,
-       .family         = AF_INET6,
+       .family         = NFPROTO_IPV6,
 };
 
 static struct nf_sockopt_ops ip6t_sockopts = {
@@ -2229,17 +2229,17 @@ static struct xt_match icmp6_matchstruct __read_mostly = {
        .matchsize      = sizeof(struct ip6t_icmp),
        .checkentry     = icmp6_checkentry,
        .proto          = IPPROTO_ICMPV6,
-       .family         = AF_INET6,
+       .family         = NFPROTO_IPV6,
 };
 
 static int __net_init ip6_tables_net_init(struct net *net)
 {
-       return xt_proto_init(net, AF_INET6);
+       return xt_proto_init(net, NFPROTO_IPV6);
 }
 
 static void __net_exit ip6_tables_net_exit(struct net *net)
 {
-       xt_proto_fini(net, AF_INET6);
+       xt_proto_fini(net, NFPROTO_IPV6);
 }
 
 static struct pernet_operations ip6_tables_net_ops = {
index 9903227..642dcb1 100644 (file)
@@ -95,18 +95,10 @@ static int icmpv6_packet(struct nf_conn *ct,
                       u_int8_t pf,
                       unsigned int hooknum)
 {
-       /* Try to delete connection immediately after all replies:
-          won't actually vanish as we still have skb, and del_timer
-          means this will only run once even if count hits zero twice
-          (theoretically possible with SMP) */
-       if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
-               if (atomic_dec_and_test(&ct->proto.icmp.count))
-                       nf_ct_kill_acct(ct, ctinfo, skb);
-       } else {
-               atomic_inc(&ct->proto.icmp.count);
-               nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
-               nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
-       }
+       /* Do not immediately delete the connection after the first
+          successful reply to avoid excessive conntrackd traffic
+          and also to handle correctly ICMP echo reply duplicates. */
+       nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
 
        return NF_ACCEPT;
 }
@@ -132,7 +124,6 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
                                      type + 128);
                return false;
        }
-       atomic_set(&ct->proto.icmp.count, 0);
        return true;
 }
 
index cb3ad74..79ba47f 100644 (file)
@@ -917,6 +917,19 @@ config NETFILTER_XT_MATCH_U32
 
          Details and examples are in the kernel module source.
 
+config NETFILTER_XT_MATCH_OSF
+       tristate '"osf" Passive OS fingerprint match'
+       depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
+       help
+         This option selects the Passive OS Fingerprinting match module
+         that allows to passively match the remote operating system by
+         analyzing incoming TCP SYN packets.
+
+         Rules and loading software can be downloaded from
+         http://www.ioremap.net/projects/osf
+
+         To compile it as a module, choose M here.  If unsure, say N.
+
 endif # NETFILTER_XTABLES
 
 endmenu
index 6282060..49f62ee 100644 (file)
@@ -77,6 +77,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
index 8020db6..edf9569 100644 (file)
@@ -398,11 +398,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
        help = nfct_help(ct);
        if (help && help->helper)
                nf_conntrack_event_cache(IPCT_HELPER, ct);
-#ifdef CONFIG_NF_NAT_NEEDED
-       if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
-           test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
-               nf_conntrack_event_cache(IPCT_NATINFO, ct);
-#endif
+
        nf_conntrack_event_cache(master_ct(ct) ?
                                 IPCT_RELATED : IPCT_NEW, ct);
        return NF_ACCEPT;
@@ -523,6 +519,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
                return ERR_PTR(-ENOMEM);
        }
 
+       spin_lock_init(&ct->lock);
        atomic_set(&ct->ct_general.use, 1);
        ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
        ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
@@ -807,8 +804,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
                          unsigned long extra_jiffies,
                          int do_acct)
 {
-       int event = 0;
-
        NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
        NF_CT_ASSERT(skb);
 
@@ -821,7 +816,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
        /* If not in hash table, timer will not be active yet */
        if (!nf_ct_is_confirmed(ct)) {
                ct->timeout.expires = extra_jiffies;
-               event = IPCT_REFRESH;
        } else {
                unsigned long newtime = jiffies + extra_jiffies;
 
@@ -832,7 +826,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
                    && del_timer(&ct->timeout)) {
                        ct->timeout.expires = newtime;
                        add_timer(&ct->timeout);
-                       event = IPCT_REFRESH;
                }
        }
 
@@ -849,10 +842,6 @@ acct:
        }
 
        spin_unlock_bh(&nf_conntrack_lock);
-
-       /* must be unlocked when calling event cache */
-       if (event)
-               nf_conntrack_event_cache(event, ct);
 }
 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
 
@@ -1001,7 +990,7 @@ struct __nf_ct_flush_report {
        int report;
 };
 
-static int kill_all(struct nf_conn *i, void *data)
+static int kill_report(struct nf_conn *i, void *data)
 {
        struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
 
@@ -1013,6 +1002,11 @@ static int kill_all(struct nf_conn *i, void *data)
        return 1;
 }
 
+static int kill_all(struct nf_conn *i, void *data)
+{
+       return 1;
+}
+
 void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
 {
        if (vmalloced)
@@ -1023,15 +1017,15 @@ void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
 }
 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
 
-void nf_conntrack_flush(struct net *net, u32 pid, int report)
+void nf_conntrack_flush_report(struct net *net, u32 pid, int report)
 {
        struct __nf_ct_flush_report fr = {
                .pid    = pid,
                .report = report,
        };
-       nf_ct_iterate_cleanup(net, kill_all, &fr);
+       nf_ct_iterate_cleanup(net, kill_report, &fr);
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_flush);
+EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
 
 static void nf_conntrack_cleanup_init_net(void)
 {
@@ -1045,7 +1039,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
        nf_ct_event_cache_flush(net);
        nf_conntrack_ecache_fini(net);
  i_see_dead_people:
-       nf_conntrack_flush(net, 0, 0);
+       nf_ct_iterate_cleanup(net, kill_all, NULL);
        if (atomic_read(&net->ct.count) != 0) {
                schedule();
                goto i_see_dead_people;
index dee4190..5516b3e 100644 (file)
 #include <linux/stddef.h>
 #include <linux/err.h>
 #include <linux/percpu.h>
-#include <linux/notifier.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 
-ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain);
-EXPORT_SYMBOL_GPL(nf_conntrack_chain);
+static DEFINE_MUTEX(nf_ct_ecache_mutex);
 
-ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain);
-EXPORT_SYMBOL_GPL(nf_ct_expect_chain);
+struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_event_cb);
+
+struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly;
+EXPORT_SYMBOL_GPL(nf_expect_event_cb);
 
 /* deliver cached events and clear cache entry - must be called with locally
  * disabled softirqs */
 static inline void
 __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
 {
+       struct nf_ct_event_notifier *notify;
+
+       rcu_read_lock();
+       notify = rcu_dereference(nf_conntrack_event_cb);
+       if (notify == NULL)
+               goto out_unlock;
+
        if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
            && ecache->events) {
                struct nf_ct_event item = {
@@ -42,14 +50,15 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
                        .report = 0
                };
 
-               atomic_notifier_call_chain(&nf_conntrack_chain,
-                                          ecache->events,
-                                          &item);
+               notify->fcn(ecache->events, &item);
        }
 
        ecache->events = 0;
        nf_ct_put(ecache->ct);
        ecache->ct = NULL;
+
+out_unlock:
+       rcu_read_unlock();
 }
 
 /* Deliver all cached events for a particular conntrack. This is called
@@ -111,26 +120,68 @@ void nf_conntrack_ecache_fini(struct net *net)
        free_percpu(net->ct.ecache);
 }
 
-int nf_conntrack_register_notifier(struct notifier_block *nb)
+int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
 {
-       return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
+       int ret = 0;
+       struct nf_ct_event_notifier *notify;
+
+       mutex_lock(&nf_ct_ecache_mutex);
+       notify = rcu_dereference(nf_conntrack_event_cb);
+       if (notify != NULL) {
+               ret = -EBUSY;
+               goto out_unlock;
+       }
+       rcu_assign_pointer(nf_conntrack_event_cb, new);
+       mutex_unlock(&nf_ct_ecache_mutex);
+       return ret;
+
+out_unlock:
+       mutex_unlock(&nf_ct_ecache_mutex);
+       return ret;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
 
-int nf_conntrack_unregister_notifier(struct notifier_block *nb)
+void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
 {
-       return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb);
+       struct nf_ct_event_notifier *notify;
+
+       mutex_lock(&nf_ct_ecache_mutex);
+       notify = rcu_dereference(nf_conntrack_event_cb);
+       BUG_ON(notify != new);
+       rcu_assign_pointer(nf_conntrack_event_cb, NULL);
+       mutex_unlock(&nf_ct_ecache_mutex);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
 
-int nf_ct_expect_register_notifier(struct notifier_block *nb)
+int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
 {
-       return atomic_notifier_chain_register(&nf_ct_expect_chain, nb);
+       int ret = 0;
+       struct nf_exp_event_notifier *notify;
+
+       mutex_lock(&nf_ct_ecache_mutex);
+       notify = rcu_dereference(nf_expect_event_cb);
+       if (notify != NULL) {
+               ret = -EBUSY;
+               goto out_unlock;
+       }
+       rcu_assign_pointer(nf_expect_event_cb, new);
+       mutex_unlock(&nf_ct_ecache_mutex);
+       return ret;
+
+out_unlock:
+       mutex_unlock(&nf_ct_ecache_mutex);
+       return ret;
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
 
-int nf_ct_expect_unregister_notifier(struct notifier_block *nb)
+void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
 {
-       return atomic_notifier_chain_unregister(&nf_ct_expect_chain, nb);
+       struct nf_exp_event_notifier *notify;
+
+       mutex_lock(&nf_ct_ecache_mutex);
+       notify = rcu_dereference(nf_expect_event_cb);
+       BUG_ON(notify != new);
+       rcu_assign_pointer(nf_expect_event_cb, NULL);
+       mutex_unlock(&nf_ct_ecache_mutex);
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
index 00fecc3..5509dd1 100644 (file)
@@ -338,11 +338,9 @@ static void update_nl_seq(struct nf_conn *ct, u32 nl_seq,
 
        if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
                info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
-               nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
        } else if (oldest != NUM_SEQ_TO_REMEMBER &&
                   after(nl_seq, info->seq_aft_nl[dir][oldest])) {
                info->seq_aft_nl[dir][oldest] = nl_seq;
-               nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
        }
 }
 
index c523f0b..4e503ad 100644 (file)
@@ -27,7 +27,6 @@
 #include <linux/netlink.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
-#include <linux/notifier.h>
 
 #include <linux/netfilter.h>
 #include <net/netlink.h>
@@ -144,7 +143,7 @@ nla_put_failure:
 }
 
 static inline int
-ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct)
+ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
 {
        struct nf_conntrack_l4proto *l4proto;
        struct nlattr *nest_proto;
@@ -346,23 +345,21 @@ nla_put_failure:
        return -1;
 }
 
-#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
-
 static int
 ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
-                   int event, int nowait,
-                   const struct nf_conn *ct)
+                   int event, struct nf_conn *ct)
 {
        struct nlmsghdr *nlh;
        struct nfgenmsg *nfmsg;
        struct nlattr *nest_parms;
-       unsigned char *b = skb_tail_pointer(skb);
+       unsigned int flags = pid ? NLM_F_MULTI : 0;
 
        event |= NFNL_SUBSYS_CTNETLINK << 8;
-       nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
-       nfmsg  = NLMSG_DATA(nlh);
+       nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+       if (nlh == NULL)
+               goto nlmsg_failure;
 
-       nlh->nlmsg_flags    = (nowait && pid) ? NLM_F_MULTI : 0;
+       nfmsg = nlmsg_data(nlh);
        nfmsg->nfgen_family = nf_ct_l3num(ct);
        nfmsg->version      = NFNETLINK_V0;
        nfmsg->res_id       = 0;
@@ -370,14 +367,14 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
        nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
        if (!nest_parms)
                goto nla_put_failure;
-       if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+       if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
                goto nla_put_failure;
        nla_nest_end(skb, nest_parms);
 
        nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
        if (!nest_parms)
                goto nla_put_failure;
-       if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+       if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
                goto nla_put_failure;
        nla_nest_end(skb, nest_parms);
 
@@ -395,104 +392,81 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
            ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
                goto nla_put_failure;
 
-       nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+       nlmsg_end(skb, nlh);
        return skb->len;
 
 nlmsg_failure:
 nla_put_failure:
-       nlmsg_trim(skb, b);
+       nlmsg_cancel(skb, nlh);
        return -1;
 }
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-/*
- * The general structure of a ctnetlink event is
- *
- *  CTA_TUPLE_ORIG
- *    <l3/l4-proto-attributes>
- *  CTA_TUPLE_REPLY
- *    <l3/l4-proto-attributes>
- *  CTA_ID
- *  ...
- *  CTA_PROTOINFO
- *    <l4-proto-attributes>
- *  CTA_TUPLE_MASTER
- *    <l3/l4-proto-attributes>
- *
- * Therefore the formular is
- *
- *   size = sizeof(headers) + sizeof(generic_nlas) + 3 * sizeof(tuple_nlas)
- *             + sizeof(protoinfo_nlas)
- */
-static struct sk_buff *
-ctnetlink_alloc_skb(const struct nf_conntrack_tuple *tuple, gfp_t gfp)
+static inline size_t
+ctnetlink_proto_size(const struct nf_conn *ct)
 {
        struct nf_conntrack_l3proto *l3proto;
        struct nf_conntrack_l4proto *l4proto;
-       int len;
-
-#define NLA_TYPE_SIZE(type)            nla_total_size(sizeof(type))
-
-       /* proto independant part */
-       len = NLMSG_SPACE(sizeof(struct nfgenmsg))
-               + 3 * nla_total_size(0)         /* CTA_TUPLE_ORIG|REPL|MASTER */
-               + 3 * nla_total_size(0)         /* CTA_TUPLE_IP */
-               + 3 * nla_total_size(0)         /* CTA_TUPLE_PROTO */
-               + 3 * NLA_TYPE_SIZE(u_int8_t)   /* CTA_PROTO_NUM */
-               + NLA_TYPE_SIZE(u_int32_t)      /* CTA_ID */
-               + NLA_TYPE_SIZE(u_int32_t)      /* CTA_STATUS */
+       size_t len = 0;
+
+       rcu_read_lock();
+       l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
+       len += l3proto->nla_size;
+
+       l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+       len += l4proto->nla_size;
+       rcu_read_unlock();
+
+       return len;
+}
+
+static inline size_t
+ctnetlink_nlmsg_size(const struct nf_conn *ct)
+{
+       return NLMSG_ALIGN(sizeof(struct nfgenmsg))
+              + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */
+              + 3 * nla_total_size(0) /* CTA_TUPLE_IP */
+              + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */
+              + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */
+              + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
+              + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
 #ifdef CONFIG_NF_CT_ACCT
-               + 2 * nla_total_size(0)         /* CTA_COUNTERS_ORIG|REPL */
-               + 2 * NLA_TYPE_SIZE(uint64_t)   /* CTA_COUNTERS_PACKETS */
-               + 2 * NLA_TYPE_SIZE(uint64_t)   /* CTA_COUNTERS_BYTES */
+              + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
+              + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */
+              + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */
 #endif
-               + NLA_TYPE_SIZE(u_int32_t)      /* CTA_TIMEOUT */
-               + nla_total_size(0)             /* CTA_PROTOINFO */
-               + nla_total_size(0)             /* CTA_HELP */
-               + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
+              + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
+              + nla_total_size(0) /* CTA_PROTOINFO */
+              + nla_total_size(0) /* CTA_HELP */
+              + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
 #ifdef CONFIG_NF_CONNTRACK_SECMARK
-               + NLA_TYPE_SIZE(u_int32_t)      /* CTA_SECMARK */
+              + nla_total_size(sizeof(u_int32_t)) /* CTA_SECMARK */
 #endif
 #ifdef CONFIG_NF_NAT_NEEDED
-               + 2 * nla_total_size(0)         /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
-               + 2 * NLA_TYPE_SIZE(u_int32_t)  /* CTA_NAT_SEQ_CORRECTION_POS */
-               + 2 * NLA_TYPE_SIZE(u_int32_t)  /* CTA_NAT_SEQ_CORRECTION_BEFORE */
-               + 2 * NLA_TYPE_SIZE(u_int32_t)  /* CTA_NAT_SEQ_CORRECTION_AFTER */
+              + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
+              + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
 #endif
 #ifdef CONFIG_NF_CONNTRACK_MARK
-               + NLA_TYPE_SIZE(u_int32_t)      /* CTA_MARK */
+              + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
 #endif
-               ;
-
-#undef NLA_TYPE_SIZE
-
-       rcu_read_lock();
-       l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
-       len += l3proto->nla_size;
-
-       l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum);
-       len += l4proto->nla_size;
-       rcu_read_unlock();
-
-       return alloc_skb(len, gfp);
+              + ctnetlink_proto_size(ct)
+              ;
 }
 
-static int ctnetlink_conntrack_event(struct notifier_block *this,
-                                    unsigned long events, void *ptr)
+static int
+ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 {
        struct nlmsghdr *nlh;
        struct nfgenmsg *nfmsg;
        struct nlattr *nest_parms;
-       struct nf_ct_event *item = (struct nf_ct_event *)ptr;
        struct nf_conn *ct = item->ct;
        struct sk_buff *skb;
        unsigned int type;
-       sk_buff_data_t b;
        unsigned int flags = 0, group;
 
        /* ignore our fake conntrack entry */
        if (ct == &nf_conntrack_untracked)
-               return NOTIFY_DONE;
+               return 0;
 
        if (events & IPCT_DESTROY) {
                type = IPCTNL_MSG_CT_DELETE;
@@ -501,26 +475,25 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
                type = IPCTNL_MSG_CT_NEW;
                flags = NLM_F_CREATE|NLM_F_EXCL;
                group = NFNLGRP_CONNTRACK_NEW;
-       } else  if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
+       } else  if (events) {
                type = IPCTNL_MSG_CT_NEW;
                group = NFNLGRP_CONNTRACK_UPDATE;
        } else
-               return NOTIFY_DONE;
+               return 0;
 
        if (!item->report && !nfnetlink_has_listeners(group))
-               return NOTIFY_DONE;
+               return 0;
 
-       skb = ctnetlink_alloc_skb(tuple(ct, IP_CT_DIR_ORIGINAL), GFP_ATOMIC);
-       if (!skb)
+       skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC);
+       if (skb == NULL)
                goto errout;
 
-       b = skb->tail;
-
        type |= NFNL_SUBSYS_CTNETLINK << 8;
-       nlh   = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg));
-       nfmsg = NLMSG_DATA(nlh);
+       nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags);
+       if (nlh == NULL)
+               goto nlmsg_failure;
 
-       nlh->nlmsg_flags    = flags;
+       nfmsg = nlmsg_data(nlh);
        nfmsg->nfgen_family = nf_ct_l3num(ct);
        nfmsg->version  = NFNETLINK_V0;
        nfmsg->res_id   = 0;
@@ -529,14 +502,14 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
        nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
        if (!nest_parms)
                goto nla_put_failure;
-       if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+       if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
                goto nla_put_failure;
        nla_nest_end(skb, nest_parms);
 
        nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
        if (!nest_parms)
                goto nla_put_failure;
-       if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+       if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
                goto nla_put_failure;
        nla_nest_end(skb, nest_parms);
 
@@ -584,17 +557,18 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 #endif
        rcu_read_unlock();
 
-       nlh->nlmsg_len = skb->tail - b;
-       nfnetlink_send(skb, item->pid, group, item->report);
-       return NOTIFY_DONE;
+       nlmsg_end(skb, nlh);
+       nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
+       return 0;
 
 nla_put_failure:
        rcu_read_unlock();
+       nlmsg_cancel(skb, nlh);
 nlmsg_failure:
        kfree_skb(skb);
 errout:
        nfnetlink_set_err(0, group, -ENOBUFS);
-       return NOTIFY_DONE;
+       return 0;
 }
 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
 
@@ -611,7 +585,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
        struct nf_conn *ct, *last;
        struct nf_conntrack_tuple_hash *h;
        struct hlist_nulls_node *n;
-       struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
+       struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
        u_int8_t l3proto = nfmsg->nfgen_family;
 
        rcu_read_lock();
@@ -637,8 +611,7 @@ restart:
                        }
                        if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
                                                cb->nlh->nlmsg_seq,
-                                               IPCTNL_MSG_CT_NEW,
-                                               1, ct) < 0) {
+                                               IPCTNL_MSG_CT_NEW, ct) < 0) {
                                cb->args[1] = (unsigned long)ct;
                                goto out;
                        }
@@ -792,7 +765,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
        struct nf_conntrack_tuple_hash *h;
        struct nf_conntrack_tuple tuple;
        struct nf_conn *ct;
-       struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+       struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        u_int8_t u3 = nfmsg->nfgen_family;
        int err = 0;
 
@@ -802,9 +775,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
                err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
        else {
                /* Flush the whole table */
-               nf_conntrack_flush(&init_net, 
-                                  NETLINK_CB(skb).pid, 
-                                  nlmsg_report(nlh));
+               nf_conntrack_flush_report(&init_net,
+                                        NETLINK_CB(skb).pid,
+                                        nlmsg_report(nlh));
                return 0;
        }
 
@@ -847,7 +820,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
        struct nf_conntrack_tuple tuple;
        struct nf_conn *ct;
        struct sk_buff *skb2 = NULL;
-       struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+       struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        u_int8_t u3 = nfmsg->nfgen_family;
        int err = 0;
 
@@ -872,15 +845,15 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
        ct = nf_ct_tuplehash_to_ctrack(h);
 
        err = -ENOMEM;
-       skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-       if (!skb2) {
+       skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (skb2 == NULL) {
                nf_ct_put(ct);
                return -ENOMEM;
        }
 
        rcu_read_lock();
        err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
-                                 IPCTNL_MSG_CT_NEW, 1, ct);
+                                 IPCTNL_MSG_CT_NEW, ct);
        rcu_read_unlock();
        nf_ct_put(ct);
        if (err <= 0)
@@ -1325,7 +1298,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 {
        struct nf_conntrack_tuple otuple, rtuple;
        struct nf_conntrack_tuple_hash *h = NULL;
-       struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+       struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        u_int8_t u3 = nfmsg->nfgen_family;
        int err = 0;
 
@@ -1503,19 +1476,18 @@ nla_put_failure:
 
 static int
 ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
-                   int event,
-                   int nowait,
-                   const struct nf_conntrack_expect *exp)
+                       int event, const struct nf_conntrack_expect *exp)
 {
        struct nlmsghdr *nlh;
        struct nfgenmsg *nfmsg;
-       unsigned char *b = skb_tail_pointer(skb);
+       unsigned int flags = pid ? NLM_F_MULTI : 0;
 
        event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
-       nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
-       nfmsg  = NLMSG_DATA(nlh);
+       nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+       if (nlh == NULL)
+               goto nlmsg_failure;
 
-       nlh->nlmsg_flags    = (nowait && pid) ? NLM_F_MULTI : 0;
+       nfmsg = nlmsg_data(nlh);
        nfmsg->nfgen_family = exp->tuple.src.l3num;
        nfmsg->version      = NFNETLINK_V0;
        nfmsg->res_id       = 0;
@@ -1523,49 +1495,46 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
        if (ctnetlink_exp_dump_expect(skb, exp) < 0)
                goto nla_put_failure;
 
-       nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+       nlmsg_end(skb, nlh);
        return skb->len;
 
 nlmsg_failure:
 nla_put_failure:
-       nlmsg_trim(skb, b);
+       nlmsg_cancel(skb, nlh);
        return -1;
 }
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-static int ctnetlink_expect_event(struct notifier_block *this,
-                                 unsigned long events, void *ptr)
+static int
+ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 {
        struct nlmsghdr *nlh;
        struct nfgenmsg *nfmsg;
-       struct nf_exp_event *item = (struct nf_exp_event *)ptr;
        struct nf_conntrack_expect *exp = item->exp;
        struct sk_buff *skb;
        unsigned int type;
-       sk_buff_data_t b;
        int flags = 0;
 
        if (events & IPEXP_NEW) {
                type = IPCTNL_MSG_EXP_NEW;
                flags = NLM_F_CREATE|NLM_F_EXCL;
        } else
-               return NOTIFY_DONE;
+               return 0;
 
        if (!item->report &&
            !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
-               return NOTIFY_DONE;
+               return 0;
 
-       skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
-       if (!skb)
+       skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+       if (skb == NULL)
                goto errout;
 
-       b = skb->tail;
-
        type |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
-       nlh   = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg));
-       nfmsg = NLMSG_DATA(nlh);
+       nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags);
+       if (nlh == NULL)
+               goto nlmsg_failure;
 
-       nlh->nlmsg_flags    = flags;
+       nfmsg = nlmsg_data(nlh);
        nfmsg->nfgen_family = exp->tuple.src.l3num;
        nfmsg->version      = NFNETLINK_V0;
        nfmsg->res_id       = 0;
@@ -1575,17 +1544,19 @@ static int ctnetlink_expect_event(struct notifier_block *this,
                goto nla_put_failure;
        rcu_read_unlock();
 
-       nlh->nlmsg_len = skb->tail - b;
-       nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report);
-       return NOTIFY_DONE;
+       nlmsg_end(skb, nlh);
+       nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
+                      item->report, GFP_ATOMIC);
+       return 0;
 
 nla_put_failure:
        rcu_read_unlock();
+       nlmsg_cancel(skb, nlh);
 nlmsg_failure:
        kfree_skb(skb);
 errout:
        nfnetlink_set_err(0, 0, -ENOBUFS);
-       return NOTIFY_DONE;
+       return 0;
 }
 #endif
 static int ctnetlink_exp_done(struct netlink_callback *cb)
@@ -1600,7 +1571,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 {
        struct net *net = &init_net;
        struct nf_conntrack_expect *exp, *last;
-       struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
+       struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
        struct hlist_node *n;
        u_int8_t l3proto = nfmsg->nfgen_family;
 
@@ -1617,10 +1588,11 @@ restart:
                                        continue;
                                cb->args[1] = 0;
                        }
-                       if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
+                       if (ctnetlink_exp_fill_info(skb,
+                                                   NETLINK_CB(cb->skb).pid,
                                                    cb->nlh->nlmsg_seq,
                                                    IPCTNL_MSG_EXP_NEW,
-                                                   1, exp) < 0) {
+                                                   exp) < 0) {
                                if (!atomic_inc_not_zero(&exp->use))
                                        continue;
                                cb->args[1] = (unsigned long)exp;
@@ -1652,7 +1624,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
        struct nf_conntrack_tuple tuple;
        struct nf_conntrack_expect *exp;
        struct sk_buff *skb2;
-       struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+       struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        u_int8_t u3 = nfmsg->nfgen_family;
        int err = 0;
 
@@ -1683,14 +1655,13 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
        }
 
        err = -ENOMEM;
-       skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-       if (!skb2)
+       skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (skb2 == NULL)
                goto out;
 
        rcu_read_lock();
        err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
-                                     nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
-                                     1, exp);
+                                     nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp);
        rcu_read_unlock();
        if (err <= 0)
                goto free;
@@ -1713,7 +1684,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
        struct nf_conntrack_expect *exp;
        struct nf_conntrack_tuple tuple;
        struct nf_conntrack_helper *h;
-       struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+       struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        struct hlist_node *n, *next;
        u_int8_t u3 = nfmsg->nfgen_family;
        unsigned int i;
@@ -1854,7 +1825,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 {
        struct nf_conntrack_tuple tuple;
        struct nf_conntrack_expect *exp;
-       struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+       struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        u_int8_t u3 = nfmsg->nfgen_family;
        int err = 0;
 
@@ -1891,12 +1862,12 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 }
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-static struct notifier_block ctnl_notifier = {
-       .notifier_call  = ctnetlink_conntrack_event,
+static struct nf_ct_event_notifier ctnl_notifier = {
+       .fcn = ctnetlink_conntrack_event,
 };
 
-static struct notifier_block ctnl_notifier_exp = {
-       .notifier_call  = ctnetlink_expect_event,
+static struct nf_exp_event_notifier ctnl_notifier_exp = {
+       .fcn = ctnetlink_expect_event,
 };
 #endif
 
index aee0d6b..1b816a2 100644 (file)
@@ -25,8 +25,6 @@
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_log.h>
 
-static DEFINE_RWLOCK(dccp_lock);
-
 /* Timeouts are based on values from RFC4340:
  *
  * - REQUEST:
@@ -492,7 +490,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
                return NF_ACCEPT;
        }
 
-       write_lock_bh(&dccp_lock);
+       spin_lock_bh(&ct->lock);
 
        role = ct->proto.dccp.role[dir];
        old_state = ct->proto.dccp.state;
@@ -536,13 +534,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
                ct->proto.dccp.last_dir = dir;
                ct->proto.dccp.last_pkt = type;
 
-               write_unlock_bh(&dccp_lock);
+               spin_unlock_bh(&ct->lock);
                if (LOG_INVALID(net, IPPROTO_DCCP))
                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
                                      "nf_ct_dccp: invalid packet ignored ");
                return NF_ACCEPT;
        case CT_DCCP_INVALID:
-               write_unlock_bh(&dccp_lock);
+               spin_unlock_bh(&ct->lock);
                if (LOG_INVALID(net, IPPROTO_DCCP))
                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
                                      "nf_ct_dccp: invalid state transition ");
@@ -552,7 +550,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
        ct->proto.dccp.last_dir = dir;
        ct->proto.dccp.last_pkt = type;
        ct->proto.dccp.state = new_state;
-       write_unlock_bh(&dccp_lock);
+       spin_unlock_bh(&ct->lock);
 
        if (new_state != old_state)
                nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
@@ -621,36 +619,39 @@ static int dccp_print_tuple(struct seq_file *s,
                          ntohs(tuple->dst.u.dccp.port));
 }
 
-static int dccp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
+static int dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
        return seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
 }
 
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
-                         const struct nf_conn *ct)
+                         struct nf_conn *ct)
 {
        struct nlattr *nest_parms;
 
-       read_lock_bh(&dccp_lock);
+       spin_lock_bh(&ct->lock);
        nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED);
        if (!nest_parms)
                goto nla_put_failure;
        NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state);
        NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE,
                   ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]);
+       NLA_PUT_BE64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ,
+                    cpu_to_be64(ct->proto.dccp.handshake_seq));
        nla_nest_end(skb, nest_parms);
-       read_unlock_bh(&dccp_lock);
+       spin_unlock_bh(&ct->lock);
        return 0;
 
 nla_put_failure:
-       read_unlock_bh(&dccp_lock);
+       spin_unlock_bh(&ct->lock);
        return -1;
 }
 
 static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = {
        [CTA_PROTOINFO_DCCP_STATE]      = { .type = NLA_U8 },
        [CTA_PROTOINFO_DCCP_ROLE]       = { .type = NLA_U8 },
+       [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 },
 };
 
 static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
@@ -674,7 +675,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
                return -EINVAL;
        }
 
-       write_lock_bh(&dccp_lock);
+       spin_lock_bh(&ct->lock);
        ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]);
        if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) {
                ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
@@ -683,7 +684,11 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
                ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER;
                ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT;
        }
-       write_unlock_bh(&dccp_lock);
+       if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) {
+               ct->proto.dccp.handshake_seq =
+               be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]));
+       }
+       spin_unlock_bh(&ct->lock);
        return 0;
 }
 
index a6d6ec3..a54a0af 100644 (file)
@@ -219,8 +219,7 @@ static int gre_print_tuple(struct seq_file *s,
 }
 
 /* print private data for conntrack */
-static int gre_print_conntrack(struct seq_file *s,
-                              const struct nf_conn *ct)
+static int gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
        return seq_printf(s, "timeout=%u, stream_timeout=%u ",
                          (ct->proto.gre.timeout / HZ),
index 101b4ad..c10e6f3 100644 (file)
@@ -25,9 +25,6 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 
-/* Protects ct->proto.sctp */
-static DEFINE_RWLOCK(sctp_lock);
-
 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
    closely.  They're more complex. --RR
 
@@ -164,13 +161,13 @@ static int sctp_print_tuple(struct seq_file *s,
 }
 
 /* Print out the private part of the conntrack. */
-static int sctp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
+static int sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
        enum sctp_conntrack state;
 
-       read_lock_bh(&sctp_lock);
+       spin_lock_bh(&ct->lock);
        state = ct->proto.sctp.state;
-       read_unlock_bh(&sctp_lock);
+       spin_unlock_bh(&ct->lock);
 
        return seq_printf(s, "%s ", sctp_conntrack_names[state]);
 }
@@ -318,7 +315,7 @@ static int sctp_packet(struct nf_conn *ct,
        }
 
        old_state = new_state = SCTP_CONNTRACK_NONE;
-       write_lock_bh(&sctp_lock);
+       spin_lock_bh(&ct->lock);
        for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
                /* Special cases of Verification tag check (Sec 8.5.1) */
                if (sch->type == SCTP_CID_INIT) {
@@ -371,7 +368,7 @@ static int sctp_packet(struct nf_conn *ct,
                if (old_state != new_state)
                        nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
        }
-       write_unlock_bh(&sctp_lock);
+       spin_unlock_bh(&ct->lock);
 
        nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]);
 
@@ -386,7 +383,7 @@ static int sctp_packet(struct nf_conn *ct,
        return NF_ACCEPT;
 
 out_unlock:
-       write_unlock_bh(&sctp_lock);
+       spin_unlock_bh(&ct->lock);
 out:
        return -NF_ACCEPT;
 }
@@ -469,11 +466,11 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
 #include <linux/netfilter/nfnetlink_conntrack.h>
 
 static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
-                         const struct nf_conn *ct)
+                         struct nf_conn *ct)
 {
        struct nlattr *nest_parms;
 
-       read_lock_bh(&sctp_lock);
+       spin_lock_bh(&ct->lock);
        nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED);
        if (!nest_parms)
                goto nla_put_failure;
@@ -488,14 +485,14 @@ static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
                     CTA_PROTOINFO_SCTP_VTAG_REPLY,
                     ct->proto.sctp.vtag[IP_CT_DIR_REPLY]);
 
-       read_unlock_bh(&sctp_lock);
+       spin_unlock_bh(&ct->lock);
 
        nla_nest_end(skb, nest_parms);
 
        return 0;
 
 nla_put_failure:
-       read_unlock_bh(&sctp_lock);
+       spin_unlock_bh(&ct->lock);
        return -1;
 }
 
@@ -527,13 +524,13 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
            !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY])
                return -EINVAL;
 
-       write_lock_bh(&sctp_lock);
+       spin_lock_bh(&ct->lock);
        ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]);
        ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] =
                nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]);
        ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
                nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]);
-       write_unlock_bh(&sctp_lock);
+       spin_unlock_bh(&ct->lock);
 
        return 0;
 }
index 97a6e93..5142e60 100644 (file)
@@ -29,9 +29,6 @@
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
 
-/* Protects ct->proto.tcp */
-static DEFINE_RWLOCK(tcp_lock);
-
 /* "Be conservative in what you do,
     be liberal in what you accept from others."
     If it's non-zero, we mark only out of window RST segments as INVALID. */
@@ -59,7 +56,7 @@ static const char *const tcp_conntrack_names[] = {
        "LAST_ACK",
        "TIME_WAIT",
        "CLOSE",
-       "LISTEN"
+       "SYN_SENT2",
 };
 
 #define SECS * HZ
@@ -82,6 +79,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
        [TCP_CONNTRACK_LAST_ACK]        = 30 SECS,
        [TCP_CONNTRACK_TIME_WAIT]       = 2 MINS,
        [TCP_CONNTRACK_CLOSE]           = 10 SECS,
+       [TCP_CONNTRACK_SYN_SENT2]       = 2 MINS,
 };
 
 #define sNO TCP_CONNTRACK_NONE
@@ -93,7 +91,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
 #define sLA TCP_CONNTRACK_LAST_ACK
 #define sTW TCP_CONNTRACK_TIME_WAIT
 #define sCL TCP_CONNTRACK_CLOSE
-#define sLI TCP_CONNTRACK_LISTEN
+#define sS2 TCP_CONNTRACK_SYN_SENT2
 #define sIV TCP_CONNTRACK_MAX
 #define sIG TCP_CONNTRACK_IGNORE
 
@@ -123,6 +121,7 @@ enum tcp_bit_set {
  *
  * NONE:       initial state
  * SYN_SENT:   SYN-only packet seen
+ * SYN_SENT2:  SYN-only packet seen from reply dir, simultaneous open
  * SYN_RECV:   SYN-ACK packet seen
  * ESTABLISHED:        ACK packet seen
  * FIN_WAIT:   FIN packet seen
@@ -131,26 +130,24 @@ enum tcp_bit_set {
  * TIME_WAIT:  last ACK seen
  * CLOSE:      closed connection (RST)
  *
- * LISTEN state is not used.
- *
  * Packets marked as IGNORED (sIG):
  *     if they may be either invalid or valid
  *     and the receiver may send back a connection
  *     closing RST or a SYN/ACK.
  *
  * Packets marked as INVALID (sIV):
- *     if they are invalid
- *     or we do not support the request (simultaneous open)
+ *     if we regard them as truly invalid packets
  */
 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
        {
 /* ORIGINAL */
-/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
-/*syn*/           { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
+/*syn*/           { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
 /*
  *     sNO -> sSS      Initialize a new connection
  *     sSS -> sSS      Retransmitted SYN
- *     sSR -> sIG      Late retransmitted SYN?
+ *     sS2 -> sS2      Late retransmitted SYN
+ *     sSR -> sIG
  *     sES -> sIG      Error: SYNs in window outside the SYN_SENT state
  *                     are errors. Receiver will reply with RST
  *                     and close the connection.
@@ -161,22 +158,30 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *     sTW -> sSS      Reopened connection (RFC 1122).
  *     sCL -> sSS
  */
-/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
-/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
+/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
 /*
- * A SYN/ACK from the client is always invalid:
- *     - either it tries to set up a simultaneous open, which is
- *       not supported;
- *     - or the firewall has just been inserted between the two hosts
- *       during the session set-up. The SYN will be retransmitted
- *       by the true client (or it'll time out).
+ *     sNO -> sIV      Too late and no reason to do anything
+ *     sSS -> sIV      Client can't send SYN and then SYN/ACK
+ *     sS2 -> sSR      SYN/ACK sent to SYN2 in simultaneous open
+ *     sSR -> sIG
+ *     sES -> sIG      Error: SYNs in window outside the SYN_SENT state
+ *                     are errors. Receiver will reply with RST
+ *                     and close the connection.
+ *                     Or we are not in sync and hold a dead connection.
+ *     sFW -> sIG
+ *     sCW -> sIG
+ *     sLA -> sIG
+ *     sTW -> sIG
+ *     sCL -> sIG
  */
-/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
 /*
  *     sNO -> sIV      Too late and no reason to do anything...
  *     sSS -> sIV      Client migth not send FIN in this state:
  *                     we enforce waiting for a SYN/ACK reply first.
+ *     sS2 -> sIV
  *     sSR -> sFW      Close started.
  *     sES -> sFW
  *     sFW -> sLA      FIN seen in both directions, waiting for
@@ -187,11 +192,12 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *     sTW -> sTW
  *     sCL -> sCL
  */
-/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 /*ack*/           { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
 /*
  *     sNO -> sES      Assumed.
  *     sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
+ *     sS2 -> sIV
  *     sSR -> sES      Established state is reached.
  *     sES -> sES      :-)
  *     sFW -> sCW      Normal close request answered by ACK.
@@ -200,29 +206,31 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *     sTW -> sTW      Retransmitted last ACK. Remain in the same state.
  *     sCL -> sCL
  */
-/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
-/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
+/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
        },
        {
 /* REPLY */
-/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
-/*syn*/           { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
+/*syn*/           { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
 /*
  *     sNO -> sIV      Never reached.
- *     sSS -> sIV      Simultaneous open, not supported
- *     sSR -> sIV      Simultaneous open, not supported.
- *     sES -> sIV      Server may not initiate a connection.
+ *     sSS -> sS2      Simultaneous open
+ *     sS2 -> sS2      Retransmitted simultaneous SYN
+ *     sSR -> sIV      Invalid SYN packets sent by the server
+ *     sES -> sIV
  *     sFW -> sIV
  *     sCW -> sIV
  *     sLA -> sIV
  *     sTW -> sIV      Reopened connection, but server may not do it.
  *     sCL -> sIV
  */
-/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
-/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
+/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
 /*
  *     sSS -> sSR      Standard open.
+ *     sS2 -> sSR      Simultaneous open
  *     sSR -> sSR      Retransmitted SYN/ACK.
  *     sES -> sIG      Late retransmitted SYN/ACK?
  *     sFW -> sIG      Might be SYN/ACK answering ignored SYN
@@ -231,10 +239,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *     sTW -> sIG
  *     sCL -> sIG
  */
-/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
 /*
  *     sSS -> sIV      Server might not send FIN in this state.
+ *     sS2 -> sIV
  *     sSR -> sFW      Close started.
  *     sES -> sFW
  *     sFW -> sLA      FIN seen in both directions.
@@ -243,10 +252,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *     sTW -> sTW
  *     sCL -> sCL
  */
-/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
-/*ack*/           { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
+/*ack*/           { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
 /*
  *     sSS -> sIG      Might be a half-open connection.
+ *     sS2 -> sIG
  *     sSR -> sSR      Might answer late resent SYN.
  *     sES -> sES      :-)
  *     sFW -> sCW      Normal close request answered by ACK.
@@ -255,8 +265,8 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *     sTW -> sTW      Retransmitted last ACK.
  *     sCL -> sCL
  */
-/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
-/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
+/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
        }
 };
@@ -296,13 +306,13 @@ static int tcp_print_tuple(struct seq_file *s,
 }
 
 /* Print out the private part of the conntrack. */
-static int tcp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
+static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
        enum tcp_conntrack state;
 
-       read_lock_bh(&tcp_lock);
+       spin_lock_bh(&ct->lock);
        state = ct->proto.tcp.state;
-       read_unlock_bh(&tcp_lock);
+       spin_unlock_bh(&ct->lock);
 
        return seq_printf(s, "%s ", tcp_conntrack_names[state]);
 }
@@ -521,13 +531,14 @@ static bool tcp_in_window(const struct nf_conn *ct,
                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
                 receiver->td_scale);
 
-       if (sender->td_end == 0) {
+       if (sender->td_maxwin == 0) {
                /*
                 * Initialize sender data.
                 */
-               if (tcph->syn && tcph->ack) {
+               if (tcph->syn) {
                        /*
-                        * Outgoing SYN-ACK in reply to a SYN.
+                        * SYN-ACK in reply to a SYN
+                        * or SYN from reply direction in simultaneous open.
                         */
                        sender->td_end =
                        sender->td_maxend = end;
@@ -543,6 +554,9 @@ static bool tcp_in_window(const struct nf_conn *ct,
                              && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
                                sender->td_scale =
                                receiver->td_scale = 0;
+                       if (!tcph->ack)
+                               /* Simultaneous open */
+                               return true;
                } else {
                        /*
                         * We are in the middle of a connection,
@@ -716,14 +730,14 @@ void nf_conntrack_tcp_update(const struct sk_buff *skb,
 
        end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
 
-       write_lock_bh(&tcp_lock);
+       spin_lock_bh(&ct->lock);
        /*
         * We have to worry for the ack in the reply packet only...
         */
        if (after(end, ct->proto.tcp.seen[dir].td_end))
                ct->proto.tcp.seen[dir].td_end = end;
        ct->proto.tcp.last_end = end;
-       write_unlock_bh(&tcp_lock);
+       spin_unlock_bh(&ct->lock);
        pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
                 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
                 sender->td_end, sender->td_maxend, sender->td_maxwin,
@@ -832,7 +846,7 @@ static int tcp_packet(struct nf_conn *ct,
        th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
        BUG_ON(th == NULL);
 
-       write_lock_bh(&tcp_lock);
+       spin_lock_bh(&ct->lock);
        old_state = ct->proto.tcp.state;
        dir = CTINFO2DIR(ctinfo);
        index = get_conntrack_index(th);
@@ -862,7 +876,7 @@ static int tcp_packet(struct nf_conn *ct,
                        && ct->proto.tcp.last_index == TCP_RST_SET)) {
                        /* Attempt to reopen a closed/aborted connection.
                         * Delete this connection and look up again. */
-                       write_unlock_bh(&tcp_lock);
+                       spin_unlock_bh(&ct->lock);
 
                        /* Only repeat if we can actually remove the timer.
                         * Destruction may already be in progress in process
@@ -898,7 +912,7 @@ static int tcp_packet(struct nf_conn *ct,
                         * that the client cannot but retransmit its SYN and
                         * thus initiate a clean new session.
                         */
-                       write_unlock_bh(&tcp_lock);
+                       spin_unlock_bh(&ct->lock);
                        if (LOG_INVALID(net, IPPROTO_TCP))
                                nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
                                          "nf_ct_tcp: killing out of sync session ");
@@ -911,7 +925,7 @@ static int tcp_packet(struct nf_conn *ct,
                ct->proto.tcp.last_end =
                    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
 
-               write_unlock_bh(&tcp_lock);
+               spin_unlock_bh(&ct->lock);
                if (LOG_INVALID(net, IPPROTO_TCP))
                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
                                  "nf_ct_tcp: invalid packet ignored ");
@@ -920,7 +934,7 @@ static int tcp_packet(struct nf_conn *ct,
                /* Invalid packet */
                pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
                         dir, get_conntrack_index(th), old_state);
-               write_unlock_bh(&tcp_lock);
+               spin_unlock_bh(&ct->lock);
                if (LOG_INVALID(net, IPPROTO_TCP))
                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
                                  "nf_ct_tcp: invalid state ");
@@ -961,7 +975,7 @@ static int tcp_packet(struct nf_conn *ct,
 
        if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
                           skb, dataoff, th, pf)) {
-               write_unlock_bh(&tcp_lock);
+               spin_unlock_bh(&ct->lock);
                return -NF_ACCEPT;
        }
      in_window:
@@ -990,9 +1004,8 @@ static int tcp_packet(struct nf_conn *ct,
                timeout = nf_ct_tcp_timeout_unacknowledged;
        else
                timeout = tcp_timeouts[new_state];
-       write_unlock_bh(&tcp_lock);
+       spin_unlock_bh(&ct->lock);
 
-       nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
        if (new_state != old_state)
                nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
 
@@ -1086,7 +1099,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
 
        ct->proto.tcp.seen[1].td_end = 0;
        ct->proto.tcp.seen[1].td_maxend = 0;
-       ct->proto.tcp.seen[1].td_maxwin = 1;
+       ct->proto.tcp.seen[1].td_maxwin = 0;
        ct->proto.tcp.seen[1].td_scale = 0;
 
        /* tcp_packet will set them */
@@ -1108,12 +1121,12 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
 #include <linux/netfilter/nfnetlink_conntrack.h>
 
 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
-                        const struct nf_conn *ct)
+                        struct nf_conn *ct)
 {
        struct nlattr *nest_parms;
        struct nf_ct_tcp_flags tmp = {};
 
-       read_lock_bh(&tcp_lock);
+       spin_lock_bh(&ct->lock);
        nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
        if (!nest_parms)
                goto nla_put_failure;
@@ -1133,14 +1146,14 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
        tmp.flags = ct->proto.tcp.seen[1].flags;
        NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
                sizeof(struct nf_ct_tcp_flags), &tmp);
-       read_unlock_bh(&tcp_lock);
+       spin_unlock_bh(&ct->lock);
 
        nla_nest_end(skb, nest_parms);
 
        return 0;
 
 nla_put_failure:
-       read_unlock_bh(&tcp_lock);
+       spin_unlock_bh(&ct->lock);
        return -1;
 }
 
@@ -1171,7 +1184,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
            nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
                return -EINVAL;
 
-       write_lock_bh(&tcp_lock);
+       spin_lock_bh(&ct->lock);
        if (tb[CTA_PROTOINFO_TCP_STATE])
                ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
 
@@ -1198,7 +1211,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
                ct->proto.tcp.seen[1].td_scale =
                        nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
        }
-       write_unlock_bh(&tcp_lock);
+       spin_unlock_bh(&ct->lock);
 
        return 0;
 }
@@ -1328,6 +1341,13 @@ static struct ctl_table tcp_compat_sysctl_table[] = {
                .proc_handler   = proc_dointvec_jiffies,
        },
        {
+               .procname       = "ip_conntrack_tcp_timeout_syn_sent2",
+               .data           = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT2],
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_jiffies,
+       },
+       {
                .procname       = "ip_conntrack_tcp_timeout_syn_recv",
                .data           = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
                .maxlen         = sizeof(unsigned int),
index 4f2310c..3a6fd77 100644 (file)
@@ -204,10 +204,10 @@ int nf_queue(struct sk_buff *skb,
                                  queuenum);
 
        switch (pf) {
-       case AF_INET:
+       case NFPROTO_IPV4:
                skb->protocol = htons(ETH_P_IP);
                break;
-       case AF_INET6:
+       case NFPROTO_IPV6:
                skb->protocol = htons(ETH_P_IPV6);
                break;
        }
index b8ab37a..92761a9 100644 (file)
@@ -107,9 +107,10 @@ int nfnetlink_has_listeners(unsigned int group)
 }
 EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
 
-int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
+int nfnetlink_send(struct sk_buff *skb, u32 pid,
+                  unsigned group, int echo, gfp_t flags)
 {
-       return nlmsg_notify(nfnl, skb, pid, group, echo, gfp_any());
+       return nlmsg_notify(nfnl, skb, pid, group, echo, flags);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_send);
 
@@ -136,7 +137,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                return -EPERM;
 
        /* All the messages must at least contain nfgenmsg */
-       if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg)))
+       if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct nfgenmsg)))
                return 0;
 
        type = nlh->nlmsg_type;
@@ -160,19 +161,14 @@ replay:
        {
                int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
                u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
-               u_int16_t attr_count = ss->cb[cb_id].attr_count;
-               struct nlattr *cda[attr_count+1];
-
-               if (likely(nlh->nlmsg_len >= min_len)) {
-                       struct nlattr *attr = (void *)nlh + NLMSG_ALIGN(min_len);
-                       int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
-
-                       err = nla_parse(cda, attr_count, attr, attrlen,
-                                       ss->cb[cb_id].policy);
-                       if (err < 0)
-                               return err;
-               } else
-                       return -EINVAL;
+               struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
+               struct nlattr *attr = (void *)nlh + min_len;
+               int attrlen = nlh->nlmsg_len - min_len;
+
+               err = nla_parse(cda, ss->cb[cb_id].attr_count,
+                               attr, attrlen, ss->cb[cb_id].policy);
+               if (err < 0)
+                       return err;
 
                err = nc->call(nfnl, skb, nlh, cda);
                if (err == -EAGAIN)
index 150e5cf..46dba5f 100644 (file)
@@ -329,6 +329,32 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target,
 }
 EXPORT_SYMBOL_GPL(xt_find_revision);
 
+static char *textify_hooks(char *buf, size_t size, unsigned int mask)
+{
+       static const char *const names[] = {
+               "PREROUTING", "INPUT", "FORWARD",
+               "OUTPUT", "POSTROUTING", "BROUTING",
+       };
+       unsigned int i;
+       char *p = buf;
+       bool np = false;
+       int res;
+
+       *p = '\0';
+       for (i = 0; i < ARRAY_SIZE(names); ++i) {
+               if (!(mask & (1 << i)))
+                       continue;
+               res = snprintf(p, size, "%s%s", np ? "/" : "", names[i]);
+               if (res > 0) {
+                       size -= res;
+                       p += res;
+               }
+               np = true;
+       }
+
+       return buf;
+}
+
 int xt_check_match(struct xt_mtchk_param *par,
                   unsigned int size, u_int8_t proto, bool inv_proto)
 {
@@ -351,9 +377,13 @@ int xt_check_match(struct xt_mtchk_param *par,
                return -EINVAL;
        }
        if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
-               printk("%s_tables: %s match: bad hook_mask %#x/%#x\n",
+               char used[64], allow[64];
+
+               printk("%s_tables: %s match: used from hooks %s, but only "
+                      "valid from %s\n",
                       xt_prefix[par->family], par->match->name,
-                      par->hook_mask, par->match->hooks);
+                      textify_hooks(used, sizeof(used), par->hook_mask),
+                      textify_hooks(allow, sizeof(allow), par->match->hooks));
                return -EINVAL;
        }
        if (par->match->proto && (par->match->proto != proto || inv_proto)) {
@@ -497,9 +527,13 @@ int xt_check_target(struct xt_tgchk_param *par,
                return -EINVAL;
        }
        if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
-               printk("%s_tables: %s target: bad hook_mask %#x/%#x\n",
+               char used[64], allow[64];
+
+               printk("%s_tables: %s target: used from hooks %s, but only "
+                      "usable from %s\n",
                       xt_prefix[par->family], par->target->name,
-                      par->hook_mask, par->target->hooks);
+                      textify_hooks(used, sizeof(used), par->hook_mask),
+                      textify_hooks(allow, sizeof(allow), par->target->hooks));
                return -EINVAL;
        }
        if (par->target->proto && (par->target->proto != proto || inv_proto)) {
index f9977b3..498b451 100644 (file)
 #include <linux/module.h>
 #include <linux/skbuff.h>
 
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/jhash.h>
+
 #include <linux/netfilter.h>
 #include <linux/netfilter_arp.h>
 #include <linux/netfilter/x_tables.h>
@@ -23,6 +27,8 @@ MODULE_ALIAS("ipt_NFQUEUE");
 MODULE_ALIAS("ip6t_NFQUEUE");
 MODULE_ALIAS("arpt_NFQUEUE");
 
+static u32 jhash_initval __read_mostly;
+
 static unsigned int
 nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
@@ -31,32 +37,105 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
        return NF_QUEUE_NR(tinfo->queuenum);
 }
 
+static u32 hash_v4(const struct sk_buff *skb)
+{
+       const struct iphdr *iph = ip_hdr(skb);
+       u32 ipaddr;
+
+       /* packets in either direction go into same queue */
+       ipaddr = iph->saddr ^ iph->daddr;
+
+       return jhash_2words(ipaddr, iph->protocol, jhash_initval);
+}
+
+static unsigned int
+nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par)
+{
+       const struct xt_NFQ_info_v1 *info = par->targinfo;
+       u32 queue = info->queuenum;
+
+       if (info->queues_total > 1)
+               queue = hash_v4(skb) % info->queues_total + queue;
+       return NF_QUEUE_NR(queue);
+}
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+static u32 hash_v6(const struct sk_buff *skb)
+{
+       const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+       u32 addr[4];
+
+       addr[0] = ip6h->saddr.s6_addr32[0] ^ ip6h->daddr.s6_addr32[0];
+       addr[1] = ip6h->saddr.s6_addr32[1] ^ ip6h->daddr.s6_addr32[1];
+       addr[2] = ip6h->saddr.s6_addr32[2] ^ ip6h->daddr.s6_addr32[2];
+       addr[3] = ip6h->saddr.s6_addr32[3] ^ ip6h->daddr.s6_addr32[3];
+
+       return jhash2(addr, ARRAY_SIZE(addr), jhash_initval);
+}
+
+static unsigned int
+nfqueue_tg6_v1(struct sk_buff *skb, const struct xt_target_param *par)
+{
+       const struct xt_NFQ_info_v1 *info = par->targinfo;
+       u32 queue = info->queuenum;
+
+       if (info->queues_total > 1)
+               queue = hash_v6(skb) % info->queues_total + queue;
+       return NF_QUEUE_NR(queue);
+}
+#endif
+
+static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
+{
+       const struct xt_NFQ_info_v1 *info = par->targinfo;
+       u32 maxid;
+
+       if (info->queues_total == 0) {
+               pr_err("NFQUEUE: number of total queues is 0\n");
+               return false;
+       }
+       maxid = info->queues_total - 1 + info->queuenum;
+       if (maxid > 0xffff) {
+               pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n",
+                      info->queues_total, maxid);
+               return false;
+       }
+       return true;
+}
+
 static struct xt_target nfqueue_tg_reg[] __read_mostly = {
        {
                .name           = "NFQUEUE",
-               .family         = NFPROTO_IPV4,
+               .family         = NFPROTO_UNSPEC,
                .target         = nfqueue_tg,
                .targetsize     = sizeof(struct xt_NFQ_info),
                .me             = THIS_MODULE,
        },
        {
                .name           = "NFQUEUE",
-               .family         = NFPROTO_IPV6,
-               .target         = nfqueue_tg,
-               .targetsize     = sizeof(struct xt_NFQ_info),
+               .revision       = 1,
+               .family         = NFPROTO_IPV4,
+               .checkentry     = nfqueue_tg_v1_check,
+               .target         = nfqueue_tg4_v1,
+               .targetsize     = sizeof(struct xt_NFQ_info_v1),
                .me             = THIS_MODULE,
        },
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
        {
                .name           = "NFQUEUE",
-               .family         = NFPROTO_ARP,
-               .target         = nfqueue_tg,
-               .targetsize     = sizeof(struct xt_NFQ_info),
+               .revision       = 1,
+               .family         = NFPROTO_IPV6,
+               .checkentry     = nfqueue_tg_v1_check,
+               .target         = nfqueue_tg6_v1,
+               .targetsize     = sizeof(struct xt_NFQ_info_v1),
                .me             = THIS_MODULE,
        },
+#endif
 };
 
 static int __init nfqueue_tg_init(void)
 {
+       get_random_bytes(&jhash_initval, sizeof(jhash_initval));
        return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
 }
 
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
new file mode 100644 (file)
index 0000000..863e409
--- /dev/null
@@ -0,0 +1,428 @@
+/*
+ * Copyright (c) 2003+ Evgeniy Polyakov <zbr@ioremap.net>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <linux/if.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/tcp.h>
+
+#include <net/ip.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_log.h>
+#include <linux/netfilter/xt_osf.h>
+
+struct xt_osf_finger {
+       struct rcu_head                 rcu_head;
+       struct list_head                finger_entry;
+       struct xt_osf_user_finger       finger;
+};
+
+enum osf_fmatch_states {
+       /* Packet does not match the fingerprint */
+       FMATCH_WRONG = 0,
+       /* Packet matches the fingerprint */
+       FMATCH_OK,
+       /* Options do not match the fingerprint, but header does */
+       FMATCH_OPT_WRONG,
+};
+
+/*
+ * Indexed by dont-fragment bit.
+ * It is the only constant value in the fingerprint.
+ */
+static struct list_head xt_osf_fingers[2];
+
+static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = {
+       [OSF_ATTR_FINGER]       = { .len = sizeof(struct xt_osf_user_finger) },
+};
+
+static void xt_osf_finger_free_rcu(struct rcu_head *rcu_head)
+{
+       struct xt_osf_finger *f = container_of(rcu_head, struct xt_osf_finger, rcu_head);
+
+       kfree(f);
+}
+
+static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb,
+                       struct nlmsghdr *nlh, struct nlattr *osf_attrs[])
+{
+       struct xt_osf_user_finger *f;
+       struct xt_osf_finger *kf = NULL, *sf;
+       int err = 0;
+
+       if (!osf_attrs[OSF_ATTR_FINGER])
+               return -EINVAL;
+
+       if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+               return -EINVAL;
+
+       f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+
+       kf = kmalloc(sizeof(struct xt_osf_finger), GFP_KERNEL);
+       if (!kf)
+               return -ENOMEM;
+
+       memcpy(&kf->finger, f, sizeof(struct xt_osf_user_finger));
+
+       list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) {
+               if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger)))
+                       continue;
+
+               kfree(kf);
+               kf = NULL;
+
+               if (nlh->nlmsg_flags & NLM_F_EXCL)
+                       err = -EEXIST;
+               break;
+       }
+
+       /*
+        * We are protected by nfnl mutex.
+        */
+       if (kf)
+               list_add_tail_rcu(&kf->finger_entry, &xt_osf_fingers[!!f->df]);
+
+       return err;
+}
+
+static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb,
+                       struct nlmsghdr *nlh, struct nlattr *osf_attrs[])
+{
+       struct xt_osf_user_finger *f;
+       struct xt_osf_finger *sf;
+       int err = ENOENT;
+
+       if (!osf_attrs[OSF_ATTR_FINGER])
+               return -EINVAL;
+
+       f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+
+       list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) {
+               if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger)))
+                       continue;
+
+               /*
+                * We are protected by nfnl mutex.
+                */
+               list_del_rcu(&sf->finger_entry);
+               call_rcu(&sf->rcu_head, xt_osf_finger_free_rcu);
+
+               err = 0;
+               break;
+       }
+
+       return err;
+}
+
+static const struct nfnl_callback xt_osf_nfnetlink_callbacks[OSF_MSG_MAX] = {
+       [OSF_MSG_ADD]   = {
+               .call           = xt_osf_add_callback,
+               .attr_count     = OSF_ATTR_MAX,
+               .policy         = xt_osf_policy,
+       },
+       [OSF_MSG_REMOVE]        = {
+               .call           = xt_osf_remove_callback,
+               .attr_count     = OSF_ATTR_MAX,
+               .policy         = xt_osf_policy,
+       },
+};
+
+static const struct nfnetlink_subsystem xt_osf_nfnetlink = {
+       .name                   = "osf",
+       .subsys_id              = NFNL_SUBSYS_OSF,
+       .cb_count               = OSF_MSG_MAX,
+       .cb                     = xt_osf_nfnetlink_callbacks,
+};
+
+static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info *info,
+                           unsigned char f_ttl)
+{
+       const struct iphdr *ip = ip_hdr(skb);
+
+       if (info->flags & XT_OSF_TTL) {
+               if (info->ttl == XT_OSF_TTL_TRUE)
+                       return ip->ttl == f_ttl;
+               if (info->ttl == XT_OSF_TTL_NOCHECK)
+                       return 1;
+               else if (ip->ttl <= f_ttl)
+                       return 1;
+               else {
+                       struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+                       int ret = 0;
+
+                       for_ifa(in_dev) {
+                               if (inet_ifa_match(ip->saddr, ifa)) {
+                                       ret = (ip->ttl == f_ttl);
+                                       break;
+                               }
+                       }
+                       endfor_ifa(in_dev);
+
+                       return ret;
+               }
+       }
+
+       return ip->ttl == f_ttl;
+}
+
+static bool xt_osf_match_packet(const struct sk_buff *skb,
+               const struct xt_match_param *p)
+{
+       const struct xt_osf_info *info = p->matchinfo;
+       const struct iphdr *ip = ip_hdr(skb);
+       const struct tcphdr *tcp;
+       struct tcphdr _tcph;
+       int fmatch = FMATCH_WRONG, fcount = 0;
+       unsigned int optsize = 0, check_WSS = 0;
+       u16 window, totlen, mss = 0;
+       bool df;
+       const unsigned char *optp = NULL, *_optp = NULL;
+       unsigned char opts[MAX_IPOPTLEN];
+       const struct xt_osf_finger *kf;
+       const struct xt_osf_user_finger *f;
+
+       if (!info)
+               return false;
+
+       tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
+       if (!tcp)
+               return false;
+
+       if (!tcp->syn)
+               return false;
+
+       totlen = ntohs(ip->tot_len);
+       df = ntohs(ip->frag_off) & IP_DF;
+       window = ntohs(tcp->window);
+
+       if (tcp->doff * 4 > sizeof(struct tcphdr)) {
+               optsize = tcp->doff * 4 - sizeof(struct tcphdr);
+
+               _optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) +
+                               sizeof(struct tcphdr), optsize, opts);
+       }
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) {
+               f = &kf->finger;
+
+               if (!(info->flags & XT_OSF_LOG) && strcmp(info->genre, f->genre))
+                       continue;
+
+               optp = _optp;
+               fmatch = FMATCH_WRONG;
+
+               if (totlen == f->ss && xt_osf_ttl(skb, info, f->ttl)) {
+                       int foptsize, optnum;
+
+                       /*
+                        * Should not happen if userspace parser was written correctly.
+                        */
+                       if (f->wss.wc >= OSF_WSS_MAX)
+                               continue;
+
+                       /* Check options */
+
+                       foptsize = 0;
+                       for (optnum = 0; optnum < f->opt_num; ++optnum)
+                               foptsize += f->opt[optnum].length;
+
+                       if (foptsize > MAX_IPOPTLEN ||
+                               optsize > MAX_IPOPTLEN ||
+                               optsize != foptsize)
+                               continue;
+
+                       check_WSS = f->wss.wc;
+
+                       for (optnum = 0; optnum < f->opt_num; ++optnum) {
+                               if (f->opt[optnum].kind == (*optp)) {
+                                       __u32 len = f->opt[optnum].length;
+                                       const __u8 *optend = optp + len;
+                                       int loop_cont = 0;
+
+                                       fmatch = FMATCH_OK;
+
+                                       switch (*optp) {
+                                       case OSFOPT_MSS:
+                                               mss = optp[3];
+                                               mss <<= 8;
+                                               mss |= optp[2];
+
+                                               mss = ntohs(mss);
+                                               break;
+                                       case OSFOPT_TS:
+                                               loop_cont = 1;
+                                               break;
+                                       }
+
+                                       optp = optend;
+                               } else
+                                       fmatch = FMATCH_OPT_WRONG;
+
+                               if (fmatch != FMATCH_OK)
+                                       break;
+                       }
+
+                       if (fmatch != FMATCH_OPT_WRONG) {
+                               fmatch = FMATCH_WRONG;
+
+                               switch (check_WSS) {
+                               case OSF_WSS_PLAIN:
+                                       if (f->wss.val == 0 || window == f->wss.val)
+                                               fmatch = FMATCH_OK;
+                                       break;
+                               case OSF_WSS_MSS:
+                                       /*
+                                        * Some smart modems decrease mangle MSS to 
+                                        * SMART_MSS_2, so we check standard, decreased
+                                        * and the one provided in the fingerprint MSS
+                                        * values.
+                                        */
+#define SMART_MSS_1    1460
+#define SMART_MSS_2    1448
+                                       if (window == f->wss.val * mss ||
+                                           window == f->wss.val * SMART_MSS_1 ||
+                                           window == f->wss.val * SMART_MSS_2)
+                                               fmatch = FMATCH_OK;
+                                       break;
+                               case OSF_WSS_MTU:
+                                       if (window == f->wss.val * (mss + 40) ||
+                                           window == f->wss.val * (SMART_MSS_1 + 40) ||
+                                           window == f->wss.val * (SMART_MSS_2 + 40))
+                                               fmatch = FMATCH_OK;
+                                       break;
+                               case OSF_WSS_MODULO:
+                                       if ((window % f->wss.val) == 0)
+                                               fmatch = FMATCH_OK;
+                                       break;
+                               }
+                       }
+
+                       if (fmatch != FMATCH_OK)
+                               continue;
+
+                       fcount++;
+
+                       if (info->flags & XT_OSF_LOG)
+                               nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL,
+                                       "%s [%s:%s] : %pi4:%d -> %pi4:%d hops=%d\n",
+                                       f->genre, f->version, f->subtype,
+                                       &ip->saddr, ntohs(tcp->source),
+                                       &ip->daddr, ntohs(tcp->dest),
+                                       f->ttl - ip->ttl);
+
+                       if ((info->flags & XT_OSF_LOG) &&
+                           info->loglevel == XT_OSF_LOGLEVEL_FIRST)
+                               break;
+               }
+       }
+       rcu_read_unlock();
+
+       if (!fcount && (info->flags & XT_OSF_LOG))
+               nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL,
+                       "Remote OS is not known: %pi4:%u -> %pi4:%u\n",
+                               &ip->saddr, ntohs(tcp->source),
+                               &ip->daddr, ntohs(tcp->dest));
+
+       if (fcount)
+               fmatch = FMATCH_OK;
+
+       return fmatch == FMATCH_OK;
+}
+
+static struct xt_match xt_osf_match = {
+       .name           = "osf",
+       .revision       = 0,
+       .family         = NFPROTO_IPV4,
+       .proto          = IPPROTO_TCP,
+       .hooks          = (1 << NF_INET_LOCAL_IN) |
+                               (1 << NF_INET_PRE_ROUTING) |
+                               (1 << NF_INET_FORWARD),
+       .match          = xt_osf_match_packet,
+       .matchsize      = sizeof(struct xt_osf_info),
+       .me             = THIS_MODULE,
+};
+
+static int __init xt_osf_init(void)
+{
+       int err = -EINVAL;
+       int i;
+
+       for (i=0; i<ARRAY_SIZE(xt_osf_fingers); ++i)
+               INIT_LIST_HEAD(&xt_osf_fingers[i]);
+
+       err = nfnetlink_subsys_register(&xt_osf_nfnetlink);
+       if (err < 0) {
+               printk(KERN_ERR "Failed (%d) to register OSF nsfnetlink helper.\n", err);
+               goto err_out_exit;
+       }
+
+       err = xt_register_match(&xt_osf_match);
+       if (err) {
+               printk(KERN_ERR "Failed (%d) to register OS fingerprint "
+                               "matching module.\n", err);
+               goto err_out_remove;
+       }
+
+       return 0;
+
+err_out_remove:
+       nfnetlink_subsys_unregister(&xt_osf_nfnetlink);
+err_out_exit:
+       return err;
+}
+
+static void __exit xt_osf_fini(void)
+{
+       struct xt_osf_finger *f;
+       int i;
+
+       nfnetlink_subsys_unregister(&xt_osf_nfnetlink);
+       xt_unregister_match(&xt_osf_match);
+
+       rcu_read_lock();
+       for (i=0; i<ARRAY_SIZE(xt_osf_fingers); ++i) {
+
+               list_for_each_entry_rcu(f, &xt_osf_fingers[i], finger_entry) {
+                       list_del_rcu(&f->finger_entry);
+                       call_rcu(&f->rcu_head, xt_osf_finger_free_rcu);
+               }
+       }
+       rcu_read_unlock();
+
+       rcu_barrier();
+}
+
+module_init(xt_osf_init);
+module_exit(xt_osf_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
+MODULE_DESCRIPTION("Passive OS fingerprint matching.");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
index 1acc089..ebf00ad 100644 (file)
@@ -22,6 +22,8 @@
 #include <net/netfilter/nf_tproxy_core.h>
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
 
+#include <linux/netfilter/xt_socket.h>
+
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #define XT_SOCKET_HAVE_CONNTRACK 1
 #include <net/netfilter/nf_conntrack.h>
@@ -86,7 +88,8 @@ extract_icmp_fields(const struct sk_buff *skb,
 
 
 static bool
-socket_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+socket_match(const struct sk_buff *skb, const struct xt_match_param *par,
+            const struct xt_socket_mtinfo1 *info)
 {
        const struct iphdr *iph = ip_hdr(skb);
        struct udphdr _hdr, *hp = NULL;
@@ -141,10 +144,24 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par)
        sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
                                   saddr, daddr, sport, dport, par->in, false);
        if (sk != NULL) {
-               bool wildcard = (sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->rcv_saddr == 0);
+               bool wildcard;
+               bool transparent = true;
+
+               /* Ignore sockets listening on INADDR_ANY */
+               wildcard = (sk->sk_state != TCP_TIME_WAIT &&
+                           inet_sk(sk)->rcv_saddr == 0);
+
+               /* Ignore non-transparent sockets,
+                  if XT_SOCKET_TRANSPARENT is used */
+               if (info && info->flags & XT_SOCKET_TRANSPARENT)
+                       transparent = ((sk->sk_state != TCP_TIME_WAIT &&
+                                       inet_sk(sk)->transparent) ||
+                                      (sk->sk_state == TCP_TIME_WAIT &&
+                                       inet_twsk(sk)->tw_transparent));
 
                nf_tproxy_put_sock(sk);
-               if (wildcard)
+
+               if (wildcard || !transparent)
                        sk = NULL;
        }
 
@@ -157,23 +174,47 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par)
        return (sk != NULL);
 }
 
-static struct xt_match socket_mt_reg __read_mostly = {
-       .name           = "socket",
-       .family         = AF_INET,
-       .match          = socket_mt,
-       .hooks          = 1 << NF_INET_PRE_ROUTING,
-       .me             = THIS_MODULE,
+static bool
+socket_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+       return socket_match(skb, par, NULL);
+}
+
+static bool
+socket_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+       return socket_match(skb, par, par->matchinfo);
+}
+
+static struct xt_match socket_mt_reg[] __read_mostly = {
+       {
+               .name           = "socket",
+               .revision       = 0,
+               .family         = NFPROTO_IPV4,
+               .match          = socket_mt_v0,
+               .hooks          = 1 << NF_INET_PRE_ROUTING,
+               .me             = THIS_MODULE,
+       },
+       {
+               .name           = "socket",
+               .revision       = 1,
+               .family         = NFPROTO_IPV4,
+               .match          = socket_mt_v1,
+               .matchsize      = sizeof(struct xt_socket_mtinfo1),
+               .hooks          = 1 << NF_INET_PRE_ROUTING,
+               .me             = THIS_MODULE,
+       },
 };
 
 static int __init socket_mt_init(void)
 {
        nf_defrag_ipv4_enable();
-       return xt_register_match(&socket_mt_reg);
+       return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
 }
 
 static void __exit socket_mt_exit(void)
 {
-       xt_unregister_match(&socket_mt_reg);
+       xt_unregister_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
 }
 
 module_init(socket_mt_init);