RxRPC: Error handling for rxrpc_alloc_connection()
[safe/jmp/linux-2.6] / net / netfilter / nf_conntrack_proto_tcp.c
index 153d661..b5ccf2b 100644 (file)
@@ -4,29 +4,10 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
- *     - Real stateful connection tracking
- *     - Modified state transitions table
- *     - Window scaling support added
- *     - SACK support added
- *
- * Willy Tarreau:
- *     - State table bugfixes
- *     - More robust state changes
- *     - Tuning timer parameters
- *
- * 27 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *     - genelized Layer 3 protocol part.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_tcp.c
- *
- * version 2.2
  */
 
 #include <linux/types.h>
 #include <linux/timer.h>
-#include <linux/netfilter.h>
 #include <linux/module.h>
 #include <linux/in.h>
 #include <linux/tcp.h>
@@ -34,6 +15,7 @@
 #include <linux/skbuff.h>
 #include <linux/ipv6.h>
 #include <net/ip6_checksum.h>
+#include <asm/unaligned.h>
 
 #include <net/tcp.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_log.h>
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
 
-#if 0
-#define DEBUGP printk
-#define DEBUGP_VARS
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Protects conntrack->proto.tcp */
+/* Protects ct->proto.tcp */
 static DEFINE_RWLOCK(tcp_lock);
 
 /* "Be conservative in what you do,
@@ -71,7 +49,7 @@ static int nf_ct_tcp_max_retrans __read_mostly = 3;
   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
      closely.  They're more complex. --RR */
 
-static const char *tcp_conntrack_names[] = {
+static const char *const tcp_conntrack_names[] = {
        "NONE",
        "SYN_SENT",
        "SYN_RECV",
@@ -89,32 +67,22 @@ static const char *tcp_conntrack_names[] = {
 #define HOURS * 60 MINS
 #define DAYS * 24 HOURS
 
-static unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly =      2 MINS;
-static unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly =     60 SECS;
-static unsigned int nf_ct_tcp_timeout_established __read_mostly =   5 DAYS;
-static unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly =      2 MINS;
-static unsigned int nf_ct_tcp_timeout_close_wait __read_mostly =   60 SECS;
-static unsigned int nf_ct_tcp_timeout_last_ack __read_mostly =     30 SECS;
-static unsigned int nf_ct_tcp_timeout_time_wait __read_mostly =     2 MINS;
-static unsigned int nf_ct_tcp_timeout_close __read_mostly =        10 SECS;
-
 /* RFC1122 says the R2 limit should be at least 100 seconds.
    Linux uses 15 packets as limit, which corresponds
    to ~13-30min depending on RTO. */
-static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly =   5 MINS;
-
-static unsigned int * tcp_timeouts[] = {
-    NULL,                              /* TCP_CONNTRACK_NONE */
-    &nf_ct_tcp_timeout_syn_sent,       /* TCP_CONNTRACK_SYN_SENT, */
-    &nf_ct_tcp_timeout_syn_recv,       /* TCP_CONNTRACK_SYN_RECV, */
-    &nf_ct_tcp_timeout_established,    /* TCP_CONNTRACK_ESTABLISHED, */
-    &nf_ct_tcp_timeout_fin_wait,       /* TCP_CONNTRACK_FIN_WAIT, */
-    &nf_ct_tcp_timeout_close_wait,     /* TCP_CONNTRACK_CLOSE_WAIT, */
-    &nf_ct_tcp_timeout_last_ack,       /* TCP_CONNTRACK_LAST_ACK, */
-    &nf_ct_tcp_timeout_time_wait,      /* TCP_CONNTRACK_TIME_WAIT, */
-    &nf_ct_tcp_timeout_close,          /* TCP_CONNTRACK_CLOSE, */
-    NULL,                              /* TCP_CONNTRACK_LISTEN */
- };
+static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly    =   5 MINS;
+static unsigned int nf_ct_tcp_timeout_unacknowledged __read_mostly =   5 MINS;
+
+static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
+       [TCP_CONNTRACK_SYN_SENT]        = 2 MINS,
+       [TCP_CONNTRACK_SYN_RECV]        = 60 SECS,
+       [TCP_CONNTRACK_ESTABLISHED]     = 5 DAYS,
+       [TCP_CONNTRACK_FIN_WAIT]        = 2 MINS,
+       [TCP_CONNTRACK_CLOSE_WAIT]      = 60 SECS,
+       [TCP_CONNTRACK_LAST_ACK]        = 30 SECS,
+       [TCP_CONNTRACK_TIME_WAIT]       = 2 MINS,
+       [TCP_CONNTRACK_CLOSE]           = 10 SECS,
+};
 
 #define sNO TCP_CONNTRACK_NONE
 #define sSS TCP_CONNTRACK_SYN_SENT
@@ -161,7 +129,7 @@ enum tcp_bit_set {
  * CLOSE_WAIT: ACK seen (after FIN)
  * LAST_ACK:   FIN seen (after FIN)
  * TIME_WAIT:  last ACK seen
- * CLOSE:      closed connection
+ * CLOSE:      closed connection (RST)
  *
  * LISTEN state is not used.
  *
@@ -174,7 +142,7 @@ enum tcp_bit_set {
  *     if they are invalid
  *     or we do not support the request (simultaneous open)
  */
-static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
+static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
        {
 /* ORIGINAL */
 /*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
@@ -293,29 +261,29 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
        }
 };
 
-static int tcp_pkt_to_tuple(const struct sk_buff *skb,
-                           unsigned int dataoff,
-                           struct nf_conntrack_tuple *tuple)
+static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+                            struct nf_conntrack_tuple *tuple)
 {
-       struct tcphdr _hdr, *hp;
+       const struct tcphdr *hp;
+       struct tcphdr _hdr;
 
        /* Actually only need first 8 bytes. */
        hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
        if (hp == NULL)
-               return 0;
+               return false;
 
        tuple->src.u.tcp.port = hp->source;
        tuple->dst.u.tcp.port = hp->dest;
 
-       return 1;
+       return true;
 }
 
-static int tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
-                           const struct nf_conntrack_tuple *orig)
+static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
+                            const struct nf_conntrack_tuple *orig)
 {
        tuple->src.u.tcp.port = orig->dst.u.tcp.port;
        tuple->dst.u.tcp.port = orig->src.u.tcp.port;
-       return 1;
+       return true;
 }
 
 /* Print out the per-protocol part of the tuple. */
@@ -328,13 +296,12 @@ static int tcp_print_tuple(struct seq_file *s,
 }
 
 /* Print out the private part of the conntrack. */
-static int tcp_print_conntrack(struct seq_file *s,
-                              const struct nf_conn *conntrack)
+static int tcp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
 {
        enum tcp_conntrack state;
 
        read_lock_bh(&tcp_lock);
-       state = conntrack->proto.tcp.state;
+       state = ct->proto.tcp.state;
        read_unlock_bh(&tcp_lock);
 
        return seq_printf(s, "%s ", tcp_conntrack_names[state]);
@@ -368,19 +335,20 @@ static unsigned int get_conntrack_index(const struct tcphdr *tcph)
 
    I.   Upper bound for valid data:    seq <= sender.td_maxend
    II.  Lower bound for valid data:    seq + len >= sender.td_end - receiver.td_maxwin
-   III.        Upper bound for valid ack:      sack <= receiver.td_end
-   IV. Lower bound for valid ack:      ack >= receiver.td_end - MAXACKWINDOW
+   III.        Upper bound for valid (s)ack:   sack <= receiver.td_end
+   IV. Lower bound for valid (s)ack:   sack >= receiver.td_end - MAXACKWINDOW
 
-   where sack is the highest right edge of sack block found in the packet.
+   where sack is the highest right edge of sack block found in the packet
+   or ack in the case of packet without SACK option.
 
-   The upper bound limit for a valid ack is not ignored -
+   The upper bound limit for a valid (s)ack is not ignored -
    we doesn't have to deal with fragments.
 */
 
 static inline __u32 segment_seq_plus_len(__u32 seq,
                                         size_t len,
                                         unsigned int dataoff,
-                                        struct tcphdr *tcph)
+                                        const struct tcphdr *tcph)
 {
        /* XXX Should I use payload length field in IP/IPv6 header ?
         * - YK */
@@ -399,11 +367,11 @@ static inline __u32 segment_seq_plus_len(__u32 seq,
  */
 static void tcp_options(const struct sk_buff *skb,
                        unsigned int dataoff,
-                       struct tcphdr *tcph,
+                       const struct tcphdr *tcph,
                        struct ip_ct_tcp_state *state)
 {
        unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
-       unsigned char *ptr;
+       const unsigned char *ptr;
        int length = (tcph->doff*4) - sizeof(struct tcphdr);
 
        if (!length)
@@ -454,10 +422,10 @@ static void tcp_options(const struct sk_buff *skb,
 }
 
 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
-                    struct tcphdr *tcph, __u32 *sack)
+                     const struct tcphdr *tcph, __u32 *sack)
 {
        unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
-       unsigned char *ptr;
+       const unsigned char *ptr;
        int length = (tcph->doff*4) - sizeof(struct tcphdr);
        __u32 tmp;
 
@@ -470,11 +438,10 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
 
        /* Fast path for timestamp-only option */
        if (length == TCPOLEN_TSTAMP_ALIGNED*4
-           && *(__be32 *)ptr ==
-               __constant_htonl((TCPOPT_NOP << 24)
-                                | (TCPOPT_NOP << 16)
-                                | (TCPOPT_TIMESTAMP << 8)
-                                | TCPOLEN_TIMESTAMP))
+           && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
+                                      | (TCPOPT_NOP << 16)
+                                      | (TCPOPT_TIMESTAMP << 8)
+                                      | TCPOLEN_TIMESTAMP))
                return;
 
        while (length > 0) {
@@ -502,7 +469,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
                                for (i = 0;
                                     i < (opsize - TCPOLEN_SACK_BASE);
                                     i += TCPOLEN_SACK_PERBLOCK) {
-                                       tmp = ntohl(*((__be32 *)(ptr+i)+1));
+                                       tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
 
                                        if (after(tmp, *sack))
                                                *sack = tmp;
@@ -515,18 +482,21 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
        }
 }
 
-static int tcp_in_window(struct ip_ct_tcp *state,
-                        enum ip_conntrack_dir dir,
-                        unsigned int index,
-                        const struct sk_buff *skb,
-                        unsigned int dataoff,
-                        struct tcphdr *tcph,
-                        int pf)
+static bool tcp_in_window(const struct nf_conn *ct,
+                         struct ip_ct_tcp *state,
+                         enum ip_conntrack_dir dir,
+                         unsigned int index,
+                         const struct sk_buff *skb,
+                         unsigned int dataoff,
+                         const struct tcphdr *tcph,
+                         u_int8_t pf)
 {
+       struct net *net = nf_ct_net(ct);
        struct ip_ct_tcp_state *sender = &state->seen[dir];
        struct ip_ct_tcp_state *receiver = &state->seen[!dir];
+       const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
        __u32 seq, ack, sack, end, win, swin;
-       int res;
+       bool res;
 
        /*
         * Get the required data from the packet.
@@ -539,18 +509,17 @@ static int tcp_in_window(struct ip_ct_tcp *state,
        if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
                tcp_sack(skb, dataoff, tcph, &sack);
 
-       DEBUGP("tcp_in_window: START\n");
-       DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
-              "seq=%u ack=%u sack=%u win=%u end=%u\n",
-               NIPQUAD(iph->saddr), ntohs(tcph->source),
-               NIPQUAD(iph->daddr), ntohs(tcph->dest),
-               seq, ack, sack, win, end);
-       DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
-              "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-               sender->td_end, sender->td_maxend, sender->td_maxwin,
-               sender->td_scale,
-               receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-               receiver->td_scale);
+       pr_debug("tcp_in_window: START\n");
+       pr_debug("tcp_in_window: ");
+       nf_ct_dump_tuple(tuple);
+       pr_debug("seq=%u ack=%u sack=%u win=%u end=%u\n",
+                seq, ack, sack, win, end);
+       pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
+                "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+                sender->td_end, sender->td_maxend, sender->td_maxwin,
+                sender->td_scale,
+                receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+                receiver->td_scale);
 
        if (sender->td_end == 0) {
                /*
@@ -628,28 +597,27 @@ static int tcp_in_window(struct ip_ct_tcp *state,
                 */
                seq = end = sender->td_end;
 
-       DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
-              "seq=%u ack=%u sack =%u win=%u end=%u\n",
-               NIPQUAD(iph->saddr), ntohs(tcph->source),
-               NIPQUAD(iph->daddr), ntohs(tcph->dest),
-               seq, ack, sack, win, end);
-       DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
-              "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-               sender->td_end, sender->td_maxend, sender->td_maxwin,
-               sender->td_scale,
-               receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-               receiver->td_scale);
-
-       DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
-               before(seq, sender->td_maxend + 1),
-               after(end, sender->td_end - receiver->td_maxwin - 1),
-               before(sack, receiver->td_end + 1),
-               after(ack, receiver->td_end - MAXACKWINDOW(sender)));
+       pr_debug("tcp_in_window: ");
+       nf_ct_dump_tuple(tuple);
+       pr_debug("seq=%u ack=%u sack =%u win=%u end=%u\n",
+                seq, ack, sack, win, end);
+       pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
+                "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+                sender->td_end, sender->td_maxend, sender->td_maxwin,
+                sender->td_scale,
+                receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+                receiver->td_scale);
+
+       pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
+                before(seq, sender->td_maxend + 1),
+                after(end, sender->td_end - receiver->td_maxwin - 1),
+                before(sack, receiver->td_end + 1),
+                after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
 
        if (before(seq, sender->td_maxend + 1) &&
            after(end, sender->td_end - receiver->td_maxwin - 1) &&
            before(sack, receiver->td_end + 1) &&
-           after(ack, receiver->td_end - MAXACKWINDOW(sender))) {
+           after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
                /*
                 * Take into account window scaling (RFC 1323).
                 */
@@ -662,8 +630,10 @@ static int tcp_in_window(struct ip_ct_tcp *state,
                swin = win + (sack - ack);
                if (sender->td_maxwin < swin)
                        sender->td_maxwin = swin;
-               if (after(end, sender->td_end))
+               if (after(end, sender->td_end)) {
                        sender->td_end = end;
+                       sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+               }
                /*
                 * Update receiver data.
                 */
@@ -674,6 +644,8 @@ static int tcp_in_window(struct ip_ct_tcp *state,
                        if (win == 0)
                                receiver->td_maxend++;
                }
+               if (ack == receiver->td_end)
+                       receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
 
                /*
                 * Check retransmissions.
@@ -694,13 +666,13 @@ static int tcp_in_window(struct ip_ct_tcp *state,
                                state->retrans = 0;
                        }
                }
-               res = 1;
+               res = true;
        } else {
-               res = 0;
+               res = false;
                if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
                    nf_ct_tcp_be_liberal)
-                       res = 1;
-               if (!res && LOG_INVALID(IPPROTO_TCP))
+                       res = true;
+               if (!res && LOG_INVALID(net, IPPROTO_TCP))
                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
                        "nf_ct_tcp: %s ",
                        before(seq, sender->td_maxend + 1) ?
@@ -713,10 +685,10 @@ static int tcp_in_window(struct ip_ct_tcp *state,
                        : "SEQ is over the upper bound (over the window of the receiver)");
        }
 
-       DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
-              "receiver end=%u maxend=%u maxwin=%u\n",
-               res, sender->td_end, sender->td_maxend, sender->td_maxwin,
-               receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
+       pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
+                "receiver end=%u maxend=%u maxwin=%u\n",
+                res, sender->td_end, sender->td_maxend, sender->td_maxwin,
+                receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
 
        return res;
 }
@@ -724,17 +696,15 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 #ifdef CONFIG_NF_NAT_NEEDED
 /* Update sender->td_end after NAT successfully mangled the packet */
 /* Caller must linearize skb at tcp header. */
-void nf_conntrack_tcp_update(struct sk_buff *skb,
+void nf_conntrack_tcp_update(const struct sk_buff *skb,
                             unsigned int dataoff,
-                            struct nf_conn *conntrack,
+                            struct nf_conn *ct,
                             int dir)
 {
-       struct tcphdr *tcph = (void *)skb->data + dataoff;
+       const struct tcphdr *tcph = (const void *)skb->data + dataoff;
+       const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir];
+       const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[!dir];
        __u32 end;
-#ifdef DEBUGP_VARS
-       struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
-       struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
-#endif
 
        end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
 
@@ -742,16 +712,16 @@ void nf_conntrack_tcp_update(struct sk_buff *skb,
        /*
         * We have to worry for the ack in the reply packet only...
         */
-       if (after(end, conntrack->proto.tcp.seen[dir].td_end))
-               conntrack->proto.tcp.seen[dir].td_end = end;
-       conntrack->proto.tcp.last_end = end;
+       if (after(end, ct->proto.tcp.seen[dir].td_end))
+               ct->proto.tcp.seen[dir].td_end = end;
+       ct->proto.tcp.last_end = end;
        write_unlock_bh(&tcp_lock);
-       DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
-              "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-               sender->td_end, sender->td_maxend, sender->td_maxwin,
-               sender->td_scale,
-               receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-               receiver->td_scale);
+       pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
+                "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+                sender->td_end, sender->td_maxend, sender->td_maxwin,
+                sender->td_scale,
+                receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+                receiver->td_scale);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
 #endif
@@ -765,43 +735,37 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
 #define        TH_ECE  0x40
 #define        TH_CWR  0x80
 
-/* table of valid flag combinations - ECE and CWR are always valid */
-static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
+/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
+static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
 {
        [TH_SYN]                        = 1,
-       [TH_SYN|TH_PUSH]                = 1,
        [TH_SYN|TH_URG]                 = 1,
-       [TH_SYN|TH_PUSH|TH_URG]         = 1,
        [TH_SYN|TH_ACK]                 = 1,
-       [TH_SYN|TH_ACK|TH_PUSH]         = 1,
        [TH_RST]                        = 1,
        [TH_RST|TH_ACK]                 = 1,
-       [TH_RST|TH_ACK|TH_PUSH]         = 1,
        [TH_FIN|TH_ACK]                 = 1,
+       [TH_FIN|TH_ACK|TH_URG]          = 1,
        [TH_ACK]                        = 1,
-       [TH_ACK|TH_PUSH]                = 1,
        [TH_ACK|TH_URG]                 = 1,
-       [TH_ACK|TH_URG|TH_PUSH]         = 1,
-       [TH_FIN|TH_ACK|TH_PUSH]         = 1,
-       [TH_FIN|TH_ACK|TH_URG]          = 1,
-       [TH_FIN|TH_ACK|TH_URG|TH_PUSH]  = 1,
 };
 
 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
-static int tcp_error(struct sk_buff *skb,
+static int tcp_error(struct net *net,
+                    struct sk_buff *skb,
                     unsigned int dataoff,
                     enum ip_conntrack_info *ctinfo,
-                    int pf,
+                    u_int8_t pf,
                     unsigned int hooknum)
 {
-       struct tcphdr _tcph, *th;
+       const struct tcphdr *th;
+       struct tcphdr _tcph;
        unsigned int tcplen = skb->len - dataoff;
        u_int8_t tcpflags;
 
        /* Smaller that minimal TCP header? */
        th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
        if (th == NULL) {
-               if (LOG_INVALID(IPPROTO_TCP))
+               if (LOG_INVALID(net, IPPROTO_TCP))
                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
                                "nf_ct_tcp: short packet ");
                return -NF_ACCEPT;
@@ -809,7 +773,7 @@ static int tcp_error(struct sk_buff *skb,
 
        /* Not whole TCP header or malformed packet */
        if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
-               if (LOG_INVALID(IPPROTO_TCP))
+               if (LOG_INVALID(net, IPPROTO_TCP))
                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
                                "nf_ct_tcp: truncated/malformed packet ");
                return -NF_ACCEPT;
@@ -820,20 +784,18 @@ static int tcp_error(struct sk_buff *skb,
         * because the checksum is assumed to be correct.
         */
        /* FIXME: Source route IP option packets --RR */
-       if (nf_conntrack_checksum &&
-           ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
-            (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) &&
+       if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
            nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
-               if (LOG_INVALID(IPPROTO_TCP))
+               if (LOG_INVALID(net, IPPROTO_TCP))
                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
                                  "nf_ct_tcp: bad TCP checksum ");
                return -NF_ACCEPT;
        }
 
        /* Check TCP flags. */
-       tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
+       tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
        if (!tcp_valid_flags[tcpflags]) {
-               if (LOG_INVALID(IPPROTO_TCP))
+               if (LOG_INVALID(net, IPPROTO_TCP))
                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
                                  "nf_ct_tcp: invalid TCP flag combination ");
                return -NF_ACCEPT;
@@ -843,16 +805,19 @@ static int tcp_error(struct sk_buff *skb,
 }
 
 /* Returns verdict for packet, or -1 for invalid. */
-static int tcp_packet(struct nf_conn *conntrack,
+static int tcp_packet(struct nf_conn *ct,
                      const struct sk_buff *skb,
                      unsigned int dataoff,
                      enum ip_conntrack_info ctinfo,
-                     int pf,
+                     u_int8_t pf,
                      unsigned int hooknum)
 {
+       struct net *net = nf_ct_net(ct);
+       struct nf_conntrack_tuple *tuple;
        enum tcp_conntrack new_state, old_state;
        enum ip_conntrack_dir dir;
-       struct tcphdr *th, _tcph;
+       const struct tcphdr *th;
+       struct tcphdr _tcph;
        unsigned long timeout;
        unsigned int index;
 
@@ -860,25 +825,65 @@ static int tcp_packet(struct nf_conn *conntrack,
        BUG_ON(th == NULL);
 
        write_lock_bh(&tcp_lock);
-       old_state = conntrack->proto.tcp.state;
+       old_state = ct->proto.tcp.state;
        dir = CTINFO2DIR(ctinfo);
        index = get_conntrack_index(th);
        new_state = tcp_conntracks[dir][index][old_state];
+       tuple = &ct->tuplehash[dir].tuple;
 
        switch (new_state) {
+       case TCP_CONNTRACK_SYN_SENT:
+               if (old_state < TCP_CONNTRACK_TIME_WAIT)
+                       break;
+               /* RFC 1122: "When a connection is closed actively,
+                * it MUST linger in TIME-WAIT state for a time 2xMSL
+                * (Maximum Segment Lifetime). However, it MAY accept
+                * a new SYN from the remote TCP to reopen the connection
+                * directly from TIME-WAIT state, if..."
+                * We ignore the conditions because we are in the
+                * TIME-WAIT state anyway.
+                *
+                * Handle aborted connections: we and the server
+                * think there is an existing connection but the client
+                * aborts it and starts a new one.
+                */
+               if (((ct->proto.tcp.seen[dir].flags
+                     | ct->proto.tcp.seen[!dir].flags)
+                    & IP_CT_TCP_FLAG_CLOSE_INIT)
+                   || (ct->proto.tcp.last_dir == dir
+                       && ct->proto.tcp.last_index == TCP_RST_SET)) {
+                       /* Attempt to reopen a closed/aborted connection.
+                        * Delete this connection and look up again. */
+                       write_unlock_bh(&tcp_lock);
+
+                       /* Only repeat if we can actually remove the timer.
+                        * Destruction may already be in progress in process
+                        * context and we must give it a chance to terminate.
+                        */
+                       if (nf_ct_kill(ct))
+                               return -NF_REPEAT;
+                       return NF_DROP;
+               }
+               /* Fall through */
        case TCP_CONNTRACK_IGNORE:
                /* Ignored packets:
                 *
+                * Our connection entry may be out of sync, so ignore
+                * packets which may signal the real connection between
+                * the client and the server.
+                *
                 * a) SYN in ORIGINAL
                 * b) SYN/ACK in REPLY
                 * c) ACK in reply direction after initial SYN in original.
+                *
+                * If the ignored packet is invalid, the receiver will send
+                * a RST we'll catch below.
                 */
                if (index == TCP_SYNACK_SET
-                   && conntrack->proto.tcp.last_index == TCP_SYN_SET
-                   && conntrack->proto.tcp.last_dir != dir
-                   && ntohl(th->ack_seq) ==
-                            conntrack->proto.tcp.last_end) {
-                       /* This SYN/ACK acknowledges a SYN that we earlier
+                   && ct->proto.tcp.last_index == TCP_SYN_SET
+                   && ct->proto.tcp.last_dir != dir
+                   && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
+                       /* b) This SYN/ACK acknowledges a SYN that we earlier
                         * ignored as invalid. This means that the client and
                         * the server are both in sync, while the firewall is
                         * not. We kill this session and block the SYN/ACK so
@@ -886,63 +891,39 @@ static int tcp_packet(struct nf_conn *conntrack,
                         * thus initiate a clean new session.
                         */
                        write_unlock_bh(&tcp_lock);
-                       if (LOG_INVALID(IPPROTO_TCP))
+                       if (LOG_INVALID(net, IPPROTO_TCP))
                                nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
                                          "nf_ct_tcp: killing out of sync session ");
-                       if (del_timer(&conntrack->timeout))
-                               conntrack->timeout.function((unsigned long)
-                                                           conntrack);
-                       return -NF_DROP;
+                       nf_ct_kill(ct);
+                       return NF_DROP;
                }
-               conntrack->proto.tcp.last_index = index;
-               conntrack->proto.tcp.last_dir = dir;
-               conntrack->proto.tcp.last_seq = ntohl(th->seq);
-               conntrack->proto.tcp.last_end =
+               ct->proto.tcp.last_index = index;
+               ct->proto.tcp.last_dir = dir;
+               ct->proto.tcp.last_seq = ntohl(th->seq);
+               ct->proto.tcp.last_end =
                    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
 
                write_unlock_bh(&tcp_lock);
-               if (LOG_INVALID(IPPROTO_TCP))
+               if (LOG_INVALID(net, IPPROTO_TCP))
                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
-                                 "nf_ct_tcp: invalid packed ignored ");
+                                 "nf_ct_tcp: invalid packet ignored ");
                return NF_ACCEPT;
        case TCP_CONNTRACK_MAX:
                /* Invalid packet */
-               DEBUGP("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
-                      dir, get_conntrack_index(th),
-                      old_state);
+               pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
+                        dir, get_conntrack_index(th), old_state);
                write_unlock_bh(&tcp_lock);
-               if (LOG_INVALID(IPPROTO_TCP))
+               if (LOG_INVALID(net, IPPROTO_TCP))
                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
                                  "nf_ct_tcp: invalid state ");
                return -NF_ACCEPT;
-       case TCP_CONNTRACK_SYN_SENT:
-               if (old_state < TCP_CONNTRACK_TIME_WAIT)
-                       break;
-               if ((conntrack->proto.tcp.seen[dir].flags &
-                       IP_CT_TCP_FLAG_CLOSE_INIT)
-                   || after(ntohl(th->seq),
-                            conntrack->proto.tcp.seen[dir].td_end)) {
-                       /* Attempt to reopen a closed connection.
-                       * Delete this connection and look up again. */
-                       write_unlock_bh(&tcp_lock);
-                       if (del_timer(&conntrack->timeout))
-                               conntrack->timeout.function((unsigned long)
-                                                           conntrack);
-                       return -NF_REPEAT;
-               } else {
-                       write_unlock_bh(&tcp_lock);
-                       if (LOG_INVALID(IPPROTO_TCP))
-                               nf_log_packet(pf, 0, skb, NULL, NULL,
-                                             NULL, "nf_ct_tcp: invalid SYN");
-                       return -NF_ACCEPT;
-               }
        case TCP_CONNTRACK_CLOSE:
                if (index == TCP_RST_SET
-                   && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
-                        && conntrack->proto.tcp.last_index == TCP_SYN_SET)
-                       || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
-                           && conntrack->proto.tcp.last_index == TCP_ACK_SET))
-                   && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
+                   && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
+                        && ct->proto.tcp.last_index == TCP_SYN_SET)
+                       || (!test_bit(IPS_ASSURED_BIT, &ct->status)
+                           && ct->proto.tcp.last_index == TCP_ACK_SET))
+                   && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
                        /* RST sent to invalid SYN or ACK we had let through
                         * at a) and c) above:
                         *
@@ -960,74 +941,76 @@ static int tcp_packet(struct nf_conn *conntrack,
                break;
        }
 
-       if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
+       if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
                           skb, dataoff, th, pf)) {
                write_unlock_bh(&tcp_lock);
                return -NF_ACCEPT;
        }
      in_window:
        /* From now on we have got in-window packets */
-       conntrack->proto.tcp.last_index = index;
+       ct->proto.tcp.last_index = index;
+       ct->proto.tcp.last_dir = dir;
 
-       DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
-              "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
-               NIPQUAD(iph->saddr), ntohs(th->source),
-               NIPQUAD(iph->daddr), ntohs(th->dest),
-               (th->syn ? 1 : 0), (th->ack ? 1 : 0),
-               (th->fin ? 1 : 0), (th->rst ? 1 : 0),
-               old_state, new_state);
+       pr_debug("tcp_conntracks: ");
+       nf_ct_dump_tuple(tuple);
+       pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
+                (th->syn ? 1 : 0), (th->ack ? 1 : 0),
+                (th->fin ? 1 : 0), (th->rst ? 1 : 0),
+                old_state, new_state);
 
-       conntrack->proto.tcp.state = new_state;
+       ct->proto.tcp.state = new_state;
        if (old_state != new_state
-           && (new_state == TCP_CONNTRACK_FIN_WAIT
-               || new_state == TCP_CONNTRACK_CLOSE))
-               conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
-       timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans
-                 && *tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans
-                 ? nf_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
+           && new_state == TCP_CONNTRACK_FIN_WAIT)
+               ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
+
+       if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans &&
+           tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans)
+               timeout = nf_ct_tcp_timeout_max_retrans;
+       else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
+                IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
+                tcp_timeouts[new_state] > nf_ct_tcp_timeout_unacknowledged)
+               timeout = nf_ct_tcp_timeout_unacknowledged;
+       else
+               timeout = tcp_timeouts[new_state];
        write_unlock_bh(&tcp_lock);
 
-       nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+       nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
        if (new_state != old_state)
-               nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
+               nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
 
-       if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
+       if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
                /* If only reply is a RST, we can consider ourselves not to
                   have an established connection: this is a fairly common
                   problem case, so we can delete the conntrack
                   immediately.  --RR */
                if (th->rst) {
-                       if (del_timer(&conntrack->timeout))
-                               conntrack->timeout.function((unsigned long)
-                                                           conntrack);
+                       nf_ct_kill_acct(ct, ctinfo, skb);
                        return NF_ACCEPT;
                }
-       } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
+       } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
                   && (old_state == TCP_CONNTRACK_SYN_RECV
                       || old_state == TCP_CONNTRACK_ESTABLISHED)
                   && new_state == TCP_CONNTRACK_ESTABLISHED) {
                /* Set ASSURED if we see see valid ack in ESTABLISHED
                   after SYN_RECV or a valid answer for a picked up
                   connection. */
-               set_bit(IPS_ASSURED_BIT, &conntrack->status);
-               nf_conntrack_event_cache(IPCT_STATUS, skb);
+               set_bit(IPS_ASSURED_BIT, &ct->status);
+               nf_conntrack_event_cache(IPCT_STATUS, ct);
        }
-       nf_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
+       nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
 
        return NF_ACCEPT;
 }
 
 /* Called when a new connection for this protocol found. */
-static int tcp_new(struct nf_conn *conntrack,
-                  const struct sk_buff *skb,
-                  unsigned int dataoff)
+static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
+                   unsigned int dataoff)
 {
        enum tcp_conntrack new_state;
-       struct tcphdr *th, _tcph;
-#ifdef DEBUGP_VARS
-       struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
-       struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
-#endif
+       const struct tcphdr *th;
+       struct tcphdr _tcph;
+       const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
+       const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
 
        th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
        BUG_ON(th == NULL);
@@ -1039,66 +1022,66 @@ static int tcp_new(struct nf_conn *conntrack,
 
        /* Invalid: delete conntrack */
        if (new_state >= TCP_CONNTRACK_MAX) {
-               DEBUGP("nf_ct_tcp: invalid new deleting.\n");
-               return 0;
+               pr_debug("nf_ct_tcp: invalid new deleting.\n");
+               return false;
        }
 
        if (new_state == TCP_CONNTRACK_SYN_SENT) {
                /* SYN packet */
-               conntrack->proto.tcp.seen[0].td_end =
+               ct->proto.tcp.seen[0].td_end =
                        segment_seq_plus_len(ntohl(th->seq), skb->len,
                                             dataoff, th);
-               conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
-               if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
-                       conntrack->proto.tcp.seen[0].td_maxwin = 1;
-               conntrack->proto.tcp.seen[0].td_maxend =
-                       conntrack->proto.tcp.seen[0].td_end;
-
-               tcp_options(skb, dataoff, th, &conntrack->proto.tcp.seen[0]);
-               conntrack->proto.tcp.seen[1].flags = 0;
+               ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+               if (ct->proto.tcp.seen[0].td_maxwin == 0)
+                       ct->proto.tcp.seen[0].td_maxwin = 1;
+               ct->proto.tcp.seen[0].td_maxend =
+                       ct->proto.tcp.seen[0].td_end;
+
+               tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
+               ct->proto.tcp.seen[1].flags = 0;
        } else if (nf_ct_tcp_loose == 0) {
                /* Don't try to pick up connections. */
-               return 0;
+               return false;
        } else {
                /*
                 * We are in the middle of a connection,
                 * its history is lost for us.
                 * Let's try to use the data from the packet.
                 */
-               conntrack->proto.tcp.seen[0].td_end =
+               ct->proto.tcp.seen[0].td_end =
                        segment_seq_plus_len(ntohl(th->seq), skb->len,
                                             dataoff, th);
-               conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
-               if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
-                       conntrack->proto.tcp.seen[0].td_maxwin = 1;
-               conntrack->proto.tcp.seen[0].td_maxend =
-                       conntrack->proto.tcp.seen[0].td_end +
-                       conntrack->proto.tcp.seen[0].td_maxwin;
-               conntrack->proto.tcp.seen[0].td_scale = 0;
+               ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+               if (ct->proto.tcp.seen[0].td_maxwin == 0)
+                       ct->proto.tcp.seen[0].td_maxwin = 1;
+               ct->proto.tcp.seen[0].td_maxend =
+                       ct->proto.tcp.seen[0].td_end +
+                       ct->proto.tcp.seen[0].td_maxwin;
+               ct->proto.tcp.seen[0].td_scale = 0;
 
                /* We assume SACK and liberal window checking to handle
                 * window scaling */
-               conntrack->proto.tcp.seen[0].flags =
-               conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
-                                                    IP_CT_TCP_FLAG_BE_LIBERAL;
+               ct->proto.tcp.seen[0].flags =
+               ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
+                                             IP_CT_TCP_FLAG_BE_LIBERAL;
        }
 
-       conntrack->proto.tcp.seen[1].td_end = 0;
-       conntrack->proto.tcp.seen[1].td_maxend = 0;
-       conntrack->proto.tcp.seen[1].td_maxwin = 1;
-       conntrack->proto.tcp.seen[1].td_scale = 0;
+       ct->proto.tcp.seen[1].td_end = 0;
+       ct->proto.tcp.seen[1].td_maxend = 0;
+       ct->proto.tcp.seen[1].td_maxwin = 1;
+       ct->proto.tcp.seen[1].td_scale = 0;
 
        /* tcp_packet will set them */
-       conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
-       conntrack->proto.tcp.last_index = TCP_NONE_SET;
-
-       DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
-              "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-               sender->td_end, sender->td_maxend, sender->td_maxwin,
-               sender->td_scale,
-               receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-               receiver->td_scale);
-       return 1;
+       ct->proto.tcp.state = TCP_CONNTRACK_NONE;
+       ct->proto.tcp.last_index = TCP_NONE_SET;
+
+       pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
+                "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+                sender->td_end, sender->td_maxend, sender->td_maxwin,
+                sender->td_scale,
+                receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+                receiver->td_scale);
+       return true;
 }
 
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
@@ -1106,55 +1089,112 @@ static int tcp_new(struct nf_conn *conntrack,
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_conntrack.h>
 
-static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
+static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
                         const struct nf_conn *ct)
 {
-       struct nfattr *nest_parms;
+       struct nlattr *nest_parms;
+       struct nf_ct_tcp_flags tmp = {};
 
        read_lock_bh(&tcp_lock);
-       nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
-       NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
-               &ct->proto.tcp.state);
+       nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
+       if (!nest_parms)
+               goto nla_put_failure;
+
+       NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state);
+
+       NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
+                  ct->proto.tcp.seen[0].td_scale);
+
+       NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
+                  ct->proto.tcp.seen[1].td_scale);
+
+       tmp.flags = ct->proto.tcp.seen[0].flags;
+       NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
+               sizeof(struct nf_ct_tcp_flags), &tmp);
+
+       tmp.flags = ct->proto.tcp.seen[1].flags;
+       NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
+               sizeof(struct nf_ct_tcp_flags), &tmp);
        read_unlock_bh(&tcp_lock);
 
-       NFA_NEST_END(skb, nest_parms);
+       nla_nest_end(skb, nest_parms);
 
        return 0;
 
-nfattr_failure:
+nla_put_failure:
        read_unlock_bh(&tcp_lock);
        return -1;
 }
 
-static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
-       [CTA_PROTOINFO_TCP_STATE-1]     = sizeof(u_int8_t),
+static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
+       [CTA_PROTOINFO_TCP_STATE]           = { .type = NLA_U8 },
+       [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
+       [CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
+       [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
+       [CTA_PROTOINFO_TCP_FLAGS_REPLY]     = { .len =  sizeof(struct nf_ct_tcp_flags) },
 };
 
-static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
+static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
 {
-       struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
-       struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
+       struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
+       struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
+       int err;
 
        /* updates could not contain anything about the private
         * protocol info, in that case skip the parsing */
-       if (!attr)
+       if (!pattr)
                return 0;
 
-       nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
+       err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
+       if (err < 0)
+               return err;
 
-       if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
-               return -EINVAL;
-
-       if (!tb[CTA_PROTOINFO_TCP_STATE-1])
+       if (tb[CTA_PROTOINFO_TCP_STATE] &&
+           nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
                return -EINVAL;
 
        write_lock_bh(&tcp_lock);
-       ct->proto.tcp.state =
-               *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
+       if (tb[CTA_PROTOINFO_TCP_STATE])
+               ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
+
+       if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
+               struct nf_ct_tcp_flags *attr =
+                       nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
+               ct->proto.tcp.seen[0].flags &= ~attr->mask;
+               ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
+       }
+
+       if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
+               struct nf_ct_tcp_flags *attr =
+                       nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
+               ct->proto.tcp.seen[1].flags &= ~attr->mask;
+               ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
+       }
+
+       if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
+           tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
+           ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+           ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
+               ct->proto.tcp.seen[0].td_scale =
+                       nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
+               ct->proto.tcp.seen[1].td_scale =
+                       nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
+       }
        write_unlock_bh(&tcp_lock);
 
        return 0;
 }
+
+static int tcp_nlattr_size(void)
+{
+       return nla_total_size(0)           /* CTA_PROTOINFO_TCP */
+               + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
+}
+
+static int tcp_nlattr_tuple_size(void)
+{
+       return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+}
 #endif
 
 #ifdef CONFIG_SYSCTL
@@ -1162,76 +1202,74 @@ static unsigned int tcp_sysctl_table_users;
 static struct ctl_table_header *tcp_sysctl_header;
 static struct ctl_table tcp_sysctl_table[] = {
        {
-               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
                .procname       = "nf_conntrack_tcp_timeout_syn_sent",
-               .data           = &nf_ct_tcp_timeout_syn_sent,
+               .data           = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
-               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
                .procname       = "nf_conntrack_tcp_timeout_syn_recv",
-               .data           = &nf_ct_tcp_timeout_syn_recv,
+               .data           = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
-               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
                .procname       = "nf_conntrack_tcp_timeout_established",
-               .data           = &nf_ct_tcp_timeout_established,
+               .data           = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
-               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
                .procname       = "nf_conntrack_tcp_timeout_fin_wait",
-               .data           = &nf_ct_tcp_timeout_fin_wait,
+               .data           = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
-               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
                .procname       = "nf_conntrack_tcp_timeout_close_wait",
-               .data           = &nf_ct_tcp_timeout_close_wait,
+               .data           = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
-               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
                .procname       = "nf_conntrack_tcp_timeout_last_ack",
-               .data           = &nf_ct_tcp_timeout_last_ack,
+               .data           = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
-               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
                .procname       = "nf_conntrack_tcp_timeout_time_wait",
-               .data           = &nf_ct_tcp_timeout_time_wait,
+               .data           = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
-               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
                .procname       = "nf_conntrack_tcp_timeout_close",
-               .data           = &nf_ct_tcp_timeout_close,
+               .data           = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
-               .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
                .procname       = "nf_conntrack_tcp_timeout_max_retrans",
                .data           = &nf_ct_tcp_timeout_max_retrans,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
+       },
+       {
+               .procname       = "nf_conntrack_tcp_timeout_unacknowledged",
+               .data           = &nf_ct_tcp_timeout_unacknowledged,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
                .ctl_name       = NET_NF_CONNTRACK_TCP_LOOSE,
@@ -1239,7 +1277,7 @@ static struct ctl_table tcp_sysctl_table[] = {
                .data           = &nf_ct_tcp_loose,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
        {
                .ctl_name       = NET_NF_CONNTRACK_TCP_BE_LIBERAL,
@@ -1247,7 +1285,7 @@ static struct ctl_table tcp_sysctl_table[] = {
                .data           = &nf_ct_tcp_be_liberal,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
        {
                .ctl_name       = NET_NF_CONNTRACK_TCP_MAX_RETRANS,
@@ -1255,7 +1293,7 @@ static struct ctl_table tcp_sysctl_table[] = {
                .data           = &nf_ct_tcp_max_retrans,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
        {
                .ctl_name       = 0
@@ -1265,76 +1303,67 @@ static struct ctl_table tcp_sysctl_table[] = {
 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
 static struct ctl_table tcp_compat_sysctl_table[] = {
        {
-               .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
                .procname       = "ip_conntrack_tcp_timeout_syn_sent",
-               .data           = &nf_ct_tcp_timeout_syn_sent,
+               .data           = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
-               .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
                .procname       = "ip_conntrack_tcp_timeout_syn_recv",
-               .data           = &nf_ct_tcp_timeout_syn_recv,
+               .data           = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
-               .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
                .procname       = "ip_conntrack_tcp_timeout_established",
-               .data           = &nf_ct_tcp_timeout_established,
+               .data           = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
-               .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
                .procname       = "ip_conntrack_tcp_timeout_fin_wait",
-               .data           = &nf_ct_tcp_timeout_fin_wait,
+               .data           = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
-               .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
                .procname       = "ip_conntrack_tcp_timeout_close_wait",
-               .data           = &nf_ct_tcp_timeout_close_wait,
+               .data           = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
-               .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
                .procname       = "ip_conntrack_tcp_timeout_last_ack",
-               .data           = &nf_ct_tcp_timeout_last_ack,
+               .data           = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
-               .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
                .procname       = "ip_conntrack_tcp_timeout_time_wait",
-               .data           = &nf_ct_tcp_timeout_time_wait,
+               .data           = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
-               .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
                .procname       = "ip_conntrack_tcp_timeout_close",
-               .data           = &nf_ct_tcp_timeout_close,
+               .data           = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
-               .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
                .procname       = "ip_conntrack_tcp_timeout_max_retrans",
                .data           = &nf_ct_tcp_timeout_max_retrans,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
                .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
@@ -1342,7 +1371,7 @@ static struct ctl_table tcp_compat_sysctl_table[] = {
                .data           = &nf_ct_tcp_loose,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
        {
                .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
@@ -1350,7 +1379,7 @@ static struct ctl_table tcp_compat_sysctl_table[] = {
                .data           = &nf_ct_tcp_be_liberal,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
        {
                .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
@@ -1358,7 +1387,7 @@ static struct ctl_table tcp_compat_sysctl_table[] = {
                .data           = &nf_ct_tcp_max_retrans,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
        {
                .ctl_name       = 0
@@ -1367,7 +1396,7 @@ static struct ctl_table tcp_compat_sysctl_table[] = {
 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif /* CONFIG_SYSCTL */
 
-struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
+struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
 {
        .l3proto                = PF_INET,
        .l4proto                = IPPROTO_TCP,
@@ -1380,10 +1409,13 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
        .new                    = tcp_new,
        .error                  = tcp_error,
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
-       .to_nfattr              = tcp_to_nfattr,
-       .from_nfattr            = nfattr_to_tcp,
-       .tuple_to_nfattr        = nf_ct_port_tuple_to_nfattr,
-       .nfattr_to_tuple        = nf_ct_port_nfattr_to_tuple,
+       .to_nlattr              = tcp_to_nlattr,
+       .nlattr_size            = tcp_nlattr_size,
+       .from_nlattr            = nlattr_to_tcp,
+       .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
+       .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
+       .nlattr_tuple_size      = tcp_nlattr_tuple_size,
+       .nla_policy             = nf_ct_port_nla_policy,
 #endif
 #ifdef CONFIG_SYSCTL
        .ctl_table_users        = &tcp_sysctl_table_users,
@@ -1396,7 +1428,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
 };
 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
 
-struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
+struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
 {
        .l3proto                = PF_INET6,
        .l4proto                = IPPROTO_TCP,
@@ -1409,10 +1441,13 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
        .new                    = tcp_new,
        .error                  = tcp_error,
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
-       .to_nfattr              = tcp_to_nfattr,
-       .from_nfattr            = nfattr_to_tcp,
-       .tuple_to_nfattr        = nf_ct_port_tuple_to_nfattr,
-       .nfattr_to_tuple        = nf_ct_port_nfattr_to_tuple,
+       .to_nlattr              = tcp_to_nlattr,
+       .nlattr_size            = tcp_nlattr_size,
+       .from_nlattr            = nlattr_to_tcp,
+       .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
+       .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
+       .nlattr_tuple_size      = tcp_nlattr_tuple_size,
+       .nla_policy             = nf_ct_port_nla_policy,
 #endif
 #ifdef CONFIG_SYSCTL
        .ctl_table_users        = &tcp_sysctl_table_users,