}
}
+#ifdef CONFIG_NF_NAT_NEEDED
+static inline s16 nat_offset(const struct nf_conn *ct,
+ enum ip_conntrack_dir dir,
+ u32 seq)
+{
+ typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
+
+ return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
+}
+#define NAT_OFFSET(pf, ct, dir, seq) \
+ (pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0)
+#else
+#define NAT_OFFSET(pf, ct, dir, seq) 0
+#endif
+
static bool tcp_in_window(const struct nf_conn *ct,
struct ip_ct_tcp *state,
enum ip_conntrack_dir dir,
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
__u32 seq, ack, sack, end, win, swin;
+ s16 receiver_offset;
bool res;
/*
if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
tcp_sack(skb, dataoff, tcph, &sack);
+ /* Take into account NAT sequence number mangling */
+ receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1);
+ ack -= receiver_offset;
+ sack -= receiver_offset;
+
pr_debug("tcp_in_window: START\n");
pr_debug("tcp_in_window: ");
nf_ct_dump_tuple(tuple);
- pr_debug("seq=%u ack=%u sack=%u win=%u end=%u\n",
- seq, ack, sack, win, end);
+ pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
+ seq, ack, receiver_offset, sack, receiver_offset, win, end);
pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
"receiver end=%u maxend=%u maxwin=%u scale=%i\n",
sender->td_end, sender->td_maxend, sender->td_maxwin,
pr_debug("tcp_in_window: ");
nf_ct_dump_tuple(tuple);
- pr_debug("seq=%u ack=%u sack =%u win=%u end=%u\n",
- seq, ack, sack, win, end);
+ pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
+ seq, ack, receiver_offset, sack, receiver_offset, win, end);
pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
"receiver end=%u maxend=%u maxwin=%u scale=%i\n",
sender->td_end, sender->td_maxend, sender->td_maxwin,
before(seq, sender->td_maxend + 1) ?
after(end, sender->td_end - receiver->td_maxwin - 1) ?
before(sack, receiver->td_end + 1) ?
- after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
+ after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
: "ACK is under the lower bound (possible overly delayed ACK)"
: "ACK is over the upper bound (ACKed data not seen yet)"
: "SEQ is under the lower bound (already ACKed data retransmitted)"
return res;
}
-#ifdef CONFIG_NF_NAT_NEEDED
-/* Update sender->td_end after NAT successfully mangled the packet */
-/* Caller must linearize skb at tcp header. */
-void nf_conntrack_tcp_update(const struct sk_buff *skb,
- unsigned int dataoff,
- struct nf_conn *ct,
- int dir)
-{
- const struct tcphdr *tcph = (const void *)skb->data + dataoff;
- const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir];
- const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[!dir];
- __u32 end;
-
- end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
-
- spin_lock_bh(&ct->lock);
- /*
- * We have to worry for the ack in the reply packet only...
- */
- if (after(end, ct->proto.tcp.seen[dir].td_end))
- ct->proto.tcp.seen[dir].td_end = end;
- ct->proto.tcp.last_end = end;
- spin_unlock_bh(&ct->lock);
- pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
-}
-EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
-#endif
-
#define TH_FIN 0x01
#define TH_SYN 0x02
#define TH_RST 0x04
};
/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
-static int tcp_error(struct net *net,
+static int tcp_error(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info *ctinfo,
/* b) This SYN/ACK acknowledges a SYN that we earlier
* ignored as invalid. This means that the client and
* the server are both in sync, while the firewall is
- * not. We kill this session and block the SYN/ACK so
- * that the client cannot but retransmit its SYN and
- * thus initiate a clean new session.
+ * not. We get in sync from the previously annotated
+ * values.
*/
- spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: killing out of sync session ");
- nf_ct_kill(ct);
- return NF_DROP;
+ old_state = TCP_CONNTRACK_SYN_SENT;
+ new_state = TCP_CONNTRACK_SYN_RECV;
+ ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
+ ct->proto.tcp.last_end;
+ ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
+ ct->proto.tcp.last_end;
+ ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
+ ct->proto.tcp.last_win == 0 ?
+ 1 : ct->proto.tcp.last_win;
+ ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
+ ct->proto.tcp.last_wscale;
+ ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
+ ct->proto.tcp.last_flags;
+ memset(&ct->proto.tcp.seen[dir], 0,
+ sizeof(struct ip_ct_tcp_state));
+ break;
}
ct->proto.tcp.last_index = index;
ct->proto.tcp.last_dir = dir;
ct->proto.tcp.last_seq = ntohl(th->seq);
ct->proto.tcp.last_end =
segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
-
+ ct->proto.tcp.last_win = ntohs(th->window);
+
+ /* a) This is a SYN in ORIGINAL. The client and the server
+ * may be in sync but we are not. In that case, we annotate
+ * the TCP options and let the packet go through. If it is a
+ * valid SYN packet, the server will reply with a SYN/ACK, and
+ * then we'll get in sync. Otherwise, the server ignores it. */
+ if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
+ struct ip_ct_tcp_state seen = {};
+
+ ct->proto.tcp.last_flags =
+ ct->proto.tcp.last_wscale = 0;
+ tcp_options(skb, dataoff, th, &seen);
+ if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
+ ct->proto.tcp.last_flags |=
+ IP_CT_TCP_FLAG_WINDOW_SCALE;
+ ct->proto.tcp.last_wscale = seen.td_scale;
+ }
+ if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
+ ct->proto.tcp.last_flags |=
+ IP_CT_TCP_FLAG_SACK_PERM;
+ }
+ }
spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
after SYN_RECV or a valid answer for a picked up
connection. */
set_bit(IPS_ASSURED_BIT, &ct->status);
- nf_conntrack_event_cache(IPCT_STATUS, ct);
+ nf_conntrack_event_cache(IPCT_ASSURED, ct);
}
nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
.proc_handler = proc_dointvec_jiffies,
},
{
- .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE,
.procname = "nf_conntrack_tcp_loose",
.data = &nf_ct_tcp_loose,
.maxlen = sizeof(unsigned int),
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL,
.procname = "nf_conntrack_tcp_be_liberal",
.data = &nf_ct_tcp_be_liberal,
.maxlen = sizeof(unsigned int),
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS,
.procname = "nf_conntrack_tcp_max_retrans",
.data = &nf_ct_tcp_max_retrans,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .ctl_name = 0
- }
+ { }
};
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
.proc_handler = proc_dointvec_jiffies,
},
{
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
.procname = "ip_conntrack_tcp_loose",
.data = &nf_ct_tcp_loose,
.maxlen = sizeof(unsigned int),
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
.procname = "ip_conntrack_tcp_be_liberal",
.data = &nf_ct_tcp_be_liberal,
.maxlen = sizeof(unsigned int),
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
.procname = "ip_conntrack_tcp_max_retrans",
.data = &nf_ct_tcp_max_retrans,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .ctl_name = 0
- }
+ { }
};
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */