int sysctl_tcp_stdurg __read_mostly;
int sysctl_tcp_rfc1337 __read_mostly;
int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
-int sysctl_tcp_frto __read_mostly;
+int sysctl_tcp_frto __read_mostly = 2;
int sysctl_tcp_frto_response __read_mostly;
int sysctl_tcp_nometrics_save __read_mostly;
#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */
#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained DSACK info */
+#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */
#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
int fack_count;
int dup_sack = (found_dup_sack && (i == first_sack_index));
- if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq))
+ if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq)) {
+ if (dup_sack) {
+ if (!tp->undo_marker)
+ NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO);
+ else
+ NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD);
+ } else
+ NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD);
continue;
+ }
skb = cached_skb;
fack_count = cached_fack_count;
tp->undo_retrans = 0;
skb = tcp_write_queue_head(sk);
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
+ tp->undo_marker = 0;
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
tp->retrans_out -= tcp_skb_pcount(skb);
/* ...enter this if branch just for the first segment */
flag |= FLAG_DATA_ACKED;
} else {
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
+ tp->undo_marker = 0;
TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
}
tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
- tp->undo_marker = 0;
tp->frto_counter = 0;
tp->reordering = min_t(unsigned int, tp->reordering,
tp->high_seq = tp->frto_highmark;
TCP_ECN_queue_cwr(tp);
- clear_all_retrans_hints(tp);
+ tcp_clear_retrans_hints_partial(tp);
}
void tcp_clear_retrans(struct tcp_sock *tp)
tp->bytes_acked = 0;
tcp_clear_retrans(tp);
- /* Push undo marker, if it was plain RTO and nothing
- * was retransmitted. */
- if (!how)
+ if (!how) {
+ /* Push undo marker, if it was plain RTO and nothing
+ * was retransmitted. */
tp->undo_marker = tp->snd_una;
+ tcp_clear_retrans_hints_partial(tp);
+ } else {
+ tcp_clear_all_retrans_hints(tp);
+ }
tcp_for_write_queue(skb, sk) {
if (skb == tcp_send_head(sk))
TCP_ECN_queue_cwr(tp);
/* Abort FRTO algorithm if one is in progress */
tp->frto_counter = 0;
-
- clear_all_retrans_hints(tp);
}
static int tcp_check_sack_reneging(struct sock *sk)
/* There is something screwy going on with the retrans hints after
an undo */
- clear_all_retrans_hints(tp);
+ tcp_clear_all_retrans_hints(tp);
}
static inline int tcp_may_undo(struct tcp_sock *tp)
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
}
- clear_all_retrans_hints(tp);
+ tcp_clear_all_retrans_hints(tp);
DBGUNDO(sk, "partial loss");
tp->lost_out = 0;
* 1. Reno does not count dupacks (sacked_out) automatically. */
if (!tp->packets_out)
tp->sacked_out = 0;
- /* 2. SACK counts snd_fack in packets inaccurately. */
- if (tp->sacked_out == 0)
+
+ if (WARN_ON(!tp->sacked_out && tp->fackets_out))
tp->fackets_out = 0;
/* Now state machine starts.
}
}
-static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
- __u32 now, __s32 *seq_rtt)
+/* If we get here, the whole TSO packet has not been acked. */
+static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
- __u32 seq = tp->snd_una;
- __u32 packets_acked;
- int acked = 0;
+ u32 packets_acked;
- /* If we get here, the whole TSO packet has not been
- * acked.
- */
- BUG_ON(!after(scb->end_seq, seq));
+ BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
packets_acked = tcp_skb_pcount(skb);
- if (tcp_trim_head(sk, skb, seq - scb->seq))
+ if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
return 0;
packets_acked -= tcp_skb_pcount(skb);
if (packets_acked) {
- __u8 sacked = scb->sacked;
-
- acked |= FLAG_DATA_ACKED;
- if (sacked) {
- if (sacked & TCPCB_RETRANS) {
- if (sacked & TCPCB_SACKED_RETRANS)
- tp->retrans_out -= packets_acked;
- acked |= FLAG_RETRANS_DATA_ACKED;
- *seq_rtt = -1;
- } else if (*seq_rtt < 0)
- *seq_rtt = now - scb->when;
- if (sacked & TCPCB_SACKED_ACKED)
- tp->sacked_out -= packets_acked;
- if (sacked & TCPCB_LOST)
- tp->lost_out -= packets_acked;
- if (sacked & TCPCB_URG) {
- if (tp->urg_mode &&
- !before(seq, tp->snd_up))
- tp->urg_mode = 0;
- }
- } else if (*seq_rtt < 0)
- *seq_rtt = now - scb->when;
-
- if (tp->fackets_out) {
- __u32 dval = min(tp->fackets_out, packets_acked);
- tp->fackets_out -= dval;
- }
- /* hint's skb might be NULL but we don't need to care */
- tp->fastpath_cnt_hint -= min_t(u32, packets_acked,
- tp->fastpath_cnt_hint);
- tp->packets_out -= packets_acked;
-
BUG_ON(tcp_skb_pcount(skb) == 0);
- BUG_ON(!before(scb->seq, scb->end_seq));
+ BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
}
- return acked;
+ return packets_acked;
}
-/* Remove acknowledged frames from the retransmission queue. */
-static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
+/* Remove acknowledged frames from the retransmission queue. If our packet
+ * is before the ack sequence we can discard it as it's confirmed to have
+ * arrived at the other end.
+ */
+static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
{
struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
struct sk_buff *skb;
- __u32 now = tcp_time_stamp;
- int acked = 0;
+ u32 now = tcp_time_stamp;
+ int fully_acked = 1;
+ int flag = 0;
int prior_packets = tp->packets_out;
- __s32 seq_rtt = -1;
+ s32 seq_rtt = -1;
ktime_t last_ackt = net_invalid_timestamp();
- while ((skb = tcp_write_queue_head(sk)) &&
- skb != tcp_send_head(sk)) {
+ while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
- __u8 sacked = scb->sacked;
+ u32 end_seq;
+ u32 packets_acked;
+ u8 sacked = scb->sacked;
- /* If our packet is before the ack sequence we can
- * discard it as it's confirmed to have arrived at
- * the other end.
- */
if (after(scb->end_seq, tp->snd_una)) {
- if (tcp_skb_pcount(skb) > 1 &&
- after(tp->snd_una, scb->seq))
- acked |= tcp_tso_acked(sk, skb,
- now, &seq_rtt);
- break;
- }
+ if (tcp_skb_pcount(skb) == 1 ||
+ !after(tp->snd_una, scb->seq))
+ break;
- /* Initial outgoing SYN's get put onto the write_queue
- * just like anything else we transmit. It is not
- * true data, and if we misinform our callers that
- * this ACK acks real data, we will erroneously exit
- * connection startup slow start one packet too
- * quickly. This is severely frowned upon behavior.
- */
- if (!(scb->flags & TCPCB_FLAG_SYN)) {
- acked |= FLAG_DATA_ACKED;
+ packets_acked = tcp_tso_acked(sk, skb);
+ if (!packets_acked)
+ break;
+
+ fully_acked = 0;
+ end_seq = tp->snd_una;
} else {
- acked |= FLAG_SYN_ACKED;
- tp->retrans_stamp = 0;
+ packets_acked = tcp_skb_pcount(skb);
+ end_seq = scb->end_seq;
}
/* MTU probing checks */
- if (icsk->icsk_mtup.probe_size) {
- if (!after(tp->mtu_probe.probe_seq_end, TCP_SKB_CB(skb)->end_seq)) {
- tcp_mtup_probe_success(sk, skb);
- }
+ if (fully_acked && icsk->icsk_mtup.probe_size &&
+ !after(tp->mtu_probe.probe_seq_end, scb->end_seq)) {
+ tcp_mtup_probe_success(sk, skb);
}
if (sacked) {
if (sacked & TCPCB_RETRANS) {
if (sacked & TCPCB_SACKED_RETRANS)
- tp->retrans_out -= tcp_skb_pcount(skb);
- acked |= FLAG_RETRANS_DATA_ACKED;
+ tp->retrans_out -= packets_acked;
+ flag |= FLAG_RETRANS_DATA_ACKED;
seq_rtt = -1;
+ if ((flag & FLAG_DATA_ACKED) ||
+ (packets_acked > 1))
+ flag |= FLAG_NONHEAD_RETRANS_ACKED;
} else if (seq_rtt < 0) {
seq_rtt = now - scb->when;
- last_ackt = skb->tstamp;
+ if (fully_acked)
+ last_ackt = skb->tstamp;
}
+
if (sacked & TCPCB_SACKED_ACKED)
- tp->sacked_out -= tcp_skb_pcount(skb);
+ tp->sacked_out -= packets_acked;
if (sacked & TCPCB_LOST)
- tp->lost_out -= tcp_skb_pcount(skb);
- if (sacked & TCPCB_URG) {
- if (tp->urg_mode &&
- !before(scb->end_seq, tp->snd_up))
- tp->urg_mode = 0;
- }
+ tp->lost_out -= packets_acked;
+
+ if ((sacked & TCPCB_URG) && tp->urg_mode &&
+ !before(end_seq, tp->snd_up))
+ tp->urg_mode = 0;
} else if (seq_rtt < 0) {
seq_rtt = now - scb->when;
- last_ackt = skb->tstamp;
+ if (fully_acked)
+ last_ackt = skb->tstamp;
}
- tcp_dec_pcount_approx(&tp->fackets_out, skb);
- tp->packets_out -= tcp_skb_pcount(skb);
+ tp->packets_out -= packets_acked;
+
+ /* Initial outgoing SYN's get put onto the write_queue
+ * just like anything else we transmit. It is not
+ * true data, and if we misinform our callers that
+ * this ACK acks real data, we will erroneously exit
+ * connection startup slow start one packet too
+ * quickly. This is severely frowned upon behavior.
+ */
+ if (!(scb->flags & TCPCB_FLAG_SYN)) {
+ flag |= FLAG_DATA_ACKED;
+ } else {
+ flag |= FLAG_SYN_ACKED;
+ tp->retrans_stamp = 0;
+ }
+
+ if (!fully_acked)
+ break;
+
tcp_unlink_write_queue(skb, sk);
sk_stream_free_skb(sk, skb);
- clear_all_retrans_hints(tp);
+ tcp_clear_all_retrans_hints(tp);
}
- if (acked&FLAG_ACKED) {
+ if (flag & FLAG_ACKED) {
u32 pkts_acked = prior_packets - tp->packets_out;
const struct tcp_congestion_ops *ca_ops
= inet_csk(sk)->icsk_ca_ops;
- tcp_ack_update_rtt(sk, acked, seq_rtt);
+ tcp_ack_update_rtt(sk, flag, seq_rtt);
tcp_rearm_rto(sk);
+ tp->fackets_out -= min(pkts_acked, tp->fackets_out);
+ /* hint's skb might be NULL but we don't need to care */
+ tp->fastpath_cnt_hint -= min_t(u32, pkts_acked,
+ tp->fastpath_cnt_hint);
if (tcp_is_reno(tp))
tcp_remove_reno_sacks(sk, pkts_acked);
s32 rtt_us = -1;
/* Is the ACK triggering packet unambiguous? */
- if (!(acked & FLAG_RETRANS_DATA_ACKED)) {
+ if (!(flag & FLAG_RETRANS_DATA_ACKED)) {
/* High resolution needed and available? */
if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
!ktime_equal(last_ackt,
}
#endif
*seq_rtt_p = seq_rtt;
- return acked;
+ return flag;
}
static void tcp_ack_probe(struct sock *sk)
if (flag&FLAG_DATA_ACKED)
inet_csk(sk)->icsk_retransmits = 0;
+ if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
+ ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
+ tp->undo_marker = 0;
+
if (!before(tp->snd_una, tp->frto_highmark)) {
tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
return 1;
break;
}
tp->frto_counter = 0;
+ tp->undo_marker = 0;
}
return 0;
}