* all the algo is pure shit and should be replaced
* with correct one. It is exactly, which we pretend to do.
*/
-}
-/* NOTE: clamping at TCP_RTO_MIN is not required, current algo
- * guarantees that rto is higher.
- */
-static inline void tcp_bound_rto(struct sock *sk)
-{
+ /* NOTE: clamping at TCP_RTO_MIN is not required, current algo
+ * guarantees that rto is higher.
+ */
if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
}
tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
}
tcp_set_rto(sk);
- tcp_bound_rto(sk);
if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
goto reset;
tp->snd_cwnd = tcp_init_cwnd(tp, dst);
return dup_sack;
}
+struct tcp_sacktag_state {
+ int reord;
+ int fack_count;
+ int flag;
+};
+
/* Check if skb is fully within the SACK block. In presence of GSO skbs,
* the incoming SACK may not exactly match but we can find smaller MSS
* aligned portion of it that matches. Therefore we might need to fragment
return in_sack;
}
-static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
- int *reord, int dup_sack, int fack_count,
- u8 *sackedto, int pcount)
+static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
+ struct tcp_sacktag_state *state,
+ int dup_sack, int pcount)
{
struct tcp_sock *tp = tcp_sk(sk);
u8 sacked = TCP_SKB_CB(skb)->sacked;
- int flag = 0;
+ int fack_count = state->fack_count;
/* Account D-SACK for retransmitted packet. */
if (dup_sack && (sacked & TCPCB_RETRANS)) {
if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
tp->undo_retrans--;
if (sacked & TCPCB_SACKED_ACKED)
- *reord = min(fack_count, *reord);
+ state->reord = min(fack_count, state->reord);
}
/* Nothing to do; acked frame is about to be dropped (was ACKed). */
if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
- return flag;
+ return sacked;
if (!(sacked & TCPCB_SACKED_ACKED)) {
if (sacked & TCPCB_SACKED_RETRANS) {
* that retransmission is still in flight.
*/
if (sacked & TCPCB_LOST) {
- *sackedto &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
+ sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
tp->lost_out -= pcount;
tp->retrans_out -= pcount;
}
*/
if (before(TCP_SKB_CB(skb)->seq,
tcp_highest_sack_seq(tp)))
- *reord = min(fack_count, *reord);
+ state->reord = min(fack_count,
+ state->reord);
/* SACK enhanced F-RTO (RFC4138; Appendix B) */
if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
- flag |= FLAG_ONLY_ORIG_SACKED;
+ state->flag |= FLAG_ONLY_ORIG_SACKED;
}
if (sacked & TCPCB_LOST) {
- *sackedto &= ~TCPCB_LOST;
+ sacked &= ~TCPCB_LOST;
tp->lost_out -= pcount;
}
}
- *sackedto |= TCPCB_SACKED_ACKED;
- flag |= FLAG_DATA_SACKED;
+ sacked |= TCPCB_SACKED_ACKED;
+ state->flag |= FLAG_DATA_SACKED;
tp->sacked_out += pcount;
fack_count += pcount;
* frames and clear it. undo_retrans is decreased above, L|R frames
* are accounted above as well.
*/
- if (dup_sack && (*sackedto & TCPCB_SACKED_RETRANS)) {
- *sackedto &= ~TCPCB_SACKED_RETRANS;
+ if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
+ sacked &= ~TCPCB_SACKED_RETRANS;
tp->retrans_out -= pcount;
}
- return flag;
+ return sacked;
}
-static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
- struct sk_buff *skb, unsigned int pcount,
- int shifted, int fack_count, int *reord,
- int *flag, int mss)
+static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
+ struct tcp_sacktag_state *state,
+ unsigned int pcount, int shifted, int mss)
{
struct tcp_sock *tp = tcp_sk(sk);
- u8 dummy_sacked = TCP_SKB_CB(skb)->sacked; /* We discard results */
+ struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
BUG_ON(!pcount);
skb_shinfo(skb)->gso_type = 0;
}
- *flag |= tcp_sacktag_one(skb, sk, reord, 0, fack_count, &dummy_sacked,
- pcount);
+ /* We discard results */
+ tcp_sacktag_one(skb, sk, state, 0, pcount);
/* Difference in this won't matter, both ACKed by the same cumul. ACK */
TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
if (skb->len > 0) {
BUG_ON(!tcp_skb_pcount(skb));
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
return 0;
}
tcp_unlink_write_queue(skb, sk);
sk_wmem_free_skb(sk, skb);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
+
return 1;
}
/* I wish gso_size would have a bit more sane initialization than
* something-or-zero which complicates things
*/
-static int tcp_shift_mss(struct sk_buff *skb)
+static int tcp_skb_seglen(struct sk_buff *skb)
{
- int mss = tcp_skb_mss(skb);
-
- if (!mss)
- mss = skb->len;
-
- return mss;
+ return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
}
/* Shifting pages past head area doesn't work */
* skb.
*/
static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
+ struct tcp_sacktag_state *state,
u32 start_seq, u32 end_seq,
- int dup_sack, int *fack_count,
- int *reord, int *flag)
+ int dup_sack)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *prev;
/* Normally R but no L won't result in plain S */
if (!dup_sack &&
- (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) == TCPCB_SACKED_RETRANS)
+ (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
goto fallback;
if (!skb_can_shift(skb))
goto fallback;
if (in_sack) {
len = skb->len;
pcount = tcp_skb_pcount(skb);
- mss = tcp_shift_mss(skb);
+ mss = tcp_skb_seglen(skb);
/* TODO: Fix DSACKs to not fragment already SACKed and we can
* drop this restriction as unnecessary
*/
- if (mss != tcp_shift_mss(prev))
+ if (mss != tcp_skb_seglen(prev))
goto fallback;
} else {
if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
/* TODO: Fix DSACKs to not fragment already SACKed and we can
* drop this restriction as unnecessary
*/
- if (mss != tcp_shift_mss(prev))
+ if (mss != tcp_skb_seglen(prev))
goto fallback;
if (len == mss) {
if (!skb_shift(prev, skb, len))
goto fallback;
- if (!tcp_shifted_skb(sk, prev, skb, pcount, len, *fack_count, reord,
- flag, mss))
+ if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss))
goto out;
/* Hole filled allows collapsing with the next as well, this is very
goto out;
skb = tcp_write_queue_next(sk, prev);
- if (!skb_can_shift(skb))
- goto out;
- if (skb == tcp_send_head(sk))
- goto out;
- if ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
+ if (!skb_can_shift(skb) ||
+ (skb == tcp_send_head(sk)) ||
+ ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
+ (mss != tcp_skb_seglen(skb)))
goto out;
len = skb->len;
if (skb_shift(prev, skb, len)) {
pcount += tcp_skb_pcount(skb);
- tcp_shifted_skb(sk, prev, skb, tcp_skb_pcount(skb), len,
- *fack_count, reord, flag, mss);
+ tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss);
}
out:
- *fack_count += pcount;
+ state->fack_count += pcount;
return prev;
noop:
return skb;
fallback:
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
return NULL;
}
static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
struct tcp_sack_block *next_dup,
+ struct tcp_sacktag_state *state,
u32 start_seq, u32 end_seq,
- int dup_sack_in, int *fack_count,
- int *reord, int *flag)
+ int dup_sack_in)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *tmp;
* so not even _safe variant of the loop is enough.
*/
if (in_sack <= 0) {
- tmp = tcp_shift_skb_data(sk, skb, start_seq,
- end_seq, dup_sack,
- fack_count, reord, flag);
+ tmp = tcp_shift_skb_data(sk, skb, state,
+ start_seq, end_seq, dup_sack);
if (tmp != NULL) {
if (tmp != skb) {
skb = tmp;
break;
if (in_sack) {
- *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack,
- *fack_count,
- &(TCP_SKB_CB(skb)->sacked),
- tcp_skb_pcount(skb));
+ TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk,
+ state,
+ dup_sack,
+ tcp_skb_pcount(skb));
if (!before(TCP_SKB_CB(skb)->seq,
tcp_highest_sack_seq(tp)))
tcp_advance_highest_sack(sk, skb);
}
- *fack_count += tcp_skb_pcount(skb);
+ state->fack_count += tcp_skb_pcount(skb);
}
return skb;
}
* a normal way
*/
static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
- u32 skip_to_seq, int *fack_count)
+ struct tcp_sacktag_state *state,
+ u32 skip_to_seq)
{
tcp_for_write_queue_from(skb, sk) {
if (skb == tcp_send_head(sk))
if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
break;
- *fack_count += tcp_skb_pcount(skb);
+ state->fack_count += tcp_skb_pcount(skb);
}
return skb;
}
static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
struct sock *sk,
struct tcp_sack_block *next_dup,
- u32 skip_to_seq,
- int *fack_count, int *reord,
- int *flag)
+ struct tcp_sacktag_state *state,
+ u32 skip_to_seq)
{
if (next_dup == NULL)
return skb;
if (before(next_dup->start_seq, skip_to_seq)) {
- skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count);
- skb = tcp_sacktag_walk(skb, sk, NULL,
- next_dup->start_seq, next_dup->end_seq,
- 1, fack_count, reord, flag);
+ skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
+ skb = tcp_sacktag_walk(skb, sk, NULL, state,
+ next_dup->start_seq, next_dup->end_seq,
+ 1);
}
return skb;
struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
struct tcp_sack_block sp[TCP_NUM_SACKS];
struct tcp_sack_block *cache;
+ struct tcp_sacktag_state state;
struct sk_buff *skb;
int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
int used_sacks;
- int reord = tp->packets_out;
- int flag = 0;
int found_dup_sack = 0;
- int fack_count;
int i, j;
int first_sack_index;
+ state.flag = 0;
+ state.reord = tp->packets_out;
+
if (!tp->sacked_out) {
if (WARN_ON(tp->fackets_out))
tp->fackets_out = 0;
found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
num_sacks, prior_snd_una);
if (found_dup_sack)
- flag |= FLAG_DSACKING_ACK;
+ state.flag |= FLAG_DSACKING_ACK;
/* Eliminate too old ACKs, but take into
* account more or less fresh ones, they can
}
skb = tcp_write_queue_head(sk);
- fack_count = 0;
+ state.fack_count = 0;
i = 0;
if (!tp->sacked_out) {
/* Event "B" in the comment above. */
if (after(end_seq, tp->high_seq))
- flag |= FLAG_DATA_LOST;
+ state.flag |= FLAG_DATA_LOST;
/* Skip too early cached blocks */
while (tcp_sack_cache_ok(tp, cache) &&
/* Head todo? */
if (before(start_seq, cache->start_seq)) {
- skb = tcp_sacktag_skip(skb, sk, start_seq,
- &fack_count);
+ skb = tcp_sacktag_skip(skb, sk, &state,
+ start_seq);
skb = tcp_sacktag_walk(skb, sk, next_dup,
+ &state,
start_seq,
cache->start_seq,
- dup_sack, &fack_count,
- &reord, &flag);
+ dup_sack);
}
/* Rest of the block already fully processed? */
goto advance_sp;
skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
- cache->end_seq,
- &fack_count, &reord,
- &flag);
+ &state,
+ cache->end_seq);
/* ...tail remains todo... */
if (tcp_highest_sack_seq(tp) == cache->end_seq) {
skb = tcp_highest_sack(sk);
if (skb == NULL)
break;
- fack_count = tp->fackets_out;
+ state.fack_count = tp->fackets_out;
cache++;
goto walk;
}
- skb = tcp_sacktag_skip(skb, sk, cache->end_seq,
- &fack_count);
+ skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
/* Check overlap against next cached too (past this one already) */
cache++;
continue;
skb = tcp_highest_sack(sk);
if (skb == NULL)
break;
- fack_count = tp->fackets_out;
+ state.fack_count = tp->fackets_out;
}
- skb = tcp_sacktag_skip(skb, sk, start_seq, &fack_count);
+ skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
walk:
- skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq,
- dup_sack, &fack_count, &reord, &flag);
+ skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
+ start_seq, end_seq, dup_sack);
advance_sp:
/* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
* due to in-order walk
*/
if (after(end_seq, tp->frto_highmark))
- flag &= ~FLAG_ONLY_ORIG_SACKED;
+ state.flag &= ~FLAG_ONLY_ORIG_SACKED;
i++;
}
tcp_verify_left_out(tp);
- if ((reord < tp->fackets_out) &&
+ if ((state.reord < tp->fackets_out) &&
((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
(!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
- tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+ tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
out:
WARN_ON((int)tp->retrans_out < 0);
WARN_ON((int)tcp_packets_in_flight(tp) < 0);
#endif
- return flag;
+ return state.flag;
}
/* Limits sacked_out so that sum with lost_out isn't ever larger than
* packets_out. Returns zero if sacked_out adjustement wasn't necessary.
*/
-int tcp_limit_reno_sacked(struct tcp_sock *tp)
+static int tcp_limit_reno_sacked(struct tcp_sock *tp)
{
u32 holes;
tcp_for_write_queue(skb, sk) {
if (skb == tcp_send_head(sk))
break;
- if (skb->len > mss &&
+ if (tcp_skb_seglen(skb) > mss &&
!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
tcp_xmit_retransmit_queue(sk);
}
+static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
+{
+ tcp_rtt_estimator(sk, seq_rtt);
+ tcp_set_rto(sk);
+ inet_csk(sk)->icsk_backoff = 0;
+}
+
/* Read draft-ietf-tcplw-high-performance before mucking
* with this code. (Supersedes RFC1323)
*/
* in window is lost... Voila. --ANK (010210)
*/
struct tcp_sock *tp = tcp_sk(sk);
- const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
- tcp_rtt_estimator(sk, seq_rtt);
- tcp_set_rto(sk);
- inet_csk(sk)->icsk_backoff = 0;
- tcp_bound_rto(sk);
+
+ tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
}
static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
if (flag & FLAG_RETRANS_DATA_ACKED)
return;
- tcp_rtt_estimator(sk, seq_rtt);
- tcp_set_rto(sk);
- inet_csk(sk)->icsk_backoff = 0;
- tcp_bound_rto(sk);
+ tcp_valid_rtt_meas(sk, seq_rtt);
}
static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
return 0;
if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
- tp->ucopy.dma_chan = get_softnet_dma();
+ tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {