X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fipv4%2Ftcp_vegas.c;h=c6743eec9b7d0c555db5e32fdf73c82d6fa7422e;hb=2e8e18ef52e7dd1af0a3bd1f7d990a1d0b249586;hp=8f06a479305bb9ae27d74579e900aa54a2087eac;hpb=5b4956138173cb8b58d83d3173360e8e681a2b66;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 8f06a47..c6743ee 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -31,7 +31,6 @@ * assumed senders never went idle. */ -#include #include #include #include @@ -39,33 +38,20 @@ #include -/* Default values of the Vegas variables, in fixed-point representation - * with V_PARAM_SHIFT bits to the right of the binary point. - */ -#define V_PARAM_SHIFT 1 -static int alpha = 1<doing_vegas_now = 0; } -static void tcp_vegas_init(struct sock *sk) +void tcp_vegas_init(struct sock *sk) { struct vegas *vegas = inet_csk_ca(sk); vegas->baseRTT = 0x7fffffff; vegas_enable(sk); } +EXPORT_SYMBOL_GPL(tcp_vegas_init); /* Do RTT sampling needed for Vegas. * Basically we: @@ -121,10 +108,16 @@ static void tcp_vegas_init(struct sock *sk) * o min-filter RTT samples from a much longer window (forever for now) * to find the propagation delay (baseRTT) */ -static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt) +void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us) { struct vegas *vegas = inet_csk_ca(sk); - u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */ + u32 vrtt; + + if (rtt_us < 0) + return; + + /* Never allow zero rtt or baseRTT */ + vrtt = rtt_us + 1; /* Filter to find propagation delay: */ if (vrtt < vegas->baseRTT) @@ -136,8 +129,9 @@ static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt) vegas->minRTT = min(vegas->minRTT, vrtt); vegas->cntRTT++; } +EXPORT_SYMBOL_GPL(tcp_vegas_pkts_acked); -static void tcp_vegas_state(struct sock *sk, u8 ca_state) +void tcp_vegas_state(struct sock *sk, u8 ca_state) { if (ca_state == TCP_CA_Open) @@ -145,6 +139,7 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state) else vegas_disable(sk); } +EXPORT_SYMBOL_GPL(tcp_vegas_state); /* * If the connection is idle and we are restarting, @@ -155,73 +150,36 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state) * packets, _then_ we can make Vegas calculations * again. */ -static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) +void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START) tcp_vegas_init(sk); } +EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); + +static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) +{ + return min(tp->snd_ssthresh, tp->snd_cwnd-1); +} -static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int flag) +static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct vegas *vegas = inet_csk_ca(sk); - if (!vegas->doing_vegas_now) - return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); - - /* The key players are v_beg_snd_una and v_beg_snd_nxt. - * - * These are so named because they represent the approximate values - * of snd_una and snd_nxt at the beginning of the current RTT. More - * precisely, they represent the amount of data sent during the RTT. - * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt, - * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding - * bytes of data have been ACKed during the course of the RTT, giving - * an "actual" rate of: - * - * (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration) - * - * Unfortunately, v_beg_snd_una is not exactly equal to snd_una, - * because delayed ACKs can cover more than one segment, so they - * don't line up nicely with the boundaries of RTTs. - * - * Another unfortunate fact of life is that delayed ACKs delay the - * advance of the left edge of our send window, so that the number - * of bytes we send in an RTT is often less than our cwnd will allow. - * So we keep track of our cwnd separately, in v_beg_snd_cwnd. - */ + if (!vegas->doing_vegas_now) { + tcp_reno_cong_avoid(sk, ack, in_flight); + return; + } if (after(ack, vegas->beg_snd_nxt)) { /* Do the Vegas once-per-RTT cwnd adjustment. */ - u32 old_wnd, old_snd_cwnd; - - - /* Here old_wnd is essentially the window of data that was - * sent during the previous RTT, and has all - * been acknowledged in the course of the RTT that ended - * with the ACK we just received. Likewise, old_snd_cwnd - * is the cwnd during the previous RTT. - */ - old_wnd = (vegas->beg_snd_nxt - vegas->beg_snd_una) / - tp->mss_cache; - old_snd_cwnd = vegas->beg_snd_cwnd; /* Save the extent of the current window so we can use this * at the end of the next RTT. */ - vegas->beg_snd_una = vegas->beg_snd_nxt; vegas->beg_snd_nxt = tp->snd_nxt; - vegas->beg_snd_cwnd = tp->snd_cwnd; - - /* Take into account the current RTT sample too, to - * decrease the impact of delayed acks. This double counts - * this sample since we count it for the next window as well, - * but that's not too awful, since we're taking the min, - * rather than averaging. - */ - tcp_vegas_rtt_calc(sk, seq_rtt * 1000); /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got @@ -236,9 +194,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, /* We don't have enough RTT samples to do the Vegas * calculation, so we'll behave like Reno. */ - tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); + tcp_reno_cong_avoid(sk, ack, in_flight); } else { - u32 rtt, target_cwnd, diff; + u32 rtt, diff; + u64 target_cwnd; /* We have enough RTT samples, so, using the Vegas * algorithm, we determine if we should increase or @@ -258,46 +217,35 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, * * This is: * (actual rate in segments) * baseRTT - * We keep it as a fixed point number with - * V_PARAM_SHIFT bits to the right of the binary point. */ - target_cwnd = ((old_wnd * vegas->baseRTT) - << V_PARAM_SHIFT) / rtt; + target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt; /* Calculate the difference between the window we had, * and the window we would like to have. This quantity * is the "Diff" from the Arizona Vegas papers. - * - * Again, this is a fixed point number with - * V_PARAM_SHIFT bits to the right of the binary - * point. */ - diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd; + diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; - if (tp->snd_cwnd <= tp->snd_ssthresh) { - /* Slow start. */ - if (diff > gamma) { - /* Going too fast. Time to slow down - * and switch to congestion avoidance. - */ - tp->snd_ssthresh = 2; - - /* Set cwnd to match the actual rate - * exactly: - * cwnd = (actual rate) * baseRTT - * Then we add 1 because the integer - * truncation robs us of full link - * utilization. - */ - tp->snd_cwnd = min(tp->snd_cwnd, - (target_cwnd >> - V_PARAM_SHIFT)+1); + if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) { + /* Going too fast. Time to slow down + * and switch to congestion avoidance. + */ - } + /* Set cwnd to match the actual rate + * exactly: + * cwnd = (actual rate) * baseRTT + * Then we add 1 because the integer + * truncation robs us of full link + * utilization. + */ + tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); + tp->snd_ssthresh = tcp_vegas_ssthresh(tp); + + } else if (tp->snd_cwnd <= tp->snd_ssthresh) { + /* Slow start. */ tcp_slow_start(tp); } else { /* Congestion avoidance. */ - u32 next_snd_cwnd; /* Figure out where we would like cwnd * to be. @@ -306,65 +254,63 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, /* The old window was too fast, so * we slow down. */ - next_snd_cwnd = old_snd_cwnd - 1; + tp->snd_cwnd--; + tp->snd_ssthresh + = tcp_vegas_ssthresh(tp); } else if (diff < alpha) { /* We don't have enough extra packets * in the network, so speed up. */ - next_snd_cwnd = old_snd_cwnd + 1; + tp->snd_cwnd++; } else { /* Sending just as fast as we * should be. */ - next_snd_cwnd = old_snd_cwnd; } - - /* Adjust cwnd upward or downward, toward the - * desired value. - */ - if (next_snd_cwnd > tp->snd_cwnd) - tp->snd_cwnd++; - else if (next_snd_cwnd < tp->snd_cwnd) - tp->snd_cwnd--; } if (tp->snd_cwnd < 2) tp->snd_cwnd = 2; else if (tp->snd_cwnd > tp->snd_cwnd_clamp) tp->snd_cwnd = tp->snd_cwnd_clamp; + + tp->snd_ssthresh = tcp_current_ssthresh(sk); } /* Wipe the slate clean for the next RTT. */ vegas->cntRTT = 0; vegas->minRTT = 0x7fffffff; } + /* Use normal slow start */ + else if (tp->snd_cwnd <= tp->snd_ssthresh) + tcp_slow_start(tp); + } /* Extract info for Tcp socket info provided via netlink. */ -static void tcp_vegas_get_info(struct sock *sk, u32 ext, - struct sk_buff *skb) +void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) { const struct vegas *ca = inet_csk_ca(sk); if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { - struct tcpvegas_info *info; - - info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO, - sizeof(*info))); - - info->tcpv_enabled = ca->doing_vegas_now; - info->tcpv_rttcnt = ca->cntRTT; - info->tcpv_rtt = ca->baseRTT; - info->tcpv_minrtt = ca->minRTT; - rtattr_failure: ; + struct tcpvegas_info info = { + .tcpv_enabled = ca->doing_vegas_now, + .tcpv_rttcnt = ca->cntRTT, + .tcpv_rtt = ca->baseRTT, + .tcpv_minrtt = ca->minRTT, + }; + + nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); } } +EXPORT_SYMBOL_GPL(tcp_vegas_get_info); static struct tcp_congestion_ops tcp_vegas = { + .flags = TCP_CONG_RTT_STAMP, .init = tcp_vegas_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_vegas_cong_avoid, .min_cwnd = tcp_reno_min_cwnd, - .rtt_sample = tcp_vegas_rtt_calc, + .pkts_acked = tcp_vegas_pkts_acked, .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, .get_info = tcp_vegas_get_info, @@ -375,7 +321,7 @@ static struct tcp_congestion_ops tcp_vegas = { static int __init tcp_vegas_register(void) { - BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(&tcp_vegas); return 0; }