[PATCH] tcp: documents timewait refcnt tricks
[safe/jmp/linux-2.6] / net / ipv4 / tcp_yeah.c
index 815e020..a0f2403 100644 (file)
@@ -6,13 +6,14 @@
  *    http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
  *
  */
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet_diag.h>
 
-#include "tcp_yeah.h"
+#include <net/tcp.h>
 
-/* Default values of the Vegas variables, in fixed-point representation
- * with V_PARAM_SHIFT bits to the right of the binary point.
- */
-#define V_PARAM_SHIFT 1
+#include "tcp_vegas.h"
 
 #define TCP_YEAH_ALPHA       80 //lin number of packets queued at the bottleneck
 #define TCP_YEAH_GAMMA        1 //lin fraction of queue to be removed per rtt
 
 /* YeAH variables */
 struct yeah {
-       /* Vegas */
-       u32     beg_snd_nxt;    /* right edge during last RTT */
-       u32     beg_snd_una;    /* left edge  during last RTT */
-       u32     beg_snd_cwnd;   /* saves the size of the cwnd */
-       u8      doing_vegas_now;/* if true, do vegas for this RTT */
-       u16     cntRTT;         /* # of RTTs measured within last RTT */
-       u32     minRTT;         /* min of RTTs measured within last RTT (in usec) */
-       u32     baseRTT;        /* the min of all Vegas RTT measurements seen (in usec) */
+       struct vegas vegas;     /* must be first */
 
        /* YeAH */
        u32 lastQ;
@@ -64,38 +58,18 @@ static void tcp_yeah_init(struct sock *sk)
 }
 
 
-static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked)
+static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct yeah *yeah = inet_csk_ca(sk);
 
        if (icsk->icsk_ca_state == TCP_CA_Open)
                yeah->pkts_acked = pkts_acked;
-}
-
-/* 64bit divisor, dividend and result. dynamic precision */
-static inline u64 div64_64(u64 dividend, u64 divisor)
-{
-       u32 d = divisor;
-
-       if (divisor > 0xffffffffULL) {
-               unsigned int shift = fls(divisor >> 32);
 
-               d = divisor >> shift;
-               dividend >>= shift;
-       }
-
-       /* avoid 64 bit division if possible */
-       if (dividend >> 32)
-               do_div(dividend, d);
-       else
-               dividend = (u32) dividend / d;
-
-       return dividend;
+       tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us);
 }
 
-static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
-                                u32 seq_rtt, u32 in_flight, int flag)
+static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct yeah *yeah = inet_csk_ca(sk);
@@ -103,12 +77,13 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
        if (!tcp_is_cwnd_limited(sk, in_flight))
                return;
 
-       if (tp->snd_cwnd <= tp->snd_ssthresh) {
+       if (tp->snd_cwnd <= tp->snd_ssthresh)
                tcp_slow_start(tp);
-       } else if (!yeah->doing_reno_now) {
+
+       else if (!yeah->doing_reno_now) {
                /* Scalable */
 
-               tp->snd_cwnd_cnt+=yeah->pkts_acked;
+               tp->snd_cwnd_cnt += yeah->pkts_acked;
                if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
                        if (tp->snd_cwnd < tp->snd_cwnd_clamp)
                                tp->snd_cwnd++;
@@ -119,29 +94,22 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
 
        } else {
                /* Reno */
-
-               if (tp->snd_cwnd_cnt < tp->snd_cwnd)
-                       tp->snd_cwnd_cnt++;
-
-               if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
-                       tp->snd_cwnd++;
-                       tp->snd_cwnd_cnt = 0;
-               }
+               tcp_cong_avoid_ai(tp, tp->snd_cwnd);
        }
 
-       /* The key players are v_beg_snd_una and v_beg_snd_nxt.
+       /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
         *
         * These are so named because they represent the approximate values
         * of snd_una and snd_nxt at the beginning of the current RTT. More
         * precisely, they represent the amount of data sent during the RTT.
         * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
-        * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding
+        * we will calculate that (v_beg_snd_nxt - v_vegas.beg_snd_una) outstanding
         * bytes of data have been ACKed during the course of the RTT, giving
         * an "actual" rate of:
         *
-        *     (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration)
+        *     (v_beg_snd_nxt - v_vegas.beg_snd_una) / (rtt duration)
         *
-        * Unfortunately, v_beg_snd_una is not exactly equal to snd_una,
+        * Unfortunately, v_vegas.beg_snd_una is not exactly equal to snd_una,
         * because delayed ACKs can cover more than one segment, so they
         * don't line up yeahly with the boundaries of RTTs.
         *
@@ -151,7 +119,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
         * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
         */
 
-       if (after(ack, yeah->beg_snd_nxt)) {
+       if (after(ack, yeah->vegas.beg_snd_nxt)) {
 
                /* We do the Vegas calculations only if we got enough RTT
                 * samples that we can be reasonably sure that we got
@@ -162,9 +130,9 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
                 * If  we have 3 samples, we should be OK.
                 */
 
-               if (yeah->cntRTT > 2) {
-                       u32 rtt;
-                       u32 queue, maxqueue;
+               if (yeah->vegas.cntRTT > 2) {
+                       u32 rtt, queue;
+                       u64 bw;
 
                        /* We have enough RTT samples, so, using the Vegas
                         * algorithm, we determine if we should increase or
@@ -177,34 +145,38 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
                         * of delayed ACKs, at the cost of noticing congestion
                         * a bit later.
                         */
-                       rtt = yeah->minRTT;
-
-                       queue = (u32)div64_64((u64)tp->snd_cwnd * (rtt - yeah->baseRTT), rtt);
-
-                       maxqueue = TCP_YEAH_ALPHA;
+                       rtt = yeah->vegas.minRTT;
 
-                       if (queue > maxqueue ||
-                                   rtt - yeah->baseRTT > (yeah->baseRTT / TCP_YEAH_PHY)) {
-
-                               if (queue > maxqueue && tp->snd_cwnd > yeah->reno_count) {
-                                       u32 reduction = min( queue / TCP_YEAH_GAMMA ,
-                                                        tp->snd_cwnd >> TCP_YEAH_EPSILON );
+                       /* Compute excess number of packets above bandwidth
+                        * Avoid doing full 64 bit divide.
+                        */
+                       bw = tp->snd_cwnd;
+                       bw *= rtt - yeah->vegas.baseRTT;
+                       do_div(bw, rtt);
+                       queue = bw;
+
+                       if (queue > TCP_YEAH_ALPHA ||
+                           rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) {
+                               if (queue > TCP_YEAH_ALPHA &&
+                                   tp->snd_cwnd > yeah->reno_count) {
+                                       u32 reduction = min(queue / TCP_YEAH_GAMMA ,
+                                                           tp->snd_cwnd >> TCP_YEAH_EPSILON);
 
                                        tp->snd_cwnd -= reduction;
 
-                                       tp->snd_cwnd = max( tp->snd_cwnd, yeah->reno_count);
+                                       tp->snd_cwnd = max(tp->snd_cwnd,
+                                                          yeah->reno_count);
 
                                        tp->snd_ssthresh = tp->snd_cwnd;
-                       }
+                               }
 
                                if (yeah->reno_count <= 2)
-                                       yeah->reno_count = max( tp->snd_cwnd>>1, 2U);
+                                       yeah->reno_count = max(tp->snd_cwnd>>1, 2U);
                                else
                                        yeah->reno_count++;
 
-                               yeah->doing_reno_now =
-                                                  min_t( u32, yeah->doing_reno_now + 1 , 0xffffff);
-
+                               yeah->doing_reno_now = min(yeah->doing_reno_now + 1,
+                                                          0xffffffU);
                        } else {
                                yeah->fast_count++;
 
@@ -223,13 +195,13 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
                /* Save the extent of the current window so we can use this
                 * at the end of the next RTT.
                 */
-               yeah->beg_snd_una  = yeah->beg_snd_nxt;
-               yeah->beg_snd_nxt  = tp->snd_nxt;
-               yeah->beg_snd_cwnd = tp->snd_cwnd;
+               yeah->vegas.beg_snd_una  = yeah->vegas.beg_snd_nxt;
+               yeah->vegas.beg_snd_nxt  = tp->snd_nxt;
+               yeah->vegas.beg_snd_cwnd = tp->snd_cwnd;
 
                /* Wipe the slate clean for the next RTT. */
-               yeah->cntRTT = 0;
-               yeah->minRTT = 0x7fffffff;
+               yeah->vegas.cntRTT = 0;
+               yeah->vegas.minRTT = 0x7fffffff;
        }
 }
 
@@ -245,7 +217,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) {
 
                reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA);
        } else
-               reduction = max(tp->snd_cwnd>>1,2U);
+               reduction = max(tp->snd_cwnd>>1, 2U);
 
        yeah->fast_count = 0;
        yeah->reno_count = max(yeah->reno_count>>1, 2U);
@@ -254,11 +226,11 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) {
 }
 
 static struct tcp_congestion_ops tcp_yeah = {
+       .flags          = TCP_CONG_RTT_STAMP,
        .init           = tcp_yeah_init,
        .ssthresh       = tcp_yeah_ssthresh,
        .cong_avoid     = tcp_yeah_cong_avoid,
        .min_cwnd       = tcp_reno_min_cwnd,
-       .rtt_sample     = tcp_vegas_rtt_calc,
        .set_state      = tcp_vegas_state,
        .cwnd_event     = tcp_vegas_cwnd_event,
        .get_info       = tcp_vegas_get_info,