X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fipv4%2Ftcp_cong.c;h=3a6be23d222f382c32dcd1e3acc5e55b97594797;hb=29e75252da20f3ab9e132c68c9aed156b87beae6;hp=c1b34f1edb32740fa38dc724f6792f5998118a54;hpb=e905a9edab7f4f14f9213b52234e4a346c690911;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index c1b34f1..3a6be23 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -12,6 +12,8 @@ #include #include +int sysctl_tcp_max_ssthresh = 0; + static DEFINE_SPINLOCK(tcp_cong_list_lock); static LIST_HEAD(tcp_cong_list); @@ -29,7 +31,7 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name) } /* - * Attach new congestion control algorthim to the list + * Attach new congestion control algorithm to the list * of available options. */ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) @@ -77,18 +79,19 @@ void tcp_init_congestion_control(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_congestion_ops *ca; - if (icsk->icsk_ca_ops != &tcp_init_congestion_ops) - return; + /* if no choice made yet assign the current value set as default */ + if (icsk->icsk_ca_ops == &tcp_init_congestion_ops) { + rcu_read_lock(); + list_for_each_entry_rcu(ca, &tcp_cong_list, list) { + if (try_module_get(ca->owner)) { + icsk->icsk_ca_ops = ca; + break; + } - rcu_read_lock(); - list_for_each_entry_rcu(ca, &tcp_cong_list, list) { - if (try_module_get(ca->owner)) { - icsk->icsk_ca_ops = ca; - break; + /* fallback to next available */ } - + rcu_read_unlock(); } - rcu_read_unlock(); if (icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); @@ -123,7 +126,7 @@ int tcp_set_default_congestion_control(const char *name) #endif if (ca) { - ca->non_restricted = 1; /* default is always allowed */ + ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */ list_move(&ca->list, &tcp_cong_list); ret = 0; } @@ -178,7 +181,7 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen) *buf = '\0'; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { - if (!ca->non_restricted) + if (!(ca->flags & TCP_CONG_NON_RESTRICTED)) continue; offs += snprintf(buf + offs, maxlen - offs, "%s%s", @@ -209,16 +212,16 @@ int tcp_set_allowed_congestion_control(char *val) } } - /* pass 2 clear */ + /* pass 2 clear old values */ list_for_each_entry_rcu(ca, &tcp_cong_list, list) - ca->non_restricted = 0; + ca->flags &= ~TCP_CONG_NON_RESTRICTED; /* pass 3 mark as allowed */ while ((name = strsep(&val, " ")) && *name) { ca = tcp_ca_find(name); WARN_ON(!ca); if (ca) - ca->non_restricted = 1; + ca->flags |= TCP_CONG_NON_RESTRICTED; } out: spin_unlock(&tcp_cong_list_lock); @@ -236,6 +239,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) rcu_read_lock(); ca = tcp_ca_find(name); + /* no change asking for existing value */ if (ca == icsk->icsk_ca_ops) goto out; @@ -252,7 +256,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) if (!ca) err = -ENOENT; - else if (!(ca->non_restricted || capable(CAP_NET_ADMIN))) + else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN))) err = -EPERM; else if (!try_module_get(ca->owner)) @@ -261,7 +265,8 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) else { tcp_cleanup_congestion_control(sk); icsk->icsk_ca_ops = ca; - if (icsk->icsk_ca_ops->init) + + if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); } out: @@ -269,31 +274,67 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) return err; } +/* RFC2861 Check whether we are limited by application or congestion window + * This is the inverse of cwnd check in tcp_tso_should_defer + */ +int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) +{ + const struct tcp_sock *tp = tcp_sk(sk); + u32 left; + + if (in_flight >= tp->snd_cwnd) + return 1; + + if (!sk_can_gso(sk)) + return 0; + + left = tp->snd_cwnd - in_flight; + if (sysctl_tcp_tso_win_divisor) + return left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd; + else + return left <= tcp_max_burst(tp); +} +EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited); /* - * Linear increase during slow start + * Slow start is used when congestion window is less than slow start + * threshold. This version implements the basic RFC2581 version + * and optionally supports: + * RFC3742 Limited Slow Start - growth limited to max_ssthresh + * RFC3465 Appropriate Byte Counting - growth limited by bytes acknowledged */ void tcp_slow_start(struct tcp_sock *tp) { - if (sysctl_tcp_abc) { - /* RFC3465: Slow Start - * TCP sender SHOULD increase cwnd by the number of - * previously unacknowledged bytes ACKed by each incoming - * acknowledgment, provided the increase is not more than L - */ - if (tp->bytes_acked < tp->mss_cache) - return; + int cnt; /* increase in packets */ + + /* RFC3465: ABC Slow start + * Increase only after a full MSS of bytes is acked + * + * TCP sender SHOULD increase cwnd by the number of + * previously unacknowledged bytes ACKed by each incoming + * acknowledgment, provided the increase is not more than L + */ + if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache) + return; - /* We MAY increase by 2 if discovered delayed ack */ - if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - } - } + if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh) + cnt = sysctl_tcp_max_ssthresh >> 1; /* limited slow start */ + else + cnt = tp->snd_cwnd; /* exponential increase */ + + /* RFC3465: ABC + * We MAY increase by 2 if discovered delayed ack + */ + if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) + cnt <<= 1; tp->bytes_acked = 0; - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; + tp->snd_cwnd_cnt += cnt; + while (tp->snd_cwnd_cnt >= tp->snd_cwnd) { + tp->snd_cwnd_cnt -= tp->snd_cwnd; + if (tp->snd_cwnd < tp->snd_cwnd_clamp) + tp->snd_cwnd++; + } } EXPORT_SYMBOL_GPL(tcp_slow_start); @@ -304,8 +345,7 @@ EXPORT_SYMBOL_GPL(tcp_slow_start); /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ -void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, - int flag) +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); @@ -355,8 +395,8 @@ u32 tcp_reno_min_cwnd(const struct sock *sk) EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); struct tcp_congestion_ops tcp_reno = { + .flags = TCP_CONG_NON_RESTRICTED, .name = "reno", - .non_restricted = 1, .owner = THIS_MODULE, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid,