X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=include%2Fnet%2Ftcp.h;h=aa04b9a5093b2ceed25c672cf5abaefa9b75b7fb;hb=3374cd1abd478f767aaedf2c21d109596ff0fe72;hp=4a99a8e391212b98572500ea03658a3635e3acef;hpb=da5c78c82629a167794436e4306b4cf1faddea90;p=safe%2Fjmp%2Flinux-2.6 diff --git a/include/net/tcp.h b/include/net/tcp.h index 4a99a8e..aa04b9a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -164,6 +165,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ +#define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */ /* * TCP option lengths @@ -174,6 +176,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOLEN_SACK_PERM 2 #define TCPOLEN_TIMESTAMP 10 #define TCPOLEN_MD5SIG 18 +#define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */ +#define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */ +#define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN) +#define TCPOLEN_COOKIE_MAX (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX) /* But this is what stacks really send out. */ #define TCPOLEN_TSTAMP_ALIGNED 12 @@ -190,6 +196,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_NAGLE_CORK 2 /* Socket is corked */ #define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */ +/* TCP thin-stream limits */ +#define TCP_THIN_LINEAR_RETRIES 6 /* After 6 linear retries, do exp. backoff */ + extern struct inet_timewait_death_row tcp_death_row; /* sysctl variables for tcp */ @@ -234,6 +243,9 @@ extern int sysctl_tcp_base_mss; extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; +extern int sysctl_tcp_cookie_size; +extern int sysctl_tcp_thin_linear_timeouts; +extern int sysctl_tcp_thin_dupack; extern atomic_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; @@ -340,11 +352,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, extern void tcp_enter_quickack_mode(struct sock *sk); -static inline void tcp_clear_options(struct tcp_options_received *rx_opt) -{ - rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0; -} - #define TCP_ECN_OK 1 #define TCP_ECN_QUEUE_CWR 2 #define TCP_ECN_DEMAND_CWR 4 @@ -398,6 +405,8 @@ extern int compat_tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); extern void tcp_set_keepalive(struct sock *sk, int val); +extern void tcp_syn_ack_timeout(struct sock *sk, + struct request_sock *req); extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, @@ -405,8 +414,8 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, extern void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, - int estab, - struct dst_entry *dst); + u8 **hvpp, + int estab); extern u8 *tcp_parse_md5sig_option(struct tcphdr *th); @@ -854,13 +863,6 @@ static inline void tcp_check_probe_timer(struct sock *sk) icsk->icsk_rto, TCP_RTO_MAX); } -static inline void tcp_push_pending_frames(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle); -} - static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) { tp->snd_wl1 = seq; @@ -937,7 +939,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { - wake_up_interruptible_poll(sk->sk_sleep, + wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, @@ -970,7 +972,8 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) /* Determine a window scaling and initial window to offer. */ extern void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, - int wscale_ok, __u8 *rcv_wscale); + int wscale_ok, __u8 *rcv_wscale, + __u32 init_rcv_wnd); static inline int tcp_win_from_space(int space) { @@ -1191,33 +1194,18 @@ extern int tcp_v4_md5_do_del(struct sock *sk, #define tcp_twsk_md5_key(twsk) NULL #endif -extern struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *); +extern struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *); extern void tcp_free_md5sig_pool(void); -extern struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu); -extern void __tcp_put_md5sig_pool(void); +extern struct tcp_md5sig_pool *tcp_get_md5sig_pool(void); +extern void tcp_put_md5sig_pool(void); + extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *); extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *, unsigned header_len); extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key); -static inline -struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) -{ - int cpu = get_cpu(); - struct tcp_md5sig_pool *ret = __tcp_get_md5sig_pool(cpu); - if (!ret) - put_cpu(); - return ret; -} - -static inline void tcp_put_md5sig_pool(void) -{ - __tcp_put_md5sig_pool(); - put_cpu(); -} - /* write queue abstraction */ static inline void tcp_write_queue_purge(struct sock *sk) { @@ -1226,6 +1214,7 @@ static inline void tcp_write_queue_purge(struct sock *sk) while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) sk_wmem_free_skb(sk, skb); sk_mem_reclaim(sk); + tcp_clear_all_retrans_hints(tcp_sk(sk)); } static inline struct sk_buff *tcp_write_queue_head(struct sock *sk) @@ -1257,29 +1246,6 @@ static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_bu #define tcp_for_write_queue_from_safe(skb, tmp, sk) \ skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) -/* This function calculates a "timeout" which is equivalent to the timeout of a - * TCP connection after "boundary" unsucessful, exponentially backed-off - * retransmissions with an initial RTO of TCP_RTO_MIN. - */ -static inline bool retransmits_timed_out(const struct sock *sk, - unsigned int boundary) -{ - unsigned int timeout, linear_backoff_thresh; - - if (!inet_csk(sk)->icsk_retransmits) - return false; - - linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); - - if (boundary <= linear_backoff_thresh) - timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; - else - timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + - (boundary - linear_backoff_thresh) * TCP_RTO_MAX; - - return (tcp_time_stamp - tcp_sk(sk)->retrans_stamp) >= timeout; -} - static inline struct sk_buff *tcp_send_head(struct sock *sk) { return sk->sk_send_head; @@ -1362,6 +1328,15 @@ static inline int tcp_write_queue_empty(struct sock *sk) return skb_queue_empty(&sk->sk_write_queue); } +static inline void tcp_push_pending_frames(struct sock *sk) +{ + if (tcp_send_head(sk)) { + struct tcp_sock *tp = tcp_sk(sk); + + __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle); + } +} + /* Start sequence of the highest skb with SACKed bit, valid only if * sacked > 0 or when the caller has ensured validity by itself. */ @@ -1401,6 +1376,14 @@ static inline void tcp_highest_sack_combine(struct sock *sk, tcp_sk(sk)->highest_sack = new; } +/* Determines whether this is a thin stream (which may suffer from + * increased latency). Used to trigger latency-reducing mechanisms. + */ +static inline unsigned int tcp_stream_is_thin(struct tcp_sock *tp) +{ + return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp); +} + /* /proc */ enum tcp_seq_states { TCP_SEQ_STATE_LISTENING, @@ -1486,6 +1469,83 @@ struct tcp_request_sock_ops { extern int tcp_cookie_generator(u32 *bakery); +/** + * struct tcp_cookie_values - each socket needs extra space for the + * cookies, together with (optional) space for any SYN data. + * + * A tcp_sock contains a pointer to the current value, and this is + * cloned to the tcp_timewait_sock. + * + * @cookie_pair: variable data from the option exchange. + * + * @cookie_desired: user specified tcpct_cookie_desired. Zero + * indicates default (sysctl_tcp_cookie_size). + * After cookie sent, remembers size of cookie. + * Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX. + * + * @s_data_desired: user specified tcpct_s_data_desired. When the + * constant payload is specified (@s_data_constant), + * holds its length instead. + * Range 0 to TCP_MSS_DESIRED. + * + * @s_data_payload: constant data that is to be included in the + * payload of SYN or SYNACK segments when the + * cookie option is present. + */ +struct tcp_cookie_values { + struct kref kref; + u8 cookie_pair[TCP_COOKIE_PAIR_SIZE]; + u8 cookie_pair_size; + u8 cookie_desired; + u16 s_data_desired:11, + s_data_constant:1, + s_data_in:1, + s_data_out:1, + s_data_unused:2; + u8 s_data_payload[0]; +}; + +static inline void tcp_cookie_values_release(struct kref *kref) +{ + kfree(container_of(kref, struct tcp_cookie_values, kref)); +} + +/* The length of constant payload data. Note that s_data_desired is + * overloaded, depending on s_data_constant: either the length of constant + * data (returned here) or the limit on variable data. + */ +static inline int tcp_s_data_size(const struct tcp_sock *tp) +{ + return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant) + ? tp->cookie_values->s_data_desired + : 0; +} + +/** + * struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace. + * + * As tcp_request_sock has already been extended in other places, the + * only remaining method is to pass stack values along as function + * parameters. These parameters are not needed after sending SYNACK. + * + * @cookie_bakery: cryptographic secret and message workspace. + * + * @cookie_plus: bytes in authenticator/cookie option, copied from + * struct tcp_options_received (above). + */ +struct tcp_extend_values { + struct request_values rv; + u32 cookie_bakery[COOKIE_WORKSPACE_WORDS]; + u8 cookie_plus:6, + cookie_out_never:1, + cookie_in_always:1; +}; + +static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp) +{ + return (struct tcp_extend_values *)rvp; +} + extern void tcp_v4_init(void); extern void tcp_init(void);