X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=include%2Fnet%2Ftcp.h;h=34f5cc24d903332f9c29f2cb0484ade46dad0f54;hb=abf492e7b3ae74873688cf9960283853a3054471;hp=265392470b26a530974bae8389a783732db43490;hpb=832d11c5cd076abc0aa1eaf7be96c81d1a59ce41;p=safe%2Fjmp%2Flinux-2.6 diff --git a/include/net/tcp.h b/include/net/tcp.h index 2653924..34f5cc2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -41,12 +42,13 @@ #include #include #include +#include #include extern struct inet_hashinfo tcp_hashinfo; -extern atomic_t tcp_orphan_count; +extern struct percpu_counter tcp_orphan_count; extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER (128 + MAX_HEADER) @@ -61,9 +63,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ #define TCP_MIN_MSS 88U -/* Minimal RCV_MSS. */ -#define TCP_MIN_RCVMSS 536U - /* The least MTU to use for probing */ #define TCP_BASE_MSS 512 @@ -166,6 +165,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ +#define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */ /* * TCP option lengths @@ -176,6 +176,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOLEN_SACK_PERM 2 #define TCPOLEN_TIMESTAMP 10 #define TCPOLEN_MD5SIG 18 +#define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */ +#define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */ +#define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN) +#define TCPOLEN_COOKIE_MAX (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX) /* But this is what stacks really send out. */ #define TCPOLEN_TSTAMP_ALIGNED 12 @@ -236,9 +240,10 @@ extern int sysctl_tcp_base_mss; extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; +extern int sysctl_tcp_cookie_size; extern atomic_t tcp_memory_allocated; -extern atomic_t tcp_sockets_allocated; +extern struct percpu_counter tcp_sockets_allocated; extern int tcp_memory_pressure; /* @@ -265,6 +270,19 @@ static inline int tcp_too_many_orphans(struct sock *sk, int num) atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]); } +/* syncookies: remember time of last synqueue overflow */ +static inline void tcp_synq_overflow(struct sock *sk) +{ + tcp_sk(sk)->rx_opt.ts_recent_stamp = jiffies; +} + +/* syncookies: no recent synqueue overflow on this listening socket? */ +static inline int tcp_synq_no_recent_overflow(const struct sock *sk) +{ + unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + return time_after(jiffies, last_overflow + TCP_TIMEOUT_INIT); +} + extern struct proto tcp_prot; #define TCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.tcp_statistics, field) @@ -329,11 +347,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, extern void tcp_enter_quickack_mode(struct sock *sk); -static inline void tcp_clear_options(struct tcp_options_received *rx_opt) -{ - rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0; -} - #define TCP_ECN_OK 1 #define TCP_ECN_QUEUE_CWR 2 #define TCP_ECN_DEMAND_CWR 4 @@ -345,8 +358,7 @@ TCP_ECN_create_request(struct request_sock *req, struct tcphdr *th) inet_rsk(req)->ecn_ok = 1; } -enum tcp_tw_status -{ +enum tcp_tw_status { TCP_TW_SUCCESS = 0, TCP_TW_RST = 1, TCP_TW_ACK = 2, @@ -380,13 +392,13 @@ extern int tcp_getsockopt(struct sock *sk, int level, int __user *optlen); extern int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, - int optlen); + unsigned int optlen); extern int compat_tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); extern int compat_tcp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen); + char __user *optval, unsigned int optlen); extern void tcp_set_keepalive(struct sock *sk, int val); extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, @@ -395,6 +407,7 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, extern void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, + u8 **hvpp, int estab); extern u8 *tcp_parse_md5sig_option(struct tcphdr *th); @@ -429,7 +442,8 @@ extern int tcp_connect(struct sock *sk); extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, - struct request_sock *req); + struct request_sock *req, + struct request_values *rvp); extern int tcp_disconnect(struct sock *sk, int flags); @@ -455,6 +469,7 @@ extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); extern int tcp_may_send_now(struct sock *sk); extern int tcp_retransmit_skb(struct sock *, struct sk_buff *); +extern void tcp_retransmit_timer(struct sock *sk); extern void tcp_xmit_retransmit_queue(struct sock *); extern void tcp_simple_retransmit(struct sock *); extern int tcp_trim_head(struct sock *, struct sk_buff *, u32); @@ -481,7 +496,16 @@ static inline void tcp_clear_xmit_timers(struct sock *sk) } extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); -extern unsigned int tcp_current_mss(struct sock *sk, int large); +extern unsigned int tcp_current_mss(struct sock *sk); + +/* Bound MSS / TSO packet size with the half of the window */ +static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) +{ + if (tp->max_window && pktsize > (tp->max_window >> 1)) + return max(tp->max_window >> 1, 68U - tp->tcp_header_len); + else + return pktsize; +} /* tcp.c */ extern void tcp_get_info(struct sock *, struct tcp_info *); @@ -498,6 +522,17 @@ extern int tcp_mtu_to_mss(struct sock *sk, int pmtu); extern int tcp_mss_to_mtu(struct sock *sk, int mss); extern void tcp_mtup_init(struct sock *sk); +static inline void tcp_bound_rto(const struct sock *sk) +{ + if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) + inet_csk(sk)->icsk_rto = TCP_RTO_MAX; +} + +static inline u32 __tcp_set_rto(const struct tcp_sock *tp) +{ + return (tp->srtt >> 3) + tp->rttvar; +} + static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) { tp->pred_flags = htonl((tp->tcp_header_len << 26) | @@ -521,6 +556,17 @@ static inline void tcp_fast_path_check(struct sock *sk) tcp_fast_path_on(tp); } +/* Compute the actual rto_min value */ +static inline u32 tcp_rto_min(struct sock *sk) +{ + struct dst_entry *dst = __sk_dst_get(sk); + u32 rto_min = TCP_RTO_MIN; + + if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) + rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); + return rto_min; +} + /* Compute the actual receive window we are currently advertising. * Rcv_nxt can be after the window if our peer push more data * than the offered window. @@ -607,21 +653,6 @@ static inline int tcp_skb_mss(const struct sk_buff *skb) return skb_shinfo(skb)->gso_size; } -static inline void tcp_dec_pcount_approx_int(__u32 *count, const int decr) -{ - if (*count) { - *count -= decr; - if ((int)*count < 0) - *count = 0; - } -} - -static inline void tcp_dec_pcount_approx(__u32 *count, - const struct sk_buff *skb) -{ - tcp_dec_pcount_approx_int(count, tcp_skb_pcount(skb)); -} - /* Events passed to congestion control interface */ enum tcp_ca_event { CA_EVENT_TX_START, /* first transmit when no packets in flight */ @@ -685,6 +716,7 @@ extern void tcp_get_allowed_congestion_control(char *buf, size_t len); extern int tcp_set_allowed_congestion_control(char *allowed); extern int tcp_set_congestion_control(struct sock *sk, const char *name); extern void tcp_slow_start(struct tcp_sock *tp); +extern void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w); extern struct tcp_congestion_ops tcp_init_congestion_ops; extern u32 tcp_reno_ssthresh(struct sock *sk); @@ -761,7 +793,12 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) return tp->packets_out - tcp_left_out(tp) + tp->retrans_out; } -extern int tcp_limit_reno_sacked(struct tcp_sock *tp); +#define TCP_INFINITE_SSTHRESH 0x7fffffff + +static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp) +{ + return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; +} /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. * The exception is rate halving phase, when cwnd is decreasing towards @@ -823,15 +860,15 @@ static inline void tcp_push_pending_frames(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - __tcp_push_pending_frames(sk, tcp_current_mss(sk, 1), tp->nonagle); + __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle); } -static inline void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq) +static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) { tp->snd_wl1 = seq; } -static inline void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) +static inline void tcp_update_wl(struct tcp_sock *tp, u32 seq) { tp->snd_wl1 = seq; } @@ -884,30 +921,32 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - if (!sysctl_tcp_low_latency && tp->ucopy.task) { - __skb_queue_tail(&tp->ucopy.prequeue, skb); - tp->ucopy.memory += skb->truesize; - if (tp->ucopy.memory > sk->sk_rcvbuf) { - struct sk_buff *skb1; - - BUG_ON(sock_owned_by_user(sk)); - - while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { - sk_backlog_rcv(sk, skb1); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED); - } - - tp->ucopy.memory = 0; - } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { - wake_up_interruptible(sk->sk_sleep); - if (!inet_csk_ack_scheduled(sk)) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - (3 * TCP_RTO_MIN) / 4, - TCP_RTO_MAX); + if (sysctl_tcp_low_latency || !tp->ucopy.task) + return 0; + + __skb_queue_tail(&tp->ucopy.prequeue, skb); + tp->ucopy.memory += skb->truesize; + if (tp->ucopy.memory > sk->sk_rcvbuf) { + struct sk_buff *skb1; + + BUG_ON(sock_owned_by_user(sk)); + + while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { + sk_backlog_rcv(sk, skb1); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPPREQUEUEDROPPED); } - return 1; + + tp->ucopy.memory = 0; + } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { + wake_up_interruptible_poll(sk->sk_sleep, + POLLIN | POLLRDNORM | POLLRDBAND); + if (!inet_csk_ack_scheduled(sk)) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + (3 * tcp_rto_min(sk)) / 4, + TCP_RTO_MAX); } - return 0; + return 1; } @@ -927,7 +966,6 @@ extern void tcp_done(struct sock *sk); static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) { rx_opt->dsack = 0; - rx_opt->eff_sacks = 0; rx_opt->num_sacks = 0; } @@ -988,6 +1026,11 @@ static inline int keepalive_time_when(const struct tcp_sock *tp) return tp->keepalive_time ? : sysctl_tcp_keepalive_time; } +static inline int keepalive_probes(const struct tcp_sock *tp) +{ + return tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; +} + static inline int tcp_fin_time(const struct sock *sk) { int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout; @@ -999,11 +1042,21 @@ static inline int tcp_fin_time(const struct sock *sk) return fin_timeout; } -static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int rst) +static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, + int paws_win) { - if ((s32)(rx_opt->rcv_tsval - rx_opt->ts_recent) >= 0) - return 0; - if (get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS) + if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win) + return 1; + if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)) + return 1; + + return 0; +} + +static inline int tcp_paws_reject(const struct tcp_options_received *rx_opt, + int rst) +{ + if (tcp_paws_check(rx_opt, 0)) return 0; /* RST segments are not recommended to carry timestamp, @@ -1140,7 +1193,7 @@ extern int tcp_v4_md5_do_del(struct sock *sk, #define tcp_twsk_md5_key(twsk) NULL #endif -extern struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void); +extern struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *); extern void tcp_free_md5sig_pool(void); extern struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu); @@ -1175,6 +1228,7 @@ static inline void tcp_write_queue_purge(struct sock *sk) while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) sk_wmem_free_skb(sk, skb); sk_mem_reclaim(sk); + tcp_clear_all_retrans_hints(tcp_sk(sk)); } static inline struct sk_buff *tcp_write_queue_head(struct sock *sk) @@ -1360,6 +1414,12 @@ extern void tcp_v4_destroy_sock(struct sock *sk); extern int tcp_v4_gso_send_check(struct sk_buff *skb); extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features); +extern struct sk_buff **tcp_gro_receive(struct sk_buff **head, + struct sk_buff *skb); +extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head, + struct sk_buff *skb); +extern int tcp_gro_complete(struct sk_buff *skb); +extern int tcp4_gro_complete(struct sk_buff *skb); #ifdef CONFIG_PROC_FS extern int tcp4_proc_init(void); @@ -1390,9 +1450,99 @@ struct tcp_request_sock_ops { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *(*md5_lookup) (struct sock *sk, struct request_sock *req); + int (*calc_md5_hash) (char *location, + struct tcp_md5sig_key *md5, + struct sock *sk, + struct request_sock *req, + struct sk_buff *skb); #endif }; +/* Using SHA1 for now, define some constants. + */ +#define COOKIE_DIGEST_WORDS (SHA_DIGEST_WORDS) +#define COOKIE_MESSAGE_WORDS (SHA_MESSAGE_BYTES / 4) +#define COOKIE_WORKSPACE_WORDS (COOKIE_DIGEST_WORDS + COOKIE_MESSAGE_WORDS) + +extern int tcp_cookie_generator(u32 *bakery); + +/** + * struct tcp_cookie_values - each socket needs extra space for the + * cookies, together with (optional) space for any SYN data. + * + * A tcp_sock contains a pointer to the current value, and this is + * cloned to the tcp_timewait_sock. + * + * @cookie_pair: variable data from the option exchange. + * + * @cookie_desired: user specified tcpct_cookie_desired. Zero + * indicates default (sysctl_tcp_cookie_size). + * After cookie sent, remembers size of cookie. + * Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX. + * + * @s_data_desired: user specified tcpct_s_data_desired. When the + * constant payload is specified (@s_data_constant), + * holds its length instead. + * Range 0 to TCP_MSS_DESIRED. + * + * @s_data_payload: constant data that is to be included in the + * payload of SYN or SYNACK segments when the + * cookie option is present. + */ +struct tcp_cookie_values { + struct kref kref; + u8 cookie_pair[TCP_COOKIE_PAIR_SIZE]; + u8 cookie_pair_size; + u8 cookie_desired; + u16 s_data_desired:11, + s_data_constant:1, + s_data_in:1, + s_data_out:1, + s_data_unused:2; + u8 s_data_payload[0]; +}; + +static inline void tcp_cookie_values_release(struct kref *kref) +{ + kfree(container_of(kref, struct tcp_cookie_values, kref)); +} + +/* The length of constant payload data. Note that s_data_desired is + * overloaded, depending on s_data_constant: either the length of constant + * data (returned here) or the limit on variable data. + */ +static inline int tcp_s_data_size(const struct tcp_sock *tp) +{ + return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant) + ? tp->cookie_values->s_data_desired + : 0; +} + +/** + * struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace. + * + * As tcp_request_sock has already been extended in other places, the + * only remaining method is to pass stack values along as function + * parameters. These parameters are not needed after sending SYNACK. + * + * @cookie_bakery: cryptographic secret and message workspace. + * + * @cookie_plus: bytes in authenticator/cookie option, copied from + * struct tcp_options_received (above). + */ +struct tcp_extend_values { + struct request_values rv; + u32 cookie_bakery[COOKIE_WORKSPACE_WORDS]; + u8 cookie_plus:6, + cookie_out_never:1, + cookie_in_always:1; +}; + +static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp) +{ + return (struct tcp_extend_values *)rvp; +} + extern void tcp_v4_init(void); extern void tcp_init(void);