X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=include%2Fnet%2Ftcp.h;h=646dbe3962eab7ae3832c9e8641549ef9e9d95f9;hb=e0a94c2a63f2644826069044649669b5e7ca75d3;hp=2ab350eca02ee87fe49be39e90dec29a91d428fe;hpb=7de6c033367ab86f39c7723392caf73325cbf286;p=safe%2Fjmp%2Flinux-2.6 diff --git a/include/net/tcp.h b/include/net/tcp.h index 2ab350e..646dbe3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -41,15 +41,17 @@ #include #include #include +#include #include extern struct inet_hashinfo tcp_hashinfo; -extern atomic_t tcp_orphan_count; +extern struct percpu_counter tcp_orphan_count; extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER (128 + MAX_HEADER) +#define MAX_TCP_OPTION_SPACE 40 /* * Never offer a window over 32767 without using window scaling. Some @@ -139,7 +141,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_KEEPINTVL 32767 #define MAX_TCP_KEEPCNT 127 #define MAX_TCP_SYNCNT 127 -#define MAX_TCP_ACCEPT_DEFERRED 65535 #define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */ @@ -185,6 +186,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOLEN_SACK_BASE_ALIGNED 4 #define TCPOLEN_SACK_PERBLOCK 8 #define TCPOLEN_MD5SIG_ALIGNED 20 +#define TCPOLEN_MSS_ALIGNED 4 /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ @@ -237,7 +239,7 @@ extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; extern atomic_t tcp_memory_allocated; -extern atomic_t tcp_sockets_allocated; +extern struct percpu_counter tcp_sockets_allocated; extern int tcp_memory_pressure; /* @@ -266,13 +268,10 @@ static inline int tcp_too_many_orphans(struct sock *sk, int num) extern struct proto tcp_prot; -DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics); -#define TCP_INC_STATS(field) SNMP_INC_STATS(tcp_statistics, field) -#define TCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(tcp_statistics, field) -#define TCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(tcp_statistics, field) -#define TCP_DEC_STATS(field) SNMP_DEC_STATS(tcp_statistics, field) -#define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val) -#define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val) +#define TCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.tcp_statistics, field) +#define TCP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.tcp_statistics, field) +#define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field) +#define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val) extern void tcp_v4_err(struct sk_buff *skb, u32); @@ -399,6 +398,8 @@ extern void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab); +extern u8 *tcp_parse_md5sig_option(struct tcphdr *th); + /* * TCP v4 functions exported for the inet6 API */ @@ -433,7 +434,6 @@ extern struct sk_buff * tcp_make_synack(struct sock *sk, extern int tcp_disconnect(struct sock *sk, int flags); -extern void tcp_unhash(struct sock *sk); /* From syncookies.c */ extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; @@ -482,7 +482,16 @@ static inline void tcp_clear_xmit_timers(struct sock *sk) } extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); -extern unsigned int tcp_current_mss(struct sock *sk, int large); +extern unsigned int tcp_current_mss(struct sock *sk); + +/* Bound MSS / TSO packet size with the half of the window */ +static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) +{ + if (tp->max_window && pktsize > (tp->max_window >> 1)) + return max(tp->max_window >> 1, 68U - tp->tcp_header_len); + else + return pktsize; +} /* tcp.c */ extern void tcp_get_info(struct sock *, struct tcp_info *); @@ -522,6 +531,17 @@ static inline void tcp_fast_path_check(struct sock *sk) tcp_fast_path_on(tp); } +/* Compute the actual rto_min value */ +static inline u32 tcp_rto_min(struct sock *sk) +{ + struct dst_entry *dst = __sk_dst_get(sk); + u32 rto_min = TCP_RTO_MIN; + + if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) + rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); + return rto_min; +} + /* Compute the actual receive window we are currently advertising. * Rcv_nxt can be after the window if our peer push more data * than the offered window. @@ -589,7 +609,6 @@ struct tcp_skb_cb { #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ #define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS) - __u16 urg_ptr; /* Valid w/URG flags is set. */ __u32 ack_seq; /* Sequence number ACK'd */ }; @@ -609,21 +628,6 @@ static inline int tcp_skb_mss(const struct sk_buff *skb) return skb_shinfo(skb)->gso_size; } -static inline void tcp_dec_pcount_approx_int(__u32 *count, const int decr) -{ - if (*count) { - *count -= decr; - if ((int)*count < 0) - *count = 0; - } -} - -static inline void tcp_dec_pcount_approx(__u32 *count, - const struct sk_buff *skb) -{ - tcp_dec_pcount_approx_int(count, tcp_skb_pcount(skb)); -} - /* Events passed to congestion control interface */ enum tcp_ca_event { CA_EVENT_TX_START, /* first transmit when no packets in flight */ @@ -687,6 +691,7 @@ extern void tcp_get_allowed_congestion_control(char *buf, size_t len); extern int tcp_set_allowed_congestion_control(char *allowed); extern int tcp_set_congestion_control(struct sock *sk, const char *name); extern void tcp_slow_start(struct tcp_sock *tp); +extern void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w); extern struct tcp_congestion_ops tcp_init_congestion_ops; extern u32 tcp_reno_ssthresh(struct sock *sk); @@ -785,11 +790,14 @@ extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); /* Slow start with delack produces 3 packets of burst, so that - * it is safe "de facto". + * it is safe "de facto". This will be the default - same as + * the default reordering threshold - but if reordering increases, + * we must be able to allow cwnd to burst at least this much in order + * to not pull it back when holes are filled. */ static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp) { - return 3; + return tp->reordering; } /* Returns end sequence number of the receiver's advertised window */ @@ -820,15 +828,15 @@ static inline void tcp_push_pending_frames(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - __tcp_push_pending_frames(sk, tcp_current_mss(sk, 1), tp->nonagle); + __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle); } -static inline void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq) +static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) { tp->snd_wl1 = seq; } -static inline void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) +static inline void tcp_update_wl(struct tcp_sock *tp, u32 seq) { tp->snd_wl1 = seq; } @@ -890,8 +898,8 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) BUG_ON(sock_owned_by_user(sk)); while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { - sk->sk_backlog_rcv(sk, skb1); - NET_INC_STATS_BH(LINUX_MIB_TCPPREQUEUEDROPPED); + sk_backlog_rcv(sk, skb1); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED); } tp->ucopy.memory = 0; @@ -899,7 +907,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) wake_up_interruptible(sk->sk_sleep); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - (3 * TCP_RTO_MIN) / 4, + (3 * tcp_rto_min(sk)) / 4, TCP_RTO_MAX); } return 1; @@ -924,7 +932,6 @@ extern void tcp_done(struct sock *sk); static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) { rx_opt->dsack = 0; - rx_opt->eff_sacks = 0; rx_opt->num_sacks = 0; } @@ -970,9 +977,10 @@ static inline void tcp_openreq_init(struct request_sock *req, ireq->acked = 0; ireq->ecn_ok = 0; ireq->rmt_port = tcp_hdr(skb)->source; + ireq->loc_port = tcp_hdr(skb)->dest; } -extern void tcp_enter_memory_pressure(void); +extern void tcp_enter_memory_pressure(struct sock *sk); static inline int keepalive_intvl_when(const struct tcp_sock *tp) { @@ -995,11 +1003,21 @@ static inline int tcp_fin_time(const struct sock *sk) return fin_timeout; } -static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int rst) +static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, + int paws_win) { - if ((s32)(rx_opt->rcv_tsval - rx_opt->ts_recent) >= 0) - return 0; - if (get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS) + if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win) + return 1; + if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)) + return 1; + + return 0; +} + +static inline int tcp_paws_reject(const struct tcp_options_received *rx_opt, + int rst) +{ + if (tcp_paws_check(rx_opt, 0)) return 0; /* RST segments are not recommended to carry timestamp, @@ -1021,13 +1039,13 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int #define TCP_CHECK_TIMER(sk) do { } while (0) -static inline void tcp_mib_init(void) +static inline void tcp_mib_init(struct net *net) { /* See RFC 2012 */ - TCP_ADD_STATS_USER(TCP_MIB_RTOALGORITHM, 1); - TCP_ADD_STATS_USER(TCP_MIB_RTOMIN, TCP_RTO_MIN*1000/HZ); - TCP_ADD_STATS_USER(TCP_MIB_RTOMAX, TCP_RTO_MAX*1000/HZ); - TCP_ADD_STATS_USER(TCP_MIB_MAXCONN, -1); + TCP_ADD_STATS_USER(net, TCP_MIB_RTOALGORITHM, 1); + TCP_ADD_STATS_USER(net, TCP_MIB_RTOMIN, TCP_RTO_MIN*1000/HZ); + TCP_ADD_STATS_USER(net, TCP_MIB_RTOMAX, TCP_RTO_MAX*1000/HZ); + TCP_ADD_STATS_USER(net, TCP_MIB_MAXCONN, -1); } /* from STCP */ @@ -1035,13 +1053,12 @@ static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp) { tp->lost_skb_hint = NULL; tp->scoreboard_skb_hint = NULL; - tp->retransmit_skb_hint = NULL; - tp->forward_skb_hint = NULL; } static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) { tcp_clear_retrans_hints_partial(tp); + tp->retransmit_skb_hint = NULL; } /* MD5 Signature */ @@ -1110,14 +1127,12 @@ struct tcp_md5sig_pool { #define TCP_MD5SIG_MAXKEYS (~(u32)0) /* really?! */ /* - functions */ -extern int tcp_v4_calc_md5_hash(char *md5_hash, - struct tcp_md5sig_key *key, - struct sock *sk, - struct dst_entry *dst, - struct request_sock *req, - struct tcphdr *th, - int protocol, - unsigned int tcplen); +extern int tcp_v4_md5_hash_skb(char *md5_hash, + struct tcp_md5sig_key *key, + struct sock *sk, + struct request_sock *req, + struct sk_buff *skb); + extern struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, struct sock *addr_sk); @@ -1129,11 +1144,26 @@ extern int tcp_v4_md5_do_add(struct sock *sk, extern int tcp_v4_md5_do_del(struct sock *sk, __be32 addr); +#ifdef CONFIG_TCP_MD5SIG +#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_keylen ? \ + &(struct tcp_md5sig_key) { \ + .key = (twsk)->tw_md5_key, \ + .keylen = (twsk)->tw_md5_keylen, \ + } : NULL) +#else +#define tcp_twsk_md5_key(twsk) NULL +#endif + extern struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void); extern void tcp_free_md5sig_pool(void); extern struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu); extern void __tcp_put_md5sig_pool(void); +extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *); +extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *, + unsigned header_len); +extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, + struct tcp_md5sig_key *key); static inline struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) @@ -1163,49 +1193,50 @@ static inline void tcp_write_queue_purge(struct sock *sk) static inline struct sk_buff *tcp_write_queue_head(struct sock *sk) { - struct sk_buff *skb = sk->sk_write_queue.next; - if (skb == (struct sk_buff *) &sk->sk_write_queue) - return NULL; - return skb; + return skb_peek(&sk->sk_write_queue); } static inline struct sk_buff *tcp_write_queue_tail(struct sock *sk) { - struct sk_buff *skb = sk->sk_write_queue.prev; - if (skb == (struct sk_buff *) &sk->sk_write_queue) - return NULL; - return skb; + return skb_peek_tail(&sk->sk_write_queue); } static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_buff *skb) { - return skb->next; + return skb_queue_next(&sk->sk_write_queue, skb); +} + +static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_buff *skb) +{ + return skb_queue_prev(&sk->sk_write_queue, skb); } #define tcp_for_write_queue(skb, sk) \ - for (skb = (sk)->sk_write_queue.next; \ - (skb != (struct sk_buff *)&(sk)->sk_write_queue); \ - skb = skb->next) + skb_queue_walk(&(sk)->sk_write_queue, skb) #define tcp_for_write_queue_from(skb, sk) \ - for (; (skb != (struct sk_buff *)&(sk)->sk_write_queue);\ - skb = skb->next) + skb_queue_walk_from(&(sk)->sk_write_queue, skb) #define tcp_for_write_queue_from_safe(skb, tmp, sk) \ - for (tmp = skb->next; \ - (skb != (struct sk_buff *)&(sk)->sk_write_queue); \ - skb = tmp, tmp = skb->next) + skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) static inline struct sk_buff *tcp_send_head(struct sock *sk) { return sk->sk_send_head; } +static inline bool tcp_skb_is_last(const struct sock *sk, + const struct sk_buff *skb) +{ + return skb_queue_is_last(&sk->sk_write_queue, skb); +} + static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb) { - sk->sk_send_head = skb->next; - if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) + if (tcp_skb_is_last(sk, skb)) sk->sk_send_head = NULL; + else + sk->sk_send_head = tcp_write_queue_next(sk, skb); } static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) @@ -1250,12 +1281,12 @@ static inline void tcp_insert_write_queue_after(struct sk_buff *skb, __skb_queue_after(&sk->sk_write_queue, skb, buff); } -/* Insert skb between prev and next on the write queue of sk. */ +/* Insert new before skb on the write queue of sk. */ static inline void tcp_insert_write_queue_before(struct sk_buff *new, struct sk_buff *skb, struct sock *sk) { - __skb_insert(new, skb->prev, skb, &sk->sk_write_queue); + __skb_queue_before(&sk->sk_write_queue, skb, new); if (sk->sk_send_head == skb) sk->sk_send_head = new; @@ -1266,12 +1297,6 @@ static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) __skb_unlink(skb, &sk->sk_write_queue); } -static inline int tcp_skb_is_last(const struct sock *sk, - const struct sk_buff *skb) -{ - return skb->next == (struct sk_buff *)&sk->sk_write_queue; -} - static inline int tcp_write_queue_empty(struct sock *sk) { return skb_queue_empty(&sk->sk_write_queue); @@ -1345,10 +1370,16 @@ extern void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo); extern struct request_sock_ops tcp_request_sock_ops; extern struct request_sock_ops tcp6_request_sock_ops; -extern int tcp_v4_destroy_sock(struct sock *sk); +extern void tcp_v4_destroy_sock(struct sock *sk); extern int tcp_v4_gso_send_check(struct sk_buff *skb); extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features); +extern struct sk_buff **tcp_gro_receive(struct sk_buff **head, + struct sk_buff *skb); +extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head, + struct sk_buff *skb); +extern int tcp_gro_complete(struct sk_buff *skb); +extern int tcp4_gro_complete(struct sk_buff *skb); #ifdef CONFIG_PROC_FS extern int tcp4_proc_init(void); @@ -1363,11 +1394,8 @@ struct tcp_sock_af_ops { int (*calc_md5_hash) (char *location, struct tcp_md5sig_key *md5, struct sock *sk, - struct dst_entry *dst, struct request_sock *req, - struct tcphdr *th, - int protocol, - unsigned int len); + struct sk_buff *skb); int (*md5_add) (struct sock *sk, struct sock *addr_sk, u8 *newkey,