3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/module.h>
27 #include <linux/errno.h>
28 #include <linux/types.h>
29 #include <linux/socket.h>
30 #include <linux/sockios.h>
31 #include <linux/net.h>
32 #include <linux/jiffies.h>
34 #include <linux/in6.h>
35 #include <linux/netdevice.h>
36 #include <linux/init.h>
37 #include <linux/jhash.h>
38 #include <linux/ipsec.h>
39 #include <linux/times.h>
41 #include <linux/ipv6.h>
42 #include <linux/icmpv6.h>
43 #include <linux/random.h>
46 #include <net/ndisc.h>
47 #include <net/inet6_hashtables.h>
48 #include <net/inet6_connection_sock.h>
50 #include <net/transp_v6.h>
51 #include <net/addrconf.h>
52 #include <net/ip6_route.h>
53 #include <net/ip6_checksum.h>
54 #include <net/inet_ecn.h>
55 #include <net/protocol.h>
58 #include <net/dsfield.h>
59 #include <net/timewait_sock.h>
60 #include <net/netdma.h>
61 #include <net/inet_common.h>
63 #include <asm/uaccess.h>
65 #include <linux/proc_fs.h>
66 #include <linux/seq_file.h>
68 #include <linux/crypto.h>
69 #include <linux/scatterlist.h>
71 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
73 static void tcp_v6_send_check(struct sock *sk, int len,
76 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
78 static struct inet_connection_sock_af_ops ipv6_mapped;
79 static struct inet_connection_sock_af_ops ipv6_specific;
80 #ifdef CONFIG_TCP_MD5SIG
81 static struct tcp_sock_af_ops tcp_sock_ipv6_specific;
82 static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
84 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
85 struct in6_addr *addr)
91 static void tcp_v6_hash(struct sock *sk)
93 if (sk->sk_state != TCP_CLOSE) {
94 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
104 static __inline__ __sum16 tcp_v6_check(struct tcphdr *th, int len,
105 struct in6_addr *saddr,
106 struct in6_addr *daddr,
109 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
112 static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
114 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
115 ipv6_hdr(skb)->saddr.s6_addr32,
117 tcp_hdr(skb)->source);
120 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
123 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
124 struct inet_sock *inet = inet_sk(sk);
125 struct inet_connection_sock *icsk = inet_csk(sk);
126 struct ipv6_pinfo *np = inet6_sk(sk);
127 struct tcp_sock *tp = tcp_sk(sk);
128 struct in6_addr *saddr = NULL, *final_p = NULL, final;
130 struct dst_entry *dst;
134 if (addr_len < SIN6_LEN_RFC2133)
137 if (usin->sin6_family != AF_INET6)
138 return(-EAFNOSUPPORT);
140 memset(&fl, 0, sizeof(fl));
143 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
144 IP6_ECN_flow_init(fl.fl6_flowlabel);
145 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
146 struct ip6_flowlabel *flowlabel;
147 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
148 if (flowlabel == NULL)
150 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
151 fl6_sock_release(flowlabel);
156 * connect() to INADDR_ANY means loopback (BSD'ism).
159 if(ipv6_addr_any(&usin->sin6_addr))
160 usin->sin6_addr.s6_addr[15] = 0x1;
162 addr_type = ipv6_addr_type(&usin->sin6_addr);
164 if(addr_type & IPV6_ADDR_MULTICAST)
167 if (addr_type&IPV6_ADDR_LINKLOCAL) {
168 if (addr_len >= sizeof(struct sockaddr_in6) &&
169 usin->sin6_scope_id) {
170 /* If interface is set while binding, indices
173 if (sk->sk_bound_dev_if &&
174 sk->sk_bound_dev_if != usin->sin6_scope_id)
177 sk->sk_bound_dev_if = usin->sin6_scope_id;
180 /* Connect to link-local address requires an interface */
181 if (!sk->sk_bound_dev_if)
185 if (tp->rx_opt.ts_recent_stamp &&
186 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
187 tp->rx_opt.ts_recent = 0;
188 tp->rx_opt.ts_recent_stamp = 0;
192 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
193 np->flow_label = fl.fl6_flowlabel;
199 if (addr_type == IPV6_ADDR_MAPPED) {
200 u32 exthdrlen = icsk->icsk_ext_hdr_len;
201 struct sockaddr_in sin;
203 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
205 if (__ipv6_only_sock(sk))
208 sin.sin_family = AF_INET;
209 sin.sin_port = usin->sin6_port;
210 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
212 icsk->icsk_af_ops = &ipv6_mapped;
213 sk->sk_backlog_rcv = tcp_v4_do_rcv;
214 #ifdef CONFIG_TCP_MD5SIG
215 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
218 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
221 icsk->icsk_ext_hdr_len = exthdrlen;
222 icsk->icsk_af_ops = &ipv6_specific;
223 sk->sk_backlog_rcv = tcp_v6_do_rcv;
224 #ifdef CONFIG_TCP_MD5SIG
225 tp->af_specific = &tcp_sock_ipv6_specific;
229 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
231 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
238 if (!ipv6_addr_any(&np->rcv_saddr))
239 saddr = &np->rcv_saddr;
241 fl.proto = IPPROTO_TCP;
242 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
243 ipv6_addr_copy(&fl.fl6_src,
244 (saddr ? saddr : &np->saddr));
245 fl.oif = sk->sk_bound_dev_if;
246 fl.fl_ip_dport = usin->sin6_port;
247 fl.fl_ip_sport = inet->sport;
249 if (np->opt && np->opt->srcrt) {
250 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
251 ipv6_addr_copy(&final, &fl.fl6_dst);
252 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
256 security_sk_classify_flow(sk, &fl);
258 err = ip6_dst_lookup(sk, &dst, &fl);
262 ipv6_addr_copy(&fl.fl6_dst, final_p);
264 if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
266 err = ip6_dst_blackhole(sk, &dst, &fl);
273 ipv6_addr_copy(&np->rcv_saddr, saddr);
276 /* set the source address */
277 ipv6_addr_copy(&np->saddr, saddr);
278 inet->rcv_saddr = LOOPBACK4_IPV6;
280 sk->sk_gso_type = SKB_GSO_TCPV6;
281 __ip6_dst_store(sk, dst, NULL, NULL);
283 icsk->icsk_ext_hdr_len = 0;
285 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
288 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
290 inet->dport = usin->sin6_port;
292 tcp_set_state(sk, TCP_SYN_SENT);
293 err = inet6_hash_connect(&tcp_death_row, sk);
298 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
303 err = tcp_connect(sk);
310 tcp_set_state(sk, TCP_CLOSE);
314 sk->sk_route_caps = 0;
318 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
319 int type, int code, int offset, __be32 info)
321 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
322 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
323 struct ipv6_pinfo *np;
329 sk = inet6_lookup(dev_net(skb->dev), &tcp_hashinfo, &hdr->daddr,
330 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
333 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
337 if (sk->sk_state == TCP_TIME_WAIT) {
338 inet_twsk_put(inet_twsk(sk));
343 if (sock_owned_by_user(sk))
344 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
346 if (sk->sk_state == TCP_CLOSE)
350 seq = ntohl(th->seq);
351 if (sk->sk_state != TCP_LISTEN &&
352 !between(seq, tp->snd_una, tp->snd_nxt)) {
353 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
359 if (type == ICMPV6_PKT_TOOBIG) {
360 struct dst_entry *dst = NULL;
362 if (sock_owned_by_user(sk))
364 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
367 /* icmp should have updated the destination cache entry */
368 dst = __sk_dst_check(sk, np->dst_cookie);
371 struct inet_sock *inet = inet_sk(sk);
374 /* BUGGG_FUTURE: Again, it is not clear how
375 to handle rthdr case. Ignore this complexity
378 memset(&fl, 0, sizeof(fl));
379 fl.proto = IPPROTO_TCP;
380 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
381 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
382 fl.oif = sk->sk_bound_dev_if;
383 fl.fl_ip_dport = inet->dport;
384 fl.fl_ip_sport = inet->sport;
385 security_skb_classify_flow(skb, &fl);
387 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
388 sk->sk_err_soft = -err;
392 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
393 sk->sk_err_soft = -err;
400 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
401 tcp_sync_mss(sk, dst_mtu(dst));
402 tcp_simple_retransmit(sk);
403 } /* else let the usual retransmit timer handle it */
408 icmpv6_err_convert(type, code, &err);
410 /* Might be for an request_sock */
411 switch (sk->sk_state) {
412 struct request_sock *req, **prev;
414 if (sock_owned_by_user(sk))
417 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
418 &hdr->saddr, inet6_iif(skb));
422 /* ICMPs are not backlogged, hence we cannot get
423 * an established socket here.
425 BUG_TRAP(req->sk == NULL);
427 if (seq != tcp_rsk(req)->snt_isn) {
428 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
432 inet_csk_reqsk_queue_drop(sk, req, prev);
436 case TCP_SYN_RECV: /* Cannot happen.
437 It can, it SYNs are crossed. --ANK */
438 if (!sock_owned_by_user(sk)) {
440 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
444 sk->sk_err_soft = err;
448 if (!sock_owned_by_user(sk) && np->recverr) {
450 sk->sk_error_report(sk);
452 sk->sk_err_soft = err;
460 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
462 struct inet6_request_sock *treq = inet6_rsk(req);
463 struct ipv6_pinfo *np = inet6_sk(sk);
464 struct sk_buff * skb;
465 struct ipv6_txoptions *opt = NULL;
466 struct in6_addr * final_p = NULL, final;
468 struct dst_entry *dst;
471 memset(&fl, 0, sizeof(fl));
472 fl.proto = IPPROTO_TCP;
473 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
474 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
475 fl.fl6_flowlabel = 0;
477 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
478 fl.fl_ip_sport = inet_sk(sk)->sport;
479 security_req_classify_flow(req, &fl);
482 if (opt && opt->srcrt) {
483 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
484 ipv6_addr_copy(&final, &fl.fl6_dst);
485 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
489 err = ip6_dst_lookup(sk, &dst, &fl);
493 ipv6_addr_copy(&fl.fl6_dst, final_p);
494 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
497 skb = tcp_make_synack(sk, dst, req);
499 struct tcphdr *th = tcp_hdr(skb);
501 th->check = tcp_v6_check(th, skb->len,
502 &treq->loc_addr, &treq->rmt_addr,
503 csum_partial((char *)th, skb->len, skb->csum));
505 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
506 err = ip6_xmit(sk, skb, &fl, opt, 0);
507 err = net_xmit_eval(err);
511 if (opt && opt != np->opt)
512 sock_kfree_s(sk, opt, opt->tot_len);
517 static inline void syn_flood_warning(struct sk_buff *skb)
519 #ifdef CONFIG_SYN_COOKIES
520 if (sysctl_tcp_syncookies)
522 "TCPv6: Possible SYN flooding on port %d. "
523 "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
527 "TCPv6: Possible SYN flooding on port %d. "
528 "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
531 static void tcp_v6_reqsk_destructor(struct request_sock *req)
533 if (inet6_rsk(req)->pktopts)
534 kfree_skb(inet6_rsk(req)->pktopts);
537 #ifdef CONFIG_TCP_MD5SIG
538 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
539 struct in6_addr *addr)
541 struct tcp_sock *tp = tcp_sk(sk);
546 if (!tp->md5sig_info || !tp->md5sig_info->entries6)
549 for (i = 0; i < tp->md5sig_info->entries6; i++) {
550 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, addr))
551 return &tp->md5sig_info->keys6[i].base;
556 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
557 struct sock *addr_sk)
559 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
562 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
563 struct request_sock *req)
565 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
568 static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
569 char *newkey, u8 newkeylen)
571 /* Add key to the list */
572 struct tcp_md5sig_key *key;
573 struct tcp_sock *tp = tcp_sk(sk);
574 struct tcp6_md5sig_key *keys;
576 key = tcp_v6_md5_do_lookup(sk, peer);
578 /* modify existing entry - just update that one */
581 key->keylen = newkeylen;
583 /* reallocate new list if current one is full. */
584 if (!tp->md5sig_info) {
585 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
586 if (!tp->md5sig_info) {
590 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
592 if (tcp_alloc_md5sig_pool() == NULL) {
596 if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) {
597 keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) *
598 (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
601 tcp_free_md5sig_pool();
606 if (tp->md5sig_info->entries6)
607 memmove(keys, tp->md5sig_info->keys6,
608 (sizeof (tp->md5sig_info->keys6[0]) *
609 tp->md5sig_info->entries6));
611 kfree(tp->md5sig_info->keys6);
612 tp->md5sig_info->keys6 = keys;
613 tp->md5sig_info->alloced6++;
616 ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr,
618 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey;
619 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen;
621 tp->md5sig_info->entries6++;
626 static int tcp_v6_md5_add_func(struct sock *sk, struct sock *addr_sk,
627 u8 *newkey, __u8 newkeylen)
629 return tcp_v6_md5_do_add(sk, &inet6_sk(addr_sk)->daddr,
633 static int tcp_v6_md5_do_del(struct sock *sk, struct in6_addr *peer)
635 struct tcp_sock *tp = tcp_sk(sk);
638 for (i = 0; i < tp->md5sig_info->entries6; i++) {
639 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, peer)) {
641 kfree(tp->md5sig_info->keys6[i].base.key);
642 tp->md5sig_info->entries6--;
644 if (tp->md5sig_info->entries6 == 0) {
645 kfree(tp->md5sig_info->keys6);
646 tp->md5sig_info->keys6 = NULL;
647 tp->md5sig_info->alloced6 = 0;
649 /* shrink the database */
650 if (tp->md5sig_info->entries6 != i)
651 memmove(&tp->md5sig_info->keys6[i],
652 &tp->md5sig_info->keys6[i+1],
653 (tp->md5sig_info->entries6 - i)
654 * sizeof (tp->md5sig_info->keys6[0]));
656 tcp_free_md5sig_pool();
663 static void tcp_v6_clear_md5_list (struct sock *sk)
665 struct tcp_sock *tp = tcp_sk(sk);
668 if (tp->md5sig_info->entries6) {
669 for (i = 0; i < tp->md5sig_info->entries6; i++)
670 kfree(tp->md5sig_info->keys6[i].base.key);
671 tp->md5sig_info->entries6 = 0;
672 tcp_free_md5sig_pool();
675 kfree(tp->md5sig_info->keys6);
676 tp->md5sig_info->keys6 = NULL;
677 tp->md5sig_info->alloced6 = 0;
679 if (tp->md5sig_info->entries4) {
680 for (i = 0; i < tp->md5sig_info->entries4; i++)
681 kfree(tp->md5sig_info->keys4[i].base.key);
682 tp->md5sig_info->entries4 = 0;
683 tcp_free_md5sig_pool();
686 kfree(tp->md5sig_info->keys4);
687 tp->md5sig_info->keys4 = NULL;
688 tp->md5sig_info->alloced4 = 0;
691 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
694 struct tcp_md5sig cmd;
695 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
698 if (optlen < sizeof(cmd))
701 if (copy_from_user(&cmd, optval, sizeof(cmd)))
704 if (sin6->sin6_family != AF_INET6)
707 if (!cmd.tcpm_keylen) {
708 if (!tcp_sk(sk)->md5sig_info)
710 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
711 return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]);
712 return tcp_v6_md5_do_del(sk, &sin6->sin6_addr);
715 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
718 if (!tcp_sk(sk)->md5sig_info) {
719 struct tcp_sock *tp = tcp_sk(sk);
720 struct tcp_md5sig_info *p;
722 p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
727 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
730 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
733 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
734 return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3],
735 newkey, cmd.tcpm_keylen);
737 return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen);
740 static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
741 struct in6_addr *saddr,
742 struct in6_addr *daddr,
743 struct tcphdr *th, unsigned int tcplen)
745 struct tcp_md5sig_pool *hp;
746 struct tcp6_pseudohdr *bp;
749 hp = tcp_get_md5sig_pool();
751 printk(KERN_WARNING "%s(): hash pool not found...\n", __func__);
752 goto clear_hash_noput;
755 bp = &hp->md5_blk.ip6;
757 /* 1. TCP pseudo-header (RFC2460) */
758 ipv6_addr_copy(&bp->saddr, saddr);
759 ipv6_addr_copy(&bp->daddr, daddr);
760 bp->len = htonl(tcplen);
761 bp->protocol = htonl(IPPROTO_TCP);
763 err = tcp_calc_md5_hash(md5_hash, key, sizeof(*bp),
769 /* Free up the crypto pool */
770 tcp_put_md5sig_pool();
774 tcp_put_md5sig_pool();
776 memset(md5_hash, 0, 16);
780 static int tcp_v6_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
782 struct dst_entry *dst,
783 struct request_sock *req,
784 struct tcphdr *th, unsigned int tcplen)
786 struct in6_addr *saddr, *daddr;
789 saddr = &inet6_sk(sk)->saddr;
790 daddr = &inet6_sk(sk)->daddr;
792 saddr = &inet6_rsk(req)->loc_addr;
793 daddr = &inet6_rsk(req)->rmt_addr;
795 return tcp_v6_do_calc_md5_hash(md5_hash, key,
800 static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
802 __u8 *hash_location = NULL;
803 struct tcp_md5sig_key *hash_expected;
804 struct ipv6hdr *ip6h = ipv6_hdr(skb);
805 struct tcphdr *th = tcp_hdr(skb);
809 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
810 hash_location = tcp_parse_md5sig_option(th);
812 /* do we have a hash as expected? */
813 if (!hash_expected) {
816 if (net_ratelimit()) {
817 printk(KERN_INFO "MD5 Hash NOT expected but found "
818 "(" NIP6_FMT ", %u)->"
819 "(" NIP6_FMT ", %u)\n",
820 NIP6(ip6h->saddr), ntohs(th->source),
821 NIP6(ip6h->daddr), ntohs(th->dest));
826 if (!hash_location) {
827 if (net_ratelimit()) {
828 printk(KERN_INFO "MD5 Hash expected but NOT found "
829 "(" NIP6_FMT ", %u)->"
830 "(" NIP6_FMT ", %u)\n",
831 NIP6(ip6h->saddr), ntohs(th->source),
832 NIP6(ip6h->daddr), ntohs(th->dest));
837 /* check the signature */
838 genhash = tcp_v6_do_calc_md5_hash(newhash,
840 &ip6h->saddr, &ip6h->daddr,
842 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
843 if (net_ratelimit()) {
844 printk(KERN_INFO "MD5 Hash %s for "
845 "(" NIP6_FMT ", %u)->"
846 "(" NIP6_FMT ", %u)\n",
847 genhash ? "failed" : "mismatch",
848 NIP6(ip6h->saddr), ntohs(th->source),
849 NIP6(ip6h->daddr), ntohs(th->dest));
857 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
859 .obj_size = sizeof(struct tcp6_request_sock),
860 .rtx_syn_ack = tcp_v6_send_synack,
861 .send_ack = tcp_v6_reqsk_send_ack,
862 .destructor = tcp_v6_reqsk_destructor,
863 .send_reset = tcp_v6_send_reset
866 #ifdef CONFIG_TCP_MD5SIG
867 static struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
868 .md5_lookup = tcp_v6_reqsk_md5_lookup,
872 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
873 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
874 .twsk_unique = tcp_twsk_unique,
875 .twsk_destructor= tcp_twsk_destructor,
878 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
880 struct ipv6_pinfo *np = inet6_sk(sk);
881 struct tcphdr *th = tcp_hdr(skb);
883 if (skb->ip_summed == CHECKSUM_PARTIAL) {
884 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
885 skb->csum_start = skb_transport_header(skb) - skb->head;
886 skb->csum_offset = offsetof(struct tcphdr, check);
888 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
889 csum_partial((char *)th, th->doff<<2,
894 static int tcp_v6_gso_send_check(struct sk_buff *skb)
896 struct ipv6hdr *ipv6h;
899 if (!pskb_may_pull(skb, sizeof(*th)))
902 ipv6h = ipv6_hdr(skb);
906 th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
908 skb->csum_start = skb_transport_header(skb) - skb->head;
909 skb->csum_offset = offsetof(struct tcphdr, check);
910 skb->ip_summed = CHECKSUM_PARTIAL;
914 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
916 struct tcphdr *th = tcp_hdr(skb), *t1;
917 struct sk_buff *buff;
919 struct net *net = dev_net(skb->dst->dev);
920 struct sock *ctl_sk = net->ipv6.tcp_sk;
921 unsigned int tot_len = sizeof(*th);
922 #ifdef CONFIG_TCP_MD5SIG
923 struct tcp_md5sig_key *key;
929 if (!ipv6_unicast_destination(skb))
932 #ifdef CONFIG_TCP_MD5SIG
934 key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
939 tot_len += TCPOLEN_MD5SIG_ALIGNED;
943 * We need to grab some memory, and put together an RST,
944 * and then put it into the queue to be sent.
947 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
952 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
954 t1 = (struct tcphdr *) skb_push(buff, tot_len);
956 /* Swap the send and the receive. */
957 memset(t1, 0, sizeof(*t1));
958 t1->dest = th->source;
959 t1->source = th->dest;
960 t1->doff = tot_len / 4;
964 t1->seq = th->ack_seq;
967 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
968 + skb->len - (th->doff<<2));
971 #ifdef CONFIG_TCP_MD5SIG
973 __be32 *opt = (__be32*)(t1 + 1);
974 opt[0] = htonl((TCPOPT_NOP << 24) |
976 (TCPOPT_MD5SIG << 8) |
978 tcp_v6_do_calc_md5_hash((__u8 *)&opt[1], key,
979 &ipv6_hdr(skb)->daddr,
980 &ipv6_hdr(skb)->saddr,
985 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
987 memset(&fl, 0, sizeof(fl));
988 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
989 ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
991 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
992 sizeof(*t1), IPPROTO_TCP,
995 fl.proto = IPPROTO_TCP;
996 fl.oif = inet6_iif(skb);
997 fl.fl_ip_dport = t1->dest;
998 fl.fl_ip_sport = t1->source;
999 security_skb_classify_flow(skb, &fl);
1001 /* Pass a socket to ip6_dst_lookup either it is for RST
1002 * Underlying function will use this to retrieve the network
1005 if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) {
1007 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
1008 ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
1009 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1010 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1018 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
1019 struct tcp_md5sig_key *key)
1021 struct tcphdr *th = tcp_hdr(skb), *t1;
1022 struct sk_buff *buff;
1024 struct net *net = dev_net(skb->dev);
1025 struct sock *ctl_sk = net->ipv6.tcp_sk;
1026 unsigned int tot_len = sizeof(struct tcphdr);
1030 tot_len += TCPOLEN_TSTAMP_ALIGNED;
1031 #ifdef CONFIG_TCP_MD5SIG
1033 tot_len += TCPOLEN_MD5SIG_ALIGNED;
1036 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1041 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1043 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1045 /* Swap the send and the receive. */
1046 memset(t1, 0, sizeof(*t1));
1047 t1->dest = th->source;
1048 t1->source = th->dest;
1049 t1->doff = tot_len/4;
1050 t1->seq = htonl(seq);
1051 t1->ack_seq = htonl(ack);
1053 t1->window = htons(win);
1055 topt = (__be32 *)(t1 + 1);
1058 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1059 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1060 *topt++ = htonl(tcp_time_stamp);
1064 #ifdef CONFIG_TCP_MD5SIG
1066 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1067 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
1068 tcp_v6_do_calc_md5_hash((__u8 *)topt, key,
1069 &ipv6_hdr(skb)->daddr,
1070 &ipv6_hdr(skb)->saddr,
1075 buff->csum = csum_partial((char *)t1, tot_len, 0);
1077 memset(&fl, 0, sizeof(fl));
1078 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
1079 ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
1081 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1082 tot_len, IPPROTO_TCP,
1085 fl.proto = IPPROTO_TCP;
1086 fl.oif = inet6_iif(skb);
1087 fl.fl_ip_dport = t1->dest;
1088 fl.fl_ip_sport = t1->source;
1089 security_skb_classify_flow(skb, &fl);
1091 if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) {
1092 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
1093 ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
1094 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1102 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1104 struct inet_timewait_sock *tw = inet_twsk(sk);
1105 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1107 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1108 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1109 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw));
1114 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1116 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
1117 tcp_v6_md5_do_lookup(skb->sk, &ipv6_hdr(skb)->daddr));
1121 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1123 struct request_sock *req, **prev;
1124 const struct tcphdr *th = tcp_hdr(skb);
1127 /* Find possible connection requests. */
1128 req = inet6_csk_search_req(sk, &prev, th->source,
1129 &ipv6_hdr(skb)->saddr,
1130 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
1132 return tcp_check_req(sk, skb, req, prev);
1134 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
1135 &ipv6_hdr(skb)->saddr, th->source,
1136 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
1139 if (nsk->sk_state != TCP_TIME_WAIT) {
1143 inet_twsk_put(inet_twsk(nsk));
1147 #ifdef CONFIG_SYN_COOKIES
1148 if (!th->rst && !th->syn && th->ack)
1149 sk = cookie_v6_check(sk, skb);
1154 /* FIXME: this is substantially similar to the ipv4 code.
1155 * Can some kind of merge be done? -- erics
1157 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1159 struct inet6_request_sock *treq;
1160 struct ipv6_pinfo *np = inet6_sk(sk);
1161 struct tcp_options_received tmp_opt;
1162 struct tcp_sock *tp = tcp_sk(sk);
1163 struct request_sock *req = NULL;
1164 __u32 isn = TCP_SKB_CB(skb)->when;
1165 #ifdef CONFIG_SYN_COOKIES
1166 int want_cookie = 0;
1168 #define want_cookie 0
1171 if (skb->protocol == htons(ETH_P_IP))
1172 return tcp_v4_conn_request(sk, skb);
1174 if (!ipv6_unicast_destination(skb))
1177 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1178 if (net_ratelimit())
1179 syn_flood_warning(skb);
1180 #ifdef CONFIG_SYN_COOKIES
1181 if (sysctl_tcp_syncookies)
1188 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1191 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1195 #ifdef CONFIG_TCP_MD5SIG
1196 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1199 tcp_clear_options(&tmp_opt);
1200 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1201 tmp_opt.user_mss = tp->rx_opt.user_mss;
1203 tcp_parse_options(skb, &tmp_opt, 0);
1205 if (want_cookie && !tmp_opt.saw_tstamp)
1206 tcp_clear_options(&tmp_opt);
1208 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1209 tcp_openreq_init(req, &tmp_opt, skb);
1211 treq = inet6_rsk(req);
1212 ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
1213 ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
1215 TCP_ECN_create_request(req, tcp_hdr(skb));
1218 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1219 req->cookie_ts = tmp_opt.tstamp_ok;
1221 if (ipv6_opt_accepted(sk, skb) ||
1222 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1223 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1224 atomic_inc(&skb->users);
1225 treq->pktopts = skb;
1227 treq->iif = sk->sk_bound_dev_if;
1229 /* So that link locals have meaning */
1230 if (!sk->sk_bound_dev_if &&
1231 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1232 treq->iif = inet6_iif(skb);
1234 isn = tcp_v6_init_sequence(skb);
1237 tcp_rsk(req)->snt_isn = isn;
1239 security_inet_conn_request(sk, skb, req);
1241 if (tcp_v6_send_synack(sk, req))
1245 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1253 return 0; /* don't send reset */
1256 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1257 struct request_sock *req,
1258 struct dst_entry *dst)
1260 struct inet6_request_sock *treq = inet6_rsk(req);
1261 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1262 struct tcp6_sock *newtcp6sk;
1263 struct inet_sock *newinet;
1264 struct tcp_sock *newtp;
1266 struct ipv6_txoptions *opt;
1267 #ifdef CONFIG_TCP_MD5SIG
1268 struct tcp_md5sig_key *key;
1271 if (skb->protocol == htons(ETH_P_IP)) {
1276 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1281 newtcp6sk = (struct tcp6_sock *)newsk;
1282 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1284 newinet = inet_sk(newsk);
1285 newnp = inet6_sk(newsk);
1286 newtp = tcp_sk(newsk);
1288 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1290 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1293 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1296 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1298 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1299 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1300 #ifdef CONFIG_TCP_MD5SIG
1301 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1304 newnp->pktoptions = NULL;
1306 newnp->mcast_oif = inet6_iif(skb);
1307 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1310 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1311 * here, tcp_create_openreq_child now does this for us, see the comment in
1312 * that function for the gory details. -acme
1315 /* It is tricky place. Until this moment IPv4 tcp
1316 worked with IPv6 icsk.icsk_af_ops.
1319 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1326 if (sk_acceptq_is_full(sk))
1330 struct in6_addr *final_p = NULL, final;
1333 memset(&fl, 0, sizeof(fl));
1334 fl.proto = IPPROTO_TCP;
1335 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1336 if (opt && opt->srcrt) {
1337 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1338 ipv6_addr_copy(&final, &fl.fl6_dst);
1339 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1342 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1343 fl.oif = sk->sk_bound_dev_if;
1344 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1345 fl.fl_ip_sport = inet_sk(sk)->sport;
1346 security_req_classify_flow(req, &fl);
1348 if (ip6_dst_lookup(sk, &dst, &fl))
1352 ipv6_addr_copy(&fl.fl6_dst, final_p);
1354 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1358 newsk = tcp_create_openreq_child(sk, req, skb);
1363 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1364 * count here, tcp_create_openreq_child now does this for us, see the
1365 * comment in that function for the gory details. -acme
1368 newsk->sk_gso_type = SKB_GSO_TCPV6;
1369 __ip6_dst_store(newsk, dst, NULL, NULL);
1371 newtcp6sk = (struct tcp6_sock *)newsk;
1372 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1374 newtp = tcp_sk(newsk);
1375 newinet = inet_sk(newsk);
1376 newnp = inet6_sk(newsk);
1378 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1380 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1381 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1382 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1383 newsk->sk_bound_dev_if = treq->iif;
1385 /* Now IPv6 options...
1387 First: no IPv4 options.
1389 newinet->opt = NULL;
1390 newnp->ipv6_fl_list = NULL;
1393 newnp->rxopt.all = np->rxopt.all;
1395 /* Clone pktoptions received with SYN */
1396 newnp->pktoptions = NULL;
1397 if (treq->pktopts != NULL) {
1398 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1399 kfree_skb(treq->pktopts);
1400 treq->pktopts = NULL;
1401 if (newnp->pktoptions)
1402 skb_set_owner_r(newnp->pktoptions, newsk);
1405 newnp->mcast_oif = inet6_iif(skb);
1406 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1408 /* Clone native IPv6 options from listening socket (if any)
1410 Yes, keeping reference count would be much more clever,
1411 but we make one more one thing there: reattach optmem
1415 newnp->opt = ipv6_dup_options(newsk, opt);
1417 sock_kfree_s(sk, opt, opt->tot_len);
1420 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1422 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1423 newnp->opt->opt_flen);
1425 tcp_mtup_init(newsk);
1426 tcp_sync_mss(newsk, dst_mtu(dst));
1427 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1428 tcp_initialize_rcv_mss(newsk);
1430 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1432 #ifdef CONFIG_TCP_MD5SIG
1433 /* Copy over the MD5 key from the original socket */
1434 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1435 /* We're using one, so create a matching key
1436 * on the newsk structure. If we fail to get
1437 * memory, then we end up not copying the key
1440 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1442 tcp_v6_md5_do_add(newsk, &inet6_sk(sk)->daddr,
1443 newkey, key->keylen);
1447 __inet6_hash(newsk);
1448 __inet_inherit_port(sk, newsk);
1453 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1455 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1456 if (opt && opt != np->opt)
1457 sock_kfree_s(sk, opt, opt->tot_len);
1462 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1464 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1465 if (!tcp_v6_check(tcp_hdr(skb), skb->len, &ipv6_hdr(skb)->saddr,
1466 &ipv6_hdr(skb)->daddr, skb->csum)) {
1467 skb->ip_summed = CHECKSUM_UNNECESSARY;
1472 skb->csum = ~csum_unfold(tcp_v6_check(tcp_hdr(skb), skb->len,
1473 &ipv6_hdr(skb)->saddr,
1474 &ipv6_hdr(skb)->daddr, 0));
1476 if (skb->len <= 76) {
1477 return __skb_checksum_complete(skb);
1482 /* The socket must have it's spinlock held when we get
1485 * We have a potential double-lock case here, so even when
1486 * doing backlog processing we use the BH locking scheme.
1487 * This is because we cannot sleep with the original spinlock
1490 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1492 struct ipv6_pinfo *np = inet6_sk(sk);
1493 struct tcp_sock *tp;
1494 struct sk_buff *opt_skb = NULL;
1496 /* Imagine: socket is IPv6. IPv4 packet arrives,
1497 goes to IPv4 receive handler and backlogged.
1498 From backlog it always goes here. Kerboom...
1499 Fortunately, tcp_rcv_established and rcv_established
1500 handle them correctly, but it is not case with
1501 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1504 if (skb->protocol == htons(ETH_P_IP))
1505 return tcp_v4_do_rcv(sk, skb);
1507 #ifdef CONFIG_TCP_MD5SIG
1508 if (tcp_v6_inbound_md5_hash (sk, skb))
1512 if (sk_filter(sk, skb))
1516 * socket locking is here for SMP purposes as backlog rcv
1517 * is currently called with bh processing disabled.
1520 /* Do Stevens' IPV6_PKTOPTIONS.
1522 Yes, guys, it is the only place in our code, where we
1523 may make it not affecting IPv4.
1524 The rest of code is protocol independent,
1525 and I do not like idea to uglify IPv4.
1527 Actually, all the idea behind IPV6_PKTOPTIONS
1528 looks not very well thought. For now we latch
1529 options, received in the last packet, enqueued
1530 by tcp. Feel free to propose better solution.
1534 opt_skb = skb_clone(skb, GFP_ATOMIC);
1536 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1537 TCP_CHECK_TIMER(sk);
1538 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1540 TCP_CHECK_TIMER(sk);
1542 goto ipv6_pktoptions;
1546 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1549 if (sk->sk_state == TCP_LISTEN) {
1550 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1555 * Queue it on the new socket if the new socket is active,
1556 * otherwise we just shortcircuit this and continue with
1560 if (tcp_child_process(sk, nsk, skb))
1563 __kfree_skb(opt_skb);
1568 TCP_CHECK_TIMER(sk);
1569 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1571 TCP_CHECK_TIMER(sk);
1573 goto ipv6_pktoptions;
1577 tcp_v6_send_reset(sk, skb);
1580 __kfree_skb(opt_skb);
1584 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1589 /* Do you ask, what is it?
1591 1. skb was enqueued by tcp.
1592 2. skb is added to tail of read queue, rather than out of order.
1593 3. socket is not in passive state.
1594 4. Finally, it really contains options, which user wants to receive.
1597 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1598 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1599 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1600 np->mcast_oif = inet6_iif(opt_skb);
1601 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1602 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1603 if (ipv6_opt_accepted(sk, opt_skb)) {
1604 skb_set_owner_r(opt_skb, sk);
1605 opt_skb = xchg(&np->pktoptions, opt_skb);
1607 __kfree_skb(opt_skb);
1608 opt_skb = xchg(&np->pktoptions, NULL);
1617 static int tcp_v6_rcv(struct sk_buff *skb)
1623 if (skb->pkt_type != PACKET_HOST)
1627 * Count it even if it's bad.
1629 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1631 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1636 if (th->doff < sizeof(struct tcphdr)/4)
1638 if (!pskb_may_pull(skb, th->doff*4))
1641 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1645 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1646 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1647 skb->len - th->doff*4);
1648 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1649 TCP_SKB_CB(skb)->when = 0;
1650 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
1651 TCP_SKB_CB(skb)->sacked = 0;
1653 sk = __inet6_lookup(dev_net(skb->dev), &tcp_hashinfo,
1654 &ipv6_hdr(skb)->saddr, th->source,
1655 &ipv6_hdr(skb)->daddr, ntohs(th->dest),
1662 if (sk->sk_state == TCP_TIME_WAIT)
1665 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1666 goto discard_and_relse;
1668 if (sk_filter(sk, skb))
1669 goto discard_and_relse;
1673 bh_lock_sock_nested(sk);
1675 if (!sock_owned_by_user(sk)) {
1676 #ifdef CONFIG_NET_DMA
1677 struct tcp_sock *tp = tcp_sk(sk);
1678 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1679 tp->ucopy.dma_chan = get_softnet_dma();
1680 if (tp->ucopy.dma_chan)
1681 ret = tcp_v6_do_rcv(sk, skb);
1685 if (!tcp_prequeue(sk, skb))
1686 ret = tcp_v6_do_rcv(sk, skb);
1689 sk_add_backlog(sk, skb);
1693 return ret ? -1 : 0;
1696 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1699 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1701 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1703 tcp_v6_send_reset(NULL, skb);
1720 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1721 inet_twsk_put(inet_twsk(sk));
1725 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1726 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1727 inet_twsk_put(inet_twsk(sk));
1731 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1736 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1737 &ipv6_hdr(skb)->daddr,
1738 ntohs(th->dest), inet6_iif(skb));
1740 struct inet_timewait_sock *tw = inet_twsk(sk);
1741 inet_twsk_deschedule(tw, &tcp_death_row);
1746 /* Fall through to ACK */
1749 tcp_v6_timewait_ack(sk, skb);
1753 case TCP_TW_SUCCESS:;
1758 static int tcp_v6_remember_stamp(struct sock *sk)
1760 /* Alas, not yet... */
1764 static struct inet_connection_sock_af_ops ipv6_specific = {
1765 .queue_xmit = inet6_csk_xmit,
1766 .send_check = tcp_v6_send_check,
1767 .rebuild_header = inet6_sk_rebuild_header,
1768 .conn_request = tcp_v6_conn_request,
1769 .syn_recv_sock = tcp_v6_syn_recv_sock,
1770 .remember_stamp = tcp_v6_remember_stamp,
1771 .net_header_len = sizeof(struct ipv6hdr),
1772 .setsockopt = ipv6_setsockopt,
1773 .getsockopt = ipv6_getsockopt,
1774 .addr2sockaddr = inet6_csk_addr2sockaddr,
1775 .sockaddr_len = sizeof(struct sockaddr_in6),
1776 .bind_conflict = inet6_csk_bind_conflict,
1777 #ifdef CONFIG_COMPAT
1778 .compat_setsockopt = compat_ipv6_setsockopt,
1779 .compat_getsockopt = compat_ipv6_getsockopt,
1783 #ifdef CONFIG_TCP_MD5SIG
1784 static struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1785 .md5_lookup = tcp_v6_md5_lookup,
1786 .calc_md5_hash = tcp_v6_calc_md5_hash,
1787 .md5_add = tcp_v6_md5_add_func,
1788 .md5_parse = tcp_v6_parse_md5_keys,
1793 * TCP over IPv4 via INET6 API
1796 static struct inet_connection_sock_af_ops ipv6_mapped = {
1797 .queue_xmit = ip_queue_xmit,
1798 .send_check = tcp_v4_send_check,
1799 .rebuild_header = inet_sk_rebuild_header,
1800 .conn_request = tcp_v6_conn_request,
1801 .syn_recv_sock = tcp_v6_syn_recv_sock,
1802 .remember_stamp = tcp_v4_remember_stamp,
1803 .net_header_len = sizeof(struct iphdr),
1804 .setsockopt = ipv6_setsockopt,
1805 .getsockopt = ipv6_getsockopt,
1806 .addr2sockaddr = inet6_csk_addr2sockaddr,
1807 .sockaddr_len = sizeof(struct sockaddr_in6),
1808 .bind_conflict = inet6_csk_bind_conflict,
1809 #ifdef CONFIG_COMPAT
1810 .compat_setsockopt = compat_ipv6_setsockopt,
1811 .compat_getsockopt = compat_ipv6_getsockopt,
1815 #ifdef CONFIG_TCP_MD5SIG
1816 static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1817 .md5_lookup = tcp_v4_md5_lookup,
1818 .calc_md5_hash = tcp_v4_calc_md5_hash,
1819 .md5_add = tcp_v6_md5_add_func,
1820 .md5_parse = tcp_v6_parse_md5_keys,
1824 /* NOTE: A lot of things set to zero explicitly by call to
1825 * sk_alloc() so need not be done here.
1827 static int tcp_v6_init_sock(struct sock *sk)
1829 struct inet_connection_sock *icsk = inet_csk(sk);
1830 struct tcp_sock *tp = tcp_sk(sk);
1832 skb_queue_head_init(&tp->out_of_order_queue);
1833 tcp_init_xmit_timers(sk);
1834 tcp_prequeue_init(tp);
1836 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1837 tp->mdev = TCP_TIMEOUT_INIT;
1839 /* So many TCP implementations out there (incorrectly) count the
1840 * initial SYN frame in their delayed-ACK and congestion control
1841 * algorithms that we must have the following bandaid to talk
1842 * efficiently to them. -DaveM
1846 /* See draft-stevens-tcpca-spec-01 for discussion of the
1847 * initialization of these values.
1849 tp->snd_ssthresh = 0x7fffffff;
1850 tp->snd_cwnd_clamp = ~0;
1851 tp->mss_cache = 536;
1853 tp->reordering = sysctl_tcp_reordering;
1855 sk->sk_state = TCP_CLOSE;
1857 icsk->icsk_af_ops = &ipv6_specific;
1858 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1859 icsk->icsk_sync_mss = tcp_sync_mss;
1860 sk->sk_write_space = sk_stream_write_space;
1861 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1863 #ifdef CONFIG_TCP_MD5SIG
1864 tp->af_specific = &tcp_sock_ipv6_specific;
1867 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1868 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1870 atomic_inc(&tcp_sockets_allocated);
1875 static int tcp_v6_destroy_sock(struct sock *sk)
1877 #ifdef CONFIG_TCP_MD5SIG
1878 /* Clean up the MD5 key list */
1879 if (tcp_sk(sk)->md5sig_info)
1880 tcp_v6_clear_md5_list(sk);
1882 tcp_v4_destroy_sock(sk);
1883 return inet6_destroy_sock(sk);
1886 #ifdef CONFIG_PROC_FS
1887 /* Proc filesystem TCPv6 sock list dumping. */
1888 static void get_openreq6(struct seq_file *seq,
1889 struct sock *sk, struct request_sock *req, int i, int uid)
1891 int ttd = req->expires - jiffies;
1892 struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1893 struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1899 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1900 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1902 src->s6_addr32[0], src->s6_addr32[1],
1903 src->s6_addr32[2], src->s6_addr32[3],
1904 ntohs(inet_sk(sk)->sport),
1905 dest->s6_addr32[0], dest->s6_addr32[1],
1906 dest->s6_addr32[2], dest->s6_addr32[3],
1907 ntohs(inet_rsk(req)->rmt_port),
1909 0,0, /* could print option size, but that is af dependent. */
1910 1, /* timers active (only the expire timer) */
1911 jiffies_to_clock_t(ttd),
1914 0, /* non standard timer */
1915 0, /* open_requests have no inode */
1919 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1921 struct in6_addr *dest, *src;
1924 unsigned long timer_expires;
1925 struct inet_sock *inet = inet_sk(sp);
1926 struct tcp_sock *tp = tcp_sk(sp);
1927 const struct inet_connection_sock *icsk = inet_csk(sp);
1928 struct ipv6_pinfo *np = inet6_sk(sp);
1931 src = &np->rcv_saddr;
1932 destp = ntohs(inet->dport);
1933 srcp = ntohs(inet->sport);
1935 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1937 timer_expires = icsk->icsk_timeout;
1938 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1940 timer_expires = icsk->icsk_timeout;
1941 } else if (timer_pending(&sp->sk_timer)) {
1943 timer_expires = sp->sk_timer.expires;
1946 timer_expires = jiffies;
1950 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1951 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1953 src->s6_addr32[0], src->s6_addr32[1],
1954 src->s6_addr32[2], src->s6_addr32[3], srcp,
1955 dest->s6_addr32[0], dest->s6_addr32[1],
1956 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1958 tp->write_seq-tp->snd_una,
1959 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1961 jiffies_to_clock_t(timer_expires - jiffies),
1962 icsk->icsk_retransmits,
1964 icsk->icsk_probes_out,
1966 atomic_read(&sp->sk_refcnt), sp,
1969 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1970 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1974 static void get_timewait6_sock(struct seq_file *seq,
1975 struct inet_timewait_sock *tw, int i)
1977 struct in6_addr *dest, *src;
1979 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1980 int ttd = tw->tw_ttd - jiffies;
1985 dest = &tw6->tw_v6_daddr;
1986 src = &tw6->tw_v6_rcv_saddr;
1987 destp = ntohs(tw->tw_dport);
1988 srcp = ntohs(tw->tw_sport);
1991 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1992 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1994 src->s6_addr32[0], src->s6_addr32[1],
1995 src->s6_addr32[2], src->s6_addr32[3], srcp,
1996 dest->s6_addr32[0], dest->s6_addr32[1],
1997 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1998 tw->tw_substate, 0, 0,
1999 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2000 atomic_read(&tw->tw_refcnt), tw);
2003 static int tcp6_seq_show(struct seq_file *seq, void *v)
2005 struct tcp_iter_state *st;
2007 if (v == SEQ_START_TOKEN) {
2012 "st tx_queue rx_queue tr tm->when retrnsmt"
2013 " uid timeout inode\n");
2018 switch (st->state) {
2019 case TCP_SEQ_STATE_LISTENING:
2020 case TCP_SEQ_STATE_ESTABLISHED:
2021 get_tcp6_sock(seq, v, st->num);
2023 case TCP_SEQ_STATE_OPENREQ:
2024 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2026 case TCP_SEQ_STATE_TIME_WAIT:
2027 get_timewait6_sock(seq, v, st->num);
2034 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2038 .owner = THIS_MODULE,
2041 .show = tcp6_seq_show,
2045 int tcp6_proc_init(struct net *net)
2047 return tcp_proc_register(net, &tcp6_seq_afinfo);
2050 void tcp6_proc_exit(struct net *net)
2052 tcp_proc_unregister(net, &tcp6_seq_afinfo);
2056 struct proto tcpv6_prot = {
2058 .owner = THIS_MODULE,
2060 .connect = tcp_v6_connect,
2061 .disconnect = tcp_disconnect,
2062 .accept = inet_csk_accept,
2064 .init = tcp_v6_init_sock,
2065 .destroy = tcp_v6_destroy_sock,
2066 .shutdown = tcp_shutdown,
2067 .setsockopt = tcp_setsockopt,
2068 .getsockopt = tcp_getsockopt,
2069 .recvmsg = tcp_recvmsg,
2070 .backlog_rcv = tcp_v6_do_rcv,
2071 .hash = tcp_v6_hash,
2072 .unhash = inet_unhash,
2073 .get_port = inet_csk_get_port,
2074 .enter_memory_pressure = tcp_enter_memory_pressure,
2075 .sockets_allocated = &tcp_sockets_allocated,
2076 .memory_allocated = &tcp_memory_allocated,
2077 .memory_pressure = &tcp_memory_pressure,
2078 .orphan_count = &tcp_orphan_count,
2079 .sysctl_mem = sysctl_tcp_mem,
2080 .sysctl_wmem = sysctl_tcp_wmem,
2081 .sysctl_rmem = sysctl_tcp_rmem,
2082 .max_header = MAX_TCP_HEADER,
2083 .obj_size = sizeof(struct tcp6_sock),
2084 .twsk_prot = &tcp6_timewait_sock_ops,
2085 .rsk_prot = &tcp6_request_sock_ops,
2086 .h.hashinfo = &tcp_hashinfo,
2087 #ifdef CONFIG_COMPAT
2088 .compat_setsockopt = compat_tcp_setsockopt,
2089 .compat_getsockopt = compat_tcp_getsockopt,
2093 static struct inet6_protocol tcpv6_protocol = {
2094 .handler = tcp_v6_rcv,
2095 .err_handler = tcp_v6_err,
2096 .gso_send_check = tcp_v6_gso_send_check,
2097 .gso_segment = tcp_tso_segment,
2098 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2101 static struct inet_protosw tcpv6_protosw = {
2102 .type = SOCK_STREAM,
2103 .protocol = IPPROTO_TCP,
2104 .prot = &tcpv6_prot,
2105 .ops = &inet6_stream_ops,
2108 .flags = INET_PROTOSW_PERMANENT |
2112 static int tcpv6_net_init(struct net *net)
2114 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2115 SOCK_RAW, IPPROTO_TCP, net);
2118 static void tcpv6_net_exit(struct net *net)
2120 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2123 static struct pernet_operations tcpv6_net_ops = {
2124 .init = tcpv6_net_init,
2125 .exit = tcpv6_net_exit,
2128 int __init tcpv6_init(void)
2132 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2136 /* register inet6 protocol */
2137 ret = inet6_register_protosw(&tcpv6_protosw);
2139 goto out_tcpv6_protocol;
2141 ret = register_pernet_subsys(&tcpv6_net_ops);
2143 goto out_tcpv6_protosw;
2148 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2150 inet6_unregister_protosw(&tcpv6_protosw);
2154 void tcpv6_exit(void)
2156 unregister_pernet_subsys(&tcpv6_net_ops);
2157 inet6_unregister_protosw(&tcpv6_protosw);
2158 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);