3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
42 #include <linux/ipv6.h>
43 #include <linux/icmpv6.h>
44 #include <linux/random.h>
47 #include <net/ndisc.h>
48 #include <net/inet6_hashtables.h>
49 #include <net/inet6_connection_sock.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
59 #include <net/dsfield.h>
60 #include <net/timewait_sock.h>
61 #include <net/netdma.h>
62 #include <net/inet_common.h>
64 #include <asm/uaccess.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 #include <linux/crypto.h>
70 #include <linux/scatterlist.h>
72 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
73 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
74 struct request_sock *req);
76 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
78 static const struct inet_connection_sock_af_ops ipv6_mapped;
79 static const struct inet_connection_sock_af_ops ipv6_specific;
80 #ifdef CONFIG_TCP_MD5SIG
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
82 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
84 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
85 struct in6_addr *addr)
91 static void tcp_v6_hash(struct sock *sk)
93 if (sk->sk_state != TCP_CLOSE) {
94 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
104 static __inline__ __sum16 tcp_v6_check(int len,
105 struct in6_addr *saddr,
106 struct in6_addr *daddr,
109 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
112 static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
114 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
115 ipv6_hdr(skb)->saddr.s6_addr32,
117 tcp_hdr(skb)->source);
120 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
123 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
124 struct inet_sock *inet = inet_sk(sk);
125 struct inet_connection_sock *icsk = inet_csk(sk);
126 struct ipv6_pinfo *np = inet6_sk(sk);
127 struct tcp_sock *tp = tcp_sk(sk);
128 struct in6_addr *saddr = NULL, *final_p = NULL, final;
130 struct dst_entry *dst;
134 if (addr_len < SIN6_LEN_RFC2133)
137 if (usin->sin6_family != AF_INET6)
138 return(-EAFNOSUPPORT);
140 memset(&fl, 0, sizeof(fl));
143 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
144 IP6_ECN_flow_init(fl.fl6_flowlabel);
145 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
146 struct ip6_flowlabel *flowlabel;
147 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
148 if (flowlabel == NULL)
150 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
151 fl6_sock_release(flowlabel);
156 * connect() to INADDR_ANY means loopback (BSD'ism).
159 if(ipv6_addr_any(&usin->sin6_addr))
160 usin->sin6_addr.s6_addr[15] = 0x1;
162 addr_type = ipv6_addr_type(&usin->sin6_addr);
164 if(addr_type & IPV6_ADDR_MULTICAST)
167 if (addr_type&IPV6_ADDR_LINKLOCAL) {
168 if (addr_len >= sizeof(struct sockaddr_in6) &&
169 usin->sin6_scope_id) {
170 /* If interface is set while binding, indices
173 if (sk->sk_bound_dev_if &&
174 sk->sk_bound_dev_if != usin->sin6_scope_id)
177 sk->sk_bound_dev_if = usin->sin6_scope_id;
180 /* Connect to link-local address requires an interface */
181 if (!sk->sk_bound_dev_if)
185 if (tp->rx_opt.ts_recent_stamp &&
186 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
187 tp->rx_opt.ts_recent = 0;
188 tp->rx_opt.ts_recent_stamp = 0;
192 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
193 np->flow_label = fl.fl6_flowlabel;
199 if (addr_type == IPV6_ADDR_MAPPED) {
200 u32 exthdrlen = icsk->icsk_ext_hdr_len;
201 struct sockaddr_in sin;
203 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
205 if (__ipv6_only_sock(sk))
208 sin.sin_family = AF_INET;
209 sin.sin_port = usin->sin6_port;
210 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
212 icsk->icsk_af_ops = &ipv6_mapped;
213 sk->sk_backlog_rcv = tcp_v4_do_rcv;
214 #ifdef CONFIG_TCP_MD5SIG
215 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
218 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
221 icsk->icsk_ext_hdr_len = exthdrlen;
222 icsk->icsk_af_ops = &ipv6_specific;
223 sk->sk_backlog_rcv = tcp_v6_do_rcv;
224 #ifdef CONFIG_TCP_MD5SIG
225 tp->af_specific = &tcp_sock_ipv6_specific;
229 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
231 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
238 if (!ipv6_addr_any(&np->rcv_saddr))
239 saddr = &np->rcv_saddr;
241 fl.proto = IPPROTO_TCP;
242 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
243 ipv6_addr_copy(&fl.fl6_src,
244 (saddr ? saddr : &np->saddr));
245 fl.oif = sk->sk_bound_dev_if;
246 fl.mark = sk->sk_mark;
247 fl.fl_ip_dport = usin->sin6_port;
248 fl.fl_ip_sport = inet->sport;
250 if (np->opt && np->opt->srcrt) {
251 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
252 ipv6_addr_copy(&final, &fl.fl6_dst);
253 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
257 security_sk_classify_flow(sk, &fl);
259 err = ip6_dst_lookup(sk, &dst, &fl);
263 ipv6_addr_copy(&fl.fl6_dst, final_p);
265 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
268 err = ip6_dst_blackhole(sk, &dst, &fl);
275 ipv6_addr_copy(&np->rcv_saddr, saddr);
278 /* set the source address */
279 ipv6_addr_copy(&np->saddr, saddr);
280 inet->rcv_saddr = LOOPBACK4_IPV6;
282 sk->sk_gso_type = SKB_GSO_TCPV6;
283 __ip6_dst_store(sk, dst, NULL, NULL);
285 icsk->icsk_ext_hdr_len = 0;
287 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
290 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
292 inet->dport = usin->sin6_port;
294 tcp_set_state(sk, TCP_SYN_SENT);
295 err = inet6_hash_connect(&tcp_death_row, sk);
300 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
305 err = tcp_connect(sk);
312 tcp_set_state(sk, TCP_CLOSE);
316 sk->sk_route_caps = 0;
320 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
321 u8 type, u8 code, int offset, __be32 info)
323 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
324 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
325 struct ipv6_pinfo *np;
330 struct net *net = dev_net(skb->dev);
332 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
333 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
336 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
341 if (sk->sk_state == TCP_TIME_WAIT) {
342 inet_twsk_put(inet_twsk(sk));
347 if (sock_owned_by_user(sk))
348 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
350 if (sk->sk_state == TCP_CLOSE)
354 seq = ntohl(th->seq);
355 if (sk->sk_state != TCP_LISTEN &&
356 !between(seq, tp->snd_una, tp->snd_nxt)) {
357 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
363 if (type == ICMPV6_PKT_TOOBIG) {
364 struct dst_entry *dst = NULL;
366 if (sock_owned_by_user(sk))
368 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
371 /* icmp should have updated the destination cache entry */
372 dst = __sk_dst_check(sk, np->dst_cookie);
375 struct inet_sock *inet = inet_sk(sk);
378 /* BUGGG_FUTURE: Again, it is not clear how
379 to handle rthdr case. Ignore this complexity
382 memset(&fl, 0, sizeof(fl));
383 fl.proto = IPPROTO_TCP;
384 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
385 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
386 fl.oif = sk->sk_bound_dev_if;
387 fl.mark = sk->sk_mark;
388 fl.fl_ip_dport = inet->dport;
389 fl.fl_ip_sport = inet->sport;
390 security_skb_classify_flow(skb, &fl);
392 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
393 sk->sk_err_soft = -err;
397 if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) {
398 sk->sk_err_soft = -err;
405 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
406 tcp_sync_mss(sk, dst_mtu(dst));
407 tcp_simple_retransmit(sk);
408 } /* else let the usual retransmit timer handle it */
413 icmpv6_err_convert(type, code, &err);
415 /* Might be for an request_sock */
416 switch (sk->sk_state) {
417 struct request_sock *req, **prev;
419 if (sock_owned_by_user(sk))
422 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
423 &hdr->saddr, inet6_iif(skb));
427 /* ICMPs are not backlogged, hence we cannot get
428 * an established socket here.
430 WARN_ON(req->sk != NULL);
432 if (seq != tcp_rsk(req)->snt_isn) {
433 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
437 inet_csk_reqsk_queue_drop(sk, req, prev);
441 case TCP_SYN_RECV: /* Cannot happen.
442 It can, it SYNs are crossed. --ANK */
443 if (!sock_owned_by_user(sk)) {
445 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
449 sk->sk_err_soft = err;
453 if (!sock_owned_by_user(sk) && np->recverr) {
455 sk->sk_error_report(sk);
457 sk->sk_err_soft = err;
465 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
467 struct inet6_request_sock *treq = inet6_rsk(req);
468 struct ipv6_pinfo *np = inet6_sk(sk);
469 struct sk_buff * skb;
470 struct ipv6_txoptions *opt = NULL;
471 struct in6_addr * final_p = NULL, final;
473 struct dst_entry *dst;
476 memset(&fl, 0, sizeof(fl));
477 fl.proto = IPPROTO_TCP;
478 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
479 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
480 fl.fl6_flowlabel = 0;
482 fl.mark = sk->sk_mark;
483 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
484 fl.fl_ip_sport = inet_rsk(req)->loc_port;
485 security_req_classify_flow(req, &fl);
488 if (opt && opt->srcrt) {
489 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
490 ipv6_addr_copy(&final, &fl.fl6_dst);
491 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
495 err = ip6_dst_lookup(sk, &dst, &fl);
499 ipv6_addr_copy(&fl.fl6_dst, final_p);
500 if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
503 skb = tcp_make_synack(sk, dst, req);
505 struct tcphdr *th = tcp_hdr(skb);
507 th->check = tcp_v6_check(skb->len,
508 &treq->loc_addr, &treq->rmt_addr,
509 csum_partial(th, skb->len, skb->csum));
511 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
512 err = ip6_xmit(sk, skb, &fl, opt, 0);
513 err = net_xmit_eval(err);
517 if (opt && opt != np->opt)
518 sock_kfree_s(sk, opt, opt->tot_len);
523 static inline void syn_flood_warning(struct sk_buff *skb)
525 #ifdef CONFIG_SYN_COOKIES
526 if (sysctl_tcp_syncookies)
528 "TCPv6: Possible SYN flooding on port %d. "
529 "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
533 "TCPv6: Possible SYN flooding on port %d. "
534 "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
537 static void tcp_v6_reqsk_destructor(struct request_sock *req)
539 kfree_skb(inet6_rsk(req)->pktopts);
542 #ifdef CONFIG_TCP_MD5SIG
543 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
544 struct in6_addr *addr)
546 struct tcp_sock *tp = tcp_sk(sk);
551 if (!tp->md5sig_info || !tp->md5sig_info->entries6)
554 for (i = 0; i < tp->md5sig_info->entries6; i++) {
555 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, addr))
556 return &tp->md5sig_info->keys6[i].base;
561 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
562 struct sock *addr_sk)
564 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
567 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
568 struct request_sock *req)
570 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
573 static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
574 char *newkey, u8 newkeylen)
576 /* Add key to the list */
577 struct tcp_md5sig_key *key;
578 struct tcp_sock *tp = tcp_sk(sk);
579 struct tcp6_md5sig_key *keys;
581 key = tcp_v6_md5_do_lookup(sk, peer);
583 /* modify existing entry - just update that one */
586 key->keylen = newkeylen;
588 /* reallocate new list if current one is full. */
589 if (!tp->md5sig_info) {
590 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
591 if (!tp->md5sig_info) {
595 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
597 if (tcp_alloc_md5sig_pool(sk) == NULL) {
601 if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) {
602 keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) *
603 (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
606 tcp_free_md5sig_pool();
611 if (tp->md5sig_info->entries6)
612 memmove(keys, tp->md5sig_info->keys6,
613 (sizeof (tp->md5sig_info->keys6[0]) *
614 tp->md5sig_info->entries6));
616 kfree(tp->md5sig_info->keys6);
617 tp->md5sig_info->keys6 = keys;
618 tp->md5sig_info->alloced6++;
621 ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr,
623 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey;
624 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen;
626 tp->md5sig_info->entries6++;
631 static int tcp_v6_md5_add_func(struct sock *sk, struct sock *addr_sk,
632 u8 *newkey, __u8 newkeylen)
634 return tcp_v6_md5_do_add(sk, &inet6_sk(addr_sk)->daddr,
638 static int tcp_v6_md5_do_del(struct sock *sk, struct in6_addr *peer)
640 struct tcp_sock *tp = tcp_sk(sk);
643 for (i = 0; i < tp->md5sig_info->entries6; i++) {
644 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, peer)) {
646 kfree(tp->md5sig_info->keys6[i].base.key);
647 tp->md5sig_info->entries6--;
649 if (tp->md5sig_info->entries6 == 0) {
650 kfree(tp->md5sig_info->keys6);
651 tp->md5sig_info->keys6 = NULL;
652 tp->md5sig_info->alloced6 = 0;
654 /* shrink the database */
655 if (tp->md5sig_info->entries6 != i)
656 memmove(&tp->md5sig_info->keys6[i],
657 &tp->md5sig_info->keys6[i+1],
658 (tp->md5sig_info->entries6 - i)
659 * sizeof (tp->md5sig_info->keys6[0]));
661 tcp_free_md5sig_pool();
668 static void tcp_v6_clear_md5_list (struct sock *sk)
670 struct tcp_sock *tp = tcp_sk(sk);
673 if (tp->md5sig_info->entries6) {
674 for (i = 0; i < tp->md5sig_info->entries6; i++)
675 kfree(tp->md5sig_info->keys6[i].base.key);
676 tp->md5sig_info->entries6 = 0;
677 tcp_free_md5sig_pool();
680 kfree(tp->md5sig_info->keys6);
681 tp->md5sig_info->keys6 = NULL;
682 tp->md5sig_info->alloced6 = 0;
684 if (tp->md5sig_info->entries4) {
685 for (i = 0; i < tp->md5sig_info->entries4; i++)
686 kfree(tp->md5sig_info->keys4[i].base.key);
687 tp->md5sig_info->entries4 = 0;
688 tcp_free_md5sig_pool();
691 kfree(tp->md5sig_info->keys4);
692 tp->md5sig_info->keys4 = NULL;
693 tp->md5sig_info->alloced4 = 0;
696 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
699 struct tcp_md5sig cmd;
700 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
703 if (optlen < sizeof(cmd))
706 if (copy_from_user(&cmd, optval, sizeof(cmd)))
709 if (sin6->sin6_family != AF_INET6)
712 if (!cmd.tcpm_keylen) {
713 if (!tcp_sk(sk)->md5sig_info)
715 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
716 return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]);
717 return tcp_v6_md5_do_del(sk, &sin6->sin6_addr);
720 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
723 if (!tcp_sk(sk)->md5sig_info) {
724 struct tcp_sock *tp = tcp_sk(sk);
725 struct tcp_md5sig_info *p;
727 p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
732 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
735 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
738 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
739 return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3],
740 newkey, cmd.tcpm_keylen);
742 return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen);
745 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
746 struct in6_addr *daddr,
747 struct in6_addr *saddr, int nbytes)
749 struct tcp6_pseudohdr *bp;
750 struct scatterlist sg;
752 bp = &hp->md5_blk.ip6;
753 /* 1. TCP pseudo-header (RFC2460) */
754 ipv6_addr_copy(&bp->saddr, saddr);
755 ipv6_addr_copy(&bp->daddr, daddr);
756 bp->protocol = cpu_to_be32(IPPROTO_TCP);
757 bp->len = cpu_to_be32(nbytes);
759 sg_init_one(&sg, bp, sizeof(*bp));
760 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
763 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
764 struct in6_addr *daddr, struct in6_addr *saddr,
767 struct tcp_md5sig_pool *hp;
768 struct hash_desc *desc;
770 hp = tcp_get_md5sig_pool();
772 goto clear_hash_noput;
773 desc = &hp->md5_desc;
775 if (crypto_hash_init(desc))
777 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
779 if (tcp_md5_hash_header(hp, th))
781 if (tcp_md5_hash_key(hp, key))
783 if (crypto_hash_final(desc, md5_hash))
786 tcp_put_md5sig_pool();
790 tcp_put_md5sig_pool();
792 memset(md5_hash, 0, 16);
796 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
797 struct sock *sk, struct request_sock *req,
800 struct in6_addr *saddr, *daddr;
801 struct tcp_md5sig_pool *hp;
802 struct hash_desc *desc;
803 struct tcphdr *th = tcp_hdr(skb);
806 saddr = &inet6_sk(sk)->saddr;
807 daddr = &inet6_sk(sk)->daddr;
809 saddr = &inet6_rsk(req)->loc_addr;
810 daddr = &inet6_rsk(req)->rmt_addr;
812 struct ipv6hdr *ip6h = ipv6_hdr(skb);
813 saddr = &ip6h->saddr;
814 daddr = &ip6h->daddr;
817 hp = tcp_get_md5sig_pool();
819 goto clear_hash_noput;
820 desc = &hp->md5_desc;
822 if (crypto_hash_init(desc))
825 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
827 if (tcp_md5_hash_header(hp, th))
829 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
831 if (tcp_md5_hash_key(hp, key))
833 if (crypto_hash_final(desc, md5_hash))
836 tcp_put_md5sig_pool();
840 tcp_put_md5sig_pool();
842 memset(md5_hash, 0, 16);
846 static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
848 __u8 *hash_location = NULL;
849 struct tcp_md5sig_key *hash_expected;
850 struct ipv6hdr *ip6h = ipv6_hdr(skb);
851 struct tcphdr *th = tcp_hdr(skb);
855 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
856 hash_location = tcp_parse_md5sig_option(th);
858 /* We've parsed the options - do we have a hash? */
859 if (!hash_expected && !hash_location)
862 if (hash_expected && !hash_location) {
863 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
867 if (!hash_expected && hash_location) {
868 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
872 /* check the signature */
873 genhash = tcp_v6_md5_hash_skb(newhash,
877 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
878 if (net_ratelimit()) {
879 printk(KERN_INFO "MD5 Hash %s for (%pI6, %u)->(%pI6, %u)\n",
880 genhash ? "failed" : "mismatch",
881 &ip6h->saddr, ntohs(th->source),
882 &ip6h->daddr, ntohs(th->dest));
890 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
892 .obj_size = sizeof(struct tcp6_request_sock),
893 .rtx_syn_ack = tcp_v6_send_synack,
894 .send_ack = tcp_v6_reqsk_send_ack,
895 .destructor = tcp_v6_reqsk_destructor,
896 .send_reset = tcp_v6_send_reset
899 #ifdef CONFIG_TCP_MD5SIG
900 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
901 .md5_lookup = tcp_v6_reqsk_md5_lookup,
902 .calc_md5_hash = tcp_v6_md5_hash_skb,
906 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
907 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
908 .twsk_unique = tcp_twsk_unique,
909 .twsk_destructor= tcp_twsk_destructor,
912 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
914 struct ipv6_pinfo *np = inet6_sk(sk);
915 struct tcphdr *th = tcp_hdr(skb);
917 if (skb->ip_summed == CHECKSUM_PARTIAL) {
918 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
919 skb->csum_start = skb_transport_header(skb) - skb->head;
920 skb->csum_offset = offsetof(struct tcphdr, check);
922 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
923 csum_partial(th, th->doff<<2,
928 static int tcp_v6_gso_send_check(struct sk_buff *skb)
930 struct ipv6hdr *ipv6h;
933 if (!pskb_may_pull(skb, sizeof(*th)))
936 ipv6h = ipv6_hdr(skb);
940 th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
942 skb->csum_start = skb_transport_header(skb) - skb->head;
943 skb->csum_offset = offsetof(struct tcphdr, check);
944 skb->ip_summed = CHECKSUM_PARTIAL;
948 static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
951 struct ipv6hdr *iph = skb_gro_network_header(skb);
953 switch (skb->ip_summed) {
954 case CHECKSUM_COMPLETE:
955 if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
957 skb->ip_summed = CHECKSUM_UNNECESSARY;
963 NAPI_GRO_CB(skb)->flush = 1;
967 return tcp_gro_receive(head, skb);
970 static int tcp6_gro_complete(struct sk_buff *skb)
972 struct ipv6hdr *iph = ipv6_hdr(skb);
973 struct tcphdr *th = tcp_hdr(skb);
975 th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
976 &iph->saddr, &iph->daddr, 0);
977 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
979 return tcp_gro_complete(skb);
982 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
983 u32 ts, struct tcp_md5sig_key *key, int rst)
985 struct tcphdr *th = tcp_hdr(skb), *t1;
986 struct sk_buff *buff;
988 struct net *net = dev_net(skb_dst(skb)->dev);
989 struct sock *ctl_sk = net->ipv6.tcp_sk;
990 unsigned int tot_len = sizeof(struct tcphdr);
991 struct dst_entry *dst;
995 tot_len += TCPOLEN_TSTAMP_ALIGNED;
996 #ifdef CONFIG_TCP_MD5SIG
998 tot_len += TCPOLEN_MD5SIG_ALIGNED;
1001 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1006 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1008 t1 = (struct tcphdr *) skb_push(buff, tot_len);
1009 skb_reset_transport_header(skb);
1011 /* Swap the send and the receive. */
1012 memset(t1, 0, sizeof(*t1));
1013 t1->dest = th->source;
1014 t1->source = th->dest;
1015 t1->doff = tot_len / 4;
1016 t1->seq = htonl(seq);
1017 t1->ack_seq = htonl(ack);
1018 t1->ack = !rst || !th->ack;
1020 t1->window = htons(win);
1022 topt = (__be32 *)(t1 + 1);
1025 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1026 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1027 *topt++ = htonl(tcp_time_stamp);
1028 *topt++ = htonl(ts);
1031 #ifdef CONFIG_TCP_MD5SIG
1033 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1034 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
1035 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
1036 &ipv6_hdr(skb)->saddr,
1037 &ipv6_hdr(skb)->daddr, t1);
1041 buff->csum = csum_partial(t1, tot_len, 0);
1043 memset(&fl, 0, sizeof(fl));
1044 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
1045 ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
1047 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1048 tot_len, IPPROTO_TCP,
1051 fl.proto = IPPROTO_TCP;
1052 fl.oif = inet6_iif(skb);
1053 fl.fl_ip_dport = t1->dest;
1054 fl.fl_ip_sport = t1->source;
1055 security_skb_classify_flow(skb, &fl);
1057 /* Pass a socket to ip6_dst_lookup either it is for RST
1058 * Underlying function will use this to retrieve the network
1061 if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) {
1062 if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) {
1063 skb_dst_set(buff, dst);
1064 ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
1065 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1067 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
1075 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
1077 struct tcphdr *th = tcp_hdr(skb);
1078 u32 seq = 0, ack_seq = 0;
1079 struct tcp_md5sig_key *key = NULL;
1084 if (!ipv6_unicast_destination(skb))
1087 #ifdef CONFIG_TCP_MD5SIG
1089 key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
1093 seq = ntohl(th->ack_seq);
1095 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1098 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1);
1101 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
1102 struct tcp_md5sig_key *key)
1104 tcp_v6_send_response(skb, seq, ack, win, ts, key, 0);
1107 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1109 struct inet_timewait_sock *tw = inet_twsk(sk);
1110 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1112 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1113 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1114 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw));
1119 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
1120 struct request_sock *req)
1122 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
1123 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr));
1127 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1129 struct request_sock *req, **prev;
1130 const struct tcphdr *th = tcp_hdr(skb);
1133 /* Find possible connection requests. */
1134 req = inet6_csk_search_req(sk, &prev, th->source,
1135 &ipv6_hdr(skb)->saddr,
1136 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
1138 return tcp_check_req(sk, skb, req, prev);
1140 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
1141 &ipv6_hdr(skb)->saddr, th->source,
1142 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
1145 if (nsk->sk_state != TCP_TIME_WAIT) {
1149 inet_twsk_put(inet_twsk(nsk));
1153 #ifdef CONFIG_SYN_COOKIES
1154 if (!th->rst && !th->syn && th->ack)
1155 sk = cookie_v6_check(sk, skb);
1160 /* FIXME: this is substantially similar to the ipv4 code.
1161 * Can some kind of merge be done? -- erics
1163 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1165 struct inet6_request_sock *treq;
1166 struct ipv6_pinfo *np = inet6_sk(sk);
1167 struct tcp_options_received tmp_opt;
1168 struct tcp_sock *tp = tcp_sk(sk);
1169 struct request_sock *req = NULL;
1170 __u32 isn = TCP_SKB_CB(skb)->when;
1171 #ifdef CONFIG_SYN_COOKIES
1172 int want_cookie = 0;
1174 #define want_cookie 0
1177 if (skb->protocol == htons(ETH_P_IP))
1178 return tcp_v4_conn_request(sk, skb);
1180 if (!ipv6_unicast_destination(skb))
1183 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1184 if (net_ratelimit())
1185 syn_flood_warning(skb);
1186 #ifdef CONFIG_SYN_COOKIES
1187 if (sysctl_tcp_syncookies)
1194 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1197 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1201 #ifdef CONFIG_TCP_MD5SIG
1202 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1205 tcp_clear_options(&tmp_opt);
1206 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1207 tmp_opt.user_mss = tp->rx_opt.user_mss;
1209 tcp_parse_options(skb, &tmp_opt, 0);
1211 if (want_cookie && !tmp_opt.saw_tstamp)
1212 tcp_clear_options(&tmp_opt);
1214 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1215 tcp_openreq_init(req, &tmp_opt, skb);
1217 treq = inet6_rsk(req);
1218 ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
1219 ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
1221 TCP_ECN_create_request(req, tcp_hdr(skb));
1224 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1225 req->cookie_ts = tmp_opt.tstamp_ok;
1227 if (ipv6_opt_accepted(sk, skb) ||
1228 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1229 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1230 atomic_inc(&skb->users);
1231 treq->pktopts = skb;
1233 treq->iif = sk->sk_bound_dev_if;
1235 /* So that link locals have meaning */
1236 if (!sk->sk_bound_dev_if &&
1237 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1238 treq->iif = inet6_iif(skb);
1240 isn = tcp_v6_init_sequence(skb);
1243 tcp_rsk(req)->snt_isn = isn;
1245 security_inet_conn_request(sk, skb, req);
1247 if (tcp_v6_send_synack(sk, req))
1251 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1259 return 0; /* don't send reset */
1262 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1263 struct request_sock *req,
1264 struct dst_entry *dst)
1266 struct inet6_request_sock *treq;
1267 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1268 struct tcp6_sock *newtcp6sk;
1269 struct inet_sock *newinet;
1270 struct tcp_sock *newtp;
1272 struct ipv6_txoptions *opt;
1273 #ifdef CONFIG_TCP_MD5SIG
1274 struct tcp_md5sig_key *key;
1277 if (skb->protocol == htons(ETH_P_IP)) {
1282 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1287 newtcp6sk = (struct tcp6_sock *)newsk;
1288 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1290 newinet = inet_sk(newsk);
1291 newnp = inet6_sk(newsk);
1292 newtp = tcp_sk(newsk);
1294 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1296 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1299 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1302 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1304 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1305 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1306 #ifdef CONFIG_TCP_MD5SIG
1307 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1310 newnp->pktoptions = NULL;
1312 newnp->mcast_oif = inet6_iif(skb);
1313 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1316 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1317 * here, tcp_create_openreq_child now does this for us, see the comment in
1318 * that function for the gory details. -acme
1321 /* It is tricky place. Until this moment IPv4 tcp
1322 worked with IPv6 icsk.icsk_af_ops.
1325 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1330 treq = inet6_rsk(req);
1333 if (sk_acceptq_is_full(sk))
1337 struct in6_addr *final_p = NULL, final;
1340 memset(&fl, 0, sizeof(fl));
1341 fl.proto = IPPROTO_TCP;
1342 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1343 if (opt && opt->srcrt) {
1344 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1345 ipv6_addr_copy(&final, &fl.fl6_dst);
1346 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1349 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1350 fl.oif = sk->sk_bound_dev_if;
1351 fl.mark = sk->sk_mark;
1352 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1353 fl.fl_ip_sport = inet_rsk(req)->loc_port;
1354 security_req_classify_flow(req, &fl);
1356 if (ip6_dst_lookup(sk, &dst, &fl))
1360 ipv6_addr_copy(&fl.fl6_dst, final_p);
1362 if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
1366 newsk = tcp_create_openreq_child(sk, req, skb);
1371 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1372 * count here, tcp_create_openreq_child now does this for us, see the
1373 * comment in that function for the gory details. -acme
1376 newsk->sk_gso_type = SKB_GSO_TCPV6;
1377 __ip6_dst_store(newsk, dst, NULL, NULL);
1379 newtcp6sk = (struct tcp6_sock *)newsk;
1380 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1382 newtp = tcp_sk(newsk);
1383 newinet = inet_sk(newsk);
1384 newnp = inet6_sk(newsk);
1386 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1388 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1389 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1390 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1391 newsk->sk_bound_dev_if = treq->iif;
1393 /* Now IPv6 options...
1395 First: no IPv4 options.
1397 newinet->opt = NULL;
1398 newnp->ipv6_fl_list = NULL;
1401 newnp->rxopt.all = np->rxopt.all;
1403 /* Clone pktoptions received with SYN */
1404 newnp->pktoptions = NULL;
1405 if (treq->pktopts != NULL) {
1406 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1407 kfree_skb(treq->pktopts);
1408 treq->pktopts = NULL;
1409 if (newnp->pktoptions)
1410 skb_set_owner_r(newnp->pktoptions, newsk);
1413 newnp->mcast_oif = inet6_iif(skb);
1414 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1416 /* Clone native IPv6 options from listening socket (if any)
1418 Yes, keeping reference count would be much more clever,
1419 but we make one more one thing there: reattach optmem
1423 newnp->opt = ipv6_dup_options(newsk, opt);
1425 sock_kfree_s(sk, opt, opt->tot_len);
1428 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1430 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1431 newnp->opt->opt_flen);
1433 tcp_mtup_init(newsk);
1434 tcp_sync_mss(newsk, dst_mtu(dst));
1435 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1436 tcp_initialize_rcv_mss(newsk);
1438 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1440 #ifdef CONFIG_TCP_MD5SIG
1441 /* Copy over the MD5 key from the original socket */
1442 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1443 /* We're using one, so create a matching key
1444 * on the newsk structure. If we fail to get
1445 * memory, then we end up not copying the key
1448 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1450 tcp_v6_md5_do_add(newsk, &newnp->daddr,
1451 newkey, key->keylen);
1455 __inet6_hash(newsk);
1456 __inet_inherit_port(sk, newsk);
1461 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1463 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1464 if (opt && opt != np->opt)
1465 sock_kfree_s(sk, opt, opt->tot_len);
1470 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1472 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1473 if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
1474 &ipv6_hdr(skb)->daddr, skb->csum)) {
1475 skb->ip_summed = CHECKSUM_UNNECESSARY;
1480 skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
1481 &ipv6_hdr(skb)->saddr,
1482 &ipv6_hdr(skb)->daddr, 0));
1484 if (skb->len <= 76) {
1485 return __skb_checksum_complete(skb);
1490 /* The socket must have it's spinlock held when we get
1493 * We have a potential double-lock case here, so even when
1494 * doing backlog processing we use the BH locking scheme.
1495 * This is because we cannot sleep with the original spinlock
1498 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1500 struct ipv6_pinfo *np = inet6_sk(sk);
1501 struct tcp_sock *tp;
1502 struct sk_buff *opt_skb = NULL;
1504 /* Imagine: socket is IPv6. IPv4 packet arrives,
1505 goes to IPv4 receive handler and backlogged.
1506 From backlog it always goes here. Kerboom...
1507 Fortunately, tcp_rcv_established and rcv_established
1508 handle them correctly, but it is not case with
1509 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1512 if (skb->protocol == htons(ETH_P_IP))
1513 return tcp_v4_do_rcv(sk, skb);
1515 #ifdef CONFIG_TCP_MD5SIG
1516 if (tcp_v6_inbound_md5_hash (sk, skb))
1520 if (sk_filter(sk, skb))
1524 * socket locking is here for SMP purposes as backlog rcv
1525 * is currently called with bh processing disabled.
1528 /* Do Stevens' IPV6_PKTOPTIONS.
1530 Yes, guys, it is the only place in our code, where we
1531 may make it not affecting IPv4.
1532 The rest of code is protocol independent,
1533 and I do not like idea to uglify IPv4.
1535 Actually, all the idea behind IPV6_PKTOPTIONS
1536 looks not very well thought. For now we latch
1537 options, received in the last packet, enqueued
1538 by tcp. Feel free to propose better solution.
1542 opt_skb = skb_clone(skb, GFP_ATOMIC);
1544 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1545 TCP_CHECK_TIMER(sk);
1546 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1548 TCP_CHECK_TIMER(sk);
1550 goto ipv6_pktoptions;
1554 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1557 if (sk->sk_state == TCP_LISTEN) {
1558 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1563 * Queue it on the new socket if the new socket is active,
1564 * otherwise we just shortcircuit this and continue with
1568 if (tcp_child_process(sk, nsk, skb))
1571 __kfree_skb(opt_skb);
1576 TCP_CHECK_TIMER(sk);
1577 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1579 TCP_CHECK_TIMER(sk);
1581 goto ipv6_pktoptions;
1585 tcp_v6_send_reset(sk, skb);
1588 __kfree_skb(opt_skb);
1592 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1597 /* Do you ask, what is it?
1599 1. skb was enqueued by tcp.
1600 2. skb is added to tail of read queue, rather than out of order.
1601 3. socket is not in passive state.
1602 4. Finally, it really contains options, which user wants to receive.
1605 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1606 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1607 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1608 np->mcast_oif = inet6_iif(opt_skb);
1609 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1610 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1611 if (ipv6_opt_accepted(sk, opt_skb)) {
1612 skb_set_owner_r(opt_skb, sk);
1613 opt_skb = xchg(&np->pktoptions, opt_skb);
1615 __kfree_skb(opt_skb);
1616 opt_skb = xchg(&np->pktoptions, NULL);
1624 static int tcp_v6_rcv(struct sk_buff *skb)
1629 struct net *net = dev_net(skb->dev);
1631 if (skb->pkt_type != PACKET_HOST)
1635 * Count it even if it's bad.
1637 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1639 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1644 if (th->doff < sizeof(struct tcphdr)/4)
1646 if (!pskb_may_pull(skb, th->doff*4))
1649 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1653 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1654 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1655 skb->len - th->doff*4);
1656 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1657 TCP_SKB_CB(skb)->when = 0;
1658 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
1659 TCP_SKB_CB(skb)->sacked = 0;
1661 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1666 if (sk->sk_state == TCP_TIME_WAIT)
1669 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1670 goto discard_and_relse;
1672 if (sk_filter(sk, skb))
1673 goto discard_and_relse;
1677 bh_lock_sock_nested(sk);
1679 if (!sock_owned_by_user(sk)) {
1680 #ifdef CONFIG_NET_DMA
1681 struct tcp_sock *tp = tcp_sk(sk);
1682 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1683 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1684 if (tp->ucopy.dma_chan)
1685 ret = tcp_v6_do_rcv(sk, skb);
1689 if (!tcp_prequeue(sk, skb))
1690 ret = tcp_v6_do_rcv(sk, skb);
1693 sk_add_backlog(sk, skb);
1697 return ret ? -1 : 0;
1700 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1703 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1705 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1707 tcp_v6_send_reset(NULL, skb);
1724 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1725 inet_twsk_put(inet_twsk(sk));
1729 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1730 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1731 inet_twsk_put(inet_twsk(sk));
1735 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1740 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1741 &ipv6_hdr(skb)->daddr,
1742 ntohs(th->dest), inet6_iif(skb));
1744 struct inet_timewait_sock *tw = inet_twsk(sk);
1745 inet_twsk_deschedule(tw, &tcp_death_row);
1750 /* Fall through to ACK */
1753 tcp_v6_timewait_ack(sk, skb);
1757 case TCP_TW_SUCCESS:;
1762 static int tcp_v6_remember_stamp(struct sock *sk)
1764 /* Alas, not yet... */
1768 static const struct inet_connection_sock_af_ops ipv6_specific = {
1769 .queue_xmit = inet6_csk_xmit,
1770 .send_check = tcp_v6_send_check,
1771 .rebuild_header = inet6_sk_rebuild_header,
1772 .conn_request = tcp_v6_conn_request,
1773 .syn_recv_sock = tcp_v6_syn_recv_sock,
1774 .remember_stamp = tcp_v6_remember_stamp,
1775 .net_header_len = sizeof(struct ipv6hdr),
1776 .setsockopt = ipv6_setsockopt,
1777 .getsockopt = ipv6_getsockopt,
1778 .addr2sockaddr = inet6_csk_addr2sockaddr,
1779 .sockaddr_len = sizeof(struct sockaddr_in6),
1780 .bind_conflict = inet6_csk_bind_conflict,
1781 #ifdef CONFIG_COMPAT
1782 .compat_setsockopt = compat_ipv6_setsockopt,
1783 .compat_getsockopt = compat_ipv6_getsockopt,
1787 #ifdef CONFIG_TCP_MD5SIG
1788 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1789 .md5_lookup = tcp_v6_md5_lookup,
1790 .calc_md5_hash = tcp_v6_md5_hash_skb,
1791 .md5_add = tcp_v6_md5_add_func,
1792 .md5_parse = tcp_v6_parse_md5_keys,
1797 * TCP over IPv4 via INET6 API
1800 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1801 .queue_xmit = ip_queue_xmit,
1802 .send_check = tcp_v4_send_check,
1803 .rebuild_header = inet_sk_rebuild_header,
1804 .conn_request = tcp_v6_conn_request,
1805 .syn_recv_sock = tcp_v6_syn_recv_sock,
1806 .remember_stamp = tcp_v4_remember_stamp,
1807 .net_header_len = sizeof(struct iphdr),
1808 .setsockopt = ipv6_setsockopt,
1809 .getsockopt = ipv6_getsockopt,
1810 .addr2sockaddr = inet6_csk_addr2sockaddr,
1811 .sockaddr_len = sizeof(struct sockaddr_in6),
1812 .bind_conflict = inet6_csk_bind_conflict,
1813 #ifdef CONFIG_COMPAT
1814 .compat_setsockopt = compat_ipv6_setsockopt,
1815 .compat_getsockopt = compat_ipv6_getsockopt,
1819 #ifdef CONFIG_TCP_MD5SIG
1820 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1821 .md5_lookup = tcp_v4_md5_lookup,
1822 .calc_md5_hash = tcp_v4_md5_hash_skb,
1823 .md5_add = tcp_v6_md5_add_func,
1824 .md5_parse = tcp_v6_parse_md5_keys,
1828 /* NOTE: A lot of things set to zero explicitly by call to
1829 * sk_alloc() so need not be done here.
1831 static int tcp_v6_init_sock(struct sock *sk)
1833 struct inet_connection_sock *icsk = inet_csk(sk);
1834 struct tcp_sock *tp = tcp_sk(sk);
1836 skb_queue_head_init(&tp->out_of_order_queue);
1837 tcp_init_xmit_timers(sk);
1838 tcp_prequeue_init(tp);
1840 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1841 tp->mdev = TCP_TIMEOUT_INIT;
1843 /* So many TCP implementations out there (incorrectly) count the
1844 * initial SYN frame in their delayed-ACK and congestion control
1845 * algorithms that we must have the following bandaid to talk
1846 * efficiently to them. -DaveM
1850 /* See draft-stevens-tcpca-spec-01 for discussion of the
1851 * initialization of these values.
1853 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1854 tp->snd_cwnd_clamp = ~0;
1855 tp->mss_cache = 536;
1857 tp->reordering = sysctl_tcp_reordering;
1859 sk->sk_state = TCP_CLOSE;
1861 icsk->icsk_af_ops = &ipv6_specific;
1862 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1863 icsk->icsk_sync_mss = tcp_sync_mss;
1864 sk->sk_write_space = sk_stream_write_space;
1865 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1867 #ifdef CONFIG_TCP_MD5SIG
1868 tp->af_specific = &tcp_sock_ipv6_specific;
1871 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1872 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1875 percpu_counter_inc(&tcp_sockets_allocated);
1881 static void tcp_v6_destroy_sock(struct sock *sk)
1883 #ifdef CONFIG_TCP_MD5SIG
1884 /* Clean up the MD5 key list */
1885 if (tcp_sk(sk)->md5sig_info)
1886 tcp_v6_clear_md5_list(sk);
1888 tcp_v4_destroy_sock(sk);
1889 inet6_destroy_sock(sk);
1892 #ifdef CONFIG_PROC_FS
1893 /* Proc filesystem TCPv6 sock list dumping. */
1894 static void get_openreq6(struct seq_file *seq,
1895 struct sock *sk, struct request_sock *req, int i, int uid)
1897 int ttd = req->expires - jiffies;
1898 struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1899 struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1905 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1906 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1908 src->s6_addr32[0], src->s6_addr32[1],
1909 src->s6_addr32[2], src->s6_addr32[3],
1910 ntohs(inet_rsk(req)->loc_port),
1911 dest->s6_addr32[0], dest->s6_addr32[1],
1912 dest->s6_addr32[2], dest->s6_addr32[3],
1913 ntohs(inet_rsk(req)->rmt_port),
1915 0,0, /* could print option size, but that is af dependent. */
1916 1, /* timers active (only the expire timer) */
1917 jiffies_to_clock_t(ttd),
1920 0, /* non standard timer */
1921 0, /* open_requests have no inode */
1925 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1927 struct in6_addr *dest, *src;
1930 unsigned long timer_expires;
1931 struct inet_sock *inet = inet_sk(sp);
1932 struct tcp_sock *tp = tcp_sk(sp);
1933 const struct inet_connection_sock *icsk = inet_csk(sp);
1934 struct ipv6_pinfo *np = inet6_sk(sp);
1937 src = &np->rcv_saddr;
1938 destp = ntohs(inet->dport);
1939 srcp = ntohs(inet->sport);
1941 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1943 timer_expires = icsk->icsk_timeout;
1944 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1946 timer_expires = icsk->icsk_timeout;
1947 } else if (timer_pending(&sp->sk_timer)) {
1949 timer_expires = sp->sk_timer.expires;
1952 timer_expires = jiffies;
1956 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1957 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %lu %lu %u %u %d\n",
1959 src->s6_addr32[0], src->s6_addr32[1],
1960 src->s6_addr32[2], src->s6_addr32[3], srcp,
1961 dest->s6_addr32[0], dest->s6_addr32[1],
1962 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1964 tp->write_seq-tp->snd_una,
1965 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1967 jiffies_to_clock_t(timer_expires - jiffies),
1968 icsk->icsk_retransmits,
1970 icsk->icsk_probes_out,
1972 atomic_read(&sp->sk_refcnt), sp,
1973 jiffies_to_clock_t(icsk->icsk_rto),
1974 jiffies_to_clock_t(icsk->icsk_ack.ato),
1975 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1977 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
1981 static void get_timewait6_sock(struct seq_file *seq,
1982 struct inet_timewait_sock *tw, int i)
1984 struct in6_addr *dest, *src;
1986 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1987 int ttd = tw->tw_ttd - jiffies;
1992 dest = &tw6->tw_v6_daddr;
1993 src = &tw6->tw_v6_rcv_saddr;
1994 destp = ntohs(tw->tw_dport);
1995 srcp = ntohs(tw->tw_sport);
1998 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1999 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2001 src->s6_addr32[0], src->s6_addr32[1],
2002 src->s6_addr32[2], src->s6_addr32[3], srcp,
2003 dest->s6_addr32[0], dest->s6_addr32[1],
2004 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2005 tw->tw_substate, 0, 0,
2006 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2007 atomic_read(&tw->tw_refcnt), tw);
2010 static int tcp6_seq_show(struct seq_file *seq, void *v)
2012 struct tcp_iter_state *st;
2014 if (v == SEQ_START_TOKEN) {
2019 "st tx_queue rx_queue tr tm->when retrnsmt"
2020 " uid timeout inode\n");
2025 switch (st->state) {
2026 case TCP_SEQ_STATE_LISTENING:
2027 case TCP_SEQ_STATE_ESTABLISHED:
2028 get_tcp6_sock(seq, v, st->num);
2030 case TCP_SEQ_STATE_OPENREQ:
2031 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2033 case TCP_SEQ_STATE_TIME_WAIT:
2034 get_timewait6_sock(seq, v, st->num);
2041 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2045 .owner = THIS_MODULE,
2048 .show = tcp6_seq_show,
2052 int tcp6_proc_init(struct net *net)
2054 return tcp_proc_register(net, &tcp6_seq_afinfo);
2057 void tcp6_proc_exit(struct net *net)
2059 tcp_proc_unregister(net, &tcp6_seq_afinfo);
2063 struct proto tcpv6_prot = {
2065 .owner = THIS_MODULE,
2067 .connect = tcp_v6_connect,
2068 .disconnect = tcp_disconnect,
2069 .accept = inet_csk_accept,
2071 .init = tcp_v6_init_sock,
2072 .destroy = tcp_v6_destroy_sock,
2073 .shutdown = tcp_shutdown,
2074 .setsockopt = tcp_setsockopt,
2075 .getsockopt = tcp_getsockopt,
2076 .recvmsg = tcp_recvmsg,
2077 .backlog_rcv = tcp_v6_do_rcv,
2078 .hash = tcp_v6_hash,
2079 .unhash = inet_unhash,
2080 .get_port = inet_csk_get_port,
2081 .enter_memory_pressure = tcp_enter_memory_pressure,
2082 .sockets_allocated = &tcp_sockets_allocated,
2083 .memory_allocated = &tcp_memory_allocated,
2084 .memory_pressure = &tcp_memory_pressure,
2085 .orphan_count = &tcp_orphan_count,
2086 .sysctl_mem = sysctl_tcp_mem,
2087 .sysctl_wmem = sysctl_tcp_wmem,
2088 .sysctl_rmem = sysctl_tcp_rmem,
2089 .max_header = MAX_TCP_HEADER,
2090 .obj_size = sizeof(struct tcp6_sock),
2091 .slab_flags = SLAB_DESTROY_BY_RCU,
2092 .twsk_prot = &tcp6_timewait_sock_ops,
2093 .rsk_prot = &tcp6_request_sock_ops,
2094 .h.hashinfo = &tcp_hashinfo,
2095 #ifdef CONFIG_COMPAT
2096 .compat_setsockopt = compat_tcp_setsockopt,
2097 .compat_getsockopt = compat_tcp_getsockopt,
2101 static const struct inet6_protocol tcpv6_protocol = {
2102 .handler = tcp_v6_rcv,
2103 .err_handler = tcp_v6_err,
2104 .gso_send_check = tcp_v6_gso_send_check,
2105 .gso_segment = tcp_tso_segment,
2106 .gro_receive = tcp6_gro_receive,
2107 .gro_complete = tcp6_gro_complete,
2108 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2111 static struct inet_protosw tcpv6_protosw = {
2112 .type = SOCK_STREAM,
2113 .protocol = IPPROTO_TCP,
2114 .prot = &tcpv6_prot,
2115 .ops = &inet6_stream_ops,
2118 .flags = INET_PROTOSW_PERMANENT |
2122 static int tcpv6_net_init(struct net *net)
2124 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2125 SOCK_RAW, IPPROTO_TCP, net);
2128 static void tcpv6_net_exit(struct net *net)
2130 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2131 inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6);
2134 static struct pernet_operations tcpv6_net_ops = {
2135 .init = tcpv6_net_init,
2136 .exit = tcpv6_net_exit,
2139 int __init tcpv6_init(void)
2143 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2147 /* register inet6 protocol */
2148 ret = inet6_register_protosw(&tcpv6_protosw);
2150 goto out_tcpv6_protocol;
2152 ret = register_pernet_subsys(&tcpv6_net_ops);
2154 goto out_tcpv6_protosw;
2159 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2161 inet6_unregister_protosw(&tcpv6_protosw);
2165 void tcpv6_exit(void)
2167 unregister_pernet_subsys(&tcpv6_net_ops);
2168 inet6_unregister_protosw(&tcpv6_protosw);
2169 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);