3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
42 #include <linux/ipv6.h>
43 #include <linux/icmpv6.h>
44 #include <linux/random.h>
47 #include <net/ndisc.h>
48 #include <net/inet6_hashtables.h>
49 #include <net/inet6_connection_sock.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
59 #include <net/dsfield.h>
60 #include <net/timewait_sock.h>
61 #include <net/netdma.h>
62 #include <net/inet_common.h>
64 #include <asm/uaccess.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 #include <linux/crypto.h>
70 #include <linux/scatterlist.h>
72 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
73 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
74 struct request_sock *req);
76 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
78 static const struct inet_connection_sock_af_ops ipv6_mapped;
79 static const struct inet_connection_sock_af_ops ipv6_specific;
80 #ifdef CONFIG_TCP_MD5SIG
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
82 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
84 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
85 struct in6_addr *addr)
91 static void tcp_v6_hash(struct sock *sk)
93 if (sk->sk_state != TCP_CLOSE) {
94 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
104 static __inline__ __sum16 tcp_v6_check(int len,
105 struct in6_addr *saddr,
106 struct in6_addr *daddr,
109 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
112 static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
114 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
115 ipv6_hdr(skb)->saddr.s6_addr32,
117 tcp_hdr(skb)->source);
120 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
123 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
124 struct inet_sock *inet = inet_sk(sk);
125 struct inet_connection_sock *icsk = inet_csk(sk);
126 struct ipv6_pinfo *np = inet6_sk(sk);
127 struct tcp_sock *tp = tcp_sk(sk);
128 struct in6_addr *saddr = NULL, *final_p = NULL, final;
130 struct dst_entry *dst;
134 if (addr_len < SIN6_LEN_RFC2133)
137 if (usin->sin6_family != AF_INET6)
138 return(-EAFNOSUPPORT);
140 memset(&fl, 0, sizeof(fl));
143 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
144 IP6_ECN_flow_init(fl.fl6_flowlabel);
145 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
146 struct ip6_flowlabel *flowlabel;
147 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
148 if (flowlabel == NULL)
150 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
151 fl6_sock_release(flowlabel);
156 * connect() to INADDR_ANY means loopback (BSD'ism).
159 if(ipv6_addr_any(&usin->sin6_addr))
160 usin->sin6_addr.s6_addr[15] = 0x1;
162 addr_type = ipv6_addr_type(&usin->sin6_addr);
164 if(addr_type & IPV6_ADDR_MULTICAST)
167 if (addr_type&IPV6_ADDR_LINKLOCAL) {
168 if (addr_len >= sizeof(struct sockaddr_in6) &&
169 usin->sin6_scope_id) {
170 /* If interface is set while binding, indices
173 if (sk->sk_bound_dev_if &&
174 sk->sk_bound_dev_if != usin->sin6_scope_id)
177 sk->sk_bound_dev_if = usin->sin6_scope_id;
180 /* Connect to link-local address requires an interface */
181 if (!sk->sk_bound_dev_if)
185 if (tp->rx_opt.ts_recent_stamp &&
186 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
187 tp->rx_opt.ts_recent = 0;
188 tp->rx_opt.ts_recent_stamp = 0;
192 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
193 np->flow_label = fl.fl6_flowlabel;
199 if (addr_type == IPV6_ADDR_MAPPED) {
200 u32 exthdrlen = icsk->icsk_ext_hdr_len;
201 struct sockaddr_in sin;
203 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
205 if (__ipv6_only_sock(sk))
208 sin.sin_family = AF_INET;
209 sin.sin_port = usin->sin6_port;
210 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
212 icsk->icsk_af_ops = &ipv6_mapped;
213 sk->sk_backlog_rcv = tcp_v4_do_rcv;
214 #ifdef CONFIG_TCP_MD5SIG
215 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
218 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
221 icsk->icsk_ext_hdr_len = exthdrlen;
222 icsk->icsk_af_ops = &ipv6_specific;
223 sk->sk_backlog_rcv = tcp_v6_do_rcv;
224 #ifdef CONFIG_TCP_MD5SIG
225 tp->af_specific = &tcp_sock_ipv6_specific;
229 ipv6_addr_set_v4mapped(inet->saddr, &np->saddr);
230 ipv6_addr_set_v4mapped(inet->rcv_saddr, &np->rcv_saddr);
236 if (!ipv6_addr_any(&np->rcv_saddr))
237 saddr = &np->rcv_saddr;
239 fl.proto = IPPROTO_TCP;
240 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
241 ipv6_addr_copy(&fl.fl6_src,
242 (saddr ? saddr : &np->saddr));
243 fl.oif = sk->sk_bound_dev_if;
244 fl.mark = sk->sk_mark;
245 fl.fl_ip_dport = usin->sin6_port;
246 fl.fl_ip_sport = inet->sport;
248 if (np->opt && np->opt->srcrt) {
249 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
250 ipv6_addr_copy(&final, &fl.fl6_dst);
251 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
255 security_sk_classify_flow(sk, &fl);
257 err = ip6_dst_lookup(sk, &dst, &fl);
261 ipv6_addr_copy(&fl.fl6_dst, final_p);
263 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
266 err = ip6_dst_blackhole(sk, &dst, &fl);
273 ipv6_addr_copy(&np->rcv_saddr, saddr);
276 /* set the source address */
277 ipv6_addr_copy(&np->saddr, saddr);
278 inet->rcv_saddr = LOOPBACK4_IPV6;
280 sk->sk_gso_type = SKB_GSO_TCPV6;
281 __ip6_dst_store(sk, dst, NULL, NULL);
283 icsk->icsk_ext_hdr_len = 0;
285 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
288 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
290 inet->dport = usin->sin6_port;
292 tcp_set_state(sk, TCP_SYN_SENT);
293 err = inet6_hash_connect(&tcp_death_row, sk);
298 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
303 err = tcp_connect(sk);
310 tcp_set_state(sk, TCP_CLOSE);
314 sk->sk_route_caps = 0;
318 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
319 u8 type, u8 code, int offset, __be32 info)
321 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
322 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
323 struct ipv6_pinfo *np;
328 struct net *net = dev_net(skb->dev);
330 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
331 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
334 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
339 if (sk->sk_state == TCP_TIME_WAIT) {
340 inet_twsk_put(inet_twsk(sk));
345 if (sock_owned_by_user(sk))
346 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
348 if (sk->sk_state == TCP_CLOSE)
352 seq = ntohl(th->seq);
353 if (sk->sk_state != TCP_LISTEN &&
354 !between(seq, tp->snd_una, tp->snd_nxt)) {
355 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
361 if (type == ICMPV6_PKT_TOOBIG) {
362 struct dst_entry *dst = NULL;
364 if (sock_owned_by_user(sk))
366 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
369 /* icmp should have updated the destination cache entry */
370 dst = __sk_dst_check(sk, np->dst_cookie);
373 struct inet_sock *inet = inet_sk(sk);
376 /* BUGGG_FUTURE: Again, it is not clear how
377 to handle rthdr case. Ignore this complexity
380 memset(&fl, 0, sizeof(fl));
381 fl.proto = IPPROTO_TCP;
382 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
383 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
384 fl.oif = sk->sk_bound_dev_if;
385 fl.mark = sk->sk_mark;
386 fl.fl_ip_dport = inet->dport;
387 fl.fl_ip_sport = inet->sport;
388 security_skb_classify_flow(skb, &fl);
390 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
391 sk->sk_err_soft = -err;
395 if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) {
396 sk->sk_err_soft = -err;
403 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
404 tcp_sync_mss(sk, dst_mtu(dst));
405 tcp_simple_retransmit(sk);
406 } /* else let the usual retransmit timer handle it */
411 icmpv6_err_convert(type, code, &err);
413 /* Might be for an request_sock */
414 switch (sk->sk_state) {
415 struct request_sock *req, **prev;
417 if (sock_owned_by_user(sk))
420 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
421 &hdr->saddr, inet6_iif(skb));
425 /* ICMPs are not backlogged, hence we cannot get
426 * an established socket here.
428 WARN_ON(req->sk != NULL);
430 if (seq != tcp_rsk(req)->snt_isn) {
431 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
435 inet_csk_reqsk_queue_drop(sk, req, prev);
439 case TCP_SYN_RECV: /* Cannot happen.
440 It can, it SYNs are crossed. --ANK */
441 if (!sock_owned_by_user(sk)) {
443 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
447 sk->sk_err_soft = err;
451 if (!sock_owned_by_user(sk) && np->recverr) {
453 sk->sk_error_report(sk);
455 sk->sk_err_soft = err;
463 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
465 struct inet6_request_sock *treq = inet6_rsk(req);
466 struct ipv6_pinfo *np = inet6_sk(sk);
467 struct sk_buff * skb;
468 struct ipv6_txoptions *opt = NULL;
469 struct in6_addr * final_p = NULL, final;
471 struct dst_entry *dst;
474 memset(&fl, 0, sizeof(fl));
475 fl.proto = IPPROTO_TCP;
476 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
477 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
478 fl.fl6_flowlabel = 0;
480 fl.mark = sk->sk_mark;
481 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
482 fl.fl_ip_sport = inet_rsk(req)->loc_port;
483 security_req_classify_flow(req, &fl);
486 if (opt && opt->srcrt) {
487 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
488 ipv6_addr_copy(&final, &fl.fl6_dst);
489 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
493 err = ip6_dst_lookup(sk, &dst, &fl);
497 ipv6_addr_copy(&fl.fl6_dst, final_p);
498 if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
501 skb = tcp_make_synack(sk, dst, req);
503 struct tcphdr *th = tcp_hdr(skb);
505 th->check = tcp_v6_check(skb->len,
506 &treq->loc_addr, &treq->rmt_addr,
507 csum_partial(th, skb->len, skb->csum));
509 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
510 err = ip6_xmit(sk, skb, &fl, opt, 0);
511 err = net_xmit_eval(err);
515 if (opt && opt != np->opt)
516 sock_kfree_s(sk, opt, opt->tot_len);
521 static inline void syn_flood_warning(struct sk_buff *skb)
523 #ifdef CONFIG_SYN_COOKIES
524 if (sysctl_tcp_syncookies)
526 "TCPv6: Possible SYN flooding on port %d. "
527 "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
531 "TCPv6: Possible SYN flooding on port %d. "
532 "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
535 static void tcp_v6_reqsk_destructor(struct request_sock *req)
537 kfree_skb(inet6_rsk(req)->pktopts);
540 #ifdef CONFIG_TCP_MD5SIG
541 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
542 struct in6_addr *addr)
544 struct tcp_sock *tp = tcp_sk(sk);
549 if (!tp->md5sig_info || !tp->md5sig_info->entries6)
552 for (i = 0; i < tp->md5sig_info->entries6; i++) {
553 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, addr))
554 return &tp->md5sig_info->keys6[i].base;
559 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
560 struct sock *addr_sk)
562 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
565 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
566 struct request_sock *req)
568 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
571 static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
572 char *newkey, u8 newkeylen)
574 /* Add key to the list */
575 struct tcp_md5sig_key *key;
576 struct tcp_sock *tp = tcp_sk(sk);
577 struct tcp6_md5sig_key *keys;
579 key = tcp_v6_md5_do_lookup(sk, peer);
581 /* modify existing entry - just update that one */
584 key->keylen = newkeylen;
586 /* reallocate new list if current one is full. */
587 if (!tp->md5sig_info) {
588 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
589 if (!tp->md5sig_info) {
593 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
595 if (tcp_alloc_md5sig_pool(sk) == NULL) {
599 if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) {
600 keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) *
601 (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
604 tcp_free_md5sig_pool();
609 if (tp->md5sig_info->entries6)
610 memmove(keys, tp->md5sig_info->keys6,
611 (sizeof (tp->md5sig_info->keys6[0]) *
612 tp->md5sig_info->entries6));
614 kfree(tp->md5sig_info->keys6);
615 tp->md5sig_info->keys6 = keys;
616 tp->md5sig_info->alloced6++;
619 ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr,
621 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey;
622 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen;
624 tp->md5sig_info->entries6++;
629 static int tcp_v6_md5_add_func(struct sock *sk, struct sock *addr_sk,
630 u8 *newkey, __u8 newkeylen)
632 return tcp_v6_md5_do_add(sk, &inet6_sk(addr_sk)->daddr,
636 static int tcp_v6_md5_do_del(struct sock *sk, struct in6_addr *peer)
638 struct tcp_sock *tp = tcp_sk(sk);
641 for (i = 0; i < tp->md5sig_info->entries6; i++) {
642 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, peer)) {
644 kfree(tp->md5sig_info->keys6[i].base.key);
645 tp->md5sig_info->entries6--;
647 if (tp->md5sig_info->entries6 == 0) {
648 kfree(tp->md5sig_info->keys6);
649 tp->md5sig_info->keys6 = NULL;
650 tp->md5sig_info->alloced6 = 0;
652 /* shrink the database */
653 if (tp->md5sig_info->entries6 != i)
654 memmove(&tp->md5sig_info->keys6[i],
655 &tp->md5sig_info->keys6[i+1],
656 (tp->md5sig_info->entries6 - i)
657 * sizeof (tp->md5sig_info->keys6[0]));
659 tcp_free_md5sig_pool();
666 static void tcp_v6_clear_md5_list (struct sock *sk)
668 struct tcp_sock *tp = tcp_sk(sk);
671 if (tp->md5sig_info->entries6) {
672 for (i = 0; i < tp->md5sig_info->entries6; i++)
673 kfree(tp->md5sig_info->keys6[i].base.key);
674 tp->md5sig_info->entries6 = 0;
675 tcp_free_md5sig_pool();
678 kfree(tp->md5sig_info->keys6);
679 tp->md5sig_info->keys6 = NULL;
680 tp->md5sig_info->alloced6 = 0;
682 if (tp->md5sig_info->entries4) {
683 for (i = 0; i < tp->md5sig_info->entries4; i++)
684 kfree(tp->md5sig_info->keys4[i].base.key);
685 tp->md5sig_info->entries4 = 0;
686 tcp_free_md5sig_pool();
689 kfree(tp->md5sig_info->keys4);
690 tp->md5sig_info->keys4 = NULL;
691 tp->md5sig_info->alloced4 = 0;
694 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
697 struct tcp_md5sig cmd;
698 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
701 if (optlen < sizeof(cmd))
704 if (copy_from_user(&cmd, optval, sizeof(cmd)))
707 if (sin6->sin6_family != AF_INET6)
710 if (!cmd.tcpm_keylen) {
711 if (!tcp_sk(sk)->md5sig_info)
713 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
714 return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]);
715 return tcp_v6_md5_do_del(sk, &sin6->sin6_addr);
718 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
721 if (!tcp_sk(sk)->md5sig_info) {
722 struct tcp_sock *tp = tcp_sk(sk);
723 struct tcp_md5sig_info *p;
725 p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
730 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
733 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
736 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
737 return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3],
738 newkey, cmd.tcpm_keylen);
740 return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen);
743 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
744 struct in6_addr *daddr,
745 struct in6_addr *saddr, int nbytes)
747 struct tcp6_pseudohdr *bp;
748 struct scatterlist sg;
750 bp = &hp->md5_blk.ip6;
751 /* 1. TCP pseudo-header (RFC2460) */
752 ipv6_addr_copy(&bp->saddr, saddr);
753 ipv6_addr_copy(&bp->daddr, daddr);
754 bp->protocol = cpu_to_be32(IPPROTO_TCP);
755 bp->len = cpu_to_be32(nbytes);
757 sg_init_one(&sg, bp, sizeof(*bp));
758 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
761 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
762 struct in6_addr *daddr, struct in6_addr *saddr,
765 struct tcp_md5sig_pool *hp;
766 struct hash_desc *desc;
768 hp = tcp_get_md5sig_pool();
770 goto clear_hash_noput;
771 desc = &hp->md5_desc;
773 if (crypto_hash_init(desc))
775 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
777 if (tcp_md5_hash_header(hp, th))
779 if (tcp_md5_hash_key(hp, key))
781 if (crypto_hash_final(desc, md5_hash))
784 tcp_put_md5sig_pool();
788 tcp_put_md5sig_pool();
790 memset(md5_hash, 0, 16);
794 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
795 struct sock *sk, struct request_sock *req,
798 struct in6_addr *saddr, *daddr;
799 struct tcp_md5sig_pool *hp;
800 struct hash_desc *desc;
801 struct tcphdr *th = tcp_hdr(skb);
804 saddr = &inet6_sk(sk)->saddr;
805 daddr = &inet6_sk(sk)->daddr;
807 saddr = &inet6_rsk(req)->loc_addr;
808 daddr = &inet6_rsk(req)->rmt_addr;
810 struct ipv6hdr *ip6h = ipv6_hdr(skb);
811 saddr = &ip6h->saddr;
812 daddr = &ip6h->daddr;
815 hp = tcp_get_md5sig_pool();
817 goto clear_hash_noput;
818 desc = &hp->md5_desc;
820 if (crypto_hash_init(desc))
823 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
825 if (tcp_md5_hash_header(hp, th))
827 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
829 if (tcp_md5_hash_key(hp, key))
831 if (crypto_hash_final(desc, md5_hash))
834 tcp_put_md5sig_pool();
838 tcp_put_md5sig_pool();
840 memset(md5_hash, 0, 16);
844 static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
846 __u8 *hash_location = NULL;
847 struct tcp_md5sig_key *hash_expected;
848 struct ipv6hdr *ip6h = ipv6_hdr(skb);
849 struct tcphdr *th = tcp_hdr(skb);
853 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
854 hash_location = tcp_parse_md5sig_option(th);
856 /* We've parsed the options - do we have a hash? */
857 if (!hash_expected && !hash_location)
860 if (hash_expected && !hash_location) {
861 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
865 if (!hash_expected && hash_location) {
866 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
870 /* check the signature */
871 genhash = tcp_v6_md5_hash_skb(newhash,
875 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
876 if (net_ratelimit()) {
877 printk(KERN_INFO "MD5 Hash %s for (%pI6, %u)->(%pI6, %u)\n",
878 genhash ? "failed" : "mismatch",
879 &ip6h->saddr, ntohs(th->source),
880 &ip6h->daddr, ntohs(th->dest));
888 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
890 .obj_size = sizeof(struct tcp6_request_sock),
891 .rtx_syn_ack = tcp_v6_send_synack,
892 .send_ack = tcp_v6_reqsk_send_ack,
893 .destructor = tcp_v6_reqsk_destructor,
894 .send_reset = tcp_v6_send_reset
897 #ifdef CONFIG_TCP_MD5SIG
898 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
899 .md5_lookup = tcp_v6_reqsk_md5_lookup,
900 .calc_md5_hash = tcp_v6_md5_hash_skb,
904 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
905 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
906 .twsk_unique = tcp_twsk_unique,
907 .twsk_destructor= tcp_twsk_destructor,
910 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
912 struct ipv6_pinfo *np = inet6_sk(sk);
913 struct tcphdr *th = tcp_hdr(skb);
915 if (skb->ip_summed == CHECKSUM_PARTIAL) {
916 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
917 skb->csum_start = skb_transport_header(skb) - skb->head;
918 skb->csum_offset = offsetof(struct tcphdr, check);
920 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
921 csum_partial(th, th->doff<<2,
926 static int tcp_v6_gso_send_check(struct sk_buff *skb)
928 struct ipv6hdr *ipv6h;
931 if (!pskb_may_pull(skb, sizeof(*th)))
934 ipv6h = ipv6_hdr(skb);
938 th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
940 skb->csum_start = skb_transport_header(skb) - skb->head;
941 skb->csum_offset = offsetof(struct tcphdr, check);
942 skb->ip_summed = CHECKSUM_PARTIAL;
946 static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
949 struct ipv6hdr *iph = skb_gro_network_header(skb);
951 switch (skb->ip_summed) {
952 case CHECKSUM_COMPLETE:
953 if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
955 skb->ip_summed = CHECKSUM_UNNECESSARY;
961 NAPI_GRO_CB(skb)->flush = 1;
965 return tcp_gro_receive(head, skb);
968 static int tcp6_gro_complete(struct sk_buff *skb)
970 struct ipv6hdr *iph = ipv6_hdr(skb);
971 struct tcphdr *th = tcp_hdr(skb);
973 th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
974 &iph->saddr, &iph->daddr, 0);
975 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
977 return tcp_gro_complete(skb);
980 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
981 u32 ts, struct tcp_md5sig_key *key, int rst)
983 struct tcphdr *th = tcp_hdr(skb), *t1;
984 struct sk_buff *buff;
986 struct net *net = dev_net(skb_dst(skb)->dev);
987 struct sock *ctl_sk = net->ipv6.tcp_sk;
988 unsigned int tot_len = sizeof(struct tcphdr);
989 struct dst_entry *dst;
993 tot_len += TCPOLEN_TSTAMP_ALIGNED;
994 #ifdef CONFIG_TCP_MD5SIG
996 tot_len += TCPOLEN_MD5SIG_ALIGNED;
999 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1004 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1006 t1 = (struct tcphdr *) skb_push(buff, tot_len);
1007 skb_reset_transport_header(skb);
1009 /* Swap the send and the receive. */
1010 memset(t1, 0, sizeof(*t1));
1011 t1->dest = th->source;
1012 t1->source = th->dest;
1013 t1->doff = tot_len / 4;
1014 t1->seq = htonl(seq);
1015 t1->ack_seq = htonl(ack);
1016 t1->ack = !rst || !th->ack;
1018 t1->window = htons(win);
1020 topt = (__be32 *)(t1 + 1);
1023 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1024 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1025 *topt++ = htonl(tcp_time_stamp);
1026 *topt++ = htonl(ts);
1029 #ifdef CONFIG_TCP_MD5SIG
1031 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1032 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
1033 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
1034 &ipv6_hdr(skb)->saddr,
1035 &ipv6_hdr(skb)->daddr, t1);
1039 buff->csum = csum_partial(t1, tot_len, 0);
1041 memset(&fl, 0, sizeof(fl));
1042 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
1043 ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
1045 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1046 tot_len, IPPROTO_TCP,
1049 fl.proto = IPPROTO_TCP;
1050 fl.oif = inet6_iif(skb);
1051 fl.fl_ip_dport = t1->dest;
1052 fl.fl_ip_sport = t1->source;
1053 security_skb_classify_flow(skb, &fl);
1055 /* Pass a socket to ip6_dst_lookup either it is for RST
1056 * Underlying function will use this to retrieve the network
1059 if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) {
1060 if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) {
1061 skb_dst_set(buff, dst);
1062 ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
1063 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1065 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
1073 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
1075 struct tcphdr *th = tcp_hdr(skb);
1076 u32 seq = 0, ack_seq = 0;
1077 struct tcp_md5sig_key *key = NULL;
1082 if (!ipv6_unicast_destination(skb))
1085 #ifdef CONFIG_TCP_MD5SIG
1087 key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
1091 seq = ntohl(th->ack_seq);
1093 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1096 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1);
1099 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
1100 struct tcp_md5sig_key *key)
1102 tcp_v6_send_response(skb, seq, ack, win, ts, key, 0);
1105 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1107 struct inet_timewait_sock *tw = inet_twsk(sk);
1108 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1110 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1111 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1112 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw));
1117 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
1118 struct request_sock *req)
1120 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
1121 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr));
1125 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1127 struct request_sock *req, **prev;
1128 const struct tcphdr *th = tcp_hdr(skb);
1131 /* Find possible connection requests. */
1132 req = inet6_csk_search_req(sk, &prev, th->source,
1133 &ipv6_hdr(skb)->saddr,
1134 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
1136 return tcp_check_req(sk, skb, req, prev);
1138 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
1139 &ipv6_hdr(skb)->saddr, th->source,
1140 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
1143 if (nsk->sk_state != TCP_TIME_WAIT) {
1147 inet_twsk_put(inet_twsk(nsk));
1151 #ifdef CONFIG_SYN_COOKIES
1152 if (!th->rst && !th->syn && th->ack)
1153 sk = cookie_v6_check(sk, skb);
1158 /* FIXME: this is substantially similar to the ipv4 code.
1159 * Can some kind of merge be done? -- erics
1161 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1163 struct inet6_request_sock *treq;
1164 struct ipv6_pinfo *np = inet6_sk(sk);
1165 struct tcp_options_received tmp_opt;
1166 struct tcp_sock *tp = tcp_sk(sk);
1167 struct request_sock *req = NULL;
1168 __u32 isn = TCP_SKB_CB(skb)->when;
1169 #ifdef CONFIG_SYN_COOKIES
1170 int want_cookie = 0;
1172 #define want_cookie 0
1175 if (skb->protocol == htons(ETH_P_IP))
1176 return tcp_v4_conn_request(sk, skb);
1178 if (!ipv6_unicast_destination(skb))
1181 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1182 if (net_ratelimit())
1183 syn_flood_warning(skb);
1184 #ifdef CONFIG_SYN_COOKIES
1185 if (sysctl_tcp_syncookies)
1192 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1195 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1199 #ifdef CONFIG_TCP_MD5SIG
1200 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1203 tcp_clear_options(&tmp_opt);
1204 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1205 tmp_opt.user_mss = tp->rx_opt.user_mss;
1207 tcp_parse_options(skb, &tmp_opt, 0);
1209 if (want_cookie && !tmp_opt.saw_tstamp)
1210 tcp_clear_options(&tmp_opt);
1212 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1213 tcp_openreq_init(req, &tmp_opt, skb);
1215 treq = inet6_rsk(req);
1216 ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
1217 ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
1219 TCP_ECN_create_request(req, tcp_hdr(skb));
1222 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1223 req->cookie_ts = tmp_opt.tstamp_ok;
1225 if (ipv6_opt_accepted(sk, skb) ||
1226 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1227 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1228 atomic_inc(&skb->users);
1229 treq->pktopts = skb;
1231 treq->iif = sk->sk_bound_dev_if;
1233 /* So that link locals have meaning */
1234 if (!sk->sk_bound_dev_if &&
1235 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1236 treq->iif = inet6_iif(skb);
1238 isn = tcp_v6_init_sequence(skb);
1241 tcp_rsk(req)->snt_isn = isn;
1243 security_inet_conn_request(sk, skb, req);
1245 if (tcp_v6_send_synack(sk, req))
1249 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1257 return 0; /* don't send reset */
1260 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1261 struct request_sock *req,
1262 struct dst_entry *dst)
1264 struct inet6_request_sock *treq;
1265 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1266 struct tcp6_sock *newtcp6sk;
1267 struct inet_sock *newinet;
1268 struct tcp_sock *newtp;
1270 struct ipv6_txoptions *opt;
1271 #ifdef CONFIG_TCP_MD5SIG
1272 struct tcp_md5sig_key *key;
1275 if (skb->protocol == htons(ETH_P_IP)) {
1280 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1285 newtcp6sk = (struct tcp6_sock *)newsk;
1286 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1288 newinet = inet_sk(newsk);
1289 newnp = inet6_sk(newsk);
1290 newtp = tcp_sk(newsk);
1292 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1294 ipv6_addr_set_v4mapped(newinet->daddr, &newnp->daddr);
1296 ipv6_addr_set_v4mapped(newinet->saddr, &newnp->saddr);
1298 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1300 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1301 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1302 #ifdef CONFIG_TCP_MD5SIG
1303 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1306 newnp->pktoptions = NULL;
1308 newnp->mcast_oif = inet6_iif(skb);
1309 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1312 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1313 * here, tcp_create_openreq_child now does this for us, see the comment in
1314 * that function for the gory details. -acme
1317 /* It is tricky place. Until this moment IPv4 tcp
1318 worked with IPv6 icsk.icsk_af_ops.
1321 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1326 treq = inet6_rsk(req);
1329 if (sk_acceptq_is_full(sk))
1333 struct in6_addr *final_p = NULL, final;
1336 memset(&fl, 0, sizeof(fl));
1337 fl.proto = IPPROTO_TCP;
1338 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1339 if (opt && opt->srcrt) {
1340 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1341 ipv6_addr_copy(&final, &fl.fl6_dst);
1342 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1345 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1346 fl.oif = sk->sk_bound_dev_if;
1347 fl.mark = sk->sk_mark;
1348 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1349 fl.fl_ip_sport = inet_rsk(req)->loc_port;
1350 security_req_classify_flow(req, &fl);
1352 if (ip6_dst_lookup(sk, &dst, &fl))
1356 ipv6_addr_copy(&fl.fl6_dst, final_p);
1358 if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
1362 newsk = tcp_create_openreq_child(sk, req, skb);
1367 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1368 * count here, tcp_create_openreq_child now does this for us, see the
1369 * comment in that function for the gory details. -acme
1372 newsk->sk_gso_type = SKB_GSO_TCPV6;
1373 __ip6_dst_store(newsk, dst, NULL, NULL);
1375 newtcp6sk = (struct tcp6_sock *)newsk;
1376 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1378 newtp = tcp_sk(newsk);
1379 newinet = inet_sk(newsk);
1380 newnp = inet6_sk(newsk);
1382 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1384 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1385 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1386 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1387 newsk->sk_bound_dev_if = treq->iif;
1389 /* Now IPv6 options...
1391 First: no IPv4 options.
1393 newinet->opt = NULL;
1394 newnp->ipv6_fl_list = NULL;
1397 newnp->rxopt.all = np->rxopt.all;
1399 /* Clone pktoptions received with SYN */
1400 newnp->pktoptions = NULL;
1401 if (treq->pktopts != NULL) {
1402 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1403 kfree_skb(treq->pktopts);
1404 treq->pktopts = NULL;
1405 if (newnp->pktoptions)
1406 skb_set_owner_r(newnp->pktoptions, newsk);
1409 newnp->mcast_oif = inet6_iif(skb);
1410 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1412 /* Clone native IPv6 options from listening socket (if any)
1414 Yes, keeping reference count would be much more clever,
1415 but we make one more one thing there: reattach optmem
1419 newnp->opt = ipv6_dup_options(newsk, opt);
1421 sock_kfree_s(sk, opt, opt->tot_len);
1424 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1426 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1427 newnp->opt->opt_flen);
1429 tcp_mtup_init(newsk);
1430 tcp_sync_mss(newsk, dst_mtu(dst));
1431 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1432 tcp_initialize_rcv_mss(newsk);
1434 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1436 #ifdef CONFIG_TCP_MD5SIG
1437 /* Copy over the MD5 key from the original socket */
1438 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1439 /* We're using one, so create a matching key
1440 * on the newsk structure. If we fail to get
1441 * memory, then we end up not copying the key
1444 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1446 tcp_v6_md5_do_add(newsk, &newnp->daddr,
1447 newkey, key->keylen);
1451 __inet6_hash(newsk);
1452 __inet_inherit_port(sk, newsk);
1457 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1459 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1460 if (opt && opt != np->opt)
1461 sock_kfree_s(sk, opt, opt->tot_len);
1466 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1468 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1469 if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
1470 &ipv6_hdr(skb)->daddr, skb->csum)) {
1471 skb->ip_summed = CHECKSUM_UNNECESSARY;
1476 skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
1477 &ipv6_hdr(skb)->saddr,
1478 &ipv6_hdr(skb)->daddr, 0));
1480 if (skb->len <= 76) {
1481 return __skb_checksum_complete(skb);
1486 /* The socket must have it's spinlock held when we get
1489 * We have a potential double-lock case here, so even when
1490 * doing backlog processing we use the BH locking scheme.
1491 * This is because we cannot sleep with the original spinlock
1494 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1496 struct ipv6_pinfo *np = inet6_sk(sk);
1497 struct tcp_sock *tp;
1498 struct sk_buff *opt_skb = NULL;
1500 /* Imagine: socket is IPv6. IPv4 packet arrives,
1501 goes to IPv4 receive handler and backlogged.
1502 From backlog it always goes here. Kerboom...
1503 Fortunately, tcp_rcv_established and rcv_established
1504 handle them correctly, but it is not case with
1505 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1508 if (skb->protocol == htons(ETH_P_IP))
1509 return tcp_v4_do_rcv(sk, skb);
1511 #ifdef CONFIG_TCP_MD5SIG
1512 if (tcp_v6_inbound_md5_hash (sk, skb))
1516 if (sk_filter(sk, skb))
1520 * socket locking is here for SMP purposes as backlog rcv
1521 * is currently called with bh processing disabled.
1524 /* Do Stevens' IPV6_PKTOPTIONS.
1526 Yes, guys, it is the only place in our code, where we
1527 may make it not affecting IPv4.
1528 The rest of code is protocol independent,
1529 and I do not like idea to uglify IPv4.
1531 Actually, all the idea behind IPV6_PKTOPTIONS
1532 looks not very well thought. For now we latch
1533 options, received in the last packet, enqueued
1534 by tcp. Feel free to propose better solution.
1538 opt_skb = skb_clone(skb, GFP_ATOMIC);
1540 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1541 TCP_CHECK_TIMER(sk);
1542 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1544 TCP_CHECK_TIMER(sk);
1546 goto ipv6_pktoptions;
1550 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1553 if (sk->sk_state == TCP_LISTEN) {
1554 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1559 * Queue it on the new socket if the new socket is active,
1560 * otherwise we just shortcircuit this and continue with
1564 if (tcp_child_process(sk, nsk, skb))
1567 __kfree_skb(opt_skb);
1572 TCP_CHECK_TIMER(sk);
1573 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1575 TCP_CHECK_TIMER(sk);
1577 goto ipv6_pktoptions;
1581 tcp_v6_send_reset(sk, skb);
1584 __kfree_skb(opt_skb);
1588 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1593 /* Do you ask, what is it?
1595 1. skb was enqueued by tcp.
1596 2. skb is added to tail of read queue, rather than out of order.
1597 3. socket is not in passive state.
1598 4. Finally, it really contains options, which user wants to receive.
1601 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1602 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1603 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1604 np->mcast_oif = inet6_iif(opt_skb);
1605 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1606 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1607 if (ipv6_opt_accepted(sk, opt_skb)) {
1608 skb_set_owner_r(opt_skb, sk);
1609 opt_skb = xchg(&np->pktoptions, opt_skb);
1611 __kfree_skb(opt_skb);
1612 opt_skb = xchg(&np->pktoptions, NULL);
1620 static int tcp_v6_rcv(struct sk_buff *skb)
1625 struct net *net = dev_net(skb->dev);
1627 if (skb->pkt_type != PACKET_HOST)
1631 * Count it even if it's bad.
1633 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1635 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1640 if (th->doff < sizeof(struct tcphdr)/4)
1642 if (!pskb_may_pull(skb, th->doff*4))
1645 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1649 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1650 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1651 skb->len - th->doff*4);
1652 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1653 TCP_SKB_CB(skb)->when = 0;
1654 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
1655 TCP_SKB_CB(skb)->sacked = 0;
1657 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1662 if (sk->sk_state == TCP_TIME_WAIT)
1665 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1666 goto discard_and_relse;
1668 if (sk_filter(sk, skb))
1669 goto discard_and_relse;
1673 bh_lock_sock_nested(sk);
1675 if (!sock_owned_by_user(sk)) {
1676 #ifdef CONFIG_NET_DMA
1677 struct tcp_sock *tp = tcp_sk(sk);
1678 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1679 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1680 if (tp->ucopy.dma_chan)
1681 ret = tcp_v6_do_rcv(sk, skb);
1685 if (!tcp_prequeue(sk, skb))
1686 ret = tcp_v6_do_rcv(sk, skb);
1689 sk_add_backlog(sk, skb);
1693 return ret ? -1 : 0;
1696 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1699 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1701 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1703 tcp_v6_send_reset(NULL, skb);
1720 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1721 inet_twsk_put(inet_twsk(sk));
1725 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1726 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1727 inet_twsk_put(inet_twsk(sk));
1731 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1736 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1737 &ipv6_hdr(skb)->daddr,
1738 ntohs(th->dest), inet6_iif(skb));
1740 struct inet_timewait_sock *tw = inet_twsk(sk);
1741 inet_twsk_deschedule(tw, &tcp_death_row);
1746 /* Fall through to ACK */
1749 tcp_v6_timewait_ack(sk, skb);
1753 case TCP_TW_SUCCESS:;
1758 static int tcp_v6_remember_stamp(struct sock *sk)
1760 /* Alas, not yet... */
1764 static const struct inet_connection_sock_af_ops ipv6_specific = {
1765 .queue_xmit = inet6_csk_xmit,
1766 .send_check = tcp_v6_send_check,
1767 .rebuild_header = inet6_sk_rebuild_header,
1768 .conn_request = tcp_v6_conn_request,
1769 .syn_recv_sock = tcp_v6_syn_recv_sock,
1770 .remember_stamp = tcp_v6_remember_stamp,
1771 .net_header_len = sizeof(struct ipv6hdr),
1772 .setsockopt = ipv6_setsockopt,
1773 .getsockopt = ipv6_getsockopt,
1774 .addr2sockaddr = inet6_csk_addr2sockaddr,
1775 .sockaddr_len = sizeof(struct sockaddr_in6),
1776 .bind_conflict = inet6_csk_bind_conflict,
1777 #ifdef CONFIG_COMPAT
1778 .compat_setsockopt = compat_ipv6_setsockopt,
1779 .compat_getsockopt = compat_ipv6_getsockopt,
1783 #ifdef CONFIG_TCP_MD5SIG
1784 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1785 .md5_lookup = tcp_v6_md5_lookup,
1786 .calc_md5_hash = tcp_v6_md5_hash_skb,
1787 .md5_add = tcp_v6_md5_add_func,
1788 .md5_parse = tcp_v6_parse_md5_keys,
1793 * TCP over IPv4 via INET6 API
1796 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1797 .queue_xmit = ip_queue_xmit,
1798 .send_check = tcp_v4_send_check,
1799 .rebuild_header = inet_sk_rebuild_header,
1800 .conn_request = tcp_v6_conn_request,
1801 .syn_recv_sock = tcp_v6_syn_recv_sock,
1802 .remember_stamp = tcp_v4_remember_stamp,
1803 .net_header_len = sizeof(struct iphdr),
1804 .setsockopt = ipv6_setsockopt,
1805 .getsockopt = ipv6_getsockopt,
1806 .addr2sockaddr = inet6_csk_addr2sockaddr,
1807 .sockaddr_len = sizeof(struct sockaddr_in6),
1808 .bind_conflict = inet6_csk_bind_conflict,
1809 #ifdef CONFIG_COMPAT
1810 .compat_setsockopt = compat_ipv6_setsockopt,
1811 .compat_getsockopt = compat_ipv6_getsockopt,
1815 #ifdef CONFIG_TCP_MD5SIG
1816 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1817 .md5_lookup = tcp_v4_md5_lookup,
1818 .calc_md5_hash = tcp_v4_md5_hash_skb,
1819 .md5_add = tcp_v6_md5_add_func,
1820 .md5_parse = tcp_v6_parse_md5_keys,
1824 /* NOTE: A lot of things set to zero explicitly by call to
1825 * sk_alloc() so need not be done here.
1827 static int tcp_v6_init_sock(struct sock *sk)
1829 struct inet_connection_sock *icsk = inet_csk(sk);
1830 struct tcp_sock *tp = tcp_sk(sk);
1832 skb_queue_head_init(&tp->out_of_order_queue);
1833 tcp_init_xmit_timers(sk);
1834 tcp_prequeue_init(tp);
1836 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1837 tp->mdev = TCP_TIMEOUT_INIT;
1839 /* So many TCP implementations out there (incorrectly) count the
1840 * initial SYN frame in their delayed-ACK and congestion control
1841 * algorithms that we must have the following bandaid to talk
1842 * efficiently to them. -DaveM
1846 /* See draft-stevens-tcpca-spec-01 for discussion of the
1847 * initialization of these values.
1849 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1850 tp->snd_cwnd_clamp = ~0;
1851 tp->mss_cache = 536;
1853 tp->reordering = sysctl_tcp_reordering;
1855 sk->sk_state = TCP_CLOSE;
1857 icsk->icsk_af_ops = &ipv6_specific;
1858 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1859 icsk->icsk_sync_mss = tcp_sync_mss;
1860 sk->sk_write_space = sk_stream_write_space;
1861 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1863 #ifdef CONFIG_TCP_MD5SIG
1864 tp->af_specific = &tcp_sock_ipv6_specific;
1867 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1868 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1871 percpu_counter_inc(&tcp_sockets_allocated);
1877 static void tcp_v6_destroy_sock(struct sock *sk)
1879 #ifdef CONFIG_TCP_MD5SIG
1880 /* Clean up the MD5 key list */
1881 if (tcp_sk(sk)->md5sig_info)
1882 tcp_v6_clear_md5_list(sk);
1884 tcp_v4_destroy_sock(sk);
1885 inet6_destroy_sock(sk);
1888 #ifdef CONFIG_PROC_FS
1889 /* Proc filesystem TCPv6 sock list dumping. */
1890 static void get_openreq6(struct seq_file *seq,
1891 struct sock *sk, struct request_sock *req, int i, int uid)
1893 int ttd = req->expires - jiffies;
1894 struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1895 struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1901 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1902 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1904 src->s6_addr32[0], src->s6_addr32[1],
1905 src->s6_addr32[2], src->s6_addr32[3],
1906 ntohs(inet_rsk(req)->loc_port),
1907 dest->s6_addr32[0], dest->s6_addr32[1],
1908 dest->s6_addr32[2], dest->s6_addr32[3],
1909 ntohs(inet_rsk(req)->rmt_port),
1911 0,0, /* could print option size, but that is af dependent. */
1912 1, /* timers active (only the expire timer) */
1913 jiffies_to_clock_t(ttd),
1916 0, /* non standard timer */
1917 0, /* open_requests have no inode */
1921 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1923 struct in6_addr *dest, *src;
1926 unsigned long timer_expires;
1927 struct inet_sock *inet = inet_sk(sp);
1928 struct tcp_sock *tp = tcp_sk(sp);
1929 const struct inet_connection_sock *icsk = inet_csk(sp);
1930 struct ipv6_pinfo *np = inet6_sk(sp);
1933 src = &np->rcv_saddr;
1934 destp = ntohs(inet->dport);
1935 srcp = ntohs(inet->sport);
1937 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1939 timer_expires = icsk->icsk_timeout;
1940 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1942 timer_expires = icsk->icsk_timeout;
1943 } else if (timer_pending(&sp->sk_timer)) {
1945 timer_expires = sp->sk_timer.expires;
1948 timer_expires = jiffies;
1952 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1953 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %lu %lu %u %u %d\n",
1955 src->s6_addr32[0], src->s6_addr32[1],
1956 src->s6_addr32[2], src->s6_addr32[3], srcp,
1957 dest->s6_addr32[0], dest->s6_addr32[1],
1958 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1960 tp->write_seq-tp->snd_una,
1961 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1963 jiffies_to_clock_t(timer_expires - jiffies),
1964 icsk->icsk_retransmits,
1966 icsk->icsk_probes_out,
1968 atomic_read(&sp->sk_refcnt), sp,
1969 jiffies_to_clock_t(icsk->icsk_rto),
1970 jiffies_to_clock_t(icsk->icsk_ack.ato),
1971 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1973 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
1977 static void get_timewait6_sock(struct seq_file *seq,
1978 struct inet_timewait_sock *tw, int i)
1980 struct in6_addr *dest, *src;
1982 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1983 int ttd = tw->tw_ttd - jiffies;
1988 dest = &tw6->tw_v6_daddr;
1989 src = &tw6->tw_v6_rcv_saddr;
1990 destp = ntohs(tw->tw_dport);
1991 srcp = ntohs(tw->tw_sport);
1994 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1995 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1997 src->s6_addr32[0], src->s6_addr32[1],
1998 src->s6_addr32[2], src->s6_addr32[3], srcp,
1999 dest->s6_addr32[0], dest->s6_addr32[1],
2000 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2001 tw->tw_substate, 0, 0,
2002 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2003 atomic_read(&tw->tw_refcnt), tw);
2006 static int tcp6_seq_show(struct seq_file *seq, void *v)
2008 struct tcp_iter_state *st;
2010 if (v == SEQ_START_TOKEN) {
2015 "st tx_queue rx_queue tr tm->when retrnsmt"
2016 " uid timeout inode\n");
2021 switch (st->state) {
2022 case TCP_SEQ_STATE_LISTENING:
2023 case TCP_SEQ_STATE_ESTABLISHED:
2024 get_tcp6_sock(seq, v, st->num);
2026 case TCP_SEQ_STATE_OPENREQ:
2027 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2029 case TCP_SEQ_STATE_TIME_WAIT:
2030 get_timewait6_sock(seq, v, st->num);
2037 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2041 .owner = THIS_MODULE,
2044 .show = tcp6_seq_show,
2048 int tcp6_proc_init(struct net *net)
2050 return tcp_proc_register(net, &tcp6_seq_afinfo);
2053 void tcp6_proc_exit(struct net *net)
2055 tcp_proc_unregister(net, &tcp6_seq_afinfo);
2059 struct proto tcpv6_prot = {
2061 .owner = THIS_MODULE,
2063 .connect = tcp_v6_connect,
2064 .disconnect = tcp_disconnect,
2065 .accept = inet_csk_accept,
2067 .init = tcp_v6_init_sock,
2068 .destroy = tcp_v6_destroy_sock,
2069 .shutdown = tcp_shutdown,
2070 .setsockopt = tcp_setsockopt,
2071 .getsockopt = tcp_getsockopt,
2072 .recvmsg = tcp_recvmsg,
2073 .backlog_rcv = tcp_v6_do_rcv,
2074 .hash = tcp_v6_hash,
2075 .unhash = inet_unhash,
2076 .get_port = inet_csk_get_port,
2077 .enter_memory_pressure = tcp_enter_memory_pressure,
2078 .sockets_allocated = &tcp_sockets_allocated,
2079 .memory_allocated = &tcp_memory_allocated,
2080 .memory_pressure = &tcp_memory_pressure,
2081 .orphan_count = &tcp_orphan_count,
2082 .sysctl_mem = sysctl_tcp_mem,
2083 .sysctl_wmem = sysctl_tcp_wmem,
2084 .sysctl_rmem = sysctl_tcp_rmem,
2085 .max_header = MAX_TCP_HEADER,
2086 .obj_size = sizeof(struct tcp6_sock),
2087 .slab_flags = SLAB_DESTROY_BY_RCU,
2088 .twsk_prot = &tcp6_timewait_sock_ops,
2089 .rsk_prot = &tcp6_request_sock_ops,
2090 .h.hashinfo = &tcp_hashinfo,
2091 #ifdef CONFIG_COMPAT
2092 .compat_setsockopt = compat_tcp_setsockopt,
2093 .compat_getsockopt = compat_tcp_getsockopt,
2097 static const struct inet6_protocol tcpv6_protocol = {
2098 .handler = tcp_v6_rcv,
2099 .err_handler = tcp_v6_err,
2100 .gso_send_check = tcp_v6_gso_send_check,
2101 .gso_segment = tcp_tso_segment,
2102 .gro_receive = tcp6_gro_receive,
2103 .gro_complete = tcp6_gro_complete,
2104 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2107 static struct inet_protosw tcpv6_protosw = {
2108 .type = SOCK_STREAM,
2109 .protocol = IPPROTO_TCP,
2110 .prot = &tcpv6_prot,
2111 .ops = &inet6_stream_ops,
2114 .flags = INET_PROTOSW_PERMANENT |
2118 static int tcpv6_net_init(struct net *net)
2120 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2121 SOCK_RAW, IPPROTO_TCP, net);
2124 static void tcpv6_net_exit(struct net *net)
2126 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2127 inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6);
2130 static struct pernet_operations tcpv6_net_ops = {
2131 .init = tcpv6_net_init,
2132 .exit = tcpv6_net_exit,
2135 int __init tcpv6_init(void)
2139 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2143 /* register inet6 protocol */
2144 ret = inet6_register_protosw(&tcpv6_protosw);
2146 goto out_tcpv6_protocol;
2148 ret = register_pernet_subsys(&tcpv6_net_ops);
2150 goto out_tcpv6_protosw;
2155 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2157 inet6_unregister_protosw(&tcpv6_protosw);
2161 void tcpv6_exit(void)
2163 unregister_pernet_subsys(&tcpv6_net_ops);
2164 inet6_unregister_protosw(&tcpv6_protosw);
2165 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);