3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
58 #include <net/addrconf.h>
60 #include <net/dsfield.h>
62 #include <asm/uaccess.h>
64 #include <linux/proc_fs.h>
65 #include <linux/seq_file.h>
67 static void tcp_v6_send_reset(struct sk_buff *skb);
68 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
69 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
72 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
75 static struct tcp_func ipv6_mapped;
76 static struct tcp_func ipv6_specific;
78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80 struct in6_addr *faddr, u16 fport)
82 int hashent = (lport ^ fport);
84 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85 hashent ^= hashent>>16;
86 hashent ^= hashent>>8;
87 return (hashent & (tcp_ehash_size - 1));
90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
92 struct inet_sock *inet = inet_sk(sk);
93 struct ipv6_pinfo *np = inet6_sk(sk);
94 struct in6_addr *laddr = &np->rcv_saddr;
95 struct in6_addr *faddr = &np->daddr;
96 __u16 lport = inet->num;
97 __u16 fport = inet->dport;
98 return tcp_v6_hashfn(laddr, lport, faddr, fport);
101 static inline int tcp_v6_bind_conflict(struct sock *sk,
102 struct tcp_bind_bucket *tb)
105 struct hlist_node *node;
107 /* We must walk the whole port owner list in this case. -DaveM */
108 sk_for_each_bound(sk2, node, &tb->owners) {
110 (!sk->sk_bound_dev_if ||
111 !sk2->sk_bound_dev_if ||
112 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113 (!sk->sk_reuse || !sk2->sk_reuse ||
114 sk2->sk_state == TCP_LISTEN) &&
115 ipv6_rcv_saddr_equal(sk, sk2))
122 /* Grrr, addr_type already calculated by caller, but I don't want
123 * to add some silly "cookie" argument to this method just for that.
124 * But it doesn't matter, the recalculation is in the rarest path
125 * this function ever takes.
127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
129 struct tcp_bind_hashbucket *head;
130 struct tcp_bind_bucket *tb;
131 struct hlist_node *node;
136 int low = sysctl_local_port_range[0];
137 int high = sysctl_local_port_range[1];
138 int remaining = (high - low) + 1;
141 spin_lock(&tcp_portalloc_lock);
142 if (tcp_port_rover < low)
145 rover = tcp_port_rover;
149 head = &tcp_bhash[tcp_bhashfn(rover)];
150 spin_lock(&head->lock);
151 tb_for_each(tb, node, &head->chain)
152 if (tb->port == rover)
156 spin_unlock(&head->lock);
157 } while (--remaining > 0);
158 tcp_port_rover = rover;
159 spin_unlock(&tcp_portalloc_lock);
161 /* Exhausted local port range during search? */
166 /* OK, here is the one we will use. */
169 head = &tcp_bhash[tcp_bhashfn(snum)];
170 spin_lock(&head->lock);
171 tb_for_each(tb, node, &head->chain)
172 if (tb->port == snum)
178 if (tb && !hlist_empty(&tb->owners)) {
179 if (tb->fastreuse > 0 && sk->sk_reuse &&
180 sk->sk_state != TCP_LISTEN) {
184 if (tcp_v6_bind_conflict(sk, tb))
190 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
192 if (hlist_empty(&tb->owners)) {
193 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
197 } else if (tb->fastreuse &&
198 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
202 if (!tcp_sk(sk)->bind_hash)
203 tcp_bind_hash(sk, tb, snum);
204 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
208 spin_unlock(&head->lock);
214 static __inline__ void __tcp_v6_hash(struct sock *sk)
216 struct hlist_head *list;
219 BUG_TRAP(sk_unhashed(sk));
221 if (sk->sk_state == TCP_LISTEN) {
222 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
223 lock = &tcp_lhash_lock;
226 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
227 list = &tcp_ehash[sk->sk_hashent].chain;
228 lock = &tcp_ehash[sk->sk_hashent].lock;
232 __sk_add_node(sk, list);
233 sock_prot_inc_use(sk->sk_prot);
238 static void tcp_v6_hash(struct sock *sk)
240 if (sk->sk_state != TCP_CLOSE) {
241 struct tcp_sock *tp = tcp_sk(sk);
243 if (tp->af_specific == &ipv6_mapped) {
253 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
256 struct hlist_node *node;
257 struct sock *result = NULL;
261 read_lock(&tcp_lhash_lock);
262 sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
263 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
264 struct ipv6_pinfo *np = inet6_sk(sk);
267 if (!ipv6_addr_any(&np->rcv_saddr)) {
268 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
272 if (sk->sk_bound_dev_if) {
273 if (sk->sk_bound_dev_if != dif)
281 if (score > hiscore) {
289 read_unlock(&tcp_lhash_lock);
293 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
294 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
296 * The sockhash lock must be held as a reader here.
299 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
300 struct in6_addr *daddr, u16 hnum,
303 struct tcp_ehash_bucket *head;
305 struct hlist_node *node;
306 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
309 /* Optimize here for direct hit, only listening connections can
310 * have wildcards anyways.
312 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
313 head = &tcp_ehash[hash];
314 read_lock(&head->lock);
315 sk_for_each(sk, node, &head->chain) {
316 /* For IPV6 do the cheaper port and family tests first. */
317 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
318 goto hit; /* You sunk my battleship! */
320 /* Must check for a TIME_WAIT'er before going to listener hash. */
321 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
322 /* FIXME: acme: check this... */
323 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
325 if(*((__u32 *)&(tw->tw_dport)) == ports &&
326 sk->sk_family == PF_INET6) {
327 if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
328 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
329 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
333 read_unlock(&head->lock);
338 read_unlock(&head->lock);
343 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
344 struct in6_addr *daddr, u16 hnum,
349 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
354 return tcp_v6_lookup_listener(daddr, hnum, dif);
357 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
358 struct in6_addr *daddr, u16 dport,
364 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
370 EXPORT_SYMBOL_GPL(tcp_v6_lookup);
374 * Open request hash tables.
377 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
381 a = raddr->s6_addr32[0];
382 b = raddr->s6_addr32[1];
383 c = raddr->s6_addr32[2];
385 a += JHASH_GOLDEN_RATIO;
386 b += JHASH_GOLDEN_RATIO;
388 __jhash_mix(a, b, c);
390 a += raddr->s6_addr32[3];
392 __jhash_mix(a, b, c);
394 return c & (TCP_SYNQ_HSIZE - 1);
397 static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
398 struct request_sock ***prevp,
400 struct in6_addr *raddr,
401 struct in6_addr *laddr,
404 struct tcp_listen_opt *lopt = tp->listen_opt;
405 struct request_sock *req, **prev;
407 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
408 (req = *prev) != NULL;
409 prev = &req->dl_next) {
410 const struct tcp6_request_sock *treq = tcp6_rsk(req);
412 if (inet_rsk(req)->rmt_port == rport &&
413 req->rsk_ops->family == AF_INET6 &&
414 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
415 ipv6_addr_equal(&treq->loc_addr, laddr) &&
416 (!treq->iif || treq->iif == iif)) {
417 BUG_TRAP(req->sk == NULL);
426 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
427 struct in6_addr *saddr,
428 struct in6_addr *daddr,
431 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
434 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
436 if (skb->protocol == htons(ETH_P_IPV6)) {
437 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
438 skb->nh.ipv6h->saddr.s6_addr32,
442 return secure_tcp_sequence_number(skb->nh.iph->daddr,
449 static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
450 struct tcp_tw_bucket **twp)
452 struct inet_sock *inet = inet_sk(sk);
453 struct ipv6_pinfo *np = inet6_sk(sk);
454 struct in6_addr *daddr = &np->rcv_saddr;
455 struct in6_addr *saddr = &np->daddr;
456 int dif = sk->sk_bound_dev_if;
457 u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
458 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
459 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
461 struct hlist_node *node;
462 struct tcp_tw_bucket *tw;
464 write_lock(&head->lock);
466 /* Check TIME-WAIT sockets first. */
467 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
468 tw = (struct tcp_tw_bucket*)sk2;
470 if(*((__u32 *)&(tw->tw_dport)) == ports &&
471 sk2->sk_family == PF_INET6 &&
472 ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
473 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
474 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
475 struct tcp_sock *tp = tcp_sk(sk);
477 if (tw->tw_ts_recent_stamp &&
478 (!twp || (sysctl_tcp_tw_reuse &&
480 tw->tw_ts_recent_stamp > 1))) {
481 /* See comment in tcp_ipv4.c */
482 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
485 tp->rx_opt.ts_recent = tw->tw_ts_recent;
486 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
495 /* And established part... */
496 sk_for_each(sk2, node, &head->chain) {
497 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
502 BUG_TRAP(sk_unhashed(sk));
503 __sk_add_node(sk, &head->chain);
504 sk->sk_hashent = hash;
505 sock_prot_inc_use(sk->sk_prot);
506 write_unlock(&head->lock);
510 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
512 /* Silly. Should hash-dance instead... */
513 tcp_tw_deschedule(tw);
514 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
521 write_unlock(&head->lock);
522 return -EADDRNOTAVAIL;
525 static inline u32 tcpv6_port_offset(const struct sock *sk)
527 const struct inet_sock *inet = inet_sk(sk);
528 const struct ipv6_pinfo *np = inet6_sk(sk);
530 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
535 static int tcp_v6_hash_connect(struct sock *sk)
537 unsigned short snum = inet_sk(sk)->num;
538 struct tcp_bind_hashbucket *head;
539 struct tcp_bind_bucket *tb;
543 int low = sysctl_local_port_range[0];
544 int high = sysctl_local_port_range[1];
545 int range = high - low;
549 u32 offset = hint + tcpv6_port_offset(sk);
550 struct hlist_node *node;
551 struct tcp_tw_bucket *tw = NULL;
554 for (i = 1; i <= range; i++) {
555 port = low + (i + offset) % range;
556 head = &tcp_bhash[tcp_bhashfn(port)];
557 spin_lock(&head->lock);
559 /* Does not bother with rcv_saddr checks,
560 * because the established check is already
563 tb_for_each(tb, node, &head->chain) {
564 if (tb->port == port) {
565 BUG_TRAP(!hlist_empty(&tb->owners));
566 if (tb->fastreuse >= 0)
568 if (!__tcp_v6_check_established(sk,
576 tb = tcp_bucket_create(head, port);
578 spin_unlock(&head->lock);
585 spin_unlock(&head->lock);
589 return -EADDRNOTAVAIL;
594 /* Head lock still held and bh's disabled */
595 tcp_bind_hash(sk, tb, port);
596 if (sk_unhashed(sk)) {
597 inet_sk(sk)->sport = htons(port);
600 spin_unlock(&head->lock);
603 tcp_tw_deschedule(tw);
611 head = &tcp_bhash[tcp_bhashfn(snum)];
612 tb = tcp_sk(sk)->bind_hash;
613 spin_lock_bh(&head->lock);
615 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
617 spin_unlock_bh(&head->lock);
620 spin_unlock(&head->lock);
621 /* No definite answer... Walk to established hash table */
622 ret = __tcp_v6_check_established(sk, snum, NULL);
629 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
631 return IP6CB(skb)->iif;
634 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
637 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
638 struct inet_sock *inet = inet_sk(sk);
639 struct ipv6_pinfo *np = inet6_sk(sk);
640 struct tcp_sock *tp = tcp_sk(sk);
641 struct in6_addr *saddr = NULL, *final_p = NULL, final;
643 struct dst_entry *dst;
647 if (addr_len < SIN6_LEN_RFC2133)
650 if (usin->sin6_family != AF_INET6)
651 return(-EAFNOSUPPORT);
653 memset(&fl, 0, sizeof(fl));
656 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
657 IP6_ECN_flow_init(fl.fl6_flowlabel);
658 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
659 struct ip6_flowlabel *flowlabel;
660 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
661 if (flowlabel == NULL)
663 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
664 fl6_sock_release(flowlabel);
669 * connect() to INADDR_ANY means loopback (BSD'ism).
672 if(ipv6_addr_any(&usin->sin6_addr))
673 usin->sin6_addr.s6_addr[15] = 0x1;
675 addr_type = ipv6_addr_type(&usin->sin6_addr);
677 if(addr_type & IPV6_ADDR_MULTICAST)
680 if (addr_type&IPV6_ADDR_LINKLOCAL) {
681 if (addr_len >= sizeof(struct sockaddr_in6) &&
682 usin->sin6_scope_id) {
683 /* If interface is set while binding, indices
686 if (sk->sk_bound_dev_if &&
687 sk->sk_bound_dev_if != usin->sin6_scope_id)
690 sk->sk_bound_dev_if = usin->sin6_scope_id;
693 /* Connect to link-local address requires an interface */
694 if (!sk->sk_bound_dev_if)
698 if (tp->rx_opt.ts_recent_stamp &&
699 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
700 tp->rx_opt.ts_recent = 0;
701 tp->rx_opt.ts_recent_stamp = 0;
705 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
706 np->flow_label = fl.fl6_flowlabel;
712 if (addr_type == IPV6_ADDR_MAPPED) {
713 u32 exthdrlen = tp->ext_header_len;
714 struct sockaddr_in sin;
716 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
718 if (__ipv6_only_sock(sk))
721 sin.sin_family = AF_INET;
722 sin.sin_port = usin->sin6_port;
723 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
725 tp->af_specific = &ipv6_mapped;
726 sk->sk_backlog_rcv = tcp_v4_do_rcv;
728 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
731 tp->ext_header_len = exthdrlen;
732 tp->af_specific = &ipv6_specific;
733 sk->sk_backlog_rcv = tcp_v6_do_rcv;
736 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
738 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
745 if (!ipv6_addr_any(&np->rcv_saddr))
746 saddr = &np->rcv_saddr;
748 fl.proto = IPPROTO_TCP;
749 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
750 ipv6_addr_copy(&fl.fl6_src,
751 (saddr ? saddr : &np->saddr));
752 fl.oif = sk->sk_bound_dev_if;
753 fl.fl_ip_dport = usin->sin6_port;
754 fl.fl_ip_sport = inet->sport;
756 if (np->opt && np->opt->srcrt) {
757 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
758 ipv6_addr_copy(&final, &fl.fl6_dst);
759 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
763 err = ip6_dst_lookup(sk, &dst, &fl);
767 ipv6_addr_copy(&fl.fl6_dst, final_p);
769 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
776 ipv6_addr_copy(&np->rcv_saddr, saddr);
779 /* set the source address */
780 ipv6_addr_copy(&np->saddr, saddr);
781 inet->rcv_saddr = LOOPBACK4_IPV6;
783 ip6_dst_store(sk, dst, NULL);
784 sk->sk_route_caps = dst->dev->features &
785 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
787 tp->ext_header_len = 0;
789 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
791 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
793 inet->dport = usin->sin6_port;
795 tcp_set_state(sk, TCP_SYN_SENT);
796 err = tcp_v6_hash_connect(sk);
801 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
806 err = tcp_connect(sk);
813 tcp_set_state(sk, TCP_CLOSE);
817 sk->sk_route_caps = 0;
821 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
822 int type, int code, int offset, __u32 info)
824 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
825 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
826 struct ipv6_pinfo *np;
832 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
835 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
839 if (sk->sk_state == TCP_TIME_WAIT) {
840 tcp_tw_put((struct tcp_tw_bucket*)sk);
845 if (sock_owned_by_user(sk))
846 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
848 if (sk->sk_state == TCP_CLOSE)
852 seq = ntohl(th->seq);
853 if (sk->sk_state != TCP_LISTEN &&
854 !between(seq, tp->snd_una, tp->snd_nxt)) {
855 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
861 if (type == ICMPV6_PKT_TOOBIG) {
862 struct dst_entry *dst = NULL;
864 if (sock_owned_by_user(sk))
866 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
869 /* icmp should have updated the destination cache entry */
870 dst = __sk_dst_check(sk, np->dst_cookie);
873 struct inet_sock *inet = inet_sk(sk);
876 /* BUGGG_FUTURE: Again, it is not clear how
877 to handle rthdr case. Ignore this complexity
880 memset(&fl, 0, sizeof(fl));
881 fl.proto = IPPROTO_TCP;
882 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
883 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
884 fl.oif = sk->sk_bound_dev_if;
885 fl.fl_ip_dport = inet->dport;
886 fl.fl_ip_sport = inet->sport;
888 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
889 sk->sk_err_soft = -err;
893 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
894 sk->sk_err_soft = -err;
901 if (tp->pmtu_cookie > dst_mtu(dst)) {
902 tcp_sync_mss(sk, dst_mtu(dst));
903 tcp_simple_retransmit(sk);
904 } /* else let the usual retransmit timer handle it */
909 icmpv6_err_convert(type, code, &err);
911 /* Might be for an request_sock */
912 switch (sk->sk_state) {
913 struct request_sock *req, **prev;
915 if (sock_owned_by_user(sk))
918 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
919 &hdr->saddr, tcp_v6_iif(skb));
923 /* ICMPs are not backlogged, hence we cannot get
924 * an established socket here.
926 BUG_TRAP(req->sk == NULL);
928 if (seq != tcp_rsk(req)->snt_isn) {
929 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
933 tcp_synq_drop(sk, req, prev);
937 case TCP_SYN_RECV: /* Cannot happen.
938 It can, it SYNs are crossed. --ANK */
939 if (!sock_owned_by_user(sk)) {
940 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
942 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
946 sk->sk_err_soft = err;
950 if (!sock_owned_by_user(sk) && np->recverr) {
952 sk->sk_error_report(sk);
954 sk->sk_err_soft = err;
962 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
963 struct dst_entry *dst)
965 struct tcp6_request_sock *treq = tcp6_rsk(req);
966 struct ipv6_pinfo *np = inet6_sk(sk);
967 struct sk_buff * skb;
968 struct ipv6_txoptions *opt = NULL;
969 struct in6_addr * final_p = NULL, final;
973 memset(&fl, 0, sizeof(fl));
974 fl.proto = IPPROTO_TCP;
975 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
976 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
977 fl.fl6_flowlabel = 0;
979 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
980 fl.fl_ip_sport = inet_sk(sk)->sport;
985 np->rxopt.bits.srcrt == 2 &&
987 struct sk_buff *pktopts = treq->pktopts;
988 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
990 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
993 if (opt && opt->srcrt) {
994 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
995 ipv6_addr_copy(&final, &fl.fl6_dst);
996 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1000 err = ip6_dst_lookup(sk, &dst, &fl);
1004 ipv6_addr_copy(&fl.fl6_dst, final_p);
1005 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1009 skb = tcp_make_synack(sk, dst, req);
1011 struct tcphdr *th = skb->h.th;
1013 th->check = tcp_v6_check(th, skb->len,
1014 &treq->loc_addr, &treq->rmt_addr,
1015 csum_partial((char *)th, skb->len, skb->csum));
1017 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1018 err = ip6_xmit(sk, skb, &fl, opt, 0);
1019 if (err == NET_XMIT_CN)
1025 if (opt && opt != np->opt)
1026 sock_kfree_s(sk, opt, opt->tot_len);
1030 static void tcp_v6_reqsk_destructor(struct request_sock *req)
1032 if (tcp6_rsk(req)->pktopts)
1033 kfree_skb(tcp6_rsk(req)->pktopts);
1036 static struct request_sock_ops tcp6_request_sock_ops = {
1038 .obj_size = sizeof(struct tcp6_request_sock),
1039 .rtx_syn_ack = tcp_v6_send_synack,
1040 .send_ack = tcp_v6_reqsk_send_ack,
1041 .destructor = tcp_v6_reqsk_destructor,
1042 .send_reset = tcp_v6_send_reset
1045 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1047 struct ipv6_pinfo *np = inet6_sk(sk);
1048 struct inet6_skb_parm *opt = IP6CB(skb);
1050 if (np->rxopt.all) {
1051 if ((opt->hop && np->rxopt.bits.hopopts) ||
1052 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1053 np->rxopt.bits.rxflow) ||
1054 (opt->srcrt && np->rxopt.bits.srcrt) ||
1055 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1062 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1063 struct sk_buff *skb)
1065 struct ipv6_pinfo *np = inet6_sk(sk);
1067 if (skb->ip_summed == CHECKSUM_HW) {
1068 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
1069 skb->csum = offsetof(struct tcphdr, check);
1071 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1072 csum_partial((char *)th, th->doff<<2,
1078 static void tcp_v6_send_reset(struct sk_buff *skb)
1080 struct tcphdr *th = skb->h.th, *t1;
1081 struct sk_buff *buff;
1087 if (!ipv6_unicast_destination(skb))
1091 * We need to grab some memory, and put together an RST,
1092 * and then put it into the queue to be sent.
1095 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1100 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1102 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1104 /* Swap the send and the receive. */
1105 memset(t1, 0, sizeof(*t1));
1106 t1->dest = th->source;
1107 t1->source = th->dest;
1108 t1->doff = sizeof(*t1)/4;
1112 t1->seq = th->ack_seq;
1115 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1116 + skb->len - (th->doff<<2));
1119 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1121 memset(&fl, 0, sizeof(fl));
1122 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1123 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1125 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1126 sizeof(*t1), IPPROTO_TCP,
1129 fl.proto = IPPROTO_TCP;
1130 fl.oif = tcp_v6_iif(skb);
1131 fl.fl_ip_dport = t1->dest;
1132 fl.fl_ip_sport = t1->source;
1134 /* sk = NULL, but it is safe for now. RST socket required. */
1135 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1137 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1138 dst_release(buff->dst);
1142 ip6_xmit(NULL, buff, &fl, NULL, 0);
1143 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1144 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1151 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1153 struct tcphdr *th = skb->h.th, *t1;
1154 struct sk_buff *buff;
1156 int tot_len = sizeof(struct tcphdr);
1161 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1166 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1168 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1170 /* Swap the send and the receive. */
1171 memset(t1, 0, sizeof(*t1));
1172 t1->dest = th->source;
1173 t1->source = th->dest;
1174 t1->doff = tot_len/4;
1175 t1->seq = htonl(seq);
1176 t1->ack_seq = htonl(ack);
1178 t1->window = htons(win);
1181 u32 *ptr = (u32*)(t1 + 1);
1182 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1183 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1184 *ptr++ = htonl(tcp_time_stamp);
1188 buff->csum = csum_partial((char *)t1, tot_len, 0);
1190 memset(&fl, 0, sizeof(fl));
1191 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1192 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1194 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1195 tot_len, IPPROTO_TCP,
1198 fl.proto = IPPROTO_TCP;
1199 fl.oif = tcp_v6_iif(skb);
1200 fl.fl_ip_dport = t1->dest;
1201 fl.fl_ip_sport = t1->source;
1203 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1204 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1205 dst_release(buff->dst);
1208 ip6_xmit(NULL, buff, &fl, NULL, 0);
1209 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1216 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1218 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1220 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1221 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1226 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1228 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1232 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1234 struct request_sock *req, **prev;
1235 struct tcphdr *th = skb->h.th;
1236 struct tcp_sock *tp = tcp_sk(sk);
1239 /* Find possible connection requests. */
1240 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1241 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1243 return tcp_check_req(sk, skb, req, prev);
1245 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1247 &skb->nh.ipv6h->daddr,
1252 if (nsk->sk_state != TCP_TIME_WAIT) {
1256 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1260 #if 0 /*def CONFIG_SYN_COOKIES*/
1261 if (!th->rst && !th->syn && th->ack)
1262 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1267 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1269 struct tcp_sock *tp = tcp_sk(sk);
1270 struct tcp_listen_opt *lopt = tp->listen_opt;
1271 u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1274 req->expires = jiffies + TCP_TIMEOUT_INIT;
1276 req->dl_next = lopt->syn_table[h];
1278 write_lock(&tp->syn_wait_lock);
1279 lopt->syn_table[h] = req;
1280 write_unlock(&tp->syn_wait_lock);
1286 /* FIXME: this is substantially similar to the ipv4 code.
1287 * Can some kind of merge be done? -- erics
1289 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1291 struct tcp6_request_sock *treq;
1292 struct ipv6_pinfo *np = inet6_sk(sk);
1293 struct tcp_options_received tmp_opt;
1294 struct tcp_sock *tp = tcp_sk(sk);
1295 struct request_sock *req = NULL;
1296 __u32 isn = TCP_SKB_CB(skb)->when;
1298 if (skb->protocol == htons(ETH_P_IP))
1299 return tcp_v4_conn_request(sk, skb);
1301 if (!ipv6_unicast_destination(skb))
1305 * There are no SYN attacks on IPv6, yet...
1307 if (tcp_synq_is_full(sk) && !isn) {
1308 if (net_ratelimit())
1309 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1313 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1316 req = reqsk_alloc(&tcp6_request_sock_ops);
1320 tcp_clear_options(&tmp_opt);
1321 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1322 tmp_opt.user_mss = tp->rx_opt.user_mss;
1324 tcp_parse_options(skb, &tmp_opt, 0);
1326 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1327 tcp_openreq_init(req, &tmp_opt, skb);
1329 treq = tcp6_rsk(req);
1330 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1331 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1332 TCP_ECN_create_request(req, skb->h.th);
1333 treq->pktopts = NULL;
1334 if (ipv6_opt_accepted(sk, skb) ||
1335 np->rxopt.bits.rxinfo ||
1336 np->rxopt.bits.rxhlim) {
1337 atomic_inc(&skb->users);
1338 treq->pktopts = skb;
1340 treq->iif = sk->sk_bound_dev_if;
1342 /* So that link locals have meaning */
1343 if (!sk->sk_bound_dev_if &&
1344 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1345 treq->iif = tcp_v6_iif(skb);
1348 isn = tcp_v6_init_sequence(sk,skb);
1350 tcp_rsk(req)->snt_isn = isn;
1352 if (tcp_v6_send_synack(sk, req, NULL))
1355 tcp_v6_synq_add(sk, req);
1363 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1364 return 0; /* don't send reset */
1367 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1368 struct request_sock *req,
1369 struct dst_entry *dst)
1371 struct tcp6_request_sock *treq = tcp6_rsk(req);
1372 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1373 struct tcp6_sock *newtcp6sk;
1374 struct inet_sock *newinet;
1375 struct tcp_sock *newtp;
1377 struct ipv6_txoptions *opt;
1379 if (skb->protocol == htons(ETH_P_IP)) {
1384 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1389 newtcp6sk = (struct tcp6_sock *)newsk;
1390 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1392 newinet = inet_sk(newsk);
1393 newnp = inet6_sk(newsk);
1394 newtp = tcp_sk(newsk);
1396 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1398 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1401 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1404 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1406 newtp->af_specific = &ipv6_mapped;
1407 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1408 newnp->pktoptions = NULL;
1410 newnp->mcast_oif = tcp_v6_iif(skb);
1411 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1413 /* Charge newly allocated IPv6 socket. Though it is mapped,
1416 #ifdef INET_REFCNT_DEBUG
1417 atomic_inc(&inet6_sock_nr);
1420 /* It is tricky place. Until this moment IPv4 tcp
1421 worked with IPv6 af_tcp.af_specific.
1424 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1431 if (sk_acceptq_is_full(sk))
1434 if (np->rxopt.bits.srcrt == 2 &&
1435 opt == NULL && treq->pktopts) {
1436 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1438 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1442 struct in6_addr *final_p = NULL, final;
1445 memset(&fl, 0, sizeof(fl));
1446 fl.proto = IPPROTO_TCP;
1447 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1448 if (opt && opt->srcrt) {
1449 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1450 ipv6_addr_copy(&final, &fl.fl6_dst);
1451 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1454 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1455 fl.oif = sk->sk_bound_dev_if;
1456 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1457 fl.fl_ip_sport = inet_sk(sk)->sport;
1459 if (ip6_dst_lookup(sk, &dst, &fl))
1463 ipv6_addr_copy(&fl.fl6_dst, final_p);
1465 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1469 newsk = tcp_create_openreq_child(sk, req, skb);
1473 /* Charge newly allocated IPv6 socket */
1474 #ifdef INET_REFCNT_DEBUG
1475 atomic_inc(&inet6_sock_nr);
1478 ip6_dst_store(newsk, dst, NULL);
1479 newsk->sk_route_caps = dst->dev->features &
1480 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1482 newtcp6sk = (struct tcp6_sock *)newsk;
1483 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1485 newtp = tcp_sk(newsk);
1486 newinet = inet_sk(newsk);
1487 newnp = inet6_sk(newsk);
1489 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1491 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1492 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1493 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1494 newsk->sk_bound_dev_if = treq->iif;
1496 /* Now IPv6 options...
1498 First: no IPv4 options.
1500 newinet->opt = NULL;
1503 newnp->rxopt.all = np->rxopt.all;
1505 /* Clone pktoptions received with SYN */
1506 newnp->pktoptions = NULL;
1507 if (treq->pktopts != NULL) {
1508 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1509 kfree_skb(treq->pktopts);
1510 treq->pktopts = NULL;
1511 if (newnp->pktoptions)
1512 skb_set_owner_r(newnp->pktoptions, newsk);
1515 newnp->mcast_oif = tcp_v6_iif(skb);
1516 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1518 /* Clone native IPv6 options from listening socket (if any)
1520 Yes, keeping reference count would be much more clever,
1521 but we make one more one thing there: reattach optmem
1525 newnp->opt = ipv6_dup_options(newsk, opt);
1527 sock_kfree_s(sk, opt, opt->tot_len);
1530 newtp->ext_header_len = 0;
1532 newtp->ext_header_len = newnp->opt->opt_nflen +
1533 newnp->opt->opt_flen;
1535 tcp_sync_mss(newsk, dst_mtu(dst));
1536 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1537 tcp_initialize_rcv_mss(newsk);
1539 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1541 __tcp_v6_hash(newsk);
1542 tcp_inherit_port(sk, newsk);
1547 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1549 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1550 if (opt && opt != np->opt)
1551 sock_kfree_s(sk, opt, opt->tot_len);
1556 static int tcp_v6_checksum_init(struct sk_buff *skb)
1558 if (skb->ip_summed == CHECKSUM_HW) {
1559 skb->ip_summed = CHECKSUM_UNNECESSARY;
1560 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1561 &skb->nh.ipv6h->daddr,skb->csum))
1563 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1565 if (skb->len <= 76) {
1566 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1567 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1569 skb->ip_summed = CHECKSUM_UNNECESSARY;
1571 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1572 &skb->nh.ipv6h->daddr,0);
1577 /* The socket must have it's spinlock held when we get
1580 * We have a potential double-lock case here, so even when
1581 * doing backlog processing we use the BH locking scheme.
1582 * This is because we cannot sleep with the original spinlock
1585 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1587 struct ipv6_pinfo *np = inet6_sk(sk);
1588 struct tcp_sock *tp;
1589 struct sk_buff *opt_skb = NULL;
1591 /* Imagine: socket is IPv6. IPv4 packet arrives,
1592 goes to IPv4 receive handler and backlogged.
1593 From backlog it always goes here. Kerboom...
1594 Fortunately, tcp_rcv_established and rcv_established
1595 handle them correctly, but it is not case with
1596 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1599 if (skb->protocol == htons(ETH_P_IP))
1600 return tcp_v4_do_rcv(sk, skb);
1602 if (sk_filter(sk, skb, 0))
1606 * socket locking is here for SMP purposes as backlog rcv
1607 * is currently called with bh processing disabled.
1610 /* Do Stevens' IPV6_PKTOPTIONS.
1612 Yes, guys, it is the only place in our code, where we
1613 may make it not affecting IPv4.
1614 The rest of code is protocol independent,
1615 and I do not like idea to uglify IPv4.
1617 Actually, all the idea behind IPV6_PKTOPTIONS
1618 looks not very well thought. For now we latch
1619 options, received in the last packet, enqueued
1620 by tcp. Feel free to propose better solution.
1624 opt_skb = skb_clone(skb, GFP_ATOMIC);
1626 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1627 TCP_CHECK_TIMER(sk);
1628 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1630 TCP_CHECK_TIMER(sk);
1632 goto ipv6_pktoptions;
1636 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1639 if (sk->sk_state == TCP_LISTEN) {
1640 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1645 * Queue it on the new socket if the new socket is active,
1646 * otherwise we just shortcircuit this and continue with
1650 if (tcp_child_process(sk, nsk, skb))
1653 __kfree_skb(opt_skb);
1658 TCP_CHECK_TIMER(sk);
1659 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1661 TCP_CHECK_TIMER(sk);
1663 goto ipv6_pktoptions;
1667 tcp_v6_send_reset(skb);
1670 __kfree_skb(opt_skb);
1674 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1679 /* Do you ask, what is it?
1681 1. skb was enqueued by tcp.
1682 2. skb is added to tail of read queue, rather than out of order.
1683 3. socket is not in passive state.
1684 4. Finally, it really contains options, which user wants to receive.
1687 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1688 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1689 if (np->rxopt.bits.rxinfo)
1690 np->mcast_oif = tcp_v6_iif(opt_skb);
1691 if (np->rxopt.bits.rxhlim)
1692 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1693 if (ipv6_opt_accepted(sk, opt_skb)) {
1694 skb_set_owner_r(opt_skb, sk);
1695 opt_skb = xchg(&np->pktoptions, opt_skb);
1697 __kfree_skb(opt_skb);
1698 opt_skb = xchg(&np->pktoptions, NULL);
1707 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1709 struct sk_buff *skb = *pskb;
1714 if (skb->pkt_type != PACKET_HOST)
1718 * Count it even if it's bad.
1720 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1722 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1727 if (th->doff < sizeof(struct tcphdr)/4)
1729 if (!pskb_may_pull(skb, th->doff*4))
1732 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1733 tcp_v6_checksum_init(skb) < 0))
1737 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1738 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1739 skb->len - th->doff*4);
1740 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1741 TCP_SKB_CB(skb)->when = 0;
1742 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1743 TCP_SKB_CB(skb)->sacked = 0;
1745 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1746 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1752 if (sk->sk_state == TCP_TIME_WAIT)
1755 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1756 goto discard_and_relse;
1758 if (sk_filter(sk, skb, 0))
1759 goto discard_and_relse;
1765 if (!sock_owned_by_user(sk)) {
1766 if (!tcp_prequeue(sk, skb))
1767 ret = tcp_v6_do_rcv(sk, skb);
1769 sk_add_backlog(sk, skb);
1773 return ret ? -1 : 0;
1776 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1779 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1781 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1783 tcp_v6_send_reset(skb);
1800 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1801 tcp_tw_put((struct tcp_tw_bucket *) sk);
1805 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1806 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1807 tcp_tw_put((struct tcp_tw_bucket *) sk);
1811 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1812 skb, th, skb->len)) {
1817 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1819 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1820 tcp_tw_put((struct tcp_tw_bucket *)sk);
1824 /* Fall through to ACK */
1827 tcp_v6_timewait_ack(sk, skb);
1831 case TCP_TW_SUCCESS:;
1836 static int tcp_v6_rebuild_header(struct sock *sk)
1839 struct dst_entry *dst;
1840 struct ipv6_pinfo *np = inet6_sk(sk);
1842 dst = __sk_dst_check(sk, np->dst_cookie);
1845 struct inet_sock *inet = inet_sk(sk);
1846 struct in6_addr *final_p = NULL, final;
1849 memset(&fl, 0, sizeof(fl));
1850 fl.proto = IPPROTO_TCP;
1851 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1852 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1853 fl.fl6_flowlabel = np->flow_label;
1854 fl.oif = sk->sk_bound_dev_if;
1855 fl.fl_ip_dport = inet->dport;
1856 fl.fl_ip_sport = inet->sport;
1858 if (np->opt && np->opt->srcrt) {
1859 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1860 ipv6_addr_copy(&final, &fl.fl6_dst);
1861 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1865 err = ip6_dst_lookup(sk, &dst, &fl);
1867 sk->sk_route_caps = 0;
1871 ipv6_addr_copy(&fl.fl6_dst, final_p);
1873 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1874 sk->sk_err_soft = -err;
1879 ip6_dst_store(sk, dst, NULL);
1880 sk->sk_route_caps = dst->dev->features &
1881 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1887 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1889 struct sock *sk = skb->sk;
1890 struct inet_sock *inet = inet_sk(sk);
1891 struct ipv6_pinfo *np = inet6_sk(sk);
1893 struct dst_entry *dst;
1894 struct in6_addr *final_p = NULL, final;
1896 memset(&fl, 0, sizeof(fl));
1897 fl.proto = IPPROTO_TCP;
1898 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1899 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1900 fl.fl6_flowlabel = np->flow_label;
1901 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1902 fl.oif = sk->sk_bound_dev_if;
1903 fl.fl_ip_sport = inet->sport;
1904 fl.fl_ip_dport = inet->dport;
1906 if (np->opt && np->opt->srcrt) {
1907 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1908 ipv6_addr_copy(&final, &fl.fl6_dst);
1909 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1913 dst = __sk_dst_check(sk, np->dst_cookie);
1916 int err = ip6_dst_lookup(sk, &dst, &fl);
1919 sk->sk_err_soft = -err;
1924 ipv6_addr_copy(&fl.fl6_dst, final_p);
1926 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1927 sk->sk_route_caps = 0;
1932 ip6_dst_store(sk, dst, NULL);
1933 sk->sk_route_caps = dst->dev->features &
1934 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1937 skb->dst = dst_clone(dst);
1939 /* Restore final destination back after routing done */
1940 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1942 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1945 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1947 struct ipv6_pinfo *np = inet6_sk(sk);
1948 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1950 sin6->sin6_family = AF_INET6;
1951 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1952 sin6->sin6_port = inet_sk(sk)->dport;
1953 /* We do not store received flowlabel for TCP */
1954 sin6->sin6_flowinfo = 0;
1955 sin6->sin6_scope_id = 0;
1956 if (sk->sk_bound_dev_if &&
1957 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1958 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1961 static int tcp_v6_remember_stamp(struct sock *sk)
1963 /* Alas, not yet... */
1967 static struct tcp_func ipv6_specific = {
1968 .queue_xmit = tcp_v6_xmit,
1969 .send_check = tcp_v6_send_check,
1970 .rebuild_header = tcp_v6_rebuild_header,
1971 .conn_request = tcp_v6_conn_request,
1972 .syn_recv_sock = tcp_v6_syn_recv_sock,
1973 .remember_stamp = tcp_v6_remember_stamp,
1974 .net_header_len = sizeof(struct ipv6hdr),
1976 .setsockopt = ipv6_setsockopt,
1977 .getsockopt = ipv6_getsockopt,
1978 .addr2sockaddr = v6_addr2sockaddr,
1979 .sockaddr_len = sizeof(struct sockaddr_in6)
1983 * TCP over IPv4 via INET6 API
1986 static struct tcp_func ipv6_mapped = {
1987 .queue_xmit = ip_queue_xmit,
1988 .send_check = tcp_v4_send_check,
1989 .rebuild_header = tcp_v4_rebuild_header,
1990 .conn_request = tcp_v6_conn_request,
1991 .syn_recv_sock = tcp_v6_syn_recv_sock,
1992 .remember_stamp = tcp_v4_remember_stamp,
1993 .net_header_len = sizeof(struct iphdr),
1995 .setsockopt = ipv6_setsockopt,
1996 .getsockopt = ipv6_getsockopt,
1997 .addr2sockaddr = v6_addr2sockaddr,
1998 .sockaddr_len = sizeof(struct sockaddr_in6)
2003 /* NOTE: A lot of things set to zero explicitly by call to
2004 * sk_alloc() so need not be done here.
2006 static int tcp_v6_init_sock(struct sock *sk)
2008 struct tcp_sock *tp = tcp_sk(sk);
2010 skb_queue_head_init(&tp->out_of_order_queue);
2011 tcp_init_xmit_timers(sk);
2012 tcp_prequeue_init(tp);
2014 tp->rto = TCP_TIMEOUT_INIT;
2015 tp->mdev = TCP_TIMEOUT_INIT;
2017 /* So many TCP implementations out there (incorrectly) count the
2018 * initial SYN frame in their delayed-ACK and congestion control
2019 * algorithms that we must have the following bandaid to talk
2020 * efficiently to them. -DaveM
2024 /* See draft-stevens-tcpca-spec-01 for discussion of the
2025 * initialization of these values.
2027 tp->snd_ssthresh = 0x7fffffff;
2028 tp->snd_cwnd_clamp = ~0;
2029 tp->mss_cache_std = tp->mss_cache = 536;
2031 tp->reordering = sysctl_tcp_reordering;
2033 sk->sk_state = TCP_CLOSE;
2035 tp->af_specific = &ipv6_specific;
2037 sk->sk_write_space = sk_stream_write_space;
2038 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2040 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2041 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2043 atomic_inc(&tcp_sockets_allocated);
2048 static int tcp_v6_destroy_sock(struct sock *sk)
2050 extern int tcp_v4_destroy_sock(struct sock *sk);
2052 tcp_v4_destroy_sock(sk);
2053 return inet6_destroy_sock(sk);
2056 /* Proc filesystem TCPv6 sock list dumping. */
2057 static void get_openreq6(struct seq_file *seq,
2058 struct sock *sk, struct request_sock *req, int i, int uid)
2060 struct in6_addr *dest, *src;
2061 int ttd = req->expires - jiffies;
2066 src = &tcp6_rsk(req)->loc_addr;
2067 dest = &tcp6_rsk(req)->rmt_addr;
2069 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2070 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2072 src->s6_addr32[0], src->s6_addr32[1],
2073 src->s6_addr32[2], src->s6_addr32[3],
2074 ntohs(inet_sk(sk)->sport),
2075 dest->s6_addr32[0], dest->s6_addr32[1],
2076 dest->s6_addr32[2], dest->s6_addr32[3],
2077 ntohs(inet_rsk(req)->rmt_port),
2079 0,0, /* could print option size, but that is af dependent. */
2080 1, /* timers active (only the expire timer) */
2081 jiffies_to_clock_t(ttd),
2084 0, /* non standard timer */
2085 0, /* open_requests have no inode */
2089 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2091 struct in6_addr *dest, *src;
2094 unsigned long timer_expires;
2095 struct inet_sock *inet = inet_sk(sp);
2096 struct tcp_sock *tp = tcp_sk(sp);
2097 struct ipv6_pinfo *np = inet6_sk(sp);
2100 src = &np->rcv_saddr;
2101 destp = ntohs(inet->dport);
2102 srcp = ntohs(inet->sport);
2103 if (tp->pending == TCP_TIME_RETRANS) {
2105 timer_expires = tp->timeout;
2106 } else if (tp->pending == TCP_TIME_PROBE0) {
2108 timer_expires = tp->timeout;
2109 } else if (timer_pending(&sp->sk_timer)) {
2111 timer_expires = sp->sk_timer.expires;
2114 timer_expires = jiffies;
2118 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2119 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2121 src->s6_addr32[0], src->s6_addr32[1],
2122 src->s6_addr32[2], src->s6_addr32[3], srcp,
2123 dest->s6_addr32[0], dest->s6_addr32[1],
2124 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2126 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2128 jiffies_to_clock_t(timer_expires - jiffies),
2133 atomic_read(&sp->sk_refcnt), sp,
2134 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2135 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2139 static void get_timewait6_sock(struct seq_file *seq,
2140 struct tcp_tw_bucket *tw, int i)
2142 struct in6_addr *dest, *src;
2144 int ttd = tw->tw_ttd - jiffies;
2149 dest = &tw->tw_v6_daddr;
2150 src = &tw->tw_v6_rcv_saddr;
2151 destp = ntohs(tw->tw_dport);
2152 srcp = ntohs(tw->tw_sport);
2155 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2156 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2158 src->s6_addr32[0], src->s6_addr32[1],
2159 src->s6_addr32[2], src->s6_addr32[3], srcp,
2160 dest->s6_addr32[0], dest->s6_addr32[1],
2161 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2162 tw->tw_substate, 0, 0,
2163 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2164 atomic_read(&tw->tw_refcnt), tw);
2167 #ifdef CONFIG_PROC_FS
2168 static int tcp6_seq_show(struct seq_file *seq, void *v)
2170 struct tcp_iter_state *st;
2172 if (v == SEQ_START_TOKEN) {
2177 "st tx_queue rx_queue tr tm->when retrnsmt"
2178 " uid timeout inode\n");
2183 switch (st->state) {
2184 case TCP_SEQ_STATE_LISTENING:
2185 case TCP_SEQ_STATE_ESTABLISHED:
2186 get_tcp6_sock(seq, v, st->num);
2188 case TCP_SEQ_STATE_OPENREQ:
2189 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2191 case TCP_SEQ_STATE_TIME_WAIT:
2192 get_timewait6_sock(seq, v, st->num);
2199 static struct file_operations tcp6_seq_fops;
2200 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2201 .owner = THIS_MODULE,
2204 .seq_show = tcp6_seq_show,
2205 .seq_fops = &tcp6_seq_fops,
2208 int __init tcp6_proc_init(void)
2210 return tcp_proc_register(&tcp6_seq_afinfo);
2213 void tcp6_proc_exit(void)
2215 tcp_proc_unregister(&tcp6_seq_afinfo);
2219 struct proto tcpv6_prot = {
2221 .owner = THIS_MODULE,
2223 .connect = tcp_v6_connect,
2224 .disconnect = tcp_disconnect,
2225 .accept = tcp_accept,
2227 .init = tcp_v6_init_sock,
2228 .destroy = tcp_v6_destroy_sock,
2229 .shutdown = tcp_shutdown,
2230 .setsockopt = tcp_setsockopt,
2231 .getsockopt = tcp_getsockopt,
2232 .sendmsg = tcp_sendmsg,
2233 .recvmsg = tcp_recvmsg,
2234 .backlog_rcv = tcp_v6_do_rcv,
2235 .hash = tcp_v6_hash,
2236 .unhash = tcp_unhash,
2237 .get_port = tcp_v6_get_port,
2238 .enter_memory_pressure = tcp_enter_memory_pressure,
2239 .sockets_allocated = &tcp_sockets_allocated,
2240 .memory_allocated = &tcp_memory_allocated,
2241 .memory_pressure = &tcp_memory_pressure,
2242 .sysctl_mem = sysctl_tcp_mem,
2243 .sysctl_wmem = sysctl_tcp_wmem,
2244 .sysctl_rmem = sysctl_tcp_rmem,
2245 .max_header = MAX_TCP_HEADER,
2246 .obj_size = sizeof(struct tcp6_sock),
2247 .rsk_prot = &tcp6_request_sock_ops,
2250 static struct inet6_protocol tcpv6_protocol = {
2251 .handler = tcp_v6_rcv,
2252 .err_handler = tcp_v6_err,
2253 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2256 extern struct proto_ops inet6_stream_ops;
2258 static struct inet_protosw tcpv6_protosw = {
2259 .type = SOCK_STREAM,
2260 .protocol = IPPROTO_TCP,
2261 .prot = &tcpv6_prot,
2262 .ops = &inet6_stream_ops,
2265 .flags = INET_PROTOSW_PERMANENT,
2268 void __init tcpv6_init(void)
2270 /* register inet6 protocol */
2271 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2272 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2273 inet6_register_protosw(&tcpv6_protosw);