[IPV6]: Generalise the tcp_v6_lookup routines
[safe/jmp/linux-2.6] / net / ipv6 / tcp_ipv6.c
1 /*
2  *      TCP over IPv6
3  *      Linux INET6 implementation 
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9  *
10  *      Based on: 
11  *      linux/net/ipv4/tcp.c
12  *      linux/net/ipv4/tcp_input.c
13  *      linux/net/ipv4/tcp_output.c
14  *
15  *      Fixes:
16  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
17  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
18  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
19  *                                      a single port at the same time.
20  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  */
27
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
36 #include <linux/in.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
43
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
47
48 #include <net/tcp.h>
49 #include <net/ndisc.h>
50 #include <net/ipv6.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
57 #include <net/xfrm.h>
58 #include <net/addrconf.h>
59 #include <net/snmp.h>
60 #include <net/dsfield.h>
61
62 #include <asm/uaccess.h>
63
64 #include <linux/proc_fs.h>
65 #include <linux/seq_file.h>
66
67 static void     tcp_v6_send_reset(struct sk_buff *skb);
68 static void     tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
69 static void     tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
70                                   struct sk_buff *skb);
71
72 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73 static int      tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
74
75 static struct tcp_func ipv6_mapped;
76 static struct tcp_func ipv6_specific;
77
78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
79 static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
80                                 const struct in6_addr *faddr, const u16 fport,
81                                 const int ehash_size)
82 {
83         int hashent = (lport ^ fport);
84
85         hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
86         hashent ^= hashent>>16;
87         hashent ^= hashent>>8;
88         return (hashent & (ehash_size - 1));
89 }
90
91 static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size)
92 {
93         const struct inet_sock *inet = inet_sk(sk);
94         const struct ipv6_pinfo *np = inet6_sk(sk);
95         const struct in6_addr *laddr = &np->rcv_saddr;
96         const struct in6_addr *faddr = &np->daddr;
97         const __u16 lport = inet->num;
98         const __u16 fport = inet->dport;
99         return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size);
100 }
101
102 static inline int tcp_v6_bind_conflict(const struct sock *sk,
103                                        const struct inet_bind_bucket *tb)
104 {
105         const struct sock *sk2;
106         const struct hlist_node *node;
107
108         /* We must walk the whole port owner list in this case. -DaveM */
109         sk_for_each_bound(sk2, node, &tb->owners) {
110                 if (sk != sk2 &&
111                     (!sk->sk_bound_dev_if ||
112                      !sk2->sk_bound_dev_if ||
113                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
114                     (!sk->sk_reuse || !sk2->sk_reuse ||
115                      sk2->sk_state == TCP_LISTEN) &&
116                      ipv6_rcv_saddr_equal(sk, sk2))
117                         break;
118         }
119
120         return node != NULL;
121 }
122
123 /* Grrr, addr_type already calculated by caller, but I don't want
124  * to add some silly "cookie" argument to this method just for that.
125  * But it doesn't matter, the recalculation is in the rarest path
126  * this function ever takes.
127  */
128 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
129 {
130         struct inet_bind_hashbucket *head;
131         struct inet_bind_bucket *tb;
132         struct hlist_node *node;
133         int ret;
134
135         local_bh_disable();
136         if (snum == 0) {
137                 int low = sysctl_local_port_range[0];
138                 int high = sysctl_local_port_range[1];
139                 int remaining = (high - low) + 1;
140                 int rover;
141
142                 spin_lock(&tcp_hashinfo.portalloc_lock);
143                 if (tcp_hashinfo.port_rover < low)
144                         rover = low;
145                 else
146                         rover = tcp_hashinfo.port_rover;
147                 do {    rover++;
148                         if (rover > high)
149                                 rover = low;
150                         head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
151                         spin_lock(&head->lock);
152                         inet_bind_bucket_for_each(tb, node, &head->chain)
153                                 if (tb->port == rover)
154                                         goto next;
155                         break;
156                 next:
157                         spin_unlock(&head->lock);
158                 } while (--remaining > 0);
159                 tcp_hashinfo.port_rover = rover;
160                 spin_unlock(&tcp_hashinfo.portalloc_lock);
161
162                 /* Exhausted local port range during search?  It is not
163                  * possible for us to be holding one of the bind hash
164                  * locks if this test triggers, because if 'remaining'
165                  * drops to zero, we broke out of the do/while loop at
166                  * the top level, not from the 'break;' statement.
167                  */
168                 ret = 1;
169                 if (unlikely(remaining <= 0))
170                         goto fail;
171
172                 /* OK, here is the one we will use. */
173                 snum = rover;
174         } else {
175                 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
176                 spin_lock(&head->lock);
177                 inet_bind_bucket_for_each(tb, node, &head->chain)
178                         if (tb->port == snum)
179                                 goto tb_found;
180         }
181         tb = NULL;
182         goto tb_not_found;
183 tb_found:
184         if (tb && !hlist_empty(&tb->owners)) {
185                 if (tb->fastreuse > 0 && sk->sk_reuse &&
186                     sk->sk_state != TCP_LISTEN) {
187                         goto success;
188                 } else {
189                         ret = 1;
190                         if (tcp_v6_bind_conflict(sk, tb))
191                                 goto fail_unlock;
192                 }
193         }
194 tb_not_found:
195         ret = 1;
196         if (tb == NULL) {
197                 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
198                 if (tb == NULL)
199                         goto fail_unlock;
200         }
201         if (hlist_empty(&tb->owners)) {
202                 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
203                         tb->fastreuse = 1;
204                 else
205                         tb->fastreuse = 0;
206         } else if (tb->fastreuse &&
207                    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
208                 tb->fastreuse = 0;
209
210 success:
211         if (!inet_csk(sk)->icsk_bind_hash)
212                 inet_bind_hash(sk, tb, snum);
213         BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
214         ret = 0;
215
216 fail_unlock:
217         spin_unlock(&head->lock);
218 fail:
219         local_bh_enable();
220         return ret;
221 }
222
223 static __inline__ void __tcp_v6_hash(struct sock *sk)
224 {
225         struct hlist_head *list;
226         rwlock_t *lock;
227
228         BUG_TRAP(sk_unhashed(sk));
229
230         if (sk->sk_state == TCP_LISTEN) {
231                 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
232                 lock = &tcp_hashinfo.lhash_lock;
233                 inet_listen_wlock(&tcp_hashinfo);
234         } else {
235                 sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size);
236                 list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
237                 lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
238                 write_lock(lock);
239         }
240
241         __sk_add_node(sk, list);
242         sock_prot_inc_use(sk->sk_prot);
243         write_unlock(lock);
244 }
245
246
247 static void tcp_v6_hash(struct sock *sk)
248 {
249         if (sk->sk_state != TCP_CLOSE) {
250                 struct tcp_sock *tp = tcp_sk(sk);
251
252                 if (tp->af_specific == &ipv6_mapped) {
253                         tcp_prot.hash(sk);
254                         return;
255                 }
256                 local_bh_disable();
257                 __tcp_v6_hash(sk);
258                 local_bh_enable();
259         }
260 }
261
262 static struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
263                                           const struct in6_addr *daddr,
264                                           const unsigned short hnum,
265                                           const int dif)
266 {
267         struct sock *sk;
268         struct hlist_node *node;
269         struct sock *result = NULL;
270         int score, hiscore;
271
272         hiscore=0;
273         read_lock(&hashinfo->lhash_lock);
274         sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
275                 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
276                         struct ipv6_pinfo *np = inet6_sk(sk);
277                         
278                         score = 1;
279                         if (!ipv6_addr_any(&np->rcv_saddr)) {
280                                 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
281                                         continue;
282                                 score++;
283                         }
284                         if (sk->sk_bound_dev_if) {
285                                 if (sk->sk_bound_dev_if != dif)
286                                         continue;
287                                 score++;
288                         }
289                         if (score == 3) {
290                                 result = sk;
291                                 break;
292                         }
293                         if (score > hiscore) {
294                                 hiscore = score;
295                                 result = sk;
296                         }
297                 }
298         }
299         if (result)
300                 sock_hold(result);
301         read_unlock(&hashinfo->lhash_lock);
302         return result;
303 }
304
305 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
306  * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
307  *
308  * The sockhash lock must be held as a reader here.
309  */
310
311 static inline struct sock *
312                 __inet6_lookup_established(struct inet_hashinfo *hashinfo,
313                                            const struct in6_addr *saddr,
314                                            const u16 sport,
315                                            const struct in6_addr *daddr,
316                                            const u16 hnum,
317                                            const int dif)
318 {
319         struct sock *sk;
320         const struct hlist_node *node;
321         const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
322         /* Optimize here for direct hit, only listening connections can
323          * have wildcards anyways.
324          */
325         const int hash = inet6_ehashfn(daddr, hnum, saddr, sport,
326                                        hashinfo->ehash_size);
327         struct inet_ehash_bucket *head = &hashinfo->ehash[hash];
328
329         read_lock(&head->lock);
330         sk_for_each(sk, node, &head->chain) {
331                 /* For IPV6 do the cheaper port and family tests first. */
332                 if (INET6_MATCH(sk, saddr, daddr, ports, dif))
333                         goto hit; /* You sunk my battleship! */
334         }
335         /* Must check for a TIME_WAIT'er before going to listener hash. */
336         sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
337                 const struct inet_timewait_sock *tw = inet_twsk(sk);
338
339                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
340                    sk->sk_family                == PF_INET6) {
341                         const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
342
343                         if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr)        &&
344                             ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr)    &&
345                             (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
346                                 goto hit;
347                 }
348         }
349         read_unlock(&head->lock);
350         return NULL;
351
352 hit:
353         sock_hold(sk);
354         read_unlock(&head->lock);
355         return sk;
356 }
357
358
359 static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo,
360                                           const struct in6_addr *saddr,
361                                           const u16 sport,
362                                           const struct in6_addr *daddr,
363                                           const u16 hnum,
364                                           const int dif)
365 {
366         struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport,
367                                                      daddr, hnum, dif);
368         if (sk)
369                 return sk;
370
371         return inet6_lookup_listener(hashinfo, daddr, hnum, dif);
372 }
373
374 inline struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
375                                  const struct in6_addr *saddr, const u16 sport,
376                                  const struct in6_addr *daddr, const u16 dport,
377                                  const int dif)
378 {
379         struct sock *sk;
380
381         local_bh_disable();
382         sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
383         local_bh_enable();
384
385         return sk;
386 }
387
388 EXPORT_SYMBOL_GPL(inet6_lookup);
389
390
391 /*
392  * Open request hash tables.
393  */
394
395 static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
396 {
397         u32 a, b, c;
398
399         a = raddr->s6_addr32[0];
400         b = raddr->s6_addr32[1];
401         c = raddr->s6_addr32[2];
402
403         a += JHASH_GOLDEN_RATIO;
404         b += JHASH_GOLDEN_RATIO;
405         c += rnd;
406         __jhash_mix(a, b, c);
407
408         a += raddr->s6_addr32[3];
409         b += (u32) rport;
410         __jhash_mix(a, b, c);
411
412         return c & (TCP_SYNQ_HSIZE - 1);
413 }
414
415 static struct request_sock *tcp_v6_search_req(const struct sock *sk,
416                                               struct request_sock ***prevp,
417                                               __u16 rport,
418                                               struct in6_addr *raddr,
419                                               struct in6_addr *laddr,
420                                               int iif)
421 {
422         const struct inet_connection_sock *icsk = inet_csk(sk);
423         struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
424         struct request_sock *req, **prev;  
425
426         for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
427              (req = *prev) != NULL;
428              prev = &req->dl_next) {
429                 const struct tcp6_request_sock *treq = tcp6_rsk(req);
430
431                 if (inet_rsk(req)->rmt_port == rport &&
432                     req->rsk_ops->family == AF_INET6 &&
433                     ipv6_addr_equal(&treq->rmt_addr, raddr) &&
434                     ipv6_addr_equal(&treq->loc_addr, laddr) &&
435                     (!treq->iif || treq->iif == iif)) {
436                         BUG_TRAP(req->sk == NULL);
437                         *prevp = prev;
438                         return req;
439                 }
440         }
441
442         return NULL;
443 }
444
445 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
446                                    struct in6_addr *saddr, 
447                                    struct in6_addr *daddr, 
448                                    unsigned long base)
449 {
450         return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
451 }
452
453 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
454 {
455         if (skb->protocol == htons(ETH_P_IPV6)) {
456                 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
457                                                     skb->nh.ipv6h->saddr.s6_addr32,
458                                                     skb->h.th->dest,
459                                                     skb->h.th->source);
460         } else {
461                 return secure_tcp_sequence_number(skb->nh.iph->daddr,
462                                                   skb->nh.iph->saddr,
463                                                   skb->h.th->dest,
464                                                   skb->h.th->source);
465         }
466 }
467
468 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
469                                       struct inet_timewait_sock **twp)
470 {
471         struct inet_sock *inet = inet_sk(sk);
472         const struct ipv6_pinfo *np = inet6_sk(sk);
473         const struct in6_addr *daddr = &np->rcv_saddr;
474         const struct in6_addr *saddr = &np->daddr;
475         const int dif = sk->sk_bound_dev_if;
476         const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
477         const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport,
478                                        tcp_hashinfo.ehash_size);
479         struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
480         struct sock *sk2;
481         const struct hlist_node *node;
482         struct inet_timewait_sock *tw;
483
484         write_lock(&head->lock);
485
486         /* Check TIME-WAIT sockets first. */
487         sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
488                 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
489
490                 tw = inet_twsk(sk2);
491
492                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
493                    sk2->sk_family               == PF_INET6     &&
494                    ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
495                    ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr)     &&
496                    sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
497                         const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
498                         struct tcp_sock *tp = tcp_sk(sk);
499
500                         if (tcptw->tw_ts_recent_stamp &&
501                             (!twp ||
502                              (sysctl_tcp_tw_reuse &&
503                               xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
504                                 /* See comment in tcp_ipv4.c */
505                                 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
506                                 if (!tp->write_seq)
507                                         tp->write_seq = 1;
508                                 tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
509                                 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
510                                 sock_hold(sk2);
511                                 goto unique;
512                         } else
513                                 goto not_unique;
514                 }
515         }
516         tw = NULL;
517
518         /* And established part... */
519         sk_for_each(sk2, node, &head->chain) {
520                 if (INET6_MATCH(sk2, saddr, daddr, ports, dif))
521                         goto not_unique;
522         }
523
524 unique:
525         BUG_TRAP(sk_unhashed(sk));
526         __sk_add_node(sk, &head->chain);
527         sk->sk_hashent = hash;
528         sock_prot_inc_use(sk->sk_prot);
529         write_unlock(&head->lock);
530
531         if (twp) {
532                 *twp = tw;
533                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
534         } else if (tw) {
535                 /* Silly. Should hash-dance instead... */
536                 inet_twsk_deschedule(tw, &tcp_death_row);
537                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
538
539                 inet_twsk_put(tw);
540         }
541         return 0;
542
543 not_unique:
544         write_unlock(&head->lock);
545         return -EADDRNOTAVAIL;
546 }
547
548 static inline u32 tcpv6_port_offset(const struct sock *sk)
549 {
550         const struct inet_sock *inet = inet_sk(sk);
551         const struct ipv6_pinfo *np = inet6_sk(sk);
552
553         return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
554                                            np->daddr.s6_addr32,
555                                            inet->dport);
556 }
557
558 static int tcp_v6_hash_connect(struct sock *sk)
559 {
560         unsigned short snum = inet_sk(sk)->num;
561         struct inet_bind_hashbucket *head;
562         struct inet_bind_bucket *tb;
563         int ret;
564
565         if (!snum) {
566                 int low = sysctl_local_port_range[0];
567                 int high = sysctl_local_port_range[1];
568                 int range = high - low;
569                 int i;
570                 int port;
571                 static u32 hint;
572                 u32 offset = hint + tcpv6_port_offset(sk);
573                 struct hlist_node *node;
574                 struct inet_timewait_sock *tw = NULL;
575
576                 local_bh_disable();
577                 for (i = 1; i <= range; i++) {
578                         port = low + (i + offset) % range;
579                         head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
580                         spin_lock(&head->lock);
581
582                         /* Does not bother with rcv_saddr checks,
583                          * because the established check is already
584                          * unique enough.
585                          */
586                         inet_bind_bucket_for_each(tb, node, &head->chain) {
587                                 if (tb->port == port) {
588                                         BUG_TRAP(!hlist_empty(&tb->owners));
589                                         if (tb->fastreuse >= 0)
590                                                 goto next_port;
591                                         if (!__tcp_v6_check_established(sk,
592                                                                         port,
593                                                                         &tw))
594                                                 goto ok;
595                                         goto next_port;
596                                 }
597                         }
598
599                         tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
600                         if (!tb) {
601                                 spin_unlock(&head->lock);
602                                 break;
603                         }
604                         tb->fastreuse = -1;
605                         goto ok;
606
607                 next_port:
608                         spin_unlock(&head->lock);
609                 }
610                 local_bh_enable();
611
612                 return -EADDRNOTAVAIL;
613
614 ok:
615                 hint += i;
616
617                 /* Head lock still held and bh's disabled */
618                 inet_bind_hash(sk, tb, port);
619                 if (sk_unhashed(sk)) {
620                         inet_sk(sk)->sport = htons(port);
621                         __tcp_v6_hash(sk);
622                 }
623                 spin_unlock(&head->lock);
624
625                 if (tw) {
626                         inet_twsk_deschedule(tw, &tcp_death_row);
627                         inet_twsk_put(tw);
628                 }
629
630                 ret = 0;
631                 goto out;
632         }
633
634         head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
635         tb   = inet_csk(sk)->icsk_bind_hash;
636         spin_lock_bh(&head->lock);
637
638         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
639                 __tcp_v6_hash(sk);
640                 spin_unlock_bh(&head->lock);
641                 return 0;
642         } else {
643                 spin_unlock(&head->lock);
644                 /* No definite answer... Walk to established hash table */
645                 ret = __tcp_v6_check_established(sk, snum, NULL);
646 out:
647                 local_bh_enable();
648                 return ret;
649         }
650 }
651
652 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
653                           int addr_len)
654 {
655         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
656         struct inet_sock *inet = inet_sk(sk);
657         struct ipv6_pinfo *np = inet6_sk(sk);
658         struct tcp_sock *tp = tcp_sk(sk);
659         struct in6_addr *saddr = NULL, *final_p = NULL, final;
660         struct flowi fl;
661         struct dst_entry *dst;
662         int addr_type;
663         int err;
664
665         if (addr_len < SIN6_LEN_RFC2133) 
666                 return -EINVAL;
667
668         if (usin->sin6_family != AF_INET6) 
669                 return(-EAFNOSUPPORT);
670
671         memset(&fl, 0, sizeof(fl));
672
673         if (np->sndflow) {
674                 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
675                 IP6_ECN_flow_init(fl.fl6_flowlabel);
676                 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
677                         struct ip6_flowlabel *flowlabel;
678                         flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
679                         if (flowlabel == NULL)
680                                 return -EINVAL;
681                         ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
682                         fl6_sock_release(flowlabel);
683                 }
684         }
685
686         /*
687          *      connect() to INADDR_ANY means loopback (BSD'ism).
688          */
689         
690         if(ipv6_addr_any(&usin->sin6_addr))
691                 usin->sin6_addr.s6_addr[15] = 0x1; 
692
693         addr_type = ipv6_addr_type(&usin->sin6_addr);
694
695         if(addr_type & IPV6_ADDR_MULTICAST)
696                 return -ENETUNREACH;
697
698         if (addr_type&IPV6_ADDR_LINKLOCAL) {
699                 if (addr_len >= sizeof(struct sockaddr_in6) &&
700                     usin->sin6_scope_id) {
701                         /* If interface is set while binding, indices
702                          * must coincide.
703                          */
704                         if (sk->sk_bound_dev_if &&
705                             sk->sk_bound_dev_if != usin->sin6_scope_id)
706                                 return -EINVAL;
707
708                         sk->sk_bound_dev_if = usin->sin6_scope_id;
709                 }
710
711                 /* Connect to link-local address requires an interface */
712                 if (!sk->sk_bound_dev_if)
713                         return -EINVAL;
714         }
715
716         if (tp->rx_opt.ts_recent_stamp &&
717             !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
718                 tp->rx_opt.ts_recent = 0;
719                 tp->rx_opt.ts_recent_stamp = 0;
720                 tp->write_seq = 0;
721         }
722
723         ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
724         np->flow_label = fl.fl6_flowlabel;
725
726         /*
727          *      TCP over IPv4
728          */
729
730         if (addr_type == IPV6_ADDR_MAPPED) {
731                 u32 exthdrlen = tp->ext_header_len;
732                 struct sockaddr_in sin;
733
734                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
735
736                 if (__ipv6_only_sock(sk))
737                         return -ENETUNREACH;
738
739                 sin.sin_family = AF_INET;
740                 sin.sin_port = usin->sin6_port;
741                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
742
743                 tp->af_specific = &ipv6_mapped;
744                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
745
746                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
747
748                 if (err) {
749                         tp->ext_header_len = exthdrlen;
750                         tp->af_specific = &ipv6_specific;
751                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
752                         goto failure;
753                 } else {
754                         ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
755                                       inet->saddr);
756                         ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
757                                       inet->rcv_saddr);
758                 }
759
760                 return err;
761         }
762
763         if (!ipv6_addr_any(&np->rcv_saddr))
764                 saddr = &np->rcv_saddr;
765
766         fl.proto = IPPROTO_TCP;
767         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
768         ipv6_addr_copy(&fl.fl6_src,
769                        (saddr ? saddr : &np->saddr));
770         fl.oif = sk->sk_bound_dev_if;
771         fl.fl_ip_dport = usin->sin6_port;
772         fl.fl_ip_sport = inet->sport;
773
774         if (np->opt && np->opt->srcrt) {
775                 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
776                 ipv6_addr_copy(&final, &fl.fl6_dst);
777                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
778                 final_p = &final;
779         }
780
781         err = ip6_dst_lookup(sk, &dst, &fl);
782         if (err)
783                 goto failure;
784         if (final_p)
785                 ipv6_addr_copy(&fl.fl6_dst, final_p);
786
787         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
788                 dst_release(dst);
789                 goto failure;
790         }
791
792         if (saddr == NULL) {
793                 saddr = &fl.fl6_src;
794                 ipv6_addr_copy(&np->rcv_saddr, saddr);
795         }
796
797         /* set the source address */
798         ipv6_addr_copy(&np->saddr, saddr);
799         inet->rcv_saddr = LOOPBACK4_IPV6;
800
801         ip6_dst_store(sk, dst, NULL);
802         sk->sk_route_caps = dst->dev->features &
803                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
804
805         tp->ext_header_len = 0;
806         if (np->opt)
807                 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
808
809         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
810
811         inet->dport = usin->sin6_port;
812
813         tcp_set_state(sk, TCP_SYN_SENT);
814         err = tcp_v6_hash_connect(sk);
815         if (err)
816                 goto late_failure;
817
818         if (!tp->write_seq)
819                 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
820                                                              np->daddr.s6_addr32,
821                                                              inet->sport,
822                                                              inet->dport);
823
824         err = tcp_connect(sk);
825         if (err)
826                 goto late_failure;
827
828         return 0;
829
830 late_failure:
831         tcp_set_state(sk, TCP_CLOSE);
832         __sk_dst_reset(sk);
833 failure:
834         inet->dport = 0;
835         sk->sk_route_caps = 0;
836         return err;
837 }
838
839 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
840                 int type, int code, int offset, __u32 info)
841 {
842         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
843         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
844         struct ipv6_pinfo *np;
845         struct sock *sk;
846         int err;
847         struct tcp_sock *tp; 
848         __u32 seq;
849
850         sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
851                           th->source, skb->dev->ifindex);
852
853         if (sk == NULL) {
854                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
855                 return;
856         }
857
858         if (sk->sk_state == TCP_TIME_WAIT) {
859                 inet_twsk_put((struct inet_timewait_sock *)sk);
860                 return;
861         }
862
863         bh_lock_sock(sk);
864         if (sock_owned_by_user(sk))
865                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
866
867         if (sk->sk_state == TCP_CLOSE)
868                 goto out;
869
870         tp = tcp_sk(sk);
871         seq = ntohl(th->seq); 
872         if (sk->sk_state != TCP_LISTEN &&
873             !between(seq, tp->snd_una, tp->snd_nxt)) {
874                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
875                 goto out;
876         }
877
878         np = inet6_sk(sk);
879
880         if (type == ICMPV6_PKT_TOOBIG) {
881                 struct dst_entry *dst = NULL;
882
883                 if (sock_owned_by_user(sk))
884                         goto out;
885                 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
886                         goto out;
887
888                 /* icmp should have updated the destination cache entry */
889                 dst = __sk_dst_check(sk, np->dst_cookie);
890
891                 if (dst == NULL) {
892                         struct inet_sock *inet = inet_sk(sk);
893                         struct flowi fl;
894
895                         /* BUGGG_FUTURE: Again, it is not clear how
896                            to handle rthdr case. Ignore this complexity
897                            for now.
898                          */
899                         memset(&fl, 0, sizeof(fl));
900                         fl.proto = IPPROTO_TCP;
901                         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
902                         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
903                         fl.oif = sk->sk_bound_dev_if;
904                         fl.fl_ip_dport = inet->dport;
905                         fl.fl_ip_sport = inet->sport;
906
907                         if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
908                                 sk->sk_err_soft = -err;
909                                 goto out;
910                         }
911
912                         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
913                                 sk->sk_err_soft = -err;
914                                 goto out;
915                         }
916
917                 } else
918                         dst_hold(dst);
919
920                 if (tp->pmtu_cookie > dst_mtu(dst)) {
921                         tcp_sync_mss(sk, dst_mtu(dst));
922                         tcp_simple_retransmit(sk);
923                 } /* else let the usual retransmit timer handle it */
924                 dst_release(dst);
925                 goto out;
926         }
927
928         icmpv6_err_convert(type, code, &err);
929
930         /* Might be for an request_sock */
931         switch (sk->sk_state) {
932                 struct request_sock *req, **prev;
933         case TCP_LISTEN:
934                 if (sock_owned_by_user(sk))
935                         goto out;
936
937                 req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
938                                         &hdr->saddr, inet6_iif(skb));
939                 if (!req)
940                         goto out;
941
942                 /* ICMPs are not backlogged, hence we cannot get
943                  * an established socket here.
944                  */
945                 BUG_TRAP(req->sk == NULL);
946
947                 if (seq != tcp_rsk(req)->snt_isn) {
948                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
949                         goto out;
950                 }
951
952                 inet_csk_reqsk_queue_drop(sk, req, prev);
953                 goto out;
954
955         case TCP_SYN_SENT:
956         case TCP_SYN_RECV:  /* Cannot happen.
957                                It can, it SYNs are crossed. --ANK */ 
958                 if (!sock_owned_by_user(sk)) {
959                         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
960                         sk->sk_err = err;
961                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
962
963                         tcp_done(sk);
964                 } else
965                         sk->sk_err_soft = err;
966                 goto out;
967         }
968
969         if (!sock_owned_by_user(sk) && np->recverr) {
970                 sk->sk_err = err;
971                 sk->sk_error_report(sk);
972         } else
973                 sk->sk_err_soft = err;
974
975 out:
976         bh_unlock_sock(sk);
977         sock_put(sk);
978 }
979
980
981 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
982                               struct dst_entry *dst)
983 {
984         struct tcp6_request_sock *treq = tcp6_rsk(req);
985         struct ipv6_pinfo *np = inet6_sk(sk);
986         struct sk_buff * skb;
987         struct ipv6_txoptions *opt = NULL;
988         struct in6_addr * final_p = NULL, final;
989         struct flowi fl;
990         int err = -1;
991
992         memset(&fl, 0, sizeof(fl));
993         fl.proto = IPPROTO_TCP;
994         ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
995         ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
996         fl.fl6_flowlabel = 0;
997         fl.oif = treq->iif;
998         fl.fl_ip_dport = inet_rsk(req)->rmt_port;
999         fl.fl_ip_sport = inet_sk(sk)->sport;
1000
1001         if (dst == NULL) {
1002                 opt = np->opt;
1003                 if (opt == NULL &&
1004                     np->rxopt.bits.srcrt == 2 &&
1005                     treq->pktopts) {
1006                         struct sk_buff *pktopts = treq->pktopts;
1007                         struct inet6_skb_parm *rxopt = IP6CB(pktopts);
1008                         if (rxopt->srcrt)
1009                                 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
1010                 }
1011
1012                 if (opt && opt->srcrt) {
1013                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1014                         ipv6_addr_copy(&final, &fl.fl6_dst);
1015                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1016                         final_p = &final;
1017                 }
1018
1019                 err = ip6_dst_lookup(sk, &dst, &fl);
1020                 if (err)
1021                         goto done;
1022                 if (final_p)
1023                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1024                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1025                         goto done;
1026         }
1027
1028         skb = tcp_make_synack(sk, dst, req);
1029         if (skb) {
1030                 struct tcphdr *th = skb->h.th;
1031
1032                 th->check = tcp_v6_check(th, skb->len,
1033                                          &treq->loc_addr, &treq->rmt_addr,
1034                                          csum_partial((char *)th, skb->len, skb->csum));
1035
1036                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1037                 err = ip6_xmit(sk, skb, &fl, opt, 0);
1038                 if (err == NET_XMIT_CN)
1039                         err = 0;
1040         }
1041
1042 done:
1043         dst_release(dst);
1044         if (opt && opt != np->opt)
1045                 sock_kfree_s(sk, opt, opt->tot_len);
1046         return err;
1047 }
1048
1049 static void tcp_v6_reqsk_destructor(struct request_sock *req)
1050 {
1051         if (tcp6_rsk(req)->pktopts)
1052                 kfree_skb(tcp6_rsk(req)->pktopts);
1053 }
1054
1055 static struct request_sock_ops tcp6_request_sock_ops = {
1056         .family         =       AF_INET6,
1057         .obj_size       =       sizeof(struct tcp6_request_sock),
1058         .rtx_syn_ack    =       tcp_v6_send_synack,
1059         .send_ack       =       tcp_v6_reqsk_send_ack,
1060         .destructor     =       tcp_v6_reqsk_destructor,
1061         .send_reset     =       tcp_v6_send_reset
1062 };
1063
1064 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1065 {
1066         struct ipv6_pinfo *np = inet6_sk(sk);
1067         struct inet6_skb_parm *opt = IP6CB(skb);
1068
1069         if (np->rxopt.all) {
1070                 if ((opt->hop && np->rxopt.bits.hopopts) ||
1071                     ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1072                      np->rxopt.bits.rxflow) ||
1073                     (opt->srcrt && np->rxopt.bits.srcrt) ||
1074                     ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1075                         return 1;
1076         }
1077         return 0;
1078 }
1079
1080
1081 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
1082                               struct sk_buff *skb)
1083 {
1084         struct ipv6_pinfo *np = inet6_sk(sk);
1085
1086         if (skb->ip_summed == CHECKSUM_HW) {
1087                 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
1088                 skb->csum = offsetof(struct tcphdr, check);
1089         } else {
1090                 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 
1091                                             csum_partial((char *)th, th->doff<<2, 
1092                                                          skb->csum));
1093         }
1094 }
1095
1096
1097 static void tcp_v6_send_reset(struct sk_buff *skb)
1098 {
1099         struct tcphdr *th = skb->h.th, *t1; 
1100         struct sk_buff *buff;
1101         struct flowi fl;
1102
1103         if (th->rst)
1104                 return;
1105
1106         if (!ipv6_unicast_destination(skb))
1107                 return; 
1108
1109         /*
1110          * We need to grab some memory, and put together an RST,
1111          * and then put it into the queue to be sent.
1112          */
1113
1114         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1115                          GFP_ATOMIC);
1116         if (buff == NULL) 
1117                 return;
1118
1119         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1120
1121         t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1122
1123         /* Swap the send and the receive. */
1124         memset(t1, 0, sizeof(*t1));
1125         t1->dest = th->source;
1126         t1->source = th->dest;
1127         t1->doff = sizeof(*t1)/4;
1128         t1->rst = 1;
1129   
1130         if(th->ack) {
1131                 t1->seq = th->ack_seq;
1132         } else {
1133                 t1->ack = 1;
1134                 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1135                                     + skb->len - (th->doff<<2));
1136         }
1137
1138         buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1139
1140         memset(&fl, 0, sizeof(fl));
1141         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1142         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1143
1144         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1145                                     sizeof(*t1), IPPROTO_TCP,
1146                                     buff->csum);
1147
1148         fl.proto = IPPROTO_TCP;
1149         fl.oif = inet6_iif(skb);
1150         fl.fl_ip_dport = t1->dest;
1151         fl.fl_ip_sport = t1->source;
1152
1153         /* sk = NULL, but it is safe for now. RST socket required. */
1154         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1155
1156                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1157                         dst_release(buff->dst);
1158                         return;
1159                 }
1160
1161                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1162                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1163                 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1164                 return;
1165         }
1166
1167         kfree_skb(buff);
1168 }
1169
1170 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1171 {
1172         struct tcphdr *th = skb->h.th, *t1;
1173         struct sk_buff *buff;
1174         struct flowi fl;
1175         int tot_len = sizeof(struct tcphdr);
1176
1177         if (ts)
1178                 tot_len += 3*4;
1179
1180         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1181                          GFP_ATOMIC);
1182         if (buff == NULL)
1183                 return;
1184
1185         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1186
1187         t1 = (struct tcphdr *) skb_push(buff,tot_len);
1188
1189         /* Swap the send and the receive. */
1190         memset(t1, 0, sizeof(*t1));
1191         t1->dest = th->source;
1192         t1->source = th->dest;
1193         t1->doff = tot_len/4;
1194         t1->seq = htonl(seq);
1195         t1->ack_seq = htonl(ack);
1196         t1->ack = 1;
1197         t1->window = htons(win);
1198         
1199         if (ts) {
1200                 u32 *ptr = (u32*)(t1 + 1);
1201                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1202                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1203                 *ptr++ = htonl(tcp_time_stamp);
1204                 *ptr = htonl(ts);
1205         }
1206
1207         buff->csum = csum_partial((char *)t1, tot_len, 0);
1208
1209         memset(&fl, 0, sizeof(fl));
1210         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1211         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1212
1213         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1214                                     tot_len, IPPROTO_TCP,
1215                                     buff->csum);
1216
1217         fl.proto = IPPROTO_TCP;
1218         fl.oif = inet6_iif(skb);
1219         fl.fl_ip_dport = t1->dest;
1220         fl.fl_ip_sport = t1->source;
1221
1222         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1223                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1224                         dst_release(buff->dst);
1225                         return;
1226                 }
1227                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1228                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1229                 return;
1230         }
1231
1232         kfree_skb(buff);
1233 }
1234
1235 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1236 {
1237         struct inet_timewait_sock *tw = inet_twsk(sk);
1238         const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1239
1240         tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1241                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1242                         tcptw->tw_ts_recent);
1243
1244         inet_twsk_put(tw);
1245 }
1246
1247 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1248 {
1249         tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1250 }
1251
1252
1253 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1254 {
1255         struct request_sock *req, **prev;
1256         const struct tcphdr *th = skb->h.th;
1257         struct sock *nsk;
1258
1259         /* Find possible connection requests. */
1260         req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
1261                                 &skb->nh.ipv6h->daddr, inet6_iif(skb));
1262         if (req)
1263                 return tcp_check_req(sk, skb, req, prev);
1264
1265         nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
1266                                          th->source, &skb->nh.ipv6h->daddr,
1267                                          ntohs(th->dest), inet6_iif(skb));
1268
1269         if (nsk) {
1270                 if (nsk->sk_state != TCP_TIME_WAIT) {
1271                         bh_lock_sock(nsk);
1272                         return nsk;
1273                 }
1274                 inet_twsk_put((struct inet_timewait_sock *)nsk);
1275                 return NULL;
1276         }
1277
1278 #if 0 /*def CONFIG_SYN_COOKIES*/
1279         if (!th->rst && !th->syn && th->ack)
1280                 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1281 #endif
1282         return sk;
1283 }
1284
1285 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1286 {
1287         struct inet_connection_sock *icsk = inet_csk(sk);
1288         struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1289         const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1290
1291         reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1292         inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1293 }
1294
1295
1296 /* FIXME: this is substantially similar to the ipv4 code.
1297  * Can some kind of merge be done? -- erics
1298  */
1299 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1300 {
1301         struct tcp6_request_sock *treq;
1302         struct ipv6_pinfo *np = inet6_sk(sk);
1303         struct tcp_options_received tmp_opt;
1304         struct tcp_sock *tp = tcp_sk(sk);
1305         struct request_sock *req = NULL;
1306         __u32 isn = TCP_SKB_CB(skb)->when;
1307
1308         if (skb->protocol == htons(ETH_P_IP))
1309                 return tcp_v4_conn_request(sk, skb);
1310
1311         if (!ipv6_unicast_destination(skb))
1312                 goto drop; 
1313
1314         /*
1315          *      There are no SYN attacks on IPv6, yet...        
1316          */
1317         if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1318                 if (net_ratelimit())
1319                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1320                 goto drop;              
1321         }
1322
1323         if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1324                 goto drop;
1325
1326         req = reqsk_alloc(&tcp6_request_sock_ops);
1327         if (req == NULL)
1328                 goto drop;
1329
1330         tcp_clear_options(&tmp_opt);
1331         tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1332         tmp_opt.user_mss = tp->rx_opt.user_mss;
1333
1334         tcp_parse_options(skb, &tmp_opt, 0);
1335
1336         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1337         tcp_openreq_init(req, &tmp_opt, skb);
1338
1339         treq = tcp6_rsk(req);
1340         ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1341         ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1342         TCP_ECN_create_request(req, skb->h.th);
1343         treq->pktopts = NULL;
1344         if (ipv6_opt_accepted(sk, skb) ||
1345             np->rxopt.bits.rxinfo ||
1346             np->rxopt.bits.rxhlim) {
1347                 atomic_inc(&skb->users);
1348                 treq->pktopts = skb;
1349         }
1350         treq->iif = sk->sk_bound_dev_if;
1351
1352         /* So that link locals have meaning */
1353         if (!sk->sk_bound_dev_if &&
1354             ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1355                 treq->iif = inet6_iif(skb);
1356
1357         if (isn == 0) 
1358                 isn = tcp_v6_init_sequence(sk,skb);
1359
1360         tcp_rsk(req)->snt_isn = isn;
1361
1362         if (tcp_v6_send_synack(sk, req, NULL))
1363                 goto drop;
1364
1365         tcp_v6_synq_add(sk, req);
1366
1367         return 0;
1368
1369 drop:
1370         if (req)
1371                 reqsk_free(req);
1372
1373         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1374         return 0; /* don't send reset */
1375 }
1376
1377 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1378                                           struct request_sock *req,
1379                                           struct dst_entry *dst)
1380 {
1381         struct tcp6_request_sock *treq = tcp6_rsk(req);
1382         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1383         struct tcp6_sock *newtcp6sk;
1384         struct inet_sock *newinet;
1385         struct tcp_sock *newtp;
1386         struct sock *newsk;
1387         struct ipv6_txoptions *opt;
1388
1389         if (skb->protocol == htons(ETH_P_IP)) {
1390                 /*
1391                  *      v6 mapped
1392                  */
1393
1394                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1395
1396                 if (newsk == NULL) 
1397                         return NULL;
1398
1399                 newtcp6sk = (struct tcp6_sock *)newsk;
1400                 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1401
1402                 newinet = inet_sk(newsk);
1403                 newnp = inet6_sk(newsk);
1404                 newtp = tcp_sk(newsk);
1405
1406                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1407
1408                 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1409                               newinet->daddr);
1410
1411                 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1412                               newinet->saddr);
1413
1414                 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1415
1416                 newtp->af_specific = &ipv6_mapped;
1417                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1418                 newnp->pktoptions  = NULL;
1419                 newnp->opt         = NULL;
1420                 newnp->mcast_oif   = inet6_iif(skb);
1421                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
1422
1423                 /*
1424                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1425                  * here, tcp_create_openreq_child now does this for us, see the comment in
1426                  * that function for the gory details. -acme
1427                  */
1428
1429                 /* It is tricky place. Until this moment IPv4 tcp
1430                    worked with IPv6 af_tcp.af_specific.
1431                    Sync it now.
1432                  */
1433                 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1434
1435                 return newsk;
1436         }
1437
1438         opt = np->opt;
1439
1440         if (sk_acceptq_is_full(sk))
1441                 goto out_overflow;
1442
1443         if (np->rxopt.bits.srcrt == 2 &&
1444             opt == NULL && treq->pktopts) {
1445                 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1446                 if (rxopt->srcrt)
1447                         opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1448         }
1449
1450         if (dst == NULL) {
1451                 struct in6_addr *final_p = NULL, final;
1452                 struct flowi fl;
1453
1454                 memset(&fl, 0, sizeof(fl));
1455                 fl.proto = IPPROTO_TCP;
1456                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1457                 if (opt && opt->srcrt) {
1458                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1459                         ipv6_addr_copy(&final, &fl.fl6_dst);
1460                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1461                         final_p = &final;
1462                 }
1463                 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1464                 fl.oif = sk->sk_bound_dev_if;
1465                 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1466                 fl.fl_ip_sport = inet_sk(sk)->sport;
1467
1468                 if (ip6_dst_lookup(sk, &dst, &fl))
1469                         goto out;
1470
1471                 if (final_p)
1472                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1473
1474                 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1475                         goto out;
1476         } 
1477
1478         newsk = tcp_create_openreq_child(sk, req, skb);
1479         if (newsk == NULL)
1480                 goto out;
1481
1482         /*
1483          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1484          * count here, tcp_create_openreq_child now does this for us, see the
1485          * comment in that function for the gory details. -acme
1486          */
1487
1488         ip6_dst_store(newsk, dst, NULL);
1489         newsk->sk_route_caps = dst->dev->features &
1490                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1491
1492         newtcp6sk = (struct tcp6_sock *)newsk;
1493         inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1494
1495         newtp = tcp_sk(newsk);
1496         newinet = inet_sk(newsk);
1497         newnp = inet6_sk(newsk);
1498
1499         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1500
1501         ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1502         ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1503         ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1504         newsk->sk_bound_dev_if = treq->iif;
1505
1506         /* Now IPv6 options... 
1507
1508            First: no IPv4 options.
1509          */
1510         newinet->opt = NULL;
1511
1512         /* Clone RX bits */
1513         newnp->rxopt.all = np->rxopt.all;
1514
1515         /* Clone pktoptions received with SYN */
1516         newnp->pktoptions = NULL;
1517         if (treq->pktopts != NULL) {
1518                 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1519                 kfree_skb(treq->pktopts);
1520                 treq->pktopts = NULL;
1521                 if (newnp->pktoptions)
1522                         skb_set_owner_r(newnp->pktoptions, newsk);
1523         }
1524         newnp->opt        = NULL;
1525         newnp->mcast_oif  = inet6_iif(skb);
1526         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1527
1528         /* Clone native IPv6 options from listening socket (if any)
1529
1530            Yes, keeping reference count would be much more clever,
1531            but we make one more one thing there: reattach optmem
1532            to newsk.
1533          */
1534         if (opt) {
1535                 newnp->opt = ipv6_dup_options(newsk, opt);
1536                 if (opt != np->opt)
1537                         sock_kfree_s(sk, opt, opt->tot_len);
1538         }
1539
1540         newtp->ext_header_len = 0;
1541         if (newnp->opt)
1542                 newtp->ext_header_len = newnp->opt->opt_nflen +
1543                                         newnp->opt->opt_flen;
1544
1545         tcp_sync_mss(newsk, dst_mtu(dst));
1546         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1547         tcp_initialize_rcv_mss(newsk);
1548
1549         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1550
1551         __tcp_v6_hash(newsk);
1552         inet_inherit_port(&tcp_hashinfo, sk, newsk);
1553
1554         return newsk;
1555
1556 out_overflow:
1557         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1558 out:
1559         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1560         if (opt && opt != np->opt)
1561                 sock_kfree_s(sk, opt, opt->tot_len);
1562         dst_release(dst);
1563         return NULL;
1564 }
1565
1566 static int tcp_v6_checksum_init(struct sk_buff *skb)
1567 {
1568         if (skb->ip_summed == CHECKSUM_HW) {
1569                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1570                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1571                                   &skb->nh.ipv6h->daddr,skb->csum))
1572                         return 0;
1573                 LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
1574         }
1575         if (skb->len <= 76) {
1576                 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1577                                  &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1578                         return -1;
1579                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1580         } else {
1581                 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1582                                           &skb->nh.ipv6h->daddr,0);
1583         }
1584         return 0;
1585 }
1586
1587 /* The socket must have it's spinlock held when we get
1588  * here.
1589  *
1590  * We have a potential double-lock case here, so even when
1591  * doing backlog processing we use the BH locking scheme.
1592  * This is because we cannot sleep with the original spinlock
1593  * held.
1594  */
1595 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1596 {
1597         struct ipv6_pinfo *np = inet6_sk(sk);
1598         struct tcp_sock *tp;
1599         struct sk_buff *opt_skb = NULL;
1600
1601         /* Imagine: socket is IPv6. IPv4 packet arrives,
1602            goes to IPv4 receive handler and backlogged.
1603            From backlog it always goes here. Kerboom...
1604            Fortunately, tcp_rcv_established and rcv_established
1605            handle them correctly, but it is not case with
1606            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1607          */
1608
1609         if (skb->protocol == htons(ETH_P_IP))
1610                 return tcp_v4_do_rcv(sk, skb);
1611
1612         if (sk_filter(sk, skb, 0))
1613                 goto discard;
1614
1615         /*
1616          *      socket locking is here for SMP purposes as backlog rcv
1617          *      is currently called with bh processing disabled.
1618          */
1619
1620         /* Do Stevens' IPV6_PKTOPTIONS.
1621
1622            Yes, guys, it is the only place in our code, where we
1623            may make it not affecting IPv4.
1624            The rest of code is protocol independent,
1625            and I do not like idea to uglify IPv4.
1626
1627            Actually, all the idea behind IPV6_PKTOPTIONS
1628            looks not very well thought. For now we latch
1629            options, received in the last packet, enqueued
1630            by tcp. Feel free to propose better solution.
1631                                                --ANK (980728)
1632          */
1633         if (np->rxopt.all)
1634                 opt_skb = skb_clone(skb, GFP_ATOMIC);
1635
1636         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1637                 TCP_CHECK_TIMER(sk);
1638                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1639                         goto reset;
1640                 TCP_CHECK_TIMER(sk);
1641                 if (opt_skb)
1642                         goto ipv6_pktoptions;
1643                 return 0;
1644         }
1645
1646         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1647                 goto csum_err;
1648
1649         if (sk->sk_state == TCP_LISTEN) { 
1650                 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1651                 if (!nsk)
1652                         goto discard;
1653
1654                 /*
1655                  * Queue it on the new socket if the new socket is active,
1656                  * otherwise we just shortcircuit this and continue with
1657                  * the new socket..
1658                  */
1659                 if(nsk != sk) {
1660                         if (tcp_child_process(sk, nsk, skb))
1661                                 goto reset;
1662                         if (opt_skb)
1663                                 __kfree_skb(opt_skb);
1664                         return 0;
1665                 }
1666         }
1667
1668         TCP_CHECK_TIMER(sk);
1669         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1670                 goto reset;
1671         TCP_CHECK_TIMER(sk);
1672         if (opt_skb)
1673                 goto ipv6_pktoptions;
1674         return 0;
1675
1676 reset:
1677         tcp_v6_send_reset(skb);
1678 discard:
1679         if (opt_skb)
1680                 __kfree_skb(opt_skb);
1681         kfree_skb(skb);
1682         return 0;
1683 csum_err:
1684         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1685         goto discard;
1686
1687
1688 ipv6_pktoptions:
1689         /* Do you ask, what is it?
1690
1691            1. skb was enqueued by tcp.
1692            2. skb is added to tail of read queue, rather than out of order.
1693            3. socket is not in passive state.
1694            4. Finally, it really contains options, which user wants to receive.
1695          */
1696         tp = tcp_sk(sk);
1697         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1698             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1699                 if (np->rxopt.bits.rxinfo)
1700                         np->mcast_oif = inet6_iif(opt_skb);
1701                 if (np->rxopt.bits.rxhlim)
1702                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1703                 if (ipv6_opt_accepted(sk, opt_skb)) {
1704                         skb_set_owner_r(opt_skb, sk);
1705                         opt_skb = xchg(&np->pktoptions, opt_skb);
1706                 } else {
1707                         __kfree_skb(opt_skb);
1708                         opt_skb = xchg(&np->pktoptions, NULL);
1709                 }
1710         }
1711
1712         if (opt_skb)
1713                 kfree_skb(opt_skb);
1714         return 0;
1715 }
1716
1717 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1718 {
1719         struct sk_buff *skb = *pskb;
1720         struct tcphdr *th;      
1721         struct sock *sk;
1722         int ret;
1723
1724         if (skb->pkt_type != PACKET_HOST)
1725                 goto discard_it;
1726
1727         /*
1728          *      Count it even if it's bad.
1729          */
1730         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1731
1732         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1733                 goto discard_it;
1734
1735         th = skb->h.th;
1736
1737         if (th->doff < sizeof(struct tcphdr)/4)
1738                 goto bad_packet;
1739         if (!pskb_may_pull(skb, th->doff*4))
1740                 goto discard_it;
1741
1742         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1743              tcp_v6_checksum_init(skb) < 0))
1744                 goto bad_packet;
1745
1746         th = skb->h.th;
1747         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1748         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1749                                     skb->len - th->doff*4);
1750         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1751         TCP_SKB_CB(skb)->when = 0;
1752         TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1753         TCP_SKB_CB(skb)->sacked = 0;
1754
1755         sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1756                             &skb->nh.ipv6h->daddr, ntohs(th->dest),
1757                             inet6_iif(skb));
1758
1759         if (!sk)
1760                 goto no_tcp_socket;
1761
1762 process:
1763         if (sk->sk_state == TCP_TIME_WAIT)
1764                 goto do_time_wait;
1765
1766         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1767                 goto discard_and_relse;
1768
1769         if (sk_filter(sk, skb, 0))
1770                 goto discard_and_relse;
1771
1772         skb->dev = NULL;
1773
1774         bh_lock_sock(sk);
1775         ret = 0;
1776         if (!sock_owned_by_user(sk)) {
1777                 if (!tcp_prequeue(sk, skb))
1778                         ret = tcp_v6_do_rcv(sk, skb);
1779         } else
1780                 sk_add_backlog(sk, skb);
1781         bh_unlock_sock(sk);
1782
1783         sock_put(sk);
1784         return ret ? -1 : 0;
1785
1786 no_tcp_socket:
1787         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1788                 goto discard_it;
1789
1790         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1791 bad_packet:
1792                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1793         } else {
1794                 tcp_v6_send_reset(skb);
1795         }
1796
1797 discard_it:
1798
1799         /*
1800          *      Discard frame
1801          */
1802
1803         kfree_skb(skb);
1804         return 0;
1805
1806 discard_and_relse:
1807         sock_put(sk);
1808         goto discard_it;
1809
1810 do_time_wait:
1811         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1812                 inet_twsk_put((struct inet_timewait_sock *)sk);
1813                 goto discard_it;
1814         }
1815
1816         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1817                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1818                 inet_twsk_put((struct inet_timewait_sock *)sk);
1819                 goto discard_it;
1820         }
1821
1822         switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1823                                            skb, th)) {
1824         case TCP_TW_SYN:
1825         {
1826                 struct sock *sk2;
1827
1828                 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1829                                             &skb->nh.ipv6h->daddr,
1830                                             ntohs(th->dest), inet6_iif(skb));
1831                 if (sk2 != NULL) {
1832                         struct inet_timewait_sock *tw = inet_twsk(sk);
1833                         inet_twsk_deschedule(tw, &tcp_death_row);
1834                         inet_twsk_put(tw);
1835                         sk = sk2;
1836                         goto process;
1837                 }
1838                 /* Fall through to ACK */
1839         }
1840         case TCP_TW_ACK:
1841                 tcp_v6_timewait_ack(sk, skb);
1842                 break;
1843         case TCP_TW_RST:
1844                 goto no_tcp_socket;
1845         case TCP_TW_SUCCESS:;
1846         }
1847         goto discard_it;
1848 }
1849
1850 static int tcp_v6_rebuild_header(struct sock *sk)
1851 {
1852         int err;
1853         struct dst_entry *dst;
1854         struct ipv6_pinfo *np = inet6_sk(sk);
1855
1856         dst = __sk_dst_check(sk, np->dst_cookie);
1857
1858         if (dst == NULL) {
1859                 struct inet_sock *inet = inet_sk(sk);
1860                 struct in6_addr *final_p = NULL, final;
1861                 struct flowi fl;
1862
1863                 memset(&fl, 0, sizeof(fl));
1864                 fl.proto = IPPROTO_TCP;
1865                 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1866                 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1867                 fl.fl6_flowlabel = np->flow_label;
1868                 fl.oif = sk->sk_bound_dev_if;
1869                 fl.fl_ip_dport = inet->dport;
1870                 fl.fl_ip_sport = inet->sport;
1871
1872                 if (np->opt && np->opt->srcrt) {
1873                         struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1874                         ipv6_addr_copy(&final, &fl.fl6_dst);
1875                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1876                         final_p = &final;
1877                 }
1878
1879                 err = ip6_dst_lookup(sk, &dst, &fl);
1880                 if (err) {
1881                         sk->sk_route_caps = 0;
1882                         return err;
1883                 }
1884                 if (final_p)
1885                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1886
1887                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1888                         sk->sk_err_soft = -err;
1889                         dst_release(dst);
1890                         return err;
1891                 }
1892
1893                 ip6_dst_store(sk, dst, NULL);
1894                 sk->sk_route_caps = dst->dev->features &
1895                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1896         }
1897
1898         return 0;
1899 }
1900
1901 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1902 {
1903         struct sock *sk = skb->sk;
1904         struct inet_sock *inet = inet_sk(sk);
1905         struct ipv6_pinfo *np = inet6_sk(sk);
1906         struct flowi fl;
1907         struct dst_entry *dst;
1908         struct in6_addr *final_p = NULL, final;
1909
1910         memset(&fl, 0, sizeof(fl));
1911         fl.proto = IPPROTO_TCP;
1912         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1913         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1914         fl.fl6_flowlabel = np->flow_label;
1915         IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1916         fl.oif = sk->sk_bound_dev_if;
1917         fl.fl_ip_sport = inet->sport;
1918         fl.fl_ip_dport = inet->dport;
1919
1920         if (np->opt && np->opt->srcrt) {
1921                 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1922                 ipv6_addr_copy(&final, &fl.fl6_dst);
1923                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1924                 final_p = &final;
1925         }
1926
1927         dst = __sk_dst_check(sk, np->dst_cookie);
1928
1929         if (dst == NULL) {
1930                 int err = ip6_dst_lookup(sk, &dst, &fl);
1931
1932                 if (err) {
1933                         sk->sk_err_soft = -err;
1934                         return err;
1935                 }
1936
1937                 if (final_p)
1938                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1939
1940                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1941                         sk->sk_route_caps = 0;
1942                         dst_release(dst);
1943                         return err;
1944                 }
1945
1946                 ip6_dst_store(sk, dst, NULL);
1947                 sk->sk_route_caps = dst->dev->features &
1948                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1949         }
1950
1951         skb->dst = dst_clone(dst);
1952
1953         /* Restore final destination back after routing done */
1954         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1955
1956         return ip6_xmit(sk, skb, &fl, np->opt, 0);
1957 }
1958
1959 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1960 {
1961         struct ipv6_pinfo *np = inet6_sk(sk);
1962         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1963
1964         sin6->sin6_family = AF_INET6;
1965         ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1966         sin6->sin6_port = inet_sk(sk)->dport;
1967         /* We do not store received flowlabel for TCP */
1968         sin6->sin6_flowinfo = 0;
1969         sin6->sin6_scope_id = 0;
1970         if (sk->sk_bound_dev_if &&
1971             ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1972                 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1973 }
1974
1975 static int tcp_v6_remember_stamp(struct sock *sk)
1976 {
1977         /* Alas, not yet... */
1978         return 0;
1979 }
1980
1981 static struct tcp_func ipv6_specific = {
1982         .queue_xmit     =       tcp_v6_xmit,
1983         .send_check     =       tcp_v6_send_check,
1984         .rebuild_header =       tcp_v6_rebuild_header,
1985         .conn_request   =       tcp_v6_conn_request,
1986         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1987         .remember_stamp =       tcp_v6_remember_stamp,
1988         .net_header_len =       sizeof(struct ipv6hdr),
1989
1990         .setsockopt     =       ipv6_setsockopt,
1991         .getsockopt     =       ipv6_getsockopt,
1992         .addr2sockaddr  =       v6_addr2sockaddr,
1993         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1994 };
1995
1996 /*
1997  *      TCP over IPv4 via INET6 API
1998  */
1999
2000 static struct tcp_func ipv6_mapped = {
2001         .queue_xmit     =       ip_queue_xmit,
2002         .send_check     =       tcp_v4_send_check,
2003         .rebuild_header =       inet_sk_rebuild_header,
2004         .conn_request   =       tcp_v6_conn_request,
2005         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
2006         .remember_stamp =       tcp_v4_remember_stamp,
2007         .net_header_len =       sizeof(struct iphdr),
2008
2009         .setsockopt     =       ipv6_setsockopt,
2010         .getsockopt     =       ipv6_getsockopt,
2011         .addr2sockaddr  =       v6_addr2sockaddr,
2012         .sockaddr_len   =       sizeof(struct sockaddr_in6)
2013 };
2014
2015
2016
2017 /* NOTE: A lot of things set to zero explicitly by call to
2018  *       sk_alloc() so need not be done here.
2019  */
2020 static int tcp_v6_init_sock(struct sock *sk)
2021 {
2022         struct inet_connection_sock *icsk = inet_csk(sk);
2023         struct tcp_sock *tp = tcp_sk(sk);
2024
2025         skb_queue_head_init(&tp->out_of_order_queue);
2026         tcp_init_xmit_timers(sk);
2027         tcp_prequeue_init(tp);
2028
2029         icsk->icsk_rto = TCP_TIMEOUT_INIT;
2030         tp->mdev = TCP_TIMEOUT_INIT;
2031
2032         /* So many TCP implementations out there (incorrectly) count the
2033          * initial SYN frame in their delayed-ACK and congestion control
2034          * algorithms that we must have the following bandaid to talk
2035          * efficiently to them.  -DaveM
2036          */
2037         tp->snd_cwnd = 2;
2038
2039         /* See draft-stevens-tcpca-spec-01 for discussion of the
2040          * initialization of these values.
2041          */
2042         tp->snd_ssthresh = 0x7fffffff;
2043         tp->snd_cwnd_clamp = ~0;
2044         tp->mss_cache = 536;
2045
2046         tp->reordering = sysctl_tcp_reordering;
2047
2048         sk->sk_state = TCP_CLOSE;
2049
2050         tp->af_specific = &ipv6_specific;
2051         icsk->icsk_ca_ops = &tcp_init_congestion_ops;
2052         sk->sk_write_space = sk_stream_write_space;
2053         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2054
2055         sk->sk_sndbuf = sysctl_tcp_wmem[1];
2056         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2057
2058         atomic_inc(&tcp_sockets_allocated);
2059
2060         return 0;
2061 }
2062
2063 static int tcp_v6_destroy_sock(struct sock *sk)
2064 {
2065         extern int tcp_v4_destroy_sock(struct sock *sk);
2066
2067         tcp_v4_destroy_sock(sk);
2068         return inet6_destroy_sock(sk);
2069 }
2070
2071 /* Proc filesystem TCPv6 sock list dumping. */
2072 static void get_openreq6(struct seq_file *seq, 
2073                          struct sock *sk, struct request_sock *req, int i, int uid)
2074 {
2075         struct in6_addr *dest, *src;
2076         int ttd = req->expires - jiffies;
2077
2078         if (ttd < 0)
2079                 ttd = 0;
2080
2081         src = &tcp6_rsk(req)->loc_addr;
2082         dest = &tcp6_rsk(req)->rmt_addr;
2083         seq_printf(seq,
2084                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2085                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2086                    i,
2087                    src->s6_addr32[0], src->s6_addr32[1],
2088                    src->s6_addr32[2], src->s6_addr32[3],
2089                    ntohs(inet_sk(sk)->sport),
2090                    dest->s6_addr32[0], dest->s6_addr32[1],
2091                    dest->s6_addr32[2], dest->s6_addr32[3],
2092                    ntohs(inet_rsk(req)->rmt_port),
2093                    TCP_SYN_RECV,
2094                    0,0, /* could print option size, but that is af dependent. */
2095                    1,   /* timers active (only the expire timer) */  
2096                    jiffies_to_clock_t(ttd), 
2097                    req->retrans,
2098                    uid,
2099                    0,  /* non standard timer */  
2100                    0, /* open_requests have no inode */
2101                    0, req);
2102 }
2103
2104 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2105 {
2106         struct in6_addr *dest, *src;
2107         __u16 destp, srcp;
2108         int timer_active;
2109         unsigned long timer_expires;
2110         struct inet_sock *inet = inet_sk(sp);
2111         struct tcp_sock *tp = tcp_sk(sp);
2112         const struct inet_connection_sock *icsk = inet_csk(sp);
2113         struct ipv6_pinfo *np = inet6_sk(sp);
2114
2115         dest  = &np->daddr;
2116         src   = &np->rcv_saddr;
2117         destp = ntohs(inet->dport);
2118         srcp  = ntohs(inet->sport);
2119
2120         if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2121                 timer_active    = 1;
2122                 timer_expires   = icsk->icsk_timeout;
2123         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2124                 timer_active    = 4;
2125                 timer_expires   = icsk->icsk_timeout;
2126         } else if (timer_pending(&sp->sk_timer)) {
2127                 timer_active    = 2;
2128                 timer_expires   = sp->sk_timer.expires;
2129         } else {
2130                 timer_active    = 0;
2131                 timer_expires = jiffies;
2132         }
2133
2134         seq_printf(seq,
2135                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2136                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2137                    i,
2138                    src->s6_addr32[0], src->s6_addr32[1],
2139                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2140                    dest->s6_addr32[0], dest->s6_addr32[1],
2141                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2142                    sp->sk_state, 
2143                    tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2144                    timer_active,
2145                    jiffies_to_clock_t(timer_expires - jiffies),
2146                    icsk->icsk_retransmits,
2147                    sock_i_uid(sp),
2148                    icsk->icsk_probes_out,
2149                    sock_i_ino(sp),
2150                    atomic_read(&sp->sk_refcnt), sp,
2151                    icsk->icsk_rto,
2152                    icsk->icsk_ack.ato,
2153                    (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
2154                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2155                    );
2156 }
2157
2158 static void get_timewait6_sock(struct seq_file *seq, 
2159                                struct inet_timewait_sock *tw, int i)
2160 {
2161         struct in6_addr *dest, *src;
2162         __u16 destp, srcp;
2163         struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
2164         int ttd = tw->tw_ttd - jiffies;
2165
2166         if (ttd < 0)
2167                 ttd = 0;
2168
2169         dest = &tcp6tw->tw_v6_daddr;
2170         src  = &tcp6tw->tw_v6_rcv_saddr;
2171         destp = ntohs(tw->tw_dport);
2172         srcp  = ntohs(tw->tw_sport);
2173
2174         seq_printf(seq,
2175                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2176                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2177                    i,
2178                    src->s6_addr32[0], src->s6_addr32[1],
2179                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2180                    dest->s6_addr32[0], dest->s6_addr32[1],
2181                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2182                    tw->tw_substate, 0, 0,
2183                    3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2184                    atomic_read(&tw->tw_refcnt), tw);
2185 }
2186
2187 #ifdef CONFIG_PROC_FS
2188 static int tcp6_seq_show(struct seq_file *seq, void *v)
2189 {
2190         struct tcp_iter_state *st;
2191
2192         if (v == SEQ_START_TOKEN) {
2193                 seq_puts(seq,
2194                          "  sl  "
2195                          "local_address                         "
2196                          "remote_address                        "
2197                          "st tx_queue rx_queue tr tm->when retrnsmt"
2198                          "   uid  timeout inode\n");
2199                 goto out;
2200         }
2201         st = seq->private;
2202
2203         switch (st->state) {
2204         case TCP_SEQ_STATE_LISTENING:
2205         case TCP_SEQ_STATE_ESTABLISHED:
2206                 get_tcp6_sock(seq, v, st->num);
2207                 break;
2208         case TCP_SEQ_STATE_OPENREQ:
2209                 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2210                 break;
2211         case TCP_SEQ_STATE_TIME_WAIT:
2212                 get_timewait6_sock(seq, v, st->num);
2213                 break;
2214         }
2215 out:
2216         return 0;
2217 }
2218
2219 static struct file_operations tcp6_seq_fops;
2220 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2221         .owner          = THIS_MODULE,
2222         .name           = "tcp6",
2223         .family         = AF_INET6,
2224         .seq_show       = tcp6_seq_show,
2225         .seq_fops       = &tcp6_seq_fops,
2226 };
2227
2228 int __init tcp6_proc_init(void)
2229 {
2230         return tcp_proc_register(&tcp6_seq_afinfo);
2231 }
2232
2233 void tcp6_proc_exit(void)
2234 {
2235         tcp_proc_unregister(&tcp6_seq_afinfo);
2236 }
2237 #endif
2238
2239 struct proto tcpv6_prot = {
2240         .name                   = "TCPv6",
2241         .owner                  = THIS_MODULE,
2242         .close                  = tcp_close,
2243         .connect                = tcp_v6_connect,
2244         .disconnect             = tcp_disconnect,
2245         .accept                 = inet_csk_accept,
2246         .ioctl                  = tcp_ioctl,
2247         .init                   = tcp_v6_init_sock,
2248         .destroy                = tcp_v6_destroy_sock,
2249         .shutdown               = tcp_shutdown,
2250         .setsockopt             = tcp_setsockopt,
2251         .getsockopt             = tcp_getsockopt,
2252         .sendmsg                = tcp_sendmsg,
2253         .recvmsg                = tcp_recvmsg,
2254         .backlog_rcv            = tcp_v6_do_rcv,
2255         .hash                   = tcp_v6_hash,
2256         .unhash                 = tcp_unhash,
2257         .get_port               = tcp_v6_get_port,
2258         .enter_memory_pressure  = tcp_enter_memory_pressure,
2259         .sockets_allocated      = &tcp_sockets_allocated,
2260         .memory_allocated       = &tcp_memory_allocated,
2261         .memory_pressure        = &tcp_memory_pressure,
2262         .orphan_count           = &tcp_orphan_count,
2263         .sysctl_mem             = sysctl_tcp_mem,
2264         .sysctl_wmem            = sysctl_tcp_wmem,
2265         .sysctl_rmem            = sysctl_tcp_rmem,
2266         .max_header             = MAX_TCP_HEADER,
2267         .obj_size               = sizeof(struct tcp6_sock),
2268         .twsk_obj_size          = sizeof(struct tcp6_timewait_sock),
2269         .rsk_prot               = &tcp6_request_sock_ops,
2270 };
2271
2272 static struct inet6_protocol tcpv6_protocol = {
2273         .handler        =       tcp_v6_rcv,
2274         .err_handler    =       tcp_v6_err,
2275         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2276 };
2277
2278 extern struct proto_ops inet6_stream_ops;
2279
2280 static struct inet_protosw tcpv6_protosw = {
2281         .type           =       SOCK_STREAM,
2282         .protocol       =       IPPROTO_TCP,
2283         .prot           =       &tcpv6_prot,
2284         .ops            =       &inet6_stream_ops,
2285         .capability     =       -1,
2286         .no_check       =       0,
2287         .flags          =       INET_PROTOSW_PERMANENT,
2288 };
2289
2290 void __init tcpv6_init(void)
2291 {
2292         /* register inet6 protocol */
2293         if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2294                 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2295         inet6_register_protosw(&tcpv6_protosw);
2296 }