Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
[safe/jmp/linux-2.6] / net / ipv6 / ndisc.c
1 /*
2  *      Neighbour Discovery for IPv6
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *      Mike Shaver             <shaver@ingenia.com>
8  *
9  *      This program is free software; you can redistribute it and/or
10  *      modify it under the terms of the GNU General Public License
11  *      as published by the Free Software Foundation; either version
12  *      2 of the License, or (at your option) any later version.
13  */
14
15 /*
16  *      Changes:
17  *
18  *      Pierre Ynard                    :       export userland ND options
19  *                                              through netlink (RDNSS support)
20  *      Lars Fenneberg                  :       fixed MTU setting on receipt
21  *                                              of an RA.
22  *      Janos Farkas                    :       kmalloc failure checks
23  *      Alexey Kuznetsov                :       state machine reworked
24  *                                              and moved to net/core.
25  *      Pekka Savola                    :       RFC2461 validation
26  *      YOSHIFUJI Hideaki @USAGI        :       Verify ND options properly
27  */
28
29 /* Set to 3 to get tracing... */
30 #define ND_DEBUG 1
31
32 #define ND_PRINTK(fmt, args...) do { if (net_ratelimit()) { printk(fmt, ## args); } } while(0)
33 #define ND_NOPRINTK(x...) do { ; } while(0)
34 #define ND_PRINTK0 ND_PRINTK
35 #define ND_PRINTK1 ND_NOPRINTK
36 #define ND_PRINTK2 ND_NOPRINTK
37 #define ND_PRINTK3 ND_NOPRINTK
38 #if ND_DEBUG >= 1
39 #undef ND_PRINTK1
40 #define ND_PRINTK1 ND_PRINTK
41 #endif
42 #if ND_DEBUG >= 2
43 #undef ND_PRINTK2
44 #define ND_PRINTK2 ND_PRINTK
45 #endif
46 #if ND_DEBUG >= 3
47 #undef ND_PRINTK3
48 #define ND_PRINTK3 ND_PRINTK
49 #endif
50
51 #include <linux/module.h>
52 #include <linux/errno.h>
53 #include <linux/types.h>
54 #include <linux/socket.h>
55 #include <linux/sockios.h>
56 #include <linux/sched.h>
57 #include <linux/net.h>
58 #include <linux/in6.h>
59 #include <linux/route.h>
60 #include <linux/init.h>
61 #include <linux/rcupdate.h>
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
65
66 #include <linux/if_addr.h>
67 #include <linux/if_arp.h>
68 #include <linux/ipv6.h>
69 #include <linux/icmpv6.h>
70 #include <linux/jhash.h>
71
72 #include <net/sock.h>
73 #include <net/snmp.h>
74
75 #include <net/ipv6.h>
76 #include <net/protocol.h>
77 #include <net/ndisc.h>
78 #include <net/ip6_route.h>
79 #include <net/addrconf.h>
80 #include <net/icmp.h>
81
82 #include <net/netlink.h>
83 #include <linux/rtnetlink.h>
84
85 #include <net/flow.h>
86 #include <net/ip6_checksum.h>
87 #include <linux/proc_fs.h>
88
89 #include <linux/netfilter.h>
90 #include <linux/netfilter_ipv6.h>
91
92 static u32 ndisc_hash(const void *pkey, const struct net_device *dev);
93 static int ndisc_constructor(struct neighbour *neigh);
94 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
95 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
96 static int pndisc_constructor(struct pneigh_entry *n);
97 static void pndisc_destructor(struct pneigh_entry *n);
98 static void pndisc_redo(struct sk_buff *skb);
99
100 static struct neigh_ops ndisc_generic_ops = {
101         .family =               AF_INET6,
102         .solicit =              ndisc_solicit,
103         .error_report =         ndisc_error_report,
104         .output =               neigh_resolve_output,
105         .connected_output =     neigh_connected_output,
106         .hh_output =            dev_queue_xmit,
107         .queue_xmit =           dev_queue_xmit,
108 };
109
110 static struct neigh_ops ndisc_hh_ops = {
111         .family =               AF_INET6,
112         .solicit =              ndisc_solicit,
113         .error_report =         ndisc_error_report,
114         .output =               neigh_resolve_output,
115         .connected_output =     neigh_resolve_output,
116         .hh_output =            dev_queue_xmit,
117         .queue_xmit =           dev_queue_xmit,
118 };
119
120
121 static struct neigh_ops ndisc_direct_ops = {
122         .family =               AF_INET6,
123         .output =               dev_queue_xmit,
124         .connected_output =     dev_queue_xmit,
125         .hh_output =            dev_queue_xmit,
126         .queue_xmit =           dev_queue_xmit,
127 };
128
129 struct neigh_table nd_tbl = {
130         .family =       AF_INET6,
131         .entry_size =   sizeof(struct neighbour) + sizeof(struct in6_addr),
132         .key_len =      sizeof(struct in6_addr),
133         .hash =         ndisc_hash,
134         .constructor =  ndisc_constructor,
135         .pconstructor = pndisc_constructor,
136         .pdestructor =  pndisc_destructor,
137         .proxy_redo =   pndisc_redo,
138         .id =           "ndisc_cache",
139         .parms = {
140                 .tbl =                  &nd_tbl,
141                 .base_reachable_time =  30 * HZ,
142                 .retrans_time =  1 * HZ,
143                 .gc_staletime = 60 * HZ,
144                 .reachable_time =               30 * HZ,
145                 .delay_probe_time =      5 * HZ,
146                 .queue_len =             3,
147                 .ucast_probes =  3,
148                 .mcast_probes =  3,
149                 .anycast_delay =         1 * HZ,
150                 .proxy_delay =          (8 * HZ) / 10,
151                 .proxy_qlen =           64,
152         },
153         .gc_interval =    30 * HZ,
154         .gc_thresh1 =    128,
155         .gc_thresh2 =    512,
156         .gc_thresh3 =   1024,
157 };
158
159 /* ND options */
160 struct ndisc_options {
161         struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX];
162 #ifdef CONFIG_IPV6_ROUTE_INFO
163         struct nd_opt_hdr *nd_opts_ri;
164         struct nd_opt_hdr *nd_opts_ri_end;
165 #endif
166         struct nd_opt_hdr *nd_useropts;
167         struct nd_opt_hdr *nd_useropts_end;
168 };
169
170 #define nd_opts_src_lladdr      nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
171 #define nd_opts_tgt_lladdr      nd_opt_array[ND_OPT_TARGET_LL_ADDR]
172 #define nd_opts_pi              nd_opt_array[ND_OPT_PREFIX_INFO]
173 #define nd_opts_pi_end          nd_opt_array[__ND_OPT_PREFIX_INFO_END]
174 #define nd_opts_rh              nd_opt_array[ND_OPT_REDIRECT_HDR]
175 #define nd_opts_mtu             nd_opt_array[ND_OPT_MTU]
176
177 #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
178
179 /*
180  * Return the padding between the option length and the start of the
181  * link addr.  Currently only IP-over-InfiniBand needs this, although
182  * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
183  * also need a pad of 2.
184  */
185 static int ndisc_addr_option_pad(unsigned short type)
186 {
187         switch (type) {
188         case ARPHRD_INFINIBAND: return 2;
189         default:                return 0;
190         }
191 }
192
193 static inline int ndisc_opt_addr_space(struct net_device *dev)
194 {
195         return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
196 }
197
198 static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
199                                   unsigned short addr_type)
200 {
201         int space = NDISC_OPT_SPACE(data_len);
202         int pad   = ndisc_addr_option_pad(addr_type);
203
204         opt[0] = type;
205         opt[1] = space>>3;
206
207         memset(opt + 2, 0, pad);
208         opt   += pad;
209         space -= pad;
210
211         memcpy(opt+2, data, data_len);
212         data_len += 2;
213         opt += data_len;
214         if ((space -= data_len) > 0)
215                 memset(opt, 0, space);
216         return opt + space;
217 }
218
219 static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
220                                             struct nd_opt_hdr *end)
221 {
222         int type;
223         if (!cur || !end || cur >= end)
224                 return NULL;
225         type = cur->nd_opt_type;
226         do {
227                 cur = ((void *)cur) + (cur->nd_opt_len << 3);
228         } while(cur < end && cur->nd_opt_type != type);
229         return (cur <= end && cur->nd_opt_type == type ? cur : NULL);
230 }
231
232 static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
233 {
234         return (opt->nd_opt_type == ND_OPT_RDNSS);
235 }
236
237 static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
238                                              struct nd_opt_hdr *end)
239 {
240         if (!cur || !end || cur >= end)
241                 return NULL;
242         do {
243                 cur = ((void *)cur) + (cur->nd_opt_len << 3);
244         } while(cur < end && !ndisc_is_useropt(cur));
245         return (cur <= end && ndisc_is_useropt(cur) ? cur : NULL);
246 }
247
248 static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
249                                                  struct ndisc_options *ndopts)
250 {
251         struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
252
253         if (!nd_opt || opt_len < 0 || !ndopts)
254                 return NULL;
255         memset(ndopts, 0, sizeof(*ndopts));
256         while (opt_len) {
257                 int l;
258                 if (opt_len < sizeof(struct nd_opt_hdr))
259                         return NULL;
260                 l = nd_opt->nd_opt_len << 3;
261                 if (opt_len < l || l == 0)
262                         return NULL;
263                 switch (nd_opt->nd_opt_type) {
264                 case ND_OPT_SOURCE_LL_ADDR:
265                 case ND_OPT_TARGET_LL_ADDR:
266                 case ND_OPT_MTU:
267                 case ND_OPT_REDIRECT_HDR:
268                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
269                                 ND_PRINTK2(KERN_WARNING
270                                            "%s(): duplicated ND6 option found: type=%d\n",
271                                            __func__,
272                                            nd_opt->nd_opt_type);
273                         } else {
274                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
275                         }
276                         break;
277                 case ND_OPT_PREFIX_INFO:
278                         ndopts->nd_opts_pi_end = nd_opt;
279                         if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
280                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
281                         break;
282 #ifdef CONFIG_IPV6_ROUTE_INFO
283                 case ND_OPT_ROUTE_INFO:
284                         ndopts->nd_opts_ri_end = nd_opt;
285                         if (!ndopts->nd_opts_ri)
286                                 ndopts->nd_opts_ri = nd_opt;
287                         break;
288 #endif
289                 default:
290                         if (ndisc_is_useropt(nd_opt)) {
291                                 ndopts->nd_useropts_end = nd_opt;
292                                 if (!ndopts->nd_useropts)
293                                         ndopts->nd_useropts = nd_opt;
294                         } else {
295                                 /*
296                                  * Unknown options must be silently ignored,
297                                  * to accommodate future extension to the
298                                  * protocol.
299                                  */
300                                 ND_PRINTK2(KERN_NOTICE
301                                            "%s(): ignored unsupported option; type=%d, len=%d\n",
302                                            __func__,
303                                            nd_opt->nd_opt_type, nd_opt->nd_opt_len);
304                         }
305                 }
306                 opt_len -= l;
307                 nd_opt = ((void *)nd_opt) + l;
308         }
309         return ndopts;
310 }
311
312 static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
313                                       struct net_device *dev)
314 {
315         u8 *lladdr = (u8 *)(p + 1);
316         int lladdrlen = p->nd_opt_len << 3;
317         int prepad = ndisc_addr_option_pad(dev->type);
318         if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
319                 return NULL;
320         return (lladdr + prepad);
321 }
322
323 int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
324 {
325         switch (dev->type) {
326         case ARPHRD_ETHER:
327         case ARPHRD_IEEE802:    /* Not sure. Check it later. --ANK */
328         case ARPHRD_FDDI:
329                 ipv6_eth_mc_map(addr, buf);
330                 return 0;
331         case ARPHRD_IEEE802_TR:
332                 ipv6_tr_mc_map(addr,buf);
333                 return 0;
334         case ARPHRD_ARCNET:
335                 ipv6_arcnet_mc_map(addr, buf);
336                 return 0;
337         case ARPHRD_INFINIBAND:
338                 ipv6_ib_mc_map(addr, dev->broadcast, buf);
339                 return 0;
340         default:
341                 if (dir) {
342                         memcpy(buf, dev->broadcast, dev->addr_len);
343                         return 0;
344                 }
345         }
346         return -EINVAL;
347 }
348
349 EXPORT_SYMBOL(ndisc_mc_map);
350
351 static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
352 {
353         const u32 *p32 = pkey;
354         u32 addr_hash, i;
355
356         addr_hash = 0;
357         for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
358                 addr_hash ^= *p32++;
359
360         return jhash_2words(addr_hash, dev->ifindex, nd_tbl.hash_rnd);
361 }
362
363 static int ndisc_constructor(struct neighbour *neigh)
364 {
365         struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key;
366         struct net_device *dev = neigh->dev;
367         struct inet6_dev *in6_dev;
368         struct neigh_parms *parms;
369         int is_multicast = ipv6_addr_is_multicast(addr);
370
371         rcu_read_lock();
372         in6_dev = in6_dev_get(dev);
373         if (in6_dev == NULL) {
374                 rcu_read_unlock();
375                 return -EINVAL;
376         }
377
378         parms = in6_dev->nd_parms;
379         __neigh_parms_put(neigh->parms);
380         neigh->parms = neigh_parms_clone(parms);
381         rcu_read_unlock();
382
383         neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
384         if (!dev->header_ops) {
385                 neigh->nud_state = NUD_NOARP;
386                 neigh->ops = &ndisc_direct_ops;
387                 neigh->output = neigh->ops->queue_xmit;
388         } else {
389                 if (is_multicast) {
390                         neigh->nud_state = NUD_NOARP;
391                         ndisc_mc_map(addr, neigh->ha, dev, 1);
392                 } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
393                         neigh->nud_state = NUD_NOARP;
394                         memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
395                         if (dev->flags&IFF_LOOPBACK)
396                                 neigh->type = RTN_LOCAL;
397                 } else if (dev->flags&IFF_POINTOPOINT) {
398                         neigh->nud_state = NUD_NOARP;
399                         memcpy(neigh->ha, dev->broadcast, dev->addr_len);
400                 }
401                 if (dev->header_ops->cache)
402                         neigh->ops = &ndisc_hh_ops;
403                 else
404                         neigh->ops = &ndisc_generic_ops;
405                 if (neigh->nud_state&NUD_VALID)
406                         neigh->output = neigh->ops->connected_output;
407                 else
408                         neigh->output = neigh->ops->output;
409         }
410         in6_dev_put(in6_dev);
411         return 0;
412 }
413
414 static int pndisc_constructor(struct pneigh_entry *n)
415 {
416         struct in6_addr *addr = (struct in6_addr*)&n->key;
417         struct in6_addr maddr;
418         struct net_device *dev = n->dev;
419
420         if (dev == NULL || __in6_dev_get(dev) == NULL)
421                 return -EINVAL;
422         addrconf_addr_solict_mult(addr, &maddr);
423         ipv6_dev_mc_inc(dev, &maddr);
424         return 0;
425 }
426
427 static void pndisc_destructor(struct pneigh_entry *n)
428 {
429         struct in6_addr *addr = (struct in6_addr*)&n->key;
430         struct in6_addr maddr;
431         struct net_device *dev = n->dev;
432
433         if (dev == NULL || __in6_dev_get(dev) == NULL)
434                 return;
435         addrconf_addr_solict_mult(addr, &maddr);
436         ipv6_dev_mc_dec(dev, &maddr);
437 }
438
439 /*
440  *      Send a Neighbour Advertisement
441  */
442 static void __ndisc_send(struct net_device *dev,
443                          struct neighbour *neigh,
444                          struct in6_addr *daddr, struct in6_addr *saddr,
445                          struct icmp6hdr *icmp6h, struct in6_addr *target,
446                          int llinfo)
447 {
448         struct flowi fl;
449         struct dst_entry *dst;
450         struct net *net = dev_net(dev);
451         struct sock *sk = net->ipv6.ndisc_sk;
452         struct sk_buff *skb;
453         struct icmp6hdr *hdr;
454         struct inet6_dev *idev;
455         int len;
456         int err;
457         u8 *opt, type;
458
459         type = icmp6h->icmp6_type;
460
461         icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex);
462
463         dst = icmp6_dst_alloc(dev, neigh, daddr);
464         if (!dst)
465                 return;
466
467         err = xfrm_lookup(&dst, &fl, NULL, 0);
468         if (err < 0)
469                 return;
470
471         if (!dev->addr_len)
472                 llinfo = 0;
473
474         len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
475         if (llinfo)
476                 len += ndisc_opt_addr_space(dev);
477
478         skb = sock_alloc_send_skb(sk,
479                                   (MAX_HEADER + sizeof(struct ipv6hdr) +
480                                    len + LL_RESERVED_SPACE(dev)),
481                                   1, &err);
482         if (!skb) {
483                 ND_PRINTK0(KERN_ERR
484                            "ICMPv6 ND: %s() failed to allocate an skb.\n",
485                            __func__);
486                 dst_release(dst);
487                 return;
488         }
489
490         skb_reserve(skb, LL_RESERVED_SPACE(dev));
491         ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
492
493         skb->transport_header = skb->tail;
494         skb_put(skb, len);
495
496         hdr = (struct icmp6hdr *)skb_transport_header(skb);
497         memcpy(hdr, icmp6h, sizeof(*hdr));
498
499         opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
500         if (target) {
501                 ipv6_addr_copy((struct in6_addr *)opt, target);
502                 opt += sizeof(*target);
503         }
504
505         if (llinfo)
506                 ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
507                                        dev->addr_len, dev->type);
508
509         hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
510                                            IPPROTO_ICMPV6,
511                                            csum_partial((__u8 *) hdr,
512                                                         len, 0));
513
514         skb->dst = dst;
515
516         idev = in6_dev_get(dst->dev);
517         IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
518
519         err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
520                       dst_output);
521         if (!err) {
522                 ICMP6MSGOUT_INC_STATS(idev, type);
523                 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
524         }
525
526         if (likely(idev != NULL))
527                 in6_dev_put(idev);
528 }
529
530 static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
531                    struct in6_addr *daddr, struct in6_addr *solicited_addr,
532                    int router, int solicited, int override, int inc_opt)
533 {
534         struct in6_addr tmpaddr;
535         struct inet6_ifaddr *ifp;
536         struct in6_addr *src_addr;
537         struct icmp6hdr icmp6h = {
538                 .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
539         };
540
541         /* for anycast or proxy, solicited_addr != src_addr */
542         ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
543         if (ifp) {
544                 src_addr = solicited_addr;
545                 if (ifp->flags & IFA_F_OPTIMISTIC)
546                         override = 0;
547                 in6_ifa_put(ifp);
548         } else {
549                 if (ipv6_dev_get_saddr(dev, daddr,
550                                        inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
551                                        &tmpaddr))
552                         return;
553                 src_addr = &tmpaddr;
554         }
555
556         icmp6h.icmp6_router = router;
557         icmp6h.icmp6_solicited = solicited;
558         icmp6h.icmp6_override = override;
559
560         __ndisc_send(dev, neigh, daddr, src_addr,
561                      &icmp6h, solicited_addr,
562                      inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
563 }
564
565 void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
566                    struct in6_addr *solicit,
567                    struct in6_addr *daddr, struct in6_addr *saddr)
568 {
569         struct in6_addr addr_buf;
570         struct icmp6hdr icmp6h = {
571                 .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
572         };
573
574         if (saddr == NULL) {
575                 if (ipv6_get_lladdr(dev, &addr_buf,
576                                    (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
577                         return;
578                 saddr = &addr_buf;
579         }
580
581         __ndisc_send(dev, neigh, daddr, saddr,
582                      &icmp6h, solicit,
583                      !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0);
584 }
585
586 void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
587                    struct in6_addr *daddr)
588 {
589         struct icmp6hdr icmp6h = {
590                 .icmp6_type = NDISC_ROUTER_SOLICITATION,
591         };
592         int send_sllao = dev->addr_len;
593
594 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
595         /*
596          * According to section 2.2 of RFC 4429, we must not
597          * send router solicitations with a sllao from
598          * optimistic addresses, but we may send the solicitation
599          * if we don't include the sllao.  So here we check
600          * if our address is optimistic, and if so, we
601          * suppress the inclusion of the sllao.
602          */
603         if (send_sllao) {
604                 struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr,
605                                                            dev, 1);
606                 if (ifp) {
607                         if (ifp->flags & IFA_F_OPTIMISTIC)  {
608                                 send_sllao = 0;
609                         }
610                         in6_ifa_put(ifp);
611                 } else {
612                         send_sllao = 0;
613                 }
614         }
615 #endif
616         __ndisc_send(dev, NULL, daddr, saddr,
617                      &icmp6h, NULL,
618                      send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0);
619 }
620
621
622 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
623 {
624         /*
625          *      "The sender MUST return an ICMP
626          *       destination unreachable"
627          */
628         dst_link_failure(skb);
629         kfree_skb(skb);
630 }
631
632 /* Called with locked neigh: either read or both */
633
634 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
635 {
636         struct in6_addr *saddr = NULL;
637         struct in6_addr mcaddr;
638         struct net_device *dev = neigh->dev;
639         struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
640         int probes = atomic_read(&neigh->probes);
641
642         if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1))
643                 saddr = &ipv6_hdr(skb)->saddr;
644
645         if ((probes -= neigh->parms->ucast_probes) < 0) {
646                 if (!(neigh->nud_state & NUD_VALID)) {
647                         ND_PRINTK1(KERN_DEBUG
648                                    "%s(): trying to ucast probe in NUD_INVALID: "
649                                    NIP6_FMT "\n",
650                                    __func__,
651                                    NIP6(*target));
652                 }
653                 ndisc_send_ns(dev, neigh, target, target, saddr);
654         } else if ((probes -= neigh->parms->app_probes) < 0) {
655 #ifdef CONFIG_ARPD
656                 neigh_app_ns(neigh);
657 #endif
658         } else {
659                 addrconf_addr_solict_mult(target, &mcaddr);
660                 ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
661         }
662 }
663
664 static struct pneigh_entry *pndisc_check_router(struct net_device *dev,
665                 struct in6_addr *addr, int *is_router)
666 {
667         struct pneigh_entry *n;
668
669         read_lock_bh(&nd_tbl.lock);
670         n = __pneigh_lookup(&nd_tbl, dev_net(dev), addr, dev);
671         if (n != NULL)
672                 *is_router = (n->flags & NTF_ROUTER);
673         read_unlock_bh(&nd_tbl.lock);
674
675         return n;
676 }
677
678 static void ndisc_recv_ns(struct sk_buff *skb)
679 {
680         struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
681         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
682         struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
683         u8 *lladdr = NULL;
684         u32 ndoptlen = skb->tail - (skb->transport_header +
685                                     offsetof(struct nd_msg, opt));
686         struct ndisc_options ndopts;
687         struct net_device *dev = skb->dev;
688         struct inet6_ifaddr *ifp;
689         struct inet6_dev *idev = NULL;
690         struct neighbour *neigh;
691         struct pneigh_entry *pneigh = NULL;
692         int dad = ipv6_addr_any(saddr);
693         int inc;
694         int is_router = 0;
695
696         if (ipv6_addr_is_multicast(&msg->target)) {
697                 ND_PRINTK2(KERN_WARNING
698                            "ICMPv6 NS: multicast target address");
699                 return;
700         }
701
702         /*
703          * RFC2461 7.1.1:
704          * DAD has to be destined for solicited node multicast address.
705          */
706         if (dad &&
707             !(daddr->s6_addr32[0] == htonl(0xff020000) &&
708               daddr->s6_addr32[1] == htonl(0x00000000) &&
709               daddr->s6_addr32[2] == htonl(0x00000001) &&
710               daddr->s6_addr [12] == 0xff )) {
711                 ND_PRINTK2(KERN_WARNING
712                            "ICMPv6 NS: bad DAD packet (wrong destination)\n");
713                 return;
714         }
715
716         if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
717                 ND_PRINTK2(KERN_WARNING
718                            "ICMPv6 NS: invalid ND options\n");
719                 return;
720         }
721
722         if (ndopts.nd_opts_src_lladdr) {
723                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
724                 if (!lladdr) {
725                         ND_PRINTK2(KERN_WARNING
726                                    "ICMPv6 NS: invalid link-layer address length\n");
727                         return;
728                 }
729
730                 /* RFC2461 7.1.1:
731                  *      If the IP source address is the unspecified address,
732                  *      there MUST NOT be source link-layer address option
733                  *      in the message.
734                  */
735                 if (dad) {
736                         ND_PRINTK2(KERN_WARNING
737                                    "ICMPv6 NS: bad DAD packet (link-layer address option)\n");
738                         return;
739                 }
740         }
741
742         inc = ipv6_addr_is_multicast(daddr);
743
744         ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
745         if (ifp) {
746
747                 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
748                         if (dad) {
749                                 if (dev->type == ARPHRD_IEEE802_TR) {
750                                         const unsigned char *sadr;
751                                         sadr = skb_mac_header(skb);
752                                         if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
753                                             sadr[9] == dev->dev_addr[1] &&
754                                             sadr[10] == dev->dev_addr[2] &&
755                                             sadr[11] == dev->dev_addr[3] &&
756                                             sadr[12] == dev->dev_addr[4] &&
757                                             sadr[13] == dev->dev_addr[5]) {
758                                                 /* looped-back to us */
759                                                 goto out;
760                                         }
761                                 }
762
763                                 /*
764                                  * We are colliding with another node
765                                  * who is doing DAD
766                                  * so fail our DAD process
767                                  */
768                                 addrconf_dad_failure(ifp);
769                                 return;
770                         } else {
771                                 /*
772                                  * This is not a dad solicitation.
773                                  * If we are an optimistic node,
774                                  * we should respond.
775                                  * Otherwise, we should ignore it.
776                                  */
777                                 if (!(ifp->flags & IFA_F_OPTIMISTIC))
778                                         goto out;
779                         }
780                 }
781
782                 idev = ifp->idev;
783         } else {
784                 idev = in6_dev_get(dev);
785                 if (!idev) {
786                         /* XXX: count this drop? */
787                         return;
788                 }
789
790                 if (ipv6_chk_acast_addr(dev_net(dev), dev, &msg->target) ||
791                     (idev->cnf.forwarding &&
792                      (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
793                      (pneigh = pndisc_check_router(dev, &msg->target,
794                                                   &is_router)) != NULL)) {
795                         if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
796                             skb->pkt_type != PACKET_HOST &&
797                             inc != 0 &&
798                             idev->nd_parms->proxy_delay != 0) {
799                                 /*
800                                  * for anycast or proxy,
801                                  * sender should delay its response
802                                  * by a random time between 0 and
803                                  * MAX_ANYCAST_DELAY_TIME seconds.
804                                  * (RFC2461) -- yoshfuji
805                                  */
806                                 struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
807                                 if (n)
808                                         pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
809                                 goto out;
810                         }
811                 } else
812                         goto out;
813         }
814
815         is_router = !!(pneigh ? is_router : idev->cnf.forwarding);
816
817         if (dad) {
818                 struct in6_addr maddr;
819
820                 ipv6_addr_all_nodes(&maddr);
821                 ndisc_send_na(dev, NULL, &maddr, &msg->target,
822                               is_router, 0, (ifp != NULL), 1);
823                 goto out;
824         }
825
826         if (inc)
827                 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
828         else
829                 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
830
831         /*
832          *      update / create cache entry
833          *      for the source address
834          */
835         neigh = __neigh_lookup(&nd_tbl, saddr, dev,
836                                !inc || lladdr || !dev->addr_len);
837         if (neigh)
838                 neigh_update(neigh, lladdr, NUD_STALE,
839                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
840                              NEIGH_UPDATE_F_OVERRIDE);
841         if (neigh || !dev->header_ops) {
842                 ndisc_send_na(dev, neigh, saddr, &msg->target,
843                               is_router,
844                               1, (ifp != NULL && inc), inc);
845                 if (neigh)
846                         neigh_release(neigh);
847         }
848
849 out:
850         if (ifp)
851                 in6_ifa_put(ifp);
852         else
853                 in6_dev_put(idev);
854
855         return;
856 }
857
858 static void ndisc_recv_na(struct sk_buff *skb)
859 {
860         struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
861         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
862         struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
863         u8 *lladdr = NULL;
864         u32 ndoptlen = skb->tail - (skb->transport_header +
865                                     offsetof(struct nd_msg, opt));
866         struct ndisc_options ndopts;
867         struct net_device *dev = skb->dev;
868         struct inet6_ifaddr *ifp;
869         struct neighbour *neigh;
870
871         if (skb->len < sizeof(struct nd_msg)) {
872                 ND_PRINTK2(KERN_WARNING
873                            "ICMPv6 NA: packet too short\n");
874                 return;
875         }
876
877         if (ipv6_addr_is_multicast(&msg->target)) {
878                 ND_PRINTK2(KERN_WARNING
879                            "ICMPv6 NA: target address is multicast.\n");
880                 return;
881         }
882
883         if (ipv6_addr_is_multicast(daddr) &&
884             msg->icmph.icmp6_solicited) {
885                 ND_PRINTK2(KERN_WARNING
886                            "ICMPv6 NA: solicited NA is multicasted.\n");
887                 return;
888         }
889
890         if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
891                 ND_PRINTK2(KERN_WARNING
892                            "ICMPv6 NS: invalid ND option\n");
893                 return;
894         }
895         if (ndopts.nd_opts_tgt_lladdr) {
896                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
897                 if (!lladdr) {
898                         ND_PRINTK2(KERN_WARNING
899                                    "ICMPv6 NA: invalid link-layer address length\n");
900                         return;
901                 }
902         }
903         ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
904         if (ifp) {
905                 if (ifp->flags & IFA_F_TENTATIVE) {
906                         addrconf_dad_failure(ifp);
907                         return;
908                 }
909                 /* What should we make now? The advertisement
910                    is invalid, but ndisc specs say nothing
911                    about it. It could be misconfiguration, or
912                    an smart proxy agent tries to help us :-)
913                  */
914                 ND_PRINTK1(KERN_WARNING
915                            "ICMPv6 NA: someone advertises our address on %s!\n",
916                            ifp->idev->dev->name);
917                 in6_ifa_put(ifp);
918                 return;
919         }
920         neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
921
922         if (neigh) {
923                 u8 old_flags = neigh->flags;
924
925                 if (neigh->nud_state & NUD_FAILED)
926                         goto out;
927
928                 /*
929                  * Don't update the neighbor cache entry on a proxy NA from
930                  * ourselves because either the proxied node is off link or it
931                  * has already sent a NA to us.
932                  */
933                 if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
934                     ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
935                     pneigh_lookup(&nd_tbl, dev_net(dev), &msg->target, dev, 0)) {
936                         /* XXX: idev->cnf.prixy_ndp */
937                         goto out;
938                 }
939
940                 neigh_update(neigh, lladdr,
941                              msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
942                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
943                              (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
944                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
945                              (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0));
946
947                 if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
948                         /*
949                          * Change: router to host
950                          */
951                         struct rt6_info *rt;
952                         rt = rt6_get_dflt_router(saddr, dev);
953                         if (rt)
954                                 ip6_del_rt(rt);
955                 }
956
957 out:
958                 neigh_release(neigh);
959         }
960 }
961
962 static void ndisc_recv_rs(struct sk_buff *skb)
963 {
964         struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
965         unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
966         struct neighbour *neigh;
967         struct inet6_dev *idev;
968         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
969         struct ndisc_options ndopts;
970         u8 *lladdr = NULL;
971
972         if (skb->len < sizeof(*rs_msg))
973                 return;
974
975         idev = in6_dev_get(skb->dev);
976         if (!idev) {
977                 if (net_ratelimit())
978                         ND_PRINTK1("ICMP6 RS: can't find in6 device\n");
979                 return;
980         }
981
982         /* Don't accept RS if we're not in router mode */
983         if (!idev->cnf.forwarding)
984                 goto out;
985
986         /*
987          * Don't update NCE if src = ::;
988          * this implies that the source node has no ip address assigned yet.
989          */
990         if (ipv6_addr_any(saddr))
991                 goto out;
992
993         /* Parse ND options */
994         if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
995                 if (net_ratelimit())
996                         ND_PRINTK2("ICMP6 NS: invalid ND option, ignored\n");
997                 goto out;
998         }
999
1000         if (ndopts.nd_opts_src_lladdr) {
1001                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1002                                              skb->dev);
1003                 if (!lladdr)
1004                         goto out;
1005         }
1006
1007         neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
1008         if (neigh) {
1009                 neigh_update(neigh, lladdr, NUD_STALE,
1010                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
1011                              NEIGH_UPDATE_F_OVERRIDE|
1012                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1013                 neigh_release(neigh);
1014         }
1015 out:
1016         in6_dev_put(idev);
1017 }
1018
1019 static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
1020 {
1021         struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra);
1022         struct sk_buff *skb;
1023         struct nlmsghdr *nlh;
1024         struct nduseroptmsg *ndmsg;
1025         struct net *net = dev_net(ra->dev);
1026         int err;
1027         int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg)
1028                                     + (opt->nd_opt_len << 3));
1029         size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr));
1030
1031         skb = nlmsg_new(msg_size, GFP_ATOMIC);
1032         if (skb == NULL) {
1033                 err = -ENOBUFS;
1034                 goto errout;
1035         }
1036
1037         nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0);
1038         if (nlh == NULL) {
1039                 goto nla_put_failure;
1040         }
1041
1042         ndmsg = nlmsg_data(nlh);
1043         ndmsg->nduseropt_family = AF_INET6;
1044         ndmsg->nduseropt_ifindex = ra->dev->ifindex;
1045         ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
1046         ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
1047         ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;
1048
1049         memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
1050
1051         NLA_PUT(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr),
1052                 &ipv6_hdr(ra)->saddr);
1053         nlmsg_end(skb, nlh);
1054
1055         err = rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL,
1056                           GFP_ATOMIC);
1057         if (err < 0)
1058                 goto errout;
1059
1060         return;
1061
1062 nla_put_failure:
1063         nlmsg_free(skb);
1064         err = -EMSGSIZE;
1065 errout:
1066         rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1067 }
1068
1069 static void ndisc_router_discovery(struct sk_buff *skb)
1070 {
1071         struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
1072         struct neighbour *neigh = NULL;
1073         struct inet6_dev *in6_dev;
1074         struct rt6_info *rt = NULL;
1075         int lifetime;
1076         struct ndisc_options ndopts;
1077         int optlen;
1078         unsigned int pref = 0;
1079
1080         __u8 * opt = (__u8 *)(ra_msg + 1);
1081
1082         optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
1083
1084         if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1085                 ND_PRINTK2(KERN_WARNING
1086                            "ICMPv6 RA: source address is not link-local.\n");
1087                 return;
1088         }
1089         if (optlen < 0) {
1090                 ND_PRINTK2(KERN_WARNING
1091                            "ICMPv6 RA: packet too short\n");
1092                 return;
1093         }
1094
1095         /*
1096          *      set the RA_RECV flag in the interface
1097          */
1098
1099         in6_dev = in6_dev_get(skb->dev);
1100         if (in6_dev == NULL) {
1101                 ND_PRINTK0(KERN_ERR
1102                            "ICMPv6 RA: can't find inet6 device for %s.\n",
1103                            skb->dev->name);
1104                 return;
1105         }
1106         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra) {
1107                 in6_dev_put(in6_dev);
1108                 return;
1109         }
1110
1111         if (!ndisc_parse_options(opt, optlen, &ndopts)) {
1112                 in6_dev_put(in6_dev);
1113                 ND_PRINTK2(KERN_WARNING
1114                            "ICMP6 RA: invalid ND options\n");
1115                 return;
1116         }
1117
1118         if (in6_dev->if_flags & IF_RS_SENT) {
1119                 /*
1120                  *      flag that an RA was received after an RS was sent
1121                  *      out on this interface.
1122                  */
1123                 in6_dev->if_flags |= IF_RA_RCVD;
1124         }
1125
1126         /*
1127          * Remember the managed/otherconf flags from most recently
1128          * received RA message (RFC 2462) -- yoshfuji
1129          */
1130         in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
1131                                 IF_RA_OTHERCONF)) |
1132                                 (ra_msg->icmph.icmp6_addrconf_managed ?
1133                                         IF_RA_MANAGED : 0) |
1134                                 (ra_msg->icmph.icmp6_addrconf_other ?
1135                                         IF_RA_OTHERCONF : 0);
1136
1137         if (!in6_dev->cnf.accept_ra_defrtr)
1138                 goto skip_defrtr;
1139
1140         lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
1141
1142 #ifdef CONFIG_IPV6_ROUTER_PREF
1143         pref = ra_msg->icmph.icmp6_router_pref;
1144         /* 10b is handled as if it were 00b (medium) */
1145         if (pref == ICMPV6_ROUTER_PREF_INVALID ||
1146             !in6_dev->cnf.accept_ra_rtr_pref)
1147                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
1148 #endif
1149
1150         rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
1151
1152         if (rt)
1153                 neigh = rt->rt6i_nexthop;
1154
1155         if (rt && lifetime == 0) {
1156                 neigh_clone(neigh);
1157                 ip6_del_rt(rt);
1158                 rt = NULL;
1159         }
1160
1161         if (rt == NULL && lifetime) {
1162                 ND_PRINTK3(KERN_DEBUG
1163                            "ICMPv6 RA: adding default router.\n");
1164
1165                 rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
1166                 if (rt == NULL) {
1167                         ND_PRINTK0(KERN_ERR
1168                                    "ICMPv6 RA: %s() failed to add default route.\n",
1169                                    __func__);
1170                         in6_dev_put(in6_dev);
1171                         return;
1172                 }
1173
1174                 neigh = rt->rt6i_nexthop;
1175                 if (neigh == NULL) {
1176                         ND_PRINTK0(KERN_ERR
1177                                    "ICMPv6 RA: %s() got default router without neighbour.\n",
1178                                    __func__);
1179                         dst_release(&rt->u.dst);
1180                         in6_dev_put(in6_dev);
1181                         return;
1182                 }
1183                 neigh->flags |= NTF_ROUTER;
1184         } else if (rt) {
1185                 rt->rt6i_flags |= (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
1186         }
1187
1188         if (rt)
1189                 rt->rt6i_expires = jiffies + (HZ * lifetime);
1190
1191         if (ra_msg->icmph.icmp6_hop_limit) {
1192                 in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
1193                 if (rt)
1194                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
1195         }
1196
1197 skip_defrtr:
1198
1199         /*
1200          *      Update Reachable Time and Retrans Timer
1201          */
1202
1203         if (in6_dev->nd_parms) {
1204                 unsigned long rtime = ntohl(ra_msg->retrans_timer);
1205
1206                 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
1207                         rtime = (rtime*HZ)/1000;
1208                         if (rtime < HZ/10)
1209                                 rtime = HZ/10;
1210                         in6_dev->nd_parms->retrans_time = rtime;
1211                         in6_dev->tstamp = jiffies;
1212                         inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1213                 }
1214
1215                 rtime = ntohl(ra_msg->reachable_time);
1216                 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
1217                         rtime = (rtime*HZ)/1000;
1218
1219                         if (rtime < HZ/10)
1220                                 rtime = HZ/10;
1221
1222                         if (rtime != in6_dev->nd_parms->base_reachable_time) {
1223                                 in6_dev->nd_parms->base_reachable_time = rtime;
1224                                 in6_dev->nd_parms->gc_staletime = 3 * rtime;
1225                                 in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
1226                                 in6_dev->tstamp = jiffies;
1227                                 inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1228                         }
1229                 }
1230         }
1231
1232         /*
1233          *      Process options.
1234          */
1235
1236         if (!neigh)
1237                 neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
1238                                        skb->dev, 1);
1239         if (neigh) {
1240                 u8 *lladdr = NULL;
1241                 if (ndopts.nd_opts_src_lladdr) {
1242                         lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1243                                                      skb->dev);
1244                         if (!lladdr) {
1245                                 ND_PRINTK2(KERN_WARNING
1246                                            "ICMPv6 RA: invalid link-layer address length\n");
1247                                 goto out;
1248                         }
1249                 }
1250                 neigh_update(neigh, lladdr, NUD_STALE,
1251                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
1252                              NEIGH_UPDATE_F_OVERRIDE|
1253                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1254                              NEIGH_UPDATE_F_ISROUTER);
1255         }
1256
1257 #ifdef CONFIG_IPV6_ROUTE_INFO
1258         if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
1259                 struct nd_opt_hdr *p;
1260                 for (p = ndopts.nd_opts_ri;
1261                      p;
1262                      p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) {
1263                         if (((struct route_info *)p)->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
1264                                 continue;
1265                         rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
1266                                       &ipv6_hdr(skb)->saddr);
1267                 }
1268         }
1269 #endif
1270
1271         if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
1272                 struct nd_opt_hdr *p;
1273                 for (p = ndopts.nd_opts_pi;
1274                      p;
1275                      p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
1276                         addrconf_prefix_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3);
1277                 }
1278         }
1279
1280         if (ndopts.nd_opts_mtu) {
1281                 __be32 n;
1282                 u32 mtu;
1283
1284                 memcpy(&n, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
1285                 mtu = ntohl(n);
1286
1287                 if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
1288                         ND_PRINTK2(KERN_WARNING
1289                                    "ICMPv6 RA: invalid mtu: %d\n",
1290                                    mtu);
1291                 } else if (in6_dev->cnf.mtu6 != mtu) {
1292                         in6_dev->cnf.mtu6 = mtu;
1293
1294                         if (rt)
1295                                 rt->u.dst.metrics[RTAX_MTU-1] = mtu;
1296
1297                         rt6_mtu_change(skb->dev, mtu);
1298                 }
1299         }
1300
1301         if (ndopts.nd_useropts) {
1302                 struct nd_opt_hdr *p;
1303                 for (p = ndopts.nd_useropts;
1304                      p;
1305                      p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
1306                         ndisc_ra_useropt(skb, p);
1307                 }
1308         }
1309
1310         if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
1311                 ND_PRINTK2(KERN_WARNING
1312                            "ICMPv6 RA: invalid RA options");
1313         }
1314 out:
1315         if (rt)
1316                 dst_release(&rt->u.dst);
1317         else if (neigh)
1318                 neigh_release(neigh);
1319         in6_dev_put(in6_dev);
1320 }
1321
1322 static void ndisc_redirect_rcv(struct sk_buff *skb)
1323 {
1324         struct inet6_dev *in6_dev;
1325         struct icmp6hdr *icmph;
1326         struct in6_addr *dest;
1327         struct in6_addr *target;        /* new first hop to destination */
1328         struct neighbour *neigh;
1329         int on_link = 0;
1330         struct ndisc_options ndopts;
1331         int optlen;
1332         u8 *lladdr = NULL;
1333
1334         if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1335                 ND_PRINTK2(KERN_WARNING
1336                            "ICMPv6 Redirect: source address is not link-local.\n");
1337                 return;
1338         }
1339
1340         optlen = skb->tail - skb->transport_header;
1341         optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1342
1343         if (optlen < 0) {
1344                 ND_PRINTK2(KERN_WARNING
1345                            "ICMPv6 Redirect: packet too short\n");
1346                 return;
1347         }
1348
1349         icmph = icmp6_hdr(skb);
1350         target = (struct in6_addr *) (icmph + 1);
1351         dest = target + 1;
1352
1353         if (ipv6_addr_is_multicast(dest)) {
1354                 ND_PRINTK2(KERN_WARNING
1355                            "ICMPv6 Redirect: destination address is multicast.\n");
1356                 return;
1357         }
1358
1359         if (ipv6_addr_equal(dest, target)) {
1360                 on_link = 1;
1361         } else if (ipv6_addr_type(target) !=
1362                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1363                 ND_PRINTK2(KERN_WARNING
1364                            "ICMPv6 Redirect: target address is not link-local unicast.\n");
1365                 return;
1366         }
1367
1368         in6_dev = in6_dev_get(skb->dev);
1369         if (!in6_dev)
1370                 return;
1371         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) {
1372                 in6_dev_put(in6_dev);
1373                 return;
1374         }
1375
1376         /* RFC2461 8.1:
1377          *      The IP source address of the Redirect MUST be the same as the current
1378          *      first-hop router for the specified ICMP Destination Address.
1379          */
1380
1381         if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1382                 ND_PRINTK2(KERN_WARNING
1383                            "ICMPv6 Redirect: invalid ND options\n");
1384                 in6_dev_put(in6_dev);
1385                 return;
1386         }
1387         if (ndopts.nd_opts_tgt_lladdr) {
1388                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1389                                              skb->dev);
1390                 if (!lladdr) {
1391                         ND_PRINTK2(KERN_WARNING
1392                                    "ICMPv6 Redirect: invalid link-layer address length\n");
1393                         in6_dev_put(in6_dev);
1394                         return;
1395                 }
1396         }
1397
1398         neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1399         if (neigh) {
1400                 rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
1401                              &ipv6_hdr(skb)->saddr, neigh, lladdr,
1402                              on_link);
1403                 neigh_release(neigh);
1404         }
1405         in6_dev_put(in6_dev);
1406 }
1407
1408 void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1409                          struct in6_addr *target)
1410 {
1411         struct net_device *dev = skb->dev;
1412         struct net *net = dev_net(dev);
1413         struct sock *sk = net->ipv6.ndisc_sk;
1414         int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1415         struct sk_buff *buff;
1416         struct icmp6hdr *icmph;
1417         struct in6_addr saddr_buf;
1418         struct in6_addr *addrp;
1419         struct rt6_info *rt;
1420         struct dst_entry *dst;
1421         struct inet6_dev *idev;
1422         struct flowi fl;
1423         u8 *opt;
1424         int rd_len;
1425         int err;
1426         u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
1427
1428         if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
1429                 ND_PRINTK2(KERN_WARNING
1430                            "ICMPv6 Redirect: no link-local address on %s\n",
1431                            dev->name);
1432                 return;
1433         }
1434
1435         if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
1436             ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1437                 ND_PRINTK2(KERN_WARNING
1438                         "ICMPv6 Redirect: target address is not link-local unicast.\n");
1439                 return;
1440         }
1441
1442         icmpv6_flow_init(sk, &fl, NDISC_REDIRECT,
1443                          &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
1444
1445         dst = ip6_route_output(net, NULL, &fl);
1446         if (dst == NULL)
1447                 return;
1448
1449         err = xfrm_lookup(&dst, &fl, NULL, 0);
1450         if (err)
1451                 return;
1452
1453         rt = (struct rt6_info *) dst;
1454
1455         if (rt->rt6i_flags & RTF_GATEWAY) {
1456                 ND_PRINTK2(KERN_WARNING
1457                            "ICMPv6 Redirect: destination is not a neighbour.\n");
1458                 dst_release(dst);
1459                 return;
1460         }
1461         if (!xrlim_allow(dst, 1*HZ)) {
1462                 dst_release(dst);
1463                 return;
1464         }
1465
1466         if (dev->addr_len) {
1467                 read_lock_bh(&neigh->lock);
1468                 if (neigh->nud_state & NUD_VALID) {
1469                         memcpy(ha_buf, neigh->ha, dev->addr_len);
1470                         read_unlock_bh(&neigh->lock);
1471                         ha = ha_buf;
1472                         len += ndisc_opt_addr_space(dev);
1473                 } else
1474                         read_unlock_bh(&neigh->lock);
1475         }
1476
1477         rd_len = min_t(unsigned int,
1478                      IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
1479         rd_len &= ~0x7;
1480         len += rd_len;
1481
1482         buff = sock_alloc_send_skb(sk,
1483                                    (MAX_HEADER + sizeof(struct ipv6hdr) +
1484                                     len + LL_RESERVED_SPACE(dev)),
1485                                    1, &err);
1486         if (buff == NULL) {
1487                 ND_PRINTK0(KERN_ERR
1488                            "ICMPv6 Redirect: %s() failed to allocate an skb.\n",
1489                            __func__);
1490                 dst_release(dst);
1491                 return;
1492         }
1493
1494         skb_reserve(buff, LL_RESERVED_SPACE(dev));
1495         ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
1496                    IPPROTO_ICMPV6, len);
1497
1498         skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
1499         skb_put(buff, len);
1500         icmph = icmp6_hdr(buff);
1501
1502         memset(icmph, 0, sizeof(struct icmp6hdr));
1503         icmph->icmp6_type = NDISC_REDIRECT;
1504
1505         /*
1506          *      copy target and destination addresses
1507          */
1508
1509         addrp = (struct in6_addr *)(icmph + 1);
1510         ipv6_addr_copy(addrp, target);
1511         addrp++;
1512         ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr);
1513
1514         opt = (u8*) (addrp + 1);
1515
1516         /*
1517          *      include target_address option
1518          */
1519
1520         if (ha)
1521                 opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha,
1522                                              dev->addr_len, dev->type);
1523
1524         /*
1525          *      build redirect option and copy skb over to the new packet.
1526          */
1527
1528         memset(opt, 0, 8);
1529         *(opt++) = ND_OPT_REDIRECT_HDR;
1530         *(opt++) = (rd_len >> 3);
1531         opt += 6;
1532
1533         memcpy(opt, ipv6_hdr(skb), rd_len - 8);
1534
1535         icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
1536                                              len, IPPROTO_ICMPV6,
1537                                              csum_partial((u8 *) icmph, len, 0));
1538
1539         buff->dst = dst;
1540         idev = in6_dev_get(dst->dev);
1541         IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
1542         err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
1543                       dst_output);
1544         if (!err) {
1545                 ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT);
1546                 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
1547         }
1548
1549         if (likely(idev != NULL))
1550                 in6_dev_put(idev);
1551 }
1552
1553 static void pndisc_redo(struct sk_buff *skb)
1554 {
1555         ndisc_recv_ns(skb);
1556         kfree_skb(skb);
1557 }
1558
1559 int ndisc_rcv(struct sk_buff *skb)
1560 {
1561         struct nd_msg *msg;
1562
1563         if (!pskb_may_pull(skb, skb->len))
1564                 return 0;
1565
1566         msg = (struct nd_msg *)skb_transport_header(skb);
1567
1568         __skb_push(skb, skb->data - skb_transport_header(skb));
1569
1570         if (ipv6_hdr(skb)->hop_limit != 255) {
1571                 ND_PRINTK2(KERN_WARNING
1572                            "ICMPv6 NDISC: invalid hop-limit: %d\n",
1573                            ipv6_hdr(skb)->hop_limit);
1574                 return 0;
1575         }
1576
1577         if (msg->icmph.icmp6_code != 0) {
1578                 ND_PRINTK2(KERN_WARNING
1579                            "ICMPv6 NDISC: invalid ICMPv6 code: %d\n",
1580                            msg->icmph.icmp6_code);
1581                 return 0;
1582         }
1583
1584         memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
1585
1586         switch (msg->icmph.icmp6_type) {
1587         case NDISC_NEIGHBOUR_SOLICITATION:
1588                 ndisc_recv_ns(skb);
1589                 break;
1590
1591         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1592                 ndisc_recv_na(skb);
1593                 break;
1594
1595         case NDISC_ROUTER_SOLICITATION:
1596                 ndisc_recv_rs(skb);
1597                 break;
1598
1599         case NDISC_ROUTER_ADVERTISEMENT:
1600                 ndisc_router_discovery(skb);
1601                 break;
1602
1603         case NDISC_REDIRECT:
1604                 ndisc_redirect_rcv(skb);
1605                 break;
1606         }
1607
1608         return 0;
1609 }
1610
1611 static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1612 {
1613         struct net_device *dev = ptr;
1614         struct net *net = dev_net(dev);
1615
1616         switch (event) {
1617         case NETDEV_CHANGEADDR:
1618                 neigh_changeaddr(&nd_tbl, dev);
1619                 fib6_run_gc(~0UL, net);
1620                 break;
1621         case NETDEV_DOWN:
1622                 neigh_ifdown(&nd_tbl, dev);
1623                 fib6_run_gc(~0UL, net);
1624                 break;
1625         default:
1626                 break;
1627         }
1628
1629         return NOTIFY_DONE;
1630 }
1631
1632 static struct notifier_block ndisc_netdev_notifier = {
1633         .notifier_call = ndisc_netdev_event,
1634 };
1635
1636 #ifdef CONFIG_SYSCTL
1637 static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
1638                                          const char *func, const char *dev_name)
1639 {
1640         static char warncomm[TASK_COMM_LEN];
1641         static int warned;
1642         if (strcmp(warncomm, current->comm) && warned < 5) {
1643                 strcpy(warncomm, current->comm);
1644                 printk(KERN_WARNING
1645                         "process `%s' is using deprecated sysctl (%s) "
1646                         "net.ipv6.neigh.%s.%s; "
1647                         "Use net.ipv6.neigh.%s.%s_ms "
1648                         "instead.\n",
1649                         warncomm, func,
1650                         dev_name, ctl->procname,
1651                         dev_name, ctl->procname);
1652                 warned++;
1653         }
1654 }
1655
1656 int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos)
1657 {
1658         struct net_device *dev = ctl->extra1;
1659         struct inet6_dev *idev;
1660         int ret;
1661
1662         if ((strcmp(ctl->procname, "retrans_time") == 0) ||
1663             (strcmp(ctl->procname, "base_reachable_time") == 0))
1664                 ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
1665
1666         if (strcmp(ctl->procname, "retrans_time") == 0)
1667                 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1668
1669         else if (strcmp(ctl->procname, "base_reachable_time") == 0)
1670                 ret = proc_dointvec_jiffies(ctl, write,
1671                                             filp, buffer, lenp, ppos);
1672
1673         else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
1674                  (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
1675                 ret = proc_dointvec_ms_jiffies(ctl, write,
1676                                                filp, buffer, lenp, ppos);
1677         else
1678                 ret = -1;
1679
1680         if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
1681                 if (ctl->data == &idev->nd_parms->base_reachable_time)
1682                         idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
1683                 idev->tstamp = jiffies;
1684                 inet6_ifinfo_notify(RTM_NEWLINK, idev);
1685                 in6_dev_put(idev);
1686         }
1687         return ret;
1688 }
1689
1690 static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
1691                                         int nlen, void __user *oldval,
1692                                         size_t __user *oldlenp,
1693                                         void __user *newval, size_t newlen)
1694 {
1695         struct net_device *dev = ctl->extra1;
1696         struct inet6_dev *idev;
1697         int ret;
1698
1699         if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME ||
1700             ctl->ctl_name == NET_NEIGH_REACHABLE_TIME)
1701                 ndisc_warn_deprecated_sysctl(ctl, "procfs", dev ? dev->name : "default");
1702
1703         switch (ctl->ctl_name) {
1704         case NET_NEIGH_REACHABLE_TIME:
1705                 ret = sysctl_jiffies(ctl, name, nlen,
1706                                      oldval, oldlenp, newval, newlen);
1707                 break;
1708         case NET_NEIGH_RETRANS_TIME_MS:
1709         case NET_NEIGH_REACHABLE_TIME_MS:
1710                  ret = sysctl_ms_jiffies(ctl, name, nlen,
1711                                          oldval, oldlenp, newval, newlen);
1712                  break;
1713         default:
1714                 ret = 0;
1715         }
1716
1717         if (newval && newlen && ret > 0 &&
1718             dev && (idev = in6_dev_get(dev)) != NULL) {
1719                 if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME ||
1720                     ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS)
1721                         idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
1722                 idev->tstamp = jiffies;
1723                 inet6_ifinfo_notify(RTM_NEWLINK, idev);
1724                 in6_dev_put(idev);
1725         }
1726
1727         return ret;
1728 }
1729
1730 #endif
1731
1732 static int ndisc_net_init(struct net *net)
1733 {
1734         struct socket *sock;
1735         struct ipv6_pinfo *np;
1736         struct sock *sk;
1737         int err;
1738
1739         err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &sock);
1740         if (err < 0) {
1741                 ND_PRINTK0(KERN_ERR
1742                            "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n",
1743                            err);
1744                 return err;
1745         }
1746
1747         net->ipv6.ndisc_sk = sk = sock->sk;
1748         sk_change_net(sk, net);
1749
1750         np = inet6_sk(sk);
1751         sk->sk_allocation = GFP_ATOMIC;
1752         np->hop_limit = 255;
1753         /* Do not loopback ndisc messages */
1754         np->mc_loop = 0;
1755         sk->sk_prot->unhash(sk);
1756
1757         return 0;
1758 }
1759
1760 static void ndisc_net_exit(struct net *net)
1761 {
1762         sk_release_kernel(net->ipv6.ndisc_sk);
1763 }
1764
1765 static struct pernet_operations ndisc_net_ops = {
1766         .init = ndisc_net_init,
1767         .exit = ndisc_net_exit,
1768 };
1769
1770 int __init ndisc_init(void)
1771 {
1772         int err;
1773
1774         err = register_pernet_subsys(&ndisc_net_ops);
1775         if (err)
1776                 return err;
1777         /*
1778          * Initialize the neighbour table
1779          */
1780         neigh_table_init(&nd_tbl);
1781
1782 #ifdef CONFIG_SYSCTL
1783         err = neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6,
1784                                     NET_IPV6_NEIGH, "ipv6",
1785                                     &ndisc_ifinfo_sysctl_change,
1786                                     &ndisc_ifinfo_sysctl_strategy);
1787         if (err)
1788                 goto out_unregister_pernet;
1789 #endif
1790         err = register_netdevice_notifier(&ndisc_netdev_notifier);
1791         if (err)
1792                 goto out_unregister_sysctl;
1793 out:
1794         return err;
1795
1796 out_unregister_sysctl:
1797 #ifdef CONFIG_SYSCTL
1798         neigh_sysctl_unregister(&nd_tbl.parms);
1799 out_unregister_pernet:
1800 #endif
1801         unregister_pernet_subsys(&ndisc_net_ops);
1802         goto out;
1803 }
1804
1805 void ndisc_cleanup(void)
1806 {
1807         unregister_netdevice_notifier(&ndisc_netdev_notifier);
1808 #ifdef CONFIG_SYSCTL
1809         neigh_sysctl_unregister(&nd_tbl.parms);
1810 #endif
1811         neigh_table_clear(&nd_tbl);
1812         unregister_pernet_subsys(&ndisc_net_ops);
1813 }