[IPV6] SIT: Add PRL management for ISATAP.
[safe/jmp/linux-2.6] / net / ipv6 / ndisc.c
1 /*
2  *      Neighbour Discovery for IPv6
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *      Mike Shaver             <shaver@ingenia.com>
8  *
9  *      This program is free software; you can redistribute it and/or
10  *      modify it under the terms of the GNU General Public License
11  *      as published by the Free Software Foundation; either version
12  *      2 of the License, or (at your option) any later version.
13  */
14
15 /*
16  *      Changes:
17  *
18  *      Pierre Ynard                    :       export userland ND options
19  *                                              through netlink (RDNSS support)
20  *      Lars Fenneberg                  :       fixed MTU setting on receipt
21  *                                              of an RA.
22  *      Janos Farkas                    :       kmalloc failure checks
23  *      Alexey Kuznetsov                :       state machine reworked
24  *                                              and moved to net/core.
25  *      Pekka Savola                    :       RFC2461 validation
26  *      YOSHIFUJI Hideaki @USAGI        :       Verify ND options properly
27  */
28
29 /* Set to 3 to get tracing... */
30 #define ND_DEBUG 1
31
32 #define ND_PRINTK(fmt, args...) do { if (net_ratelimit()) { printk(fmt, ## args); } } while(0)
33 #define ND_NOPRINTK(x...) do { ; } while(0)
34 #define ND_PRINTK0 ND_PRINTK
35 #define ND_PRINTK1 ND_NOPRINTK
36 #define ND_PRINTK2 ND_NOPRINTK
37 #define ND_PRINTK3 ND_NOPRINTK
38 #if ND_DEBUG >= 1
39 #undef ND_PRINTK1
40 #define ND_PRINTK1 ND_PRINTK
41 #endif
42 #if ND_DEBUG >= 2
43 #undef ND_PRINTK2
44 #define ND_PRINTK2 ND_PRINTK
45 #endif
46 #if ND_DEBUG >= 3
47 #undef ND_PRINTK3
48 #define ND_PRINTK3 ND_PRINTK
49 #endif
50
51 #include <linux/module.h>
52 #include <linux/errno.h>
53 #include <linux/types.h>
54 #include <linux/socket.h>
55 #include <linux/sockios.h>
56 #include <linux/sched.h>
57 #include <linux/net.h>
58 #include <linux/in6.h>
59 #include <linux/route.h>
60 #include <linux/init.h>
61 #include <linux/rcupdate.h>
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
65
66 #include <linux/if_addr.h>
67 #include <linux/if_arp.h>
68 #include <linux/ipv6.h>
69 #include <linux/icmpv6.h>
70 #include <linux/jhash.h>
71
72 #include <net/sock.h>
73 #include <net/snmp.h>
74
75 #include <net/ipv6.h>
76 #include <net/protocol.h>
77 #include <net/ndisc.h>
78 #include <net/ip6_route.h>
79 #include <net/addrconf.h>
80 #include <net/icmp.h>
81
82 #include <net/netlink.h>
83 #include <linux/rtnetlink.h>
84
85 #include <net/flow.h>
86 #include <net/ip6_checksum.h>
87 #include <linux/proc_fs.h>
88
89 #include <linux/netfilter.h>
90 #include <linux/netfilter_ipv6.h>
91
92 static u32 ndisc_hash(const void *pkey, const struct net_device *dev);
93 static int ndisc_constructor(struct neighbour *neigh);
94 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
95 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
96 static int pndisc_constructor(struct pneigh_entry *n);
97 static void pndisc_destructor(struct pneigh_entry *n);
98 static void pndisc_redo(struct sk_buff *skb);
99
100 static struct neigh_ops ndisc_generic_ops = {
101         .family =               AF_INET6,
102         .solicit =              ndisc_solicit,
103         .error_report =         ndisc_error_report,
104         .output =               neigh_resolve_output,
105         .connected_output =     neigh_connected_output,
106         .hh_output =            dev_queue_xmit,
107         .queue_xmit =           dev_queue_xmit,
108 };
109
110 static struct neigh_ops ndisc_hh_ops = {
111         .family =               AF_INET6,
112         .solicit =              ndisc_solicit,
113         .error_report =         ndisc_error_report,
114         .output =               neigh_resolve_output,
115         .connected_output =     neigh_resolve_output,
116         .hh_output =            dev_queue_xmit,
117         .queue_xmit =           dev_queue_xmit,
118 };
119
120
121 static struct neigh_ops ndisc_direct_ops = {
122         .family =               AF_INET6,
123         .output =               dev_queue_xmit,
124         .connected_output =     dev_queue_xmit,
125         .hh_output =            dev_queue_xmit,
126         .queue_xmit =           dev_queue_xmit,
127 };
128
129 struct neigh_table nd_tbl = {
130         .family =       AF_INET6,
131         .entry_size =   sizeof(struct neighbour) + sizeof(struct in6_addr),
132         .key_len =      sizeof(struct in6_addr),
133         .hash =         ndisc_hash,
134         .constructor =  ndisc_constructor,
135         .pconstructor = pndisc_constructor,
136         .pdestructor =  pndisc_destructor,
137         .proxy_redo =   pndisc_redo,
138         .id =           "ndisc_cache",
139         .parms = {
140                 .tbl =                  &nd_tbl,
141                 .base_reachable_time =  30 * HZ,
142                 .retrans_time =  1 * HZ,
143                 .gc_staletime = 60 * HZ,
144                 .reachable_time =               30 * HZ,
145                 .delay_probe_time =      5 * HZ,
146                 .queue_len =             3,
147                 .ucast_probes =  3,
148                 .mcast_probes =  3,
149                 .anycast_delay =         1 * HZ,
150                 .proxy_delay =          (8 * HZ) / 10,
151                 .proxy_qlen =           64,
152         },
153         .gc_interval =    30 * HZ,
154         .gc_thresh1 =    128,
155         .gc_thresh2 =    512,
156         .gc_thresh3 =   1024,
157 };
158
159 /* ND options */
160 struct ndisc_options {
161         struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX];
162 #ifdef CONFIG_IPV6_ROUTE_INFO
163         struct nd_opt_hdr *nd_opts_ri;
164         struct nd_opt_hdr *nd_opts_ri_end;
165 #endif
166         struct nd_opt_hdr *nd_useropts;
167         struct nd_opt_hdr *nd_useropts_end;
168 };
169
170 #define nd_opts_src_lladdr      nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
171 #define nd_opts_tgt_lladdr      nd_opt_array[ND_OPT_TARGET_LL_ADDR]
172 #define nd_opts_pi              nd_opt_array[ND_OPT_PREFIX_INFO]
173 #define nd_opts_pi_end          nd_opt_array[__ND_OPT_PREFIX_INFO_END]
174 #define nd_opts_rh              nd_opt_array[ND_OPT_REDIRECT_HDR]
175 #define nd_opts_mtu             nd_opt_array[ND_OPT_MTU]
176
177 #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
178
179 /*
180  * Return the padding between the option length and the start of the
181  * link addr.  Currently only IP-over-InfiniBand needs this, although
182  * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
183  * also need a pad of 2.
184  */
185 static int ndisc_addr_option_pad(unsigned short type)
186 {
187         switch (type) {
188         case ARPHRD_INFINIBAND: return 2;
189         default:                return 0;
190         }
191 }
192
193 static inline int ndisc_opt_addr_space(struct net_device *dev)
194 {
195         return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
196 }
197
198 static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
199                                   unsigned short addr_type)
200 {
201         int space = NDISC_OPT_SPACE(data_len);
202         int pad   = ndisc_addr_option_pad(addr_type);
203
204         opt[0] = type;
205         opt[1] = space>>3;
206
207         memset(opt + 2, 0, pad);
208         opt   += pad;
209         space -= pad;
210
211         memcpy(opt+2, data, data_len);
212         data_len += 2;
213         opt += data_len;
214         if ((space -= data_len) > 0)
215                 memset(opt, 0, space);
216         return opt + space;
217 }
218
219 static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
220                                             struct nd_opt_hdr *end)
221 {
222         int type;
223         if (!cur || !end || cur >= end)
224                 return NULL;
225         type = cur->nd_opt_type;
226         do {
227                 cur = ((void *)cur) + (cur->nd_opt_len << 3);
228         } while(cur < end && cur->nd_opt_type != type);
229         return (cur <= end && cur->nd_opt_type == type ? cur : NULL);
230 }
231
232 static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
233 {
234         return (opt->nd_opt_type == ND_OPT_RDNSS);
235 }
236
237 static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
238                                              struct nd_opt_hdr *end)
239 {
240         if (!cur || !end || cur >= end)
241                 return NULL;
242         do {
243                 cur = ((void *)cur) + (cur->nd_opt_len << 3);
244         } while(cur < end && !ndisc_is_useropt(cur));
245         return (cur <= end && ndisc_is_useropt(cur) ? cur : NULL);
246 }
247
248 static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
249                                                  struct ndisc_options *ndopts)
250 {
251         struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
252
253         if (!nd_opt || opt_len < 0 || !ndopts)
254                 return NULL;
255         memset(ndopts, 0, sizeof(*ndopts));
256         while (opt_len) {
257                 int l;
258                 if (opt_len < sizeof(struct nd_opt_hdr))
259                         return NULL;
260                 l = nd_opt->nd_opt_len << 3;
261                 if (opt_len < l || l == 0)
262                         return NULL;
263                 switch (nd_opt->nd_opt_type) {
264                 case ND_OPT_SOURCE_LL_ADDR:
265                 case ND_OPT_TARGET_LL_ADDR:
266                 case ND_OPT_MTU:
267                 case ND_OPT_REDIRECT_HDR:
268                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
269                                 ND_PRINTK2(KERN_WARNING
270                                            "%s(): duplicated ND6 option found: type=%d\n",
271                                            __func__,
272                                            nd_opt->nd_opt_type);
273                         } else {
274                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
275                         }
276                         break;
277                 case ND_OPT_PREFIX_INFO:
278                         ndopts->nd_opts_pi_end = nd_opt;
279                         if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
280                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
281                         break;
282 #ifdef CONFIG_IPV6_ROUTE_INFO
283                 case ND_OPT_ROUTE_INFO:
284                         ndopts->nd_opts_ri_end = nd_opt;
285                         if (!ndopts->nd_opts_ri)
286                                 ndopts->nd_opts_ri = nd_opt;
287                         break;
288 #endif
289                 default:
290                         if (ndisc_is_useropt(nd_opt)) {
291                                 ndopts->nd_useropts_end = nd_opt;
292                                 if (!ndopts->nd_useropts)
293                                         ndopts->nd_useropts = nd_opt;
294                         } else {
295                                 /*
296                                  * Unknown options must be silently ignored,
297                                  * to accommodate future extension to the
298                                  * protocol.
299                                  */
300                                 ND_PRINTK2(KERN_NOTICE
301                                            "%s(): ignored unsupported option; type=%d, len=%d\n",
302                                            __func__,
303                                            nd_opt->nd_opt_type, nd_opt->nd_opt_len);
304                         }
305                 }
306                 opt_len -= l;
307                 nd_opt = ((void *)nd_opt) + l;
308         }
309         return ndopts;
310 }
311
312 static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
313                                       struct net_device *dev)
314 {
315         u8 *lladdr = (u8 *)(p + 1);
316         int lladdrlen = p->nd_opt_len << 3;
317         int prepad = ndisc_addr_option_pad(dev->type);
318         if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
319                 return NULL;
320         return (lladdr + prepad);
321 }
322
323 int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
324 {
325         switch (dev->type) {
326         case ARPHRD_ETHER:
327         case ARPHRD_IEEE802:    /* Not sure. Check it later. --ANK */
328         case ARPHRD_FDDI:
329                 ipv6_eth_mc_map(addr, buf);
330                 return 0;
331         case ARPHRD_IEEE802_TR:
332                 ipv6_tr_mc_map(addr,buf);
333                 return 0;
334         case ARPHRD_ARCNET:
335                 ipv6_arcnet_mc_map(addr, buf);
336                 return 0;
337         case ARPHRD_INFINIBAND:
338                 ipv6_ib_mc_map(addr, dev->broadcast, buf);
339                 return 0;
340         default:
341                 if (dir) {
342                         memcpy(buf, dev->broadcast, dev->addr_len);
343                         return 0;
344                 }
345         }
346         return -EINVAL;
347 }
348
349 EXPORT_SYMBOL(ndisc_mc_map);
350
351 static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
352 {
353         const u32 *p32 = pkey;
354         u32 addr_hash, i;
355
356         addr_hash = 0;
357         for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
358                 addr_hash ^= *p32++;
359
360         return jhash_2words(addr_hash, dev->ifindex, nd_tbl.hash_rnd);
361 }
362
363 static int ndisc_constructor(struct neighbour *neigh)
364 {
365         struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key;
366         struct net_device *dev = neigh->dev;
367         struct inet6_dev *in6_dev;
368         struct neigh_parms *parms;
369         int is_multicast = ipv6_addr_is_multicast(addr);
370
371         rcu_read_lock();
372         in6_dev = in6_dev_get(dev);
373         if (in6_dev == NULL) {
374                 rcu_read_unlock();
375                 return -EINVAL;
376         }
377
378         parms = in6_dev->nd_parms;
379         __neigh_parms_put(neigh->parms);
380         neigh->parms = neigh_parms_clone(parms);
381         rcu_read_unlock();
382
383         neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
384         if (!dev->header_ops) {
385                 neigh->nud_state = NUD_NOARP;
386                 neigh->ops = &ndisc_direct_ops;
387                 neigh->output = neigh->ops->queue_xmit;
388         } else {
389                 if (is_multicast) {
390                         neigh->nud_state = NUD_NOARP;
391                         ndisc_mc_map(addr, neigh->ha, dev, 1);
392                 } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
393                         neigh->nud_state = NUD_NOARP;
394                         memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
395                         if (dev->flags&IFF_LOOPBACK)
396                                 neigh->type = RTN_LOCAL;
397                 } else if (dev->flags&IFF_POINTOPOINT) {
398                         neigh->nud_state = NUD_NOARP;
399                         memcpy(neigh->ha, dev->broadcast, dev->addr_len);
400                 }
401                 if (dev->header_ops->cache)
402                         neigh->ops = &ndisc_hh_ops;
403                 else
404                         neigh->ops = &ndisc_generic_ops;
405                 if (neigh->nud_state&NUD_VALID)
406                         neigh->output = neigh->ops->connected_output;
407                 else
408                         neigh->output = neigh->ops->output;
409         }
410         in6_dev_put(in6_dev);
411         return 0;
412 }
413
414 static int pndisc_constructor(struct pneigh_entry *n)
415 {
416         struct in6_addr *addr = (struct in6_addr*)&n->key;
417         struct in6_addr maddr;
418         struct net_device *dev = n->dev;
419
420         if (dev == NULL || __in6_dev_get(dev) == NULL)
421                 return -EINVAL;
422         addrconf_addr_solict_mult(addr, &maddr);
423         ipv6_dev_mc_inc(dev, &maddr);
424         return 0;
425 }
426
427 static void pndisc_destructor(struct pneigh_entry *n)
428 {
429         struct in6_addr *addr = (struct in6_addr*)&n->key;
430         struct in6_addr maddr;
431         struct net_device *dev = n->dev;
432
433         if (dev == NULL || __in6_dev_get(dev) == NULL)
434                 return;
435         addrconf_addr_solict_mult(addr, &maddr);
436         ipv6_dev_mc_dec(dev, &maddr);
437 }
438
439 /*
440  *      Send a Neighbour Advertisement
441  */
442 static void __ndisc_send(struct net_device *dev,
443                          struct neighbour *neigh,
444                          struct in6_addr *daddr, struct in6_addr *saddr,
445                          struct icmp6hdr *icmp6h, struct in6_addr *target,
446                          int llinfo)
447 {
448         struct flowi fl;
449         struct dst_entry *dst;
450         struct net *net = dev_net(dev);
451         struct sock *sk = net->ipv6.ndisc_sk;
452         struct sk_buff *skb;
453         struct icmp6hdr *hdr;
454         struct inet6_dev *idev;
455         int len;
456         int err;
457         u8 *opt, type;
458
459         type = icmp6h->icmp6_type;
460
461         icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex);
462
463         dst = icmp6_dst_alloc(dev, neigh, daddr);
464         if (!dst)
465                 return;
466
467         err = xfrm_lookup(&dst, &fl, NULL, 0);
468         if (err < 0)
469                 return;
470
471         if (!dev->addr_len)
472                 llinfo = 0;
473
474         len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
475         if (llinfo)
476                 len += ndisc_opt_addr_space(dev);
477
478         skb = sock_alloc_send_skb(sk,
479                                   (MAX_HEADER + sizeof(struct ipv6hdr) +
480                                    len + LL_RESERVED_SPACE(dev)),
481                                   1, &err);
482         if (!skb) {
483                 ND_PRINTK0(KERN_ERR
484                            "ICMPv6 ND: %s() failed to allocate an skb.\n",
485                            __func__);
486                 dst_release(dst);
487                 return;
488         }
489
490         skb_reserve(skb, LL_RESERVED_SPACE(dev));
491         ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
492
493         skb->transport_header = skb->tail;
494         skb_put(skb, len);
495
496         hdr = (struct icmp6hdr *)skb_transport_header(skb);
497         memcpy(hdr, icmp6h, sizeof(*hdr));
498
499         opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
500         if (target) {
501                 ipv6_addr_copy((struct in6_addr *)opt, target);
502                 opt += sizeof(*target);
503         }
504
505         if (llinfo)
506                 ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
507                                        dev->addr_len, dev->type);
508
509         hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
510                                            IPPROTO_ICMPV6,
511                                            csum_partial((__u8 *) hdr,
512                                                         len, 0));
513
514         skb->dst = dst;
515
516         idev = in6_dev_get(dst->dev);
517         IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
518
519         err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
520                       dst_output);
521         if (!err) {
522                 ICMP6MSGOUT_INC_STATS(idev, type);
523                 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
524         }
525
526         if (likely(idev != NULL))
527                 in6_dev_put(idev);
528 }
529
530 static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
531                    struct in6_addr *daddr, struct in6_addr *solicited_addr,
532                    int router, int solicited, int override, int inc_opt)
533 {
534         struct in6_addr tmpaddr;
535         struct inet6_ifaddr *ifp;
536         struct in6_addr *src_addr;
537         struct icmp6hdr icmp6h = {
538                 .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
539         };
540
541         /* for anycast or proxy, solicited_addr != src_addr */
542         ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
543         if (ifp) {
544                 src_addr = solicited_addr;
545                 if (ifp->flags & IFA_F_OPTIMISTIC)
546                         override = 0;
547                 in6_ifa_put(ifp);
548         } else {
549                 if (ipv6_dev_get_saddr(dev, daddr,
550                                        inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
551                                        &tmpaddr))
552                         return;
553                 src_addr = &tmpaddr;
554         }
555
556         icmp6h.icmp6_router = router;
557         icmp6h.icmp6_solicited = solicited;
558         icmp6h.icmp6_override = override;
559
560         __ndisc_send(dev, neigh, daddr, src_addr,
561                      &icmp6h, solicited_addr,
562                      inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
563 }
564
565 void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
566                    struct in6_addr *solicit,
567                    struct in6_addr *daddr, struct in6_addr *saddr)
568 {
569         struct in6_addr addr_buf;
570         struct icmp6hdr icmp6h = {
571                 .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
572         };
573
574         if (saddr == NULL) {
575                 if (ipv6_get_lladdr(dev, &addr_buf,
576                                    (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
577                         return;
578                 saddr = &addr_buf;
579         }
580
581         __ndisc_send(dev, neigh, daddr, saddr,
582                      &icmp6h, solicit,
583                      !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0);
584 }
585
586 void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
587                    struct in6_addr *daddr)
588 {
589         struct icmp6hdr icmp6h = {
590                 .icmp6_type = NDISC_ROUTER_SOLICITATION,
591         };
592         int send_sllao = dev->addr_len;
593
594 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
595         /*
596          * According to section 2.2 of RFC 4429, we must not
597          * send router solicitations with a sllao from
598          * optimistic addresses, but we may send the solicitation
599          * if we don't include the sllao.  So here we check
600          * if our address is optimistic, and if so, we
601          * suppress the inclusion of the sllao.
602          */
603         if (send_sllao) {
604                 struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr,
605                                                            dev, 1);
606                 if (ifp) {
607                         if (ifp->flags & IFA_F_OPTIMISTIC)  {
608                                 send_sllao = 0;
609                         }
610                         in6_ifa_put(ifp);
611                 } else {
612                         send_sllao = 0;
613                 }
614         }
615 #endif
616         __ndisc_send(dev, NULL, daddr, saddr,
617                      &icmp6h, NULL,
618                      send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0);
619 }
620
621
622 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
623 {
624         /*
625          *      "The sender MUST return an ICMP
626          *       destination unreachable"
627          */
628         dst_link_failure(skb);
629         kfree_skb(skb);
630 }
631
632 /* Called with locked neigh: either read or both */
633
634 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
635 {
636         struct in6_addr *saddr = NULL;
637         struct in6_addr mcaddr;
638         struct net_device *dev = neigh->dev;
639         struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
640         int probes = atomic_read(&neigh->probes);
641
642         if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1))
643                 saddr = &ipv6_hdr(skb)->saddr;
644
645         if ((probes -= neigh->parms->ucast_probes) < 0) {
646                 if (!(neigh->nud_state & NUD_VALID)) {
647                         ND_PRINTK1(KERN_DEBUG
648                                    "%s(): trying to ucast probe in NUD_INVALID: "
649                                    NIP6_FMT "\n",
650                                    __func__,
651                                    NIP6(*target));
652                 }
653                 ndisc_send_ns(dev, neigh, target, target, saddr);
654         } else if ((probes -= neigh->parms->app_probes) < 0) {
655 #ifdef CONFIG_ARPD
656                 neigh_app_ns(neigh);
657 #endif
658         } else {
659                 addrconf_addr_solict_mult(target, &mcaddr);
660                 ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
661         }
662 }
663
664 static int pndisc_is_router(const void *pkey,
665                             struct net_device *dev)
666 {
667         struct pneigh_entry *n;
668         int ret = -1;
669
670         read_lock_bh(&nd_tbl.lock);
671         n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev);
672         if (n)
673                 ret = !!(n->flags & NTF_ROUTER);
674         read_unlock_bh(&nd_tbl.lock);
675
676         return ret;
677 }
678
679 static void ndisc_recv_ns(struct sk_buff *skb)
680 {
681         struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
682         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
683         struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
684         u8 *lladdr = NULL;
685         u32 ndoptlen = skb->tail - (skb->transport_header +
686                                     offsetof(struct nd_msg, opt));
687         struct ndisc_options ndopts;
688         struct net_device *dev = skb->dev;
689         struct inet6_ifaddr *ifp;
690         struct inet6_dev *idev = NULL;
691         struct neighbour *neigh;
692         int dad = ipv6_addr_any(saddr);
693         int inc;
694         int is_router = -1;
695
696         if (ipv6_addr_is_multicast(&msg->target)) {
697                 ND_PRINTK2(KERN_WARNING
698                            "ICMPv6 NS: multicast target address");
699                 return;
700         }
701
702         /*
703          * RFC2461 7.1.1:
704          * DAD has to be destined for solicited node multicast address.
705          */
706         if (dad &&
707             !(daddr->s6_addr32[0] == htonl(0xff020000) &&
708               daddr->s6_addr32[1] == htonl(0x00000000) &&
709               daddr->s6_addr32[2] == htonl(0x00000001) &&
710               daddr->s6_addr [12] == 0xff )) {
711                 ND_PRINTK2(KERN_WARNING
712                            "ICMPv6 NS: bad DAD packet (wrong destination)\n");
713                 return;
714         }
715
716         if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
717                 ND_PRINTK2(KERN_WARNING
718                            "ICMPv6 NS: invalid ND options\n");
719                 return;
720         }
721
722         if (ndopts.nd_opts_src_lladdr) {
723                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
724                 if (!lladdr) {
725                         ND_PRINTK2(KERN_WARNING
726                                    "ICMPv6 NS: invalid link-layer address length\n");
727                         return;
728                 }
729
730                 /* RFC2461 7.1.1:
731                  *      If the IP source address is the unspecified address,
732                  *      there MUST NOT be source link-layer address option
733                  *      in the message.
734                  */
735                 if (dad) {
736                         ND_PRINTK2(KERN_WARNING
737                                    "ICMPv6 NS: bad DAD packet (link-layer address option)\n");
738                         return;
739                 }
740         }
741
742         inc = ipv6_addr_is_multicast(daddr);
743
744         ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
745         if (ifp) {
746
747                 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
748                         if (dad) {
749                                 if (dev->type == ARPHRD_IEEE802_TR) {
750                                         const unsigned char *sadr;
751                                         sadr = skb_mac_header(skb);
752                                         if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
753                                             sadr[9] == dev->dev_addr[1] &&
754                                             sadr[10] == dev->dev_addr[2] &&
755                                             sadr[11] == dev->dev_addr[3] &&
756                                             sadr[12] == dev->dev_addr[4] &&
757                                             sadr[13] == dev->dev_addr[5]) {
758                                                 /* looped-back to us */
759                                                 goto out;
760                                         }
761                                 }
762
763                                 /*
764                                  * We are colliding with another node
765                                  * who is doing DAD
766                                  * so fail our DAD process
767                                  */
768                                 addrconf_dad_failure(ifp);
769                                 return;
770                         } else {
771                                 /*
772                                  * This is not a dad solicitation.
773                                  * If we are an optimistic node,
774                                  * we should respond.
775                                  * Otherwise, we should ignore it.
776                                  */
777                                 if (!(ifp->flags & IFA_F_OPTIMISTIC))
778                                         goto out;
779                         }
780                 }
781
782                 idev = ifp->idev;
783         } else {
784                 idev = in6_dev_get(dev);
785                 if (!idev) {
786                         /* XXX: count this drop? */
787                         return;
788                 }
789
790                 if (ipv6_chk_acast_addr(dev_net(dev), dev, &msg->target) ||
791                     (idev->cnf.forwarding &&
792                      (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
793                      (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
794                         if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
795                             skb->pkt_type != PACKET_HOST &&
796                             inc != 0 &&
797                             idev->nd_parms->proxy_delay != 0) {
798                                 /*
799                                  * for anycast or proxy,
800                                  * sender should delay its response
801                                  * by a random time between 0 and
802                                  * MAX_ANYCAST_DELAY_TIME seconds.
803                                  * (RFC2461) -- yoshfuji
804                                  */
805                                 struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
806                                 if (n)
807                                         pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
808                                 goto out;
809                         }
810                 } else
811                         goto out;
812         }
813
814         if (is_router < 0)
815                 is_router = !!idev->cnf.forwarding;
816
817         if (dad) {
818                 struct in6_addr maddr;
819
820                 ipv6_addr_all_nodes(&maddr);
821                 ndisc_send_na(dev, NULL, &maddr, &msg->target,
822                               is_router, 0, (ifp != NULL), 1);
823                 goto out;
824         }
825
826         if (inc)
827                 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
828         else
829                 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
830
831         /*
832          *      update / create cache entry
833          *      for the source address
834          */
835         neigh = __neigh_lookup(&nd_tbl, saddr, dev,
836                                !inc || lladdr || !dev->addr_len);
837         if (neigh)
838                 neigh_update(neigh, lladdr, NUD_STALE,
839                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
840                              NEIGH_UPDATE_F_OVERRIDE);
841         if (neigh || !dev->header_ops) {
842                 ndisc_send_na(dev, neigh, saddr, &msg->target,
843                               is_router,
844                               1, (ifp != NULL && inc), inc);
845                 if (neigh)
846                         neigh_release(neigh);
847         }
848
849 out:
850         if (ifp)
851                 in6_ifa_put(ifp);
852         else
853                 in6_dev_put(idev);
854
855         return;
856 }
857
858 static void ndisc_recv_na(struct sk_buff *skb)
859 {
860         struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
861         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
862         struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
863         u8 *lladdr = NULL;
864         u32 ndoptlen = skb->tail - (skb->transport_header +
865                                     offsetof(struct nd_msg, opt));
866         struct ndisc_options ndopts;
867         struct net_device *dev = skb->dev;
868         struct inet6_ifaddr *ifp;
869         struct neighbour *neigh;
870
871         if (skb->len < sizeof(struct nd_msg)) {
872                 ND_PRINTK2(KERN_WARNING
873                            "ICMPv6 NA: packet too short\n");
874                 return;
875         }
876
877         if (ipv6_addr_is_multicast(&msg->target)) {
878                 ND_PRINTK2(KERN_WARNING
879                            "ICMPv6 NA: target address is multicast.\n");
880                 return;
881         }
882
883         if (ipv6_addr_is_multicast(daddr) &&
884             msg->icmph.icmp6_solicited) {
885                 ND_PRINTK2(KERN_WARNING
886                            "ICMPv6 NA: solicited NA is multicasted.\n");
887                 return;
888         }
889
890         if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
891                 ND_PRINTK2(KERN_WARNING
892                            "ICMPv6 NS: invalid ND option\n");
893                 return;
894         }
895         if (ndopts.nd_opts_tgt_lladdr) {
896                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
897                 if (!lladdr) {
898                         ND_PRINTK2(KERN_WARNING
899                                    "ICMPv6 NA: invalid link-layer address length\n");
900                         return;
901                 }
902         }
903         ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
904         if (ifp) {
905                 if (ifp->flags & IFA_F_TENTATIVE) {
906                         addrconf_dad_failure(ifp);
907                         return;
908                 }
909                 /* What should we make now? The advertisement
910                    is invalid, but ndisc specs say nothing
911                    about it. It could be misconfiguration, or
912                    an smart proxy agent tries to help us :-)
913                  */
914                 ND_PRINTK1(KERN_WARNING
915                            "ICMPv6 NA: someone advertises our address on %s!\n",
916                            ifp->idev->dev->name);
917                 in6_ifa_put(ifp);
918                 return;
919         }
920         neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
921
922         if (neigh) {
923                 u8 old_flags = neigh->flags;
924
925                 if (neigh->nud_state & NUD_FAILED)
926                         goto out;
927
928                 /*
929                  * Don't update the neighbor cache entry on a proxy NA from
930                  * ourselves because either the proxied node is off link or it
931                  * has already sent a NA to us.
932                  */
933                 if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
934                     ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
935                     pneigh_lookup(&nd_tbl, dev_net(dev), &msg->target, dev, 0)) {
936                         /* XXX: idev->cnf.prixy_ndp */
937                         goto out;
938                 }
939
940                 neigh_update(neigh, lladdr,
941                              msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
942                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
943                              (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
944                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
945                              (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0));
946
947                 if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
948                         /*
949                          * Change: router to host
950                          */
951                         struct rt6_info *rt;
952                         rt = rt6_get_dflt_router(saddr, dev);
953                         if (rt)
954                                 ip6_del_rt(rt);
955                 }
956
957 out:
958                 neigh_release(neigh);
959         }
960 }
961
962 static void ndisc_recv_rs(struct sk_buff *skb)
963 {
964         struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
965         unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
966         struct neighbour *neigh;
967         struct inet6_dev *idev;
968         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
969         struct ndisc_options ndopts;
970         u8 *lladdr = NULL;
971
972         if (skb->len < sizeof(*rs_msg))
973                 return;
974
975         idev = in6_dev_get(skb->dev);
976         if (!idev) {
977                 if (net_ratelimit())
978                         ND_PRINTK1("ICMP6 RS: can't find in6 device\n");
979                 return;
980         }
981
982         /* Don't accept RS if we're not in router mode */
983         if (!idev->cnf.forwarding)
984                 goto out;
985
986         /*
987          * Don't update NCE if src = ::;
988          * this implies that the source node has no ip address assigned yet.
989          */
990         if (ipv6_addr_any(saddr))
991                 goto out;
992
993         /* Parse ND options */
994         if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
995                 if (net_ratelimit())
996                         ND_PRINTK2("ICMP6 NS: invalid ND option, ignored\n");
997                 goto out;
998         }
999
1000         if (ndopts.nd_opts_src_lladdr) {
1001                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1002                                              skb->dev);
1003                 if (!lladdr)
1004                         goto out;
1005         }
1006
1007         neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
1008         if (neigh) {
1009                 neigh_update(neigh, lladdr, NUD_STALE,
1010                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
1011                              NEIGH_UPDATE_F_OVERRIDE|
1012                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1013                 neigh_release(neigh);
1014         }
1015 out:
1016         in6_dev_put(idev);
1017 }
1018
1019 static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
1020 {
1021         struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra);
1022         struct sk_buff *skb;
1023         struct nlmsghdr *nlh;
1024         struct nduseroptmsg *ndmsg;
1025         struct net *net = dev_net(ra->dev);
1026         int err;
1027         int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg)
1028                                     + (opt->nd_opt_len << 3));
1029         size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr));
1030
1031         skb = nlmsg_new(msg_size, GFP_ATOMIC);
1032         if (skb == NULL) {
1033                 err = -ENOBUFS;
1034                 goto errout;
1035         }
1036
1037         nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0);
1038         if (nlh == NULL) {
1039                 goto nla_put_failure;
1040         }
1041
1042         ndmsg = nlmsg_data(nlh);
1043         ndmsg->nduseropt_family = AF_INET6;
1044         ndmsg->nduseropt_ifindex = ra->dev->ifindex;
1045         ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
1046         ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
1047         ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;
1048
1049         memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
1050
1051         NLA_PUT(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr),
1052                 &ipv6_hdr(ra)->saddr);
1053         nlmsg_end(skb, nlh);
1054
1055         err = rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL,
1056                           GFP_ATOMIC);
1057         if (err < 0)
1058                 goto errout;
1059
1060         return;
1061
1062 nla_put_failure:
1063         nlmsg_free(skb);
1064         err = -EMSGSIZE;
1065 errout:
1066         rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1067 }
1068
1069 static void ndisc_router_discovery(struct sk_buff *skb)
1070 {
1071         struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
1072         struct neighbour *neigh = NULL;
1073         struct inet6_dev *in6_dev;
1074         struct rt6_info *rt = NULL;
1075         int lifetime;
1076         struct ndisc_options ndopts;
1077         int optlen;
1078         unsigned int pref = 0;
1079
1080         __u8 * opt = (__u8 *)(ra_msg + 1);
1081
1082         optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
1083
1084         if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1085                 ND_PRINTK2(KERN_WARNING
1086                            "ICMPv6 RA: source address is not link-local.\n");
1087                 return;
1088         }
1089         if (optlen < 0) {
1090                 ND_PRINTK2(KERN_WARNING
1091                            "ICMPv6 RA: packet too short\n");
1092                 return;
1093         }
1094
1095         if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
1096                 ND_PRINTK2(KERN_WARNING
1097                            "ICMPv6 RA: from host or unauthorized router\n");
1098                 return;
1099         }
1100
1101         /*
1102          *      set the RA_RECV flag in the interface
1103          */
1104
1105         in6_dev = in6_dev_get(skb->dev);
1106         if (in6_dev == NULL) {
1107                 ND_PRINTK0(KERN_ERR
1108                            "ICMPv6 RA: can't find inet6 device for %s.\n",
1109                            skb->dev->name);
1110                 return;
1111         }
1112         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra) {
1113                 in6_dev_put(in6_dev);
1114                 return;
1115         }
1116
1117         if (!ndisc_parse_options(opt, optlen, &ndopts)) {
1118                 in6_dev_put(in6_dev);
1119                 ND_PRINTK2(KERN_WARNING
1120                            "ICMP6 RA: invalid ND options\n");
1121                 return;
1122         }
1123
1124         /* skip link-specific parameters from interior routers */
1125         if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
1126                 goto skip_linkparms;
1127
1128         if (in6_dev->if_flags & IF_RS_SENT) {
1129                 /*
1130                  *      flag that an RA was received after an RS was sent
1131                  *      out on this interface.
1132                  */
1133                 in6_dev->if_flags |= IF_RA_RCVD;
1134         }
1135
1136         /*
1137          * Remember the managed/otherconf flags from most recently
1138          * received RA message (RFC 2462) -- yoshfuji
1139          */
1140         in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
1141                                 IF_RA_OTHERCONF)) |
1142                                 (ra_msg->icmph.icmp6_addrconf_managed ?
1143                                         IF_RA_MANAGED : 0) |
1144                                 (ra_msg->icmph.icmp6_addrconf_other ?
1145                                         IF_RA_OTHERCONF : 0);
1146
1147         if (!in6_dev->cnf.accept_ra_defrtr)
1148                 goto skip_defrtr;
1149
1150         lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
1151
1152 #ifdef CONFIG_IPV6_ROUTER_PREF
1153         pref = ra_msg->icmph.icmp6_router_pref;
1154         /* 10b is handled as if it were 00b (medium) */
1155         if (pref == ICMPV6_ROUTER_PREF_INVALID ||
1156             !in6_dev->cnf.accept_ra_rtr_pref)
1157                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
1158 #endif
1159
1160         rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
1161
1162         if (rt)
1163                 neigh = rt->rt6i_nexthop;
1164
1165         if (rt && lifetime == 0) {
1166                 neigh_clone(neigh);
1167                 ip6_del_rt(rt);
1168                 rt = NULL;
1169         }
1170
1171         if (rt == NULL && lifetime) {
1172                 ND_PRINTK3(KERN_DEBUG
1173                            "ICMPv6 RA: adding default router.\n");
1174
1175                 rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
1176                 if (rt == NULL) {
1177                         ND_PRINTK0(KERN_ERR
1178                                    "ICMPv6 RA: %s() failed to add default route.\n",
1179                                    __func__);
1180                         in6_dev_put(in6_dev);
1181                         return;
1182                 }
1183
1184                 neigh = rt->rt6i_nexthop;
1185                 if (neigh == NULL) {
1186                         ND_PRINTK0(KERN_ERR
1187                                    "ICMPv6 RA: %s() got default router without neighbour.\n",
1188                                    __func__);
1189                         dst_release(&rt->u.dst);
1190                         in6_dev_put(in6_dev);
1191                         return;
1192                 }
1193                 neigh->flags |= NTF_ROUTER;
1194         } else if (rt) {
1195                 rt->rt6i_flags |= (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
1196         }
1197
1198         if (rt)
1199                 rt->rt6i_expires = jiffies + (HZ * lifetime);
1200
1201         if (ra_msg->icmph.icmp6_hop_limit) {
1202                 in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
1203                 if (rt)
1204                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
1205         }
1206
1207 skip_defrtr:
1208
1209         /*
1210          *      Update Reachable Time and Retrans Timer
1211          */
1212
1213         if (in6_dev->nd_parms) {
1214                 unsigned long rtime = ntohl(ra_msg->retrans_timer);
1215
1216                 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
1217                         rtime = (rtime*HZ)/1000;
1218                         if (rtime < HZ/10)
1219                                 rtime = HZ/10;
1220                         in6_dev->nd_parms->retrans_time = rtime;
1221                         in6_dev->tstamp = jiffies;
1222                         inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1223                 }
1224
1225                 rtime = ntohl(ra_msg->reachable_time);
1226                 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
1227                         rtime = (rtime*HZ)/1000;
1228
1229                         if (rtime < HZ/10)
1230                                 rtime = HZ/10;
1231
1232                         if (rtime != in6_dev->nd_parms->base_reachable_time) {
1233                                 in6_dev->nd_parms->base_reachable_time = rtime;
1234                                 in6_dev->nd_parms->gc_staletime = 3 * rtime;
1235                                 in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
1236                                 in6_dev->tstamp = jiffies;
1237                                 inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1238                         }
1239                 }
1240         }
1241
1242 skip_linkparms:
1243
1244         /*
1245          *      Process options.
1246          */
1247
1248         if (!neigh)
1249                 neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
1250                                        skb->dev, 1);
1251         if (neigh) {
1252                 u8 *lladdr = NULL;
1253                 if (ndopts.nd_opts_src_lladdr) {
1254                         lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1255                                                      skb->dev);
1256                         if (!lladdr) {
1257                                 ND_PRINTK2(KERN_WARNING
1258                                            "ICMPv6 RA: invalid link-layer address length\n");
1259                                 goto out;
1260                         }
1261                 }
1262                 neigh_update(neigh, lladdr, NUD_STALE,
1263                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
1264                              NEIGH_UPDATE_F_OVERRIDE|
1265                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1266                              NEIGH_UPDATE_F_ISROUTER);
1267         }
1268
1269 #ifdef CONFIG_IPV6_ROUTE_INFO
1270         if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
1271                 struct nd_opt_hdr *p;
1272                 for (p = ndopts.nd_opts_ri;
1273                      p;
1274                      p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) {
1275                         if (((struct route_info *)p)->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
1276                                 continue;
1277                         rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
1278                                       &ipv6_hdr(skb)->saddr);
1279                 }
1280         }
1281 #endif
1282
1283         /* skip link-specific ndopts from interior routers */
1284         if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
1285                 goto out;
1286
1287         if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
1288                 struct nd_opt_hdr *p;
1289                 for (p = ndopts.nd_opts_pi;
1290                      p;
1291                      p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
1292                         addrconf_prefix_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3);
1293                 }
1294         }
1295
1296         if (ndopts.nd_opts_mtu) {
1297                 __be32 n;
1298                 u32 mtu;
1299
1300                 memcpy(&n, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
1301                 mtu = ntohl(n);
1302
1303                 if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
1304                         ND_PRINTK2(KERN_WARNING
1305                                    "ICMPv6 RA: invalid mtu: %d\n",
1306                                    mtu);
1307                 } else if (in6_dev->cnf.mtu6 != mtu) {
1308                         in6_dev->cnf.mtu6 = mtu;
1309
1310                         if (rt)
1311                                 rt->u.dst.metrics[RTAX_MTU-1] = mtu;
1312
1313                         rt6_mtu_change(skb->dev, mtu);
1314                 }
1315         }
1316
1317         if (ndopts.nd_useropts) {
1318                 struct nd_opt_hdr *p;
1319                 for (p = ndopts.nd_useropts;
1320                      p;
1321                      p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
1322                         ndisc_ra_useropt(skb, p);
1323                 }
1324         }
1325
1326         if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
1327                 ND_PRINTK2(KERN_WARNING
1328                            "ICMPv6 RA: invalid RA options");
1329         }
1330 out:
1331         if (rt)
1332                 dst_release(&rt->u.dst);
1333         else if (neigh)
1334                 neigh_release(neigh);
1335         in6_dev_put(in6_dev);
1336 }
1337
1338 static void ndisc_redirect_rcv(struct sk_buff *skb)
1339 {
1340         struct inet6_dev *in6_dev;
1341         struct icmp6hdr *icmph;
1342         struct in6_addr *dest;
1343         struct in6_addr *target;        /* new first hop to destination */
1344         struct neighbour *neigh;
1345         int on_link = 0;
1346         struct ndisc_options ndopts;
1347         int optlen;
1348         u8 *lladdr = NULL;
1349
1350         switch (skb->ndisc_nodetype) {
1351         case NDISC_NODETYPE_HOST:
1352         case NDISC_NODETYPE_NODEFAULT:
1353                 ND_PRINTK2(KERN_WARNING
1354                            "ICMPv6 Redirect: from host or unauthorized router\n");
1355                 return;
1356         }
1357
1358         if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1359                 ND_PRINTK2(KERN_WARNING
1360                            "ICMPv6 Redirect: source address is not link-local.\n");
1361                 return;
1362         }
1363
1364         optlen = skb->tail - skb->transport_header;
1365         optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1366
1367         if (optlen < 0) {
1368                 ND_PRINTK2(KERN_WARNING
1369                            "ICMPv6 Redirect: packet too short\n");
1370                 return;
1371         }
1372
1373         icmph = icmp6_hdr(skb);
1374         target = (struct in6_addr *) (icmph + 1);
1375         dest = target + 1;
1376
1377         if (ipv6_addr_is_multicast(dest)) {
1378                 ND_PRINTK2(KERN_WARNING
1379                            "ICMPv6 Redirect: destination address is multicast.\n");
1380                 return;
1381         }
1382
1383         if (ipv6_addr_equal(dest, target)) {
1384                 on_link = 1;
1385         } else if (ipv6_addr_type(target) !=
1386                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1387                 ND_PRINTK2(KERN_WARNING
1388                            "ICMPv6 Redirect: target address is not link-local unicast.\n");
1389                 return;
1390         }
1391
1392         in6_dev = in6_dev_get(skb->dev);
1393         if (!in6_dev)
1394                 return;
1395         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) {
1396                 in6_dev_put(in6_dev);
1397                 return;
1398         }
1399
1400         /* RFC2461 8.1:
1401          *      The IP source address of the Redirect MUST be the same as the current
1402          *      first-hop router for the specified ICMP Destination Address.
1403          */
1404
1405         if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1406                 ND_PRINTK2(KERN_WARNING
1407                            "ICMPv6 Redirect: invalid ND options\n");
1408                 in6_dev_put(in6_dev);
1409                 return;
1410         }
1411         if (ndopts.nd_opts_tgt_lladdr) {
1412                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1413                                              skb->dev);
1414                 if (!lladdr) {
1415                         ND_PRINTK2(KERN_WARNING
1416                                    "ICMPv6 Redirect: invalid link-layer address length\n");
1417                         in6_dev_put(in6_dev);
1418                         return;
1419                 }
1420         }
1421
1422         neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1423         if (neigh) {
1424                 rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
1425                              &ipv6_hdr(skb)->saddr, neigh, lladdr,
1426                              on_link);
1427                 neigh_release(neigh);
1428         }
1429         in6_dev_put(in6_dev);
1430 }
1431
1432 void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1433                          struct in6_addr *target)
1434 {
1435         struct net_device *dev = skb->dev;
1436         struct net *net = dev_net(dev);
1437         struct sock *sk = net->ipv6.ndisc_sk;
1438         int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1439         struct sk_buff *buff;
1440         struct icmp6hdr *icmph;
1441         struct in6_addr saddr_buf;
1442         struct in6_addr *addrp;
1443         struct rt6_info *rt;
1444         struct dst_entry *dst;
1445         struct inet6_dev *idev;
1446         struct flowi fl;
1447         u8 *opt;
1448         int rd_len;
1449         int err;
1450         u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
1451
1452         if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
1453                 ND_PRINTK2(KERN_WARNING
1454                            "ICMPv6 Redirect: no link-local address on %s\n",
1455                            dev->name);
1456                 return;
1457         }
1458
1459         if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
1460             ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1461                 ND_PRINTK2(KERN_WARNING
1462                         "ICMPv6 Redirect: target address is not link-local unicast.\n");
1463                 return;
1464         }
1465
1466         icmpv6_flow_init(sk, &fl, NDISC_REDIRECT,
1467                          &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
1468
1469         dst = ip6_route_output(net, NULL, &fl);
1470         if (dst == NULL)
1471                 return;
1472
1473         err = xfrm_lookup(&dst, &fl, NULL, 0);
1474         if (err)
1475                 return;
1476
1477         rt = (struct rt6_info *) dst;
1478
1479         if (rt->rt6i_flags & RTF_GATEWAY) {
1480                 ND_PRINTK2(KERN_WARNING
1481                            "ICMPv6 Redirect: destination is not a neighbour.\n");
1482                 dst_release(dst);
1483                 return;
1484         }
1485         if (!xrlim_allow(dst, 1*HZ)) {
1486                 dst_release(dst);
1487                 return;
1488         }
1489
1490         if (dev->addr_len) {
1491                 read_lock_bh(&neigh->lock);
1492                 if (neigh->nud_state & NUD_VALID) {
1493                         memcpy(ha_buf, neigh->ha, dev->addr_len);
1494                         read_unlock_bh(&neigh->lock);
1495                         ha = ha_buf;
1496                         len += ndisc_opt_addr_space(dev);
1497                 } else
1498                         read_unlock_bh(&neigh->lock);
1499         }
1500
1501         rd_len = min_t(unsigned int,
1502                      IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
1503         rd_len &= ~0x7;
1504         len += rd_len;
1505
1506         buff = sock_alloc_send_skb(sk,
1507                                    (MAX_HEADER + sizeof(struct ipv6hdr) +
1508                                     len + LL_RESERVED_SPACE(dev)),
1509                                    1, &err);
1510         if (buff == NULL) {
1511                 ND_PRINTK0(KERN_ERR
1512                            "ICMPv6 Redirect: %s() failed to allocate an skb.\n",
1513                            __func__);
1514                 dst_release(dst);
1515                 return;
1516         }
1517
1518         skb_reserve(buff, LL_RESERVED_SPACE(dev));
1519         ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
1520                    IPPROTO_ICMPV6, len);
1521
1522         skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
1523         skb_put(buff, len);
1524         icmph = icmp6_hdr(buff);
1525
1526         memset(icmph, 0, sizeof(struct icmp6hdr));
1527         icmph->icmp6_type = NDISC_REDIRECT;
1528
1529         /*
1530          *      copy target and destination addresses
1531          */
1532
1533         addrp = (struct in6_addr *)(icmph + 1);
1534         ipv6_addr_copy(addrp, target);
1535         addrp++;
1536         ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr);
1537
1538         opt = (u8*) (addrp + 1);
1539
1540         /*
1541          *      include target_address option
1542          */
1543
1544         if (ha)
1545                 opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha,
1546                                              dev->addr_len, dev->type);
1547
1548         /*
1549          *      build redirect option and copy skb over to the new packet.
1550          */
1551
1552         memset(opt, 0, 8);
1553         *(opt++) = ND_OPT_REDIRECT_HDR;
1554         *(opt++) = (rd_len >> 3);
1555         opt += 6;
1556
1557         memcpy(opt, ipv6_hdr(skb), rd_len - 8);
1558
1559         icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
1560                                              len, IPPROTO_ICMPV6,
1561                                              csum_partial((u8 *) icmph, len, 0));
1562
1563         buff->dst = dst;
1564         idev = in6_dev_get(dst->dev);
1565         IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
1566         err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
1567                       dst_output);
1568         if (!err) {
1569                 ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT);
1570                 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
1571         }
1572
1573         if (likely(idev != NULL))
1574                 in6_dev_put(idev);
1575 }
1576
1577 static void pndisc_redo(struct sk_buff *skb)
1578 {
1579         ndisc_recv_ns(skb);
1580         kfree_skb(skb);
1581 }
1582
1583 int ndisc_rcv(struct sk_buff *skb)
1584 {
1585         struct nd_msg *msg;
1586
1587         if (!pskb_may_pull(skb, skb->len))
1588                 return 0;
1589
1590         msg = (struct nd_msg *)skb_transport_header(skb);
1591
1592         __skb_push(skb, skb->data - skb_transport_header(skb));
1593
1594         if (ipv6_hdr(skb)->hop_limit != 255) {
1595                 ND_PRINTK2(KERN_WARNING
1596                            "ICMPv6 NDISC: invalid hop-limit: %d\n",
1597                            ipv6_hdr(skb)->hop_limit);
1598                 return 0;
1599         }
1600
1601         if (msg->icmph.icmp6_code != 0) {
1602                 ND_PRINTK2(KERN_WARNING
1603                            "ICMPv6 NDISC: invalid ICMPv6 code: %d\n",
1604                            msg->icmph.icmp6_code);
1605                 return 0;
1606         }
1607
1608         memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
1609
1610         switch (msg->icmph.icmp6_type) {
1611         case NDISC_NEIGHBOUR_SOLICITATION:
1612                 ndisc_recv_ns(skb);
1613                 break;
1614
1615         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1616                 ndisc_recv_na(skb);
1617                 break;
1618
1619         case NDISC_ROUTER_SOLICITATION:
1620                 ndisc_recv_rs(skb);
1621                 break;
1622
1623         case NDISC_ROUTER_ADVERTISEMENT:
1624                 ndisc_router_discovery(skb);
1625                 break;
1626
1627         case NDISC_REDIRECT:
1628                 ndisc_redirect_rcv(skb);
1629                 break;
1630         }
1631
1632         return 0;
1633 }
1634
1635 static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1636 {
1637         struct net_device *dev = ptr;
1638         struct net *net = dev_net(dev);
1639
1640         switch (event) {
1641         case NETDEV_CHANGEADDR:
1642                 neigh_changeaddr(&nd_tbl, dev);
1643                 fib6_run_gc(~0UL, net);
1644                 break;
1645         case NETDEV_DOWN:
1646                 neigh_ifdown(&nd_tbl, dev);
1647                 fib6_run_gc(~0UL, net);
1648                 break;
1649         default:
1650                 break;
1651         }
1652
1653         return NOTIFY_DONE;
1654 }
1655
1656 static struct notifier_block ndisc_netdev_notifier = {
1657         .notifier_call = ndisc_netdev_event,
1658 };
1659
1660 #ifdef CONFIG_SYSCTL
1661 static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
1662                                          const char *func, const char *dev_name)
1663 {
1664         static char warncomm[TASK_COMM_LEN];
1665         static int warned;
1666         if (strcmp(warncomm, current->comm) && warned < 5) {
1667                 strcpy(warncomm, current->comm);
1668                 printk(KERN_WARNING
1669                         "process `%s' is using deprecated sysctl (%s) "
1670                         "net.ipv6.neigh.%s.%s; "
1671                         "Use net.ipv6.neigh.%s.%s_ms "
1672                         "instead.\n",
1673                         warncomm, func,
1674                         dev_name, ctl->procname,
1675                         dev_name, ctl->procname);
1676                 warned++;
1677         }
1678 }
1679
1680 int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos)
1681 {
1682         struct net_device *dev = ctl->extra1;
1683         struct inet6_dev *idev;
1684         int ret;
1685
1686         if ((strcmp(ctl->procname, "retrans_time") == 0) ||
1687             (strcmp(ctl->procname, "base_reachable_time") == 0))
1688                 ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
1689
1690         if (strcmp(ctl->procname, "retrans_time") == 0)
1691                 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1692
1693         else if (strcmp(ctl->procname, "base_reachable_time") == 0)
1694                 ret = proc_dointvec_jiffies(ctl, write,
1695                                             filp, buffer, lenp, ppos);
1696
1697         else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
1698                  (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
1699                 ret = proc_dointvec_ms_jiffies(ctl, write,
1700                                                filp, buffer, lenp, ppos);
1701         else
1702                 ret = -1;
1703
1704         if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
1705                 if (ctl->data == &idev->nd_parms->base_reachable_time)
1706                         idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
1707                 idev->tstamp = jiffies;
1708                 inet6_ifinfo_notify(RTM_NEWLINK, idev);
1709                 in6_dev_put(idev);
1710         }
1711         return ret;
1712 }
1713
1714 static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
1715                                         int nlen, void __user *oldval,
1716                                         size_t __user *oldlenp,
1717                                         void __user *newval, size_t newlen)
1718 {
1719         struct net_device *dev = ctl->extra1;
1720         struct inet6_dev *idev;
1721         int ret;
1722
1723         if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME ||
1724             ctl->ctl_name == NET_NEIGH_REACHABLE_TIME)
1725                 ndisc_warn_deprecated_sysctl(ctl, "procfs", dev ? dev->name : "default");
1726
1727         switch (ctl->ctl_name) {
1728         case NET_NEIGH_REACHABLE_TIME:
1729                 ret = sysctl_jiffies(ctl, name, nlen,
1730                                      oldval, oldlenp, newval, newlen);
1731                 break;
1732         case NET_NEIGH_RETRANS_TIME_MS:
1733         case NET_NEIGH_REACHABLE_TIME_MS:
1734                  ret = sysctl_ms_jiffies(ctl, name, nlen,
1735                                          oldval, oldlenp, newval, newlen);
1736                  break;
1737         default:
1738                 ret = 0;
1739         }
1740
1741         if (newval && newlen && ret > 0 &&
1742             dev && (idev = in6_dev_get(dev)) != NULL) {
1743                 if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME ||
1744                     ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS)
1745                         idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
1746                 idev->tstamp = jiffies;
1747                 inet6_ifinfo_notify(RTM_NEWLINK, idev);
1748                 in6_dev_put(idev);
1749         }
1750
1751         return ret;
1752 }
1753
1754 #endif
1755
1756 static int ndisc_net_init(struct net *net)
1757 {
1758         struct socket *sock;
1759         struct ipv6_pinfo *np;
1760         struct sock *sk;
1761         int err;
1762
1763         err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &sock);
1764         if (err < 0) {
1765                 ND_PRINTK0(KERN_ERR
1766                            "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n",
1767                            err);
1768                 return err;
1769         }
1770
1771         net->ipv6.ndisc_sk = sk = sock->sk;
1772         sk_change_net(sk, net);
1773
1774         np = inet6_sk(sk);
1775         sk->sk_allocation = GFP_ATOMIC;
1776         np->hop_limit = 255;
1777         /* Do not loopback ndisc messages */
1778         np->mc_loop = 0;
1779         sk->sk_prot->unhash(sk);
1780
1781         return 0;
1782 }
1783
1784 static void ndisc_net_exit(struct net *net)
1785 {
1786         sk_release_kernel(net->ipv6.ndisc_sk);
1787 }
1788
1789 static struct pernet_operations ndisc_net_ops = {
1790         .init = ndisc_net_init,
1791         .exit = ndisc_net_exit,
1792 };
1793
1794 int __init ndisc_init(void)
1795 {
1796         int err;
1797
1798         err = register_pernet_subsys(&ndisc_net_ops);
1799         if (err)
1800                 return err;
1801         /*
1802          * Initialize the neighbour table
1803          */
1804         neigh_table_init(&nd_tbl);
1805
1806 #ifdef CONFIG_SYSCTL
1807         err = neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6,
1808                                     NET_IPV6_NEIGH, "ipv6",
1809                                     &ndisc_ifinfo_sysctl_change,
1810                                     &ndisc_ifinfo_sysctl_strategy);
1811         if (err)
1812                 goto out_unregister_pernet;
1813 #endif
1814         err = register_netdevice_notifier(&ndisc_netdev_notifier);
1815         if (err)
1816                 goto out_unregister_sysctl;
1817 out:
1818         return err;
1819
1820 out_unregister_sysctl:
1821 #ifdef CONFIG_SYSCTL
1822         neigh_sysctl_unregister(&nd_tbl.parms);
1823 out_unregister_pernet:
1824 #endif
1825         unregister_pernet_subsys(&ndisc_net_ops);
1826         goto out;
1827 }
1828
1829 void ndisc_cleanup(void)
1830 {
1831         unregister_netdevice_notifier(&ndisc_netdev_notifier);
1832 #ifdef CONFIG_SYSCTL
1833         neigh_sysctl_unregister(&nd_tbl.parms);
1834 #endif
1835         neigh_table_clear(&nd_tbl);
1836         unregister_pernet_subsys(&ndisc_net_ops);
1837 }