SAFE public projects git trees. - safe/jmp/linux-2.6/blob - net/ipv4/ipmr.c

   1 /*
   2  *      IP multicast routing support for mrouted 3.6/3.8
   3  *
   4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *        Linux Consultancy and Custom Driver Development
   6  *
   7  *      This program is free software; you can redistribute it and/or
   8  *      modify it under the terms of the GNU General Public License
   9  *      as published by the Free Software Foundation; either version
  10  *      2 of the License, or (at your option) any later version.
  11  *
  12  *      Fixes:
  13  *      Michael Chastain        :       Incorrect size of copying.
  14  *      Alan Cox                :       Added the cache manager code
  15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
  16  *      Mike McLagan            :       Routing by source
  17  *      Malcolm Beattie         :       Buffer handling fixes.
  18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
  19  *      SVR Anand               :       Fixed several multicast bugs and problems.
  20  *      Alexey Kuznetsov        :       Status, optimisations and more.
  21  *      Brad Parker             :       Better behaviour on mrouted upcall
  22  *                                      overflow.
  23  *      Carlos Picoto           :       PIMv1 Support
  24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
  25  *                                      Relax this requrement to work with older peers.
  26  *
  27  */
  28
  29 #include <asm/system.h>
  30 #include <asm/uaccess.h>
  31 #include <linux/types.h>
  32 #include <linux/capability.h>
  33 #include <linux/errno.h>
  34 #include <linux/timer.h>
  35 #include <linux/mm.h>
  36 #include <linux/kernel.h>
  37 #include <linux/fcntl.h>
  38 #include <linux/stat.h>
  39 #include <linux/socket.h>
  40 #include <linux/in.h>
  41 #include <linux/inet.h>
  42 #include <linux/netdevice.h>
  43 #include <linux/inetdevice.h>
  44 #include <linux/igmp.h>
  45 #include <linux/proc_fs.h>
  46 #include <linux/seq_file.h>
  47 #include <linux/mroute.h>
  48 #include <linux/init.h>
  49 #include <linux/if_ether.h>
  50 #include <net/net_namespace.h>
  51 #include <net/ip.h>
  52 #include <net/protocol.h>
  53 #include <linux/skbuff.h>
  54 #include <net/route.h>
  55 #include <net/sock.h>
  56 #include <net/icmp.h>
  57 #include <net/udp.h>
  58 #include <net/raw.h>
  59 #include <linux/notifier.h>
  60 #include <linux/if_arp.h>
  61 #include <linux/netfilter_ipv4.h>
  62 #include <net/ipip.h>
  63 #include <net/checksum.h>
  64 #include <net/netlink.h>
  65
  66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
  67 #define CONFIG_IP_PIMSM 1
  68 #endif
  69
  70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
  71    Note that the changes are semaphored via rtnl_lock.
  72  */
  73
  74 static DEFINE_RWLOCK(mrt_lock);
  75
  76 /*
  77  *      Multicast router control variables
  78  */
  79
  80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
  81
  82 static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
  83
  84 /* Special spinlock for queue of unresolved entries */
  85 static DEFINE_SPINLOCK(mfc_unres_lock);
  86
  87 /* We return to original Alan's scheme. Hash table of resolved
  88    entries is changed only in process context and protected
  89    with weak lock mrt_lock. Queue of unresolved entries is protected
  90    with strong spinlock mfc_unres_lock.
  91
  92    In this case data path is free of exclusive locks at all.
  93  */
  94
  95 static struct kmem_cache *mrt_cachep __read_mostly;
  96
  97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
  98 static int ipmr_cache_report(struct net *net,
  99                              struct sk_buff *pkt, vifi_t vifi, int assert);
 100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 101
 102 static struct timer_list ipmr_expire_timer;
 103
 104 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 105
 106 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
 107 {
 108         struct net *net = dev_net(dev);
 109
 110         dev_close(dev);
 111
 112         dev = __dev_get_by_name(net, "tunl0");
 113         if (dev) {
 114                 const struct net_device_ops *ops = dev->netdev_ops;
 115                 struct ifreq ifr;
 116                 struct ip_tunnel_parm p;
 117
 118                 memset(&p, 0, sizeof(p));
 119                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
 120                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
 121                 p.iph.version = 4;
 122                 p.iph.ihl = 5;
 123                 p.iph.protocol = IPPROTO_IPIP;
 124                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 125                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 126
 127                 if (ops->ndo_do_ioctl) {
 128                         mm_segment_t oldfs = get_fs();
 129
 130                         set_fs(KERNEL_DS);
 131                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
 132                         set_fs(oldfs);
 133                 }
 134         }
 135 }
 136
 137 static
 138 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
 139 {
 140         struct net_device  *dev;
 141
 142         dev = __dev_get_by_name(net, "tunl0");
 143
 144         if (dev) {
 145                 const struct net_device_ops *ops = dev->netdev_ops;
 146                 int err;
 147                 struct ifreq ifr;
 148                 struct ip_tunnel_parm p;
 149                 struct in_device  *in_dev;
 150
 151                 memset(&p, 0, sizeof(p));
 152                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
 153                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
 154                 p.iph.version = 4;
 155                 p.iph.ihl = 5;
 156                 p.iph.protocol = IPPROTO_IPIP;
 157                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 158                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 159
 160                 if (ops->ndo_do_ioctl) {
 161                         mm_segment_t oldfs = get_fs();
 162
 163                         set_fs(KERNEL_DS);
 164                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
 165                         set_fs(oldfs);
 166                 } else
 167                         err = -EOPNOTSUPP;
 168
 169                 dev = NULL;
 170
 171                 if (err == 0 &&
 172                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
 173                         dev->flags |= IFF_MULTICAST;
 174
 175                         in_dev = __in_dev_get_rtnl(dev);
 176                         if (in_dev == NULL)
 177                                 goto failure;
 178
 179                         ipv4_devconf_setall(in_dev);
 180                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
 181
 182                         if (dev_open(dev))
 183                                 goto failure;
 184                         dev_hold(dev);
 185                 }
 186         }
 187         return dev;
 188
 189 failure:
 190         /* allow the register to be completed before unregistering. */
 191         rtnl_unlock();
 192         rtnl_lock();
 193
 194         unregister_netdevice(dev);
 195         return NULL;
 196 }
 197
 198 #ifdef CONFIG_IP_PIMSM
 199
 200 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 201 {
 202         struct net *net = dev_net(dev);
 203
 204         read_lock(&mrt_lock);
 205         dev->stats.tx_bytes += skb->len;
 206         dev->stats.tx_packets++;
 207         ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
 208                           IGMPMSG_WHOLEPKT);
 209         read_unlock(&mrt_lock);
 210         kfree_skb(skb);
 211         return NETDEV_TX_OK;
 212 }
 213
 214 static const struct net_device_ops reg_vif_netdev_ops = {
 215         .ndo_start_xmit = reg_vif_xmit,
 216 };
 217
 218 static void reg_vif_setup(struct net_device *dev)
 219 {
 220         dev->type               = ARPHRD_PIMREG;
 221         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
 222         dev->flags              = IFF_NOARP;
 223         dev->netdev_ops         = &reg_vif_netdev_ops,
 224         dev->destructor         = free_netdev;
 225         dev->features           |= NETIF_F_NETNS_LOCAL;
 226 }
 227
 228 static struct net_device *ipmr_reg_vif(struct net *net)
 229 {
 230         struct net_device *dev;
 231         struct in_device *in_dev;
 232
 233         dev = alloc_netdev(0, "pimreg", reg_vif_setup);
 234
 235         if (dev == NULL)
 236                 return NULL;
 237
 238         dev_net_set(dev, net);
 239
 240         if (register_netdevice(dev)) {
 241                 free_netdev(dev);
 242                 return NULL;
 243         }
 244         dev->iflink = 0;
 245
 246         rcu_read_lock();
 247         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
 248                 rcu_read_unlock();
 249                 goto failure;
 250         }
 251
 252         ipv4_devconf_setall(in_dev);
 253         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
 254         rcu_read_unlock();
 255
 256         if (dev_open(dev))
 257                 goto failure;
 258
 259         dev_hold(dev);
 260
 261         return dev;
 262
 263 failure:
 264         /* allow the register to be completed before unregistering. */
 265         rtnl_unlock();
 266         rtnl_lock();
 267
 268         unregister_netdevice(dev);
 269         return NULL;
 270 }
 271 #endif
 272
 273 /*
 274  *      Delete a VIF entry
 275  *      @notify: Set to 1, if the caller is a notifier_call
 276  */
 277
 278 static int vif_delete(struct net *net, int vifi, int notify,
 279                       struct list_head *head)
 280 {
 281         struct vif_device *v;
 282         struct net_device *dev;
 283         struct in_device *in_dev;
 284
 285         if (vifi < 0 || vifi >= net->ipv4.maxvif)
 286                 return -EADDRNOTAVAIL;
 287
 288         v = &net->ipv4.vif_table[vifi];
 289
 290         write_lock_bh(&mrt_lock);
 291         dev = v->dev;
 292         v->dev = NULL;
 293
 294         if (!dev) {
 295                 write_unlock_bh(&mrt_lock);
 296                 return -EADDRNOTAVAIL;
 297         }
 298
 299 #ifdef CONFIG_IP_PIMSM
 300         if (vifi == net->ipv4.mroute_reg_vif_num)
 301                 net->ipv4.mroute_reg_vif_num = -1;
 302 #endif
 303
 304         if (vifi+1 == net->ipv4.maxvif) {
 305                 int tmp;
 306                 for (tmp=vifi-1; tmp>=0; tmp--) {
 307                         if (VIF_EXISTS(net, tmp))
 308                                 break;
 309                 }
 310                 net->ipv4.maxvif = tmp+1;
 311         }
 312
 313         write_unlock_bh(&mrt_lock);
 314
 315         dev_set_allmulti(dev, -1);
 316
 317         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
 318                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
 319                 ip_rt_multicast_event(in_dev);
 320         }
 321
 322         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
 323                 unregister_netdevice_queue(dev, head);
 324
 325         dev_put(dev);
 326         return 0;
 327 }
 328
 329 static inline void ipmr_cache_free(struct mfc_cache *c)
 330 {
 331         release_net(mfc_net(c));
 332         kmem_cache_free(mrt_cachep, c);
 333 }
 334
 335 /* Destroy an unresolved cache entry, killing queued skbs
 336    and reporting error to netlink readers.
 337  */
 338
 339 static void ipmr_destroy_unres(struct mfc_cache *c)
 340 {
 341         struct sk_buff *skb;
 342         struct nlmsgerr *e;
 343         struct net *net = mfc_net(c);
 344
 345         atomic_dec(&net->ipv4.cache_resolve_queue_len);
 346
 347         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
 348                 if (ip_hdr(skb)->version == 0) {
 349                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 350                         nlh->nlmsg_type = NLMSG_ERROR;
 351                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 352                         skb_trim(skb, nlh->nlmsg_len);
 353                         e = NLMSG_DATA(nlh);
 354                         e->error = -ETIMEDOUT;
 355                         memset(&e->msg, 0, sizeof(e->msg));
 356
 357                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 358                 } else
 359                         kfree_skb(skb);
 360         }
 361
 362         ipmr_cache_free(c);
 363 }
 364
 365
 366 /* Single timer process for all the unresolved queue. */
 367
 368 static void ipmr_expire_process(unsigned long dummy)
 369 {
 370         unsigned long now;
 371         unsigned long expires;
 372         struct mfc_cache *c, **cp;
 373
 374         if (!spin_trylock(&mfc_unres_lock)) {
 375                 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
 376                 return;
 377         }
 378
 379         if (mfc_unres_queue == NULL)
 380                 goto out;
 381
 382         now = jiffies;
 383         expires = 10*HZ;
 384         cp = &mfc_unres_queue;
 385
 386         while ((c=*cp) != NULL) {
 387                 if (time_after(c->mfc_un.unres.expires, now)) {
 388                         unsigned long interval = c->mfc_un.unres.expires - now;
 389                         if (interval < expires)
 390                                 expires = interval;
 391                         cp = &c->next;
 392                         continue;
 393                 }
 394
 395                 *cp = c->next;
 396
 397                 ipmr_destroy_unres(c);
 398         }
 399
 400         if (mfc_unres_queue != NULL)
 401                 mod_timer(&ipmr_expire_timer, jiffies + expires);
 402
 403 out:
 404         spin_unlock(&mfc_unres_lock);
 405 }
 406
 407 /* Fill oifs list. It is called under write locked mrt_lock. */
 408
 409 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
 410 {
 411         int vifi;
 412         struct net *net = mfc_net(cache);
 413
 414         cache->mfc_un.res.minvif = MAXVIFS;
 415         cache->mfc_un.res.maxvif = 0;
 416         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
 417
 418         for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
 419                 if (VIF_EXISTS(net, vifi) &&
 420                     ttls[vifi] && ttls[vifi] < 255) {
 421                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 422                         if (cache->mfc_un.res.minvif > vifi)
 423                                 cache->mfc_un.res.minvif = vifi;
 424                         if (cache->mfc_un.res.maxvif <= vifi)
 425                                 cache->mfc_un.res.maxvif = vifi + 1;
 426                 }
 427         }
 428 }
 429
 430 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
 431 {
 432         int vifi = vifc->vifc_vifi;
 433         struct vif_device *v = &net->ipv4.vif_table[vifi];
 434         struct net_device *dev;
 435         struct in_device *in_dev;
 436         int err;
 437
 438         /* Is vif busy ? */
 439         if (VIF_EXISTS(net, vifi))
 440                 return -EADDRINUSE;
 441
 442         switch (vifc->vifc_flags) {
 443 #ifdef CONFIG_IP_PIMSM
 444         case VIFF_REGISTER:
 445                 /*
 446                  * Special Purpose VIF in PIM
 447                  * All the packets will be sent to the daemon
 448                  */
 449                 if (net->ipv4.mroute_reg_vif_num >= 0)
 450                         return -EADDRINUSE;
 451                 dev = ipmr_reg_vif(net);
 452                 if (!dev)
 453                         return -ENOBUFS;
 454                 err = dev_set_allmulti(dev, 1);
 455                 if (err) {
 456                         unregister_netdevice(dev);
 457                         dev_put(dev);
 458                         return err;
 459                 }
 460                 break;
 461 #endif
 462         case VIFF_TUNNEL:
 463                 dev = ipmr_new_tunnel(net, vifc);
 464                 if (!dev)
 465                         return -ENOBUFS;
 466                 err = dev_set_allmulti(dev, 1);
 467                 if (err) {
 468                         ipmr_del_tunnel(dev, vifc);
 469                         dev_put(dev);
 470                         return err;
 471                 }
 472                 break;
 473
 474         case VIFF_USE_IFINDEX:
 475         case 0:
 476                 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
 477                         dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
 478                         if (dev && dev->ip_ptr == NULL) {
 479                                 dev_put(dev);
 480                                 return -EADDRNOTAVAIL;
 481                         }
 482                 } else
 483                         dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
 484
 485                 if (!dev)
 486                         return -EADDRNOTAVAIL;
 487                 err = dev_set_allmulti(dev, 1);
 488                 if (err) {
 489                         dev_put(dev);
 490                         return err;
 491                 }
 492                 break;
 493         default:
 494                 return -EINVAL;
 495         }
 496
 497         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
 498                 return -EADDRNOTAVAIL;
 499         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
 500         ip_rt_multicast_event(in_dev);
 501
 502         /*
 503          *      Fill in the VIF structures
 504          */
 505         v->rate_limit = vifc->vifc_rate_limit;
 506         v->local = vifc->vifc_lcl_addr.s_addr;
 507         v->remote = vifc->vifc_rmt_addr.s_addr;
 508         v->flags = vifc->vifc_flags;
 509         if (!mrtsock)
 510                 v->flags |= VIFF_STATIC;
 511         v->threshold = vifc->vifc_threshold;
 512         v->bytes_in = 0;
 513         v->bytes_out = 0;
 514         v->pkt_in = 0;
 515         v->pkt_out = 0;
 516         v->link = dev->ifindex;
 517         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
 518                 v->link = dev->iflink;
 519
 520         /* And finish update writing critical data */
 521         write_lock_bh(&mrt_lock);
 522         v->dev = dev;
 523 #ifdef CONFIG_IP_PIMSM
 524         if (v->flags&VIFF_REGISTER)
 525                 net->ipv4.mroute_reg_vif_num = vifi;
 526 #endif
 527         if (vifi+1 > net->ipv4.maxvif)
 528                 net->ipv4.maxvif = vifi+1;
 529         write_unlock_bh(&mrt_lock);
 530         return 0;
 531 }
 532
 533 static struct mfc_cache *ipmr_cache_find(struct net *net,
 534                                          __be32 origin,
 535                                          __be32 mcastgrp)
 536 {
 537         int line = MFC_HASH(mcastgrp, origin);
 538         struct mfc_cache *c;
 539
 540         for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
 541                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
 542                         break;
 543         }
 544         return c;
 545 }
 546
 547 /*
 548  *      Allocate a multicast cache entry
 549  */
 550 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
 551 {
 552         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 553         if (c == NULL)
 554                 return NULL;
 555         c->mfc_un.res.minvif = MAXVIFS;
 556         mfc_net_set(c, net);
 557         return c;
 558 }
 559
 560 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
 561 {
 562         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 563         if (c == NULL)
 564                 return NULL;
 565         skb_queue_head_init(&c->mfc_un.unres.unresolved);
 566         c->mfc_un.unres.expires = jiffies + 10*HZ;
 567         mfc_net_set(c, net);
 568         return c;
 569 }
 570
 571 /*
 572  *      A cache entry has gone into a resolved state from queued
 573  */
 574
 575 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 576 {
 577         struct sk_buff *skb;
 578         struct nlmsgerr *e;
 579
 580         /*
 581          *      Play the pending entries through our router
 582          */
 583
 584         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 585                 if (ip_hdr(skb)->version == 0) {
 586                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 587
 588                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
 589                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
 590                                                   (u8 *)nlh);
 591                         } else {
 592                                 nlh->nlmsg_type = NLMSG_ERROR;
 593                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 594                                 skb_trim(skb, nlh->nlmsg_len);
 595                                 e = NLMSG_DATA(nlh);
 596                                 e->error = -EMSGSIZE;
 597                                 memset(&e->msg, 0, sizeof(e->msg));
 598                         }
 599
 600                         rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
 601                 } else
 602                         ip_mr_forward(skb, c, 0);
 603         }
 604 }
 605
 606 /*
 607  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
 608  *      expects the following bizarre scheme.
 609  *
 610  *      Called under mrt_lock.
 611  */
 612
 613 static int ipmr_cache_report(struct net *net,
 614                              struct sk_buff *pkt, vifi_t vifi, int assert)
 615 {
 616         struct sk_buff *skb;
 617         const int ihl = ip_hdrlen(pkt);
 618         struct igmphdr *igmp;
 619         struct igmpmsg *msg;
 620         int ret;
 621
 622 #ifdef CONFIG_IP_PIMSM
 623         if (assert == IGMPMSG_WHOLEPKT)
 624                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
 625         else
 626 #endif
 627                 skb = alloc_skb(128, GFP_ATOMIC);
 628
 629         if (!skb)
 630                 return -ENOBUFS;
 631
 632 #ifdef CONFIG_IP_PIMSM
 633         if (assert == IGMPMSG_WHOLEPKT) {
 634                 /* Ugly, but we have no choice with this interface.
 635                    Duplicate old header, fix ihl, length etc.
 636                    And all this only to mangle msg->im_msgtype and
 637                    to set msg->im_mbz to "mbz" :-)
 638                  */
 639                 skb_push(skb, sizeof(struct iphdr));
 640                 skb_reset_network_header(skb);
 641                 skb_reset_transport_header(skb);
 642                 msg = (struct igmpmsg *)skb_network_header(skb);
 643                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 644                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
 645                 msg->im_mbz = 0;
 646                 msg->im_vif = net->ipv4.mroute_reg_vif_num;
 647                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
 648                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
 649                                              sizeof(struct iphdr));
 650         } else
 651 #endif
 652         {
 653
 654         /*
 655          *      Copy the IP header
 656          */
 657
 658         skb->network_header = skb->tail;
 659         skb_put(skb, ihl);
 660         skb_copy_to_linear_data(skb, pkt->data, ihl);
 661         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
 662         msg = (struct igmpmsg *)skb_network_header(skb);
 663         msg->im_vif = vifi;
 664         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
 665
 666         /*
 667          *      Add our header
 668          */
 669
 670         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
 671         igmp->type      =
 672         msg->im_msgtype = assert;
 673         igmp->code      =       0;
 674         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
 675         skb->transport_header = skb->network_header;
 676         }
 677
 678         if (net->ipv4.mroute_sk == NULL) {
 679                 kfree_skb(skb);
 680                 return -EINVAL;
 681         }
 682
 683         /*
 684          *      Deliver to mrouted
 685          */
 686         ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
 687         if (ret < 0) {
 688                 if (net_ratelimit())
 689                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
 690                 kfree_skb(skb);
 691         }
 692
 693         return ret;
 694 }
 695
 696 /*
 697  *      Queue a packet for resolution. It gets locked cache entry!
 698  */
 699
 700 static int
 701 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 702 {
 703         int err;
 704         struct mfc_cache *c;
 705         const struct iphdr *iph = ip_hdr(skb);
 706
 707         spin_lock_bh(&mfc_unres_lock);
 708         for (c=mfc_unres_queue; c; c=c->next) {
 709                 if (net_eq(mfc_net(c), net) &&
 710                     c->mfc_mcastgrp == iph->daddr &&
 711                     c->mfc_origin == iph->saddr)
 712                         break;
 713         }
 714
 715         if (c == NULL) {
 716                 /*
 717                  *      Create a new entry if allowable
 718                  */
 719
 720                 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
 721                     (c = ipmr_cache_alloc_unres(net)) == NULL) {
 722                         spin_unlock_bh(&mfc_unres_lock);
 723
 724                         kfree_skb(skb);
 725                         return -ENOBUFS;
 726                 }
 727
 728                 /*
 729                  *      Fill in the new cache entry
 730                  */
 731                 c->mfc_parent   = -1;
 732                 c->mfc_origin   = iph->saddr;
 733                 c->mfc_mcastgrp = iph->daddr;
 734
 735                 /*
 736                  *      Reflect first query at mrouted.
 737                  */
 738                 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
 739                 if (err < 0) {
 740                         /* If the report failed throw the cache entry
 741                            out - Brad Parker
 742                          */
 743                         spin_unlock_bh(&mfc_unres_lock);
 744
 745                         ipmr_cache_free(c);
 746                         kfree_skb(skb);
 747                         return err;
 748                 }
 749
 750                 atomic_inc(&net->ipv4.cache_resolve_queue_len);
 751                 c->next = mfc_unres_queue;
 752                 mfc_unres_queue = c;
 753
 754                 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
 755         }
 756
 757         /*
 758          *      See if we can append the packet
 759          */
 760         if (c->mfc_un.unres.unresolved.qlen>3) {
 761                 kfree_skb(skb);
 762                 err = -ENOBUFS;
 763         } else {
 764                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
 765                 err = 0;
 766         }
 767
 768         spin_unlock_bh(&mfc_unres_lock);
 769         return err;
 770 }
 771
 772 /*
 773  *      MFC cache manipulation by user space mroute daemon
 774  */
 775
 776 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
 777 {
 778         int line;
 779         struct mfc_cache *c, **cp;
 780
 781         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 782
 783         for (cp = &net->ipv4.mfc_cache_array[line];
 784              (c = *cp) != NULL; cp = &c->next) {
 785                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 786                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 787                         write_lock_bh(&mrt_lock);
 788                         *cp = c->next;
 789                         write_unlock_bh(&mrt_lock);
 790
 791                         ipmr_cache_free(c);
 792                         return 0;
 793                 }
 794         }
 795         return -ENOENT;
 796 }
 797
 798 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 799 {
 800         int line;
 801         struct mfc_cache *uc, *c, **cp;
 802
 803         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 804
 805         for (cp = &net->ipv4.mfc_cache_array[line];
 806              (c = *cp) != NULL; cp = &c->next) {
 807                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 808                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
 809                         break;
 810         }
 811
 812         if (c != NULL) {
 813                 write_lock_bh(&mrt_lock);
 814                 c->mfc_parent = mfc->mfcc_parent;
 815                 ipmr_update_thresholds(c, mfc->mfcc_ttls);
 816                 if (!mrtsock)
 817                         c->mfc_flags |= MFC_STATIC;
 818                 write_unlock_bh(&mrt_lock);
 819                 return 0;
 820         }
 821
 822         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
 823                 return -EINVAL;
 824
 825         c = ipmr_cache_alloc(net);
 826         if (c == NULL)
 827                 return -ENOMEM;
 828
 829         c->mfc_origin = mfc->mfcc_origin.s_addr;
 830         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
 831         c->mfc_parent = mfc->mfcc_parent;
 832         ipmr_update_thresholds(c, mfc->mfcc_ttls);
 833         if (!mrtsock)
 834                 c->mfc_flags |= MFC_STATIC;
 835
 836         write_lock_bh(&mrt_lock);
 837         c->next = net->ipv4.mfc_cache_array[line];
 838         net->ipv4.mfc_cache_array[line] = c;
 839         write_unlock_bh(&mrt_lock);
 840
 841         /*
 842          *      Check to see if we resolved a queued list. If so we
 843          *      need to send on the frames and tidy up.
 844          */
 845         spin_lock_bh(&mfc_unres_lock);
 846         for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
 847              cp = &uc->next) {
 848                 if (net_eq(mfc_net(uc), net) &&
 849                     uc->mfc_origin == c->mfc_origin &&
 850                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 851                         *cp = uc->next;
 852                         atomic_dec(&net->ipv4.cache_resolve_queue_len);
 853                         break;
 854                 }
 855         }
 856         if (mfc_unres_queue == NULL)
 857                 del_timer(&ipmr_expire_timer);
 858         spin_unlock_bh(&mfc_unres_lock);
 859
 860         if (uc) {
 861                 ipmr_cache_resolve(uc, c);
 862                 ipmr_cache_free(uc);
 863         }
 864         return 0;
 865 }
 866
 867 /*
 868  *      Close the multicast socket, and clear the vif tables etc
 869  */
 870
 871 static void mroute_clean_tables(struct net *net)
 872 {
 873         int i;
 874         LIST_HEAD(list);
 875
 876         /*
 877          *      Shut down all active vif entries
 878          */
 879         for (i = 0; i < net->ipv4.maxvif; i++) {
 880                 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
 881                         vif_delete(net, i, 0, &list);
 882         }
 883         unregister_netdevice_many(&list);
 884
 885         /*
 886          *      Wipe the cache
 887          */
 888         for (i=0; i<MFC_LINES; i++) {
 889                 struct mfc_cache *c, **cp;
 890
 891                 cp = &net->ipv4.mfc_cache_array[i];
 892                 while ((c = *cp) != NULL) {
 893                         if (c->mfc_flags&MFC_STATIC) {
 894                                 cp = &c->next;
 895                                 continue;
 896                         }
 897                         write_lock_bh(&mrt_lock);
 898                         *cp = c->next;
 899                         write_unlock_bh(&mrt_lock);
 900
 901                         ipmr_cache_free(c);
 902                 }
 903         }
 904
 905         if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
 906                 struct mfc_cache *c, **cp;
 907
 908                 spin_lock_bh(&mfc_unres_lock);
 909                 cp = &mfc_unres_queue;
 910                 while ((c = *cp) != NULL) {
 911                         if (!net_eq(mfc_net(c), net)) {
 912                                 cp = &c->next;
 913                                 continue;
 914                         }
 915                         *cp = c->next;
 916
 917                         ipmr_destroy_unres(c);
 918                 }
 919                 spin_unlock_bh(&mfc_unres_lock);
 920         }
 921 }
 922
 923 static void mrtsock_destruct(struct sock *sk)
 924 {
 925         struct net *net = sock_net(sk);
 926
 927         rtnl_lock();
 928         if (sk == net->ipv4.mroute_sk) {
 929                 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
 930
 931                 write_lock_bh(&mrt_lock);
 932                 net->ipv4.mroute_sk = NULL;
 933                 write_unlock_bh(&mrt_lock);
 934
 935                 mroute_clean_tables(net);
 936         }
 937         rtnl_unlock();
 938 }
 939
 940 /*
 941  *      Socket options and virtual interface manipulation. The whole
 942  *      virtual interface system is a complete heap, but unfortunately
 943  *      that's how BSD mrouted happens to think. Maybe one day with a proper
 944  *      MOSPF/PIM router set up we can clean this up.
 945  */
 946
 947 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
 948 {
 949         int ret;
 950         struct vifctl vif;
 951         struct mfcctl mfc;
 952         struct net *net = sock_net(sk);
 953
 954         if (optname != MRT_INIT) {
 955                 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
 956                         return -EACCES;
 957         }
 958
 959         switch (optname) {
 960         case MRT_INIT:
 961                 if (sk->sk_type != SOCK_RAW ||
 962                     inet_sk(sk)->inet_num != IPPROTO_IGMP)
 963                         return -EOPNOTSUPP;
 964                 if (optlen != sizeof(int))
 965                         return -ENOPROTOOPT;
 966
 967                 rtnl_lock();
 968                 if (net->ipv4.mroute_sk) {
 969                         rtnl_unlock();
 970                         return -EADDRINUSE;
 971                 }
 972
 973                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
 974                 if (ret == 0) {
 975                         write_lock_bh(&mrt_lock);
 976                         net->ipv4.mroute_sk = sk;
 977                         write_unlock_bh(&mrt_lock);
 978
 979                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
 980                 }
 981                 rtnl_unlock();
 982                 return ret;
 983         case MRT_DONE:
 984                 if (sk != net->ipv4.mroute_sk)
 985                         return -EACCES;
 986                 return ip_ra_control(sk, 0, NULL);
 987         case MRT_ADD_VIF:
 988         case MRT_DEL_VIF:
 989                 if (optlen != sizeof(vif))
 990                         return -EINVAL;
 991                 if (copy_from_user(&vif, optval, sizeof(vif)))
 992                         return -EFAULT;
 993                 if (vif.vifc_vifi >= MAXVIFS)
 994                         return -ENFILE;
 995                 rtnl_lock();
 996                 if (optname == MRT_ADD_VIF) {
 997                         ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
 998                 } else {
 999                         ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
1000                 }
1001                 rtnl_unlock();
1002                 return ret;
1003
1004                 /*
1005                  *      Manipulate the forwarding caches. These live
1006                  *      in a sort of kernel/user symbiosis.
1007                  */
1008         case MRT_ADD_MFC:
1009         case MRT_DEL_MFC:
1010                 if (optlen != sizeof(mfc))
1011                         return -EINVAL;
1012                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1013                         return -EFAULT;
1014                 rtnl_lock();
1015                 if (optname == MRT_DEL_MFC)
1016                         ret = ipmr_mfc_delete(net, &mfc);
1017                 else
1018                         ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1019                 rtnl_unlock();
1020                 return ret;
1021                 /*
1022                  *      Control PIM assert.
1023                  */
1024         case MRT_ASSERT:
1025         {
1026                 int v;
1027                 if (get_user(v,(int __user *)optval))
1028                         return -EFAULT;
1029                 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1030                 return 0;
1031         }
1032 #ifdef CONFIG_IP_PIMSM
1033         case MRT_PIM:
1034         {
1035                 int v;
1036
1037                 if (get_user(v,(int __user *)optval))
1038                         return -EFAULT;
1039                 v = (v) ? 1 : 0;
1040
1041                 rtnl_lock();
1042                 ret = 0;
1043                 if (v != net->ipv4.mroute_do_pim) {
1044                         net->ipv4.mroute_do_pim = v;
1045                         net->ipv4.mroute_do_assert = v;
1046                 }
1047                 rtnl_unlock();
1048                 return ret;
1049         }
1050 #endif
1051         /*
1052          *      Spurious command, or MRT_VERSION which you cannot
1053          *      set.
1054          */
1055         default:
1056                 return -ENOPROTOOPT;
1057         }
1058 }
1059
1060 /*
1061  *      Getsock opt support for the multicast routing system.
1062  */
1063
1064 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1065 {
1066         int olr;
1067         int val;
1068         struct net *net = sock_net(sk);
1069
1070         if (optname != MRT_VERSION &&
1071 #ifdef CONFIG_IP_PIMSM
1072            optname!=MRT_PIM &&
1073 #endif
1074            optname!=MRT_ASSERT)
1075                 return -ENOPROTOOPT;
1076
1077         if (get_user(olr, optlen))
1078                 return -EFAULT;
1079
1080         olr = min_t(unsigned int, olr, sizeof(int));
1081         if (olr < 0)
1082                 return -EINVAL;
1083
1084         if (put_user(olr, optlen))
1085                 return -EFAULT;
1086         if (optname == MRT_VERSION)
1087                 val = 0x0305;
1088 #ifdef CONFIG_IP_PIMSM
1089         else if (optname == MRT_PIM)
1090                 val = net->ipv4.mroute_do_pim;
1091 #endif
1092         else
1093                 val = net->ipv4.mroute_do_assert;
1094         if (copy_to_user(optval, &val, olr))
1095                 return -EFAULT;
1096         return 0;
1097 }
1098
1099 /*
1100  *      The IP multicast ioctl support routines.
1101  */
1102
1103 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1104 {
1105         struct sioc_sg_req sr;
1106         struct sioc_vif_req vr;
1107         struct vif_device *vif;
1108         struct mfc_cache *c;
1109         struct net *net = sock_net(sk);
1110
1111         switch (cmd) {
1112         case SIOCGETVIFCNT:
1113                 if (copy_from_user(&vr, arg, sizeof(vr)))
1114                         return -EFAULT;
1115                 if (vr.vifi >= net->ipv4.maxvif)
1116                         return -EINVAL;
1117                 read_lock(&mrt_lock);
1118                 vif = &net->ipv4.vif_table[vr.vifi];
1119                 if (VIF_EXISTS(net, vr.vifi)) {
1120                         vr.icount = vif->pkt_in;
1121                         vr.ocount = vif->pkt_out;
1122                         vr.ibytes = vif->bytes_in;
1123                         vr.obytes = vif->bytes_out;
1124                         read_unlock(&mrt_lock);
1125
1126                         if (copy_to_user(arg, &vr, sizeof(vr)))
1127                                 return -EFAULT;
1128                         return 0;
1129                 }
1130                 read_unlock(&mrt_lock);
1131                 return -EADDRNOTAVAIL;
1132         case SIOCGETSGCNT:
1133                 if (copy_from_user(&sr, arg, sizeof(sr)))
1134                         return -EFAULT;
1135
1136                 read_lock(&mrt_lock);
1137                 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1138                 if (c) {
1139                         sr.pktcnt = c->mfc_un.res.pkt;
1140                         sr.bytecnt = c->mfc_un.res.bytes;
1141                         sr.wrong_if = c->mfc_un.res.wrong_if;
1142                         read_unlock(&mrt_lock);
1143
1144                         if (copy_to_user(arg, &sr, sizeof(sr)))
1145                                 return -EFAULT;
1146                         return 0;
1147                 }
1148                 read_unlock(&mrt_lock);
1149                 return -EADDRNOTAVAIL;
1150         default:
1151                 return -ENOIOCTLCMD;
1152         }
1153 }
1154
1155
1156 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1157 {
1158         struct net_device *dev = ptr;
1159         struct net *net = dev_net(dev);
1160         struct vif_device *v;
1161         int ct;
1162         LIST_HEAD(list);
1163
1164         if (!net_eq(dev_net(dev), net))
1165                 return NOTIFY_DONE;
1166
1167         if (event != NETDEV_UNREGISTER)
1168                 return NOTIFY_DONE;
1169         v = &net->ipv4.vif_table[0];
1170         for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1171                 if (v->dev == dev)
1172                         vif_delete(net, ct, 1, &list);
1173         }
1174         unregister_netdevice_many(&list);
1175         return NOTIFY_DONE;
1176 }
1177
1178
1179 static struct notifier_block ip_mr_notifier = {
1180         .notifier_call = ipmr_device_event,
1181 };
1182
1183 /*
1184  *      Encapsulate a packet by attaching a valid IPIP header to it.
1185  *      This avoids tunnel drivers and other mess and gives us the speed so
1186  *      important for multicast video.
1187  */
1188
1189 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1190 {
1191         struct iphdr *iph;
1192         struct iphdr *old_iph = ip_hdr(skb);
1193
1194         skb_push(skb, sizeof(struct iphdr));
1195         skb->transport_header = skb->network_header;
1196         skb_reset_network_header(skb);
1197         iph = ip_hdr(skb);
1198
1199         iph->version    =       4;
1200         iph->tos        =       old_iph->tos;
1201         iph->ttl        =       old_iph->ttl;
1202         iph->frag_off   =       0;
1203         iph->daddr      =       daddr;
1204         iph->saddr      =       saddr;
1205         iph->protocol   =       IPPROTO_IPIP;
1206         iph->ihl        =       5;
1207         iph->tot_len    =       htons(skb->len);
1208         ip_select_ident(iph, skb_dst(skb), NULL);
1209         ip_send_check(iph);
1210
1211         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1212         nf_reset(skb);
1213 }
1214
1215 static inline int ipmr_forward_finish(struct sk_buff *skb)
1216 {
1217         struct ip_options * opt = &(IPCB(skb)->opt);
1218
1219         IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1220
1221         if (unlikely(opt->optlen))
1222                 ip_forward_options(skb);
1223
1224         return dst_output(skb);
1225 }
1226
1227 /*
1228  *      Processing handlers for ipmr_forward
1229  */
1230
1231 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1232 {
1233         struct net *net = mfc_net(c);
1234         const struct iphdr *iph = ip_hdr(skb);
1235         struct vif_device *vif = &net->ipv4.vif_table[vifi];
1236         struct net_device *dev;
1237         struct rtable *rt;
1238         int    encap = 0;
1239
1240         if (vif->dev == NULL)
1241                 goto out_free;
1242
1243 #ifdef CONFIG_IP_PIMSM
1244         if (vif->flags & VIFF_REGISTER) {
1245                 vif->pkt_out++;
1246                 vif->bytes_out += skb->len;
1247                 vif->dev->stats.tx_bytes += skb->len;
1248                 vif->dev->stats.tx_packets++;
1249                 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1250                 goto out_free;
1251         }
1252 #endif
1253
1254         if (vif->flags&VIFF_TUNNEL) {
1255                 struct flowi fl = { .oif = vif->link,
1256                                     .nl_u = { .ip4_u =
1257                                               { .daddr = vif->remote,
1258                                                 .saddr = vif->local,
1259                                                 .tos = RT_TOS(iph->tos) } },
1260                                     .proto = IPPROTO_IPIP };
1261                 if (ip_route_output_key(net, &rt, &fl))
1262                         goto out_free;
1263                 encap = sizeof(struct iphdr);
1264         } else {
1265                 struct flowi fl = { .oif = vif->link,
1266                                     .nl_u = { .ip4_u =
1267                                               { .daddr = iph->daddr,
1268                                                 .tos = RT_TOS(iph->tos) } },
1269                                     .proto = IPPROTO_IPIP };
1270                 if (ip_route_output_key(net, &rt, &fl))
1271                         goto out_free;
1272         }
1273
1274         dev = rt->u.dst.dev;
1275
1276         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1277                 /* Do not fragment multicasts. Alas, IPv4 does not
1278                    allow to send ICMP, so that packets will disappear
1279                    to blackhole.
1280                  */
1281
1282                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1283                 ip_rt_put(rt);
1284                 goto out_free;
1285         }
1286
1287         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1288
1289         if (skb_cow(skb, encap)) {
1290                 ip_rt_put(rt);
1291                 goto out_free;
1292         }
1293
1294         vif->pkt_out++;
1295         vif->bytes_out += skb->len;
1296
1297         skb_dst_drop(skb);
1298         skb_dst_set(skb, &rt->u.dst);
1299         ip_decrease_ttl(ip_hdr(skb));
1300
1301         /* FIXME: forward and output firewalls used to be called here.
1302          * What do we do with netfilter? -- RR */
1303         if (vif->flags & VIFF_TUNNEL) {
1304                 ip_encap(skb, vif->local, vif->remote);
1305                 /* FIXME: extra output firewall step used to be here. --RR */
1306                 vif->dev->stats.tx_packets++;
1307                 vif->dev->stats.tx_bytes += skb->len;
1308         }
1309
1310         IPCB(skb)->flags |= IPSKB_FORWARDED;
1311
1312         /*
1313          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1314          * not only before forwarding, but after forwarding on all output
1315          * interfaces. It is clear, if mrouter runs a multicasting
1316          * program, it should receive packets not depending to what interface
1317          * program is joined.
1318          * If we will not make it, the program will have to join on all
1319          * interfaces. On the other hand, multihoming host (or router, but
1320          * not mrouter) cannot join to more than one interface - it will
1321          * result in receiving multiple packets.
1322          */
1323         NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1324                 ipmr_forward_finish);
1325         return;
1326
1327 out_free:
1328         kfree_skb(skb);
1329         return;
1330 }
1331
1332 static int ipmr_find_vif(struct net_device *dev)
1333 {
1334         struct net *net = dev_net(dev);
1335         int ct;
1336         for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1337                 if (net->ipv4.vif_table[ct].dev == dev)
1338                         break;
1339         }
1340         return ct;
1341 }
1342
1343 /* "local" means that we should preserve one skb (for local delivery) */
1344
1345 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1346 {
1347         int psend = -1;
1348         int vif, ct;
1349         struct net *net = mfc_net(cache);
1350
1351         vif = cache->mfc_parent;
1352         cache->mfc_un.res.pkt++;
1353         cache->mfc_un.res.bytes += skb->len;
1354
1355         /*
1356          * Wrong interface: drop packet and (maybe) send PIM assert.
1357          */
1358         if (net->ipv4.vif_table[vif].dev != skb->dev) {
1359                 int true_vifi;
1360
1361                 if (skb_rtable(skb)->fl.iif == 0) {
1362                         /* It is our own packet, looped back.
1363                            Very complicated situation...
1364
1365                            The best workaround until routing daemons will be
1366                            fixed is not to redistribute packet, if it was
1367                            send through wrong interface. It means, that
1368                            multicast applications WILL NOT work for
1369                            (S,G), which have default multicast route pointing
1370                            to wrong oif. In any case, it is not a good
1371                            idea to use multicasting applications on router.
1372                          */
1373                         goto dont_forward;
1374                 }
1375
1376                 cache->mfc_un.res.wrong_if++;
1377                 true_vifi = ipmr_find_vif(skb->dev);
1378
1379                 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1380                     /* pimsm uses asserts, when switching from RPT to SPT,
1381                        so that we cannot check that packet arrived on an oif.
1382                        It is bad, but otherwise we would need to move pretty
1383                        large chunk of pimd to kernel. Ough... --ANK
1384                      */
1385                     (net->ipv4.mroute_do_pim ||
1386                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1387                     time_after(jiffies,
1388                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1389                         cache->mfc_un.res.last_assert = jiffies;
1390                         ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1391                 }
1392                 goto dont_forward;
1393         }
1394
1395         net->ipv4.vif_table[vif].pkt_in++;
1396         net->ipv4.vif_table[vif].bytes_in += skb->len;
1397
1398         /*
1399          *      Forward the frame
1400          */
1401         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1402                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1403                         if (psend != -1) {
1404                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1405                                 if (skb2)
1406                                         ipmr_queue_xmit(skb2, cache, psend);
1407                         }
1408                         psend = ct;
1409                 }
1410         }
1411         if (psend != -1) {
1412                 if (local) {
1413                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1414                         if (skb2)
1415                                 ipmr_queue_xmit(skb2, cache, psend);
1416                 } else {
1417                         ipmr_queue_xmit(skb, cache, psend);
1418                         return 0;
1419                 }
1420         }
1421
1422 dont_forward:
1423         if (!local)
1424                 kfree_skb(skb);
1425         return 0;
1426 }
1427
1428
1429 /*
1430  *      Multicast packets for forwarding arrive here
1431  */
1432
1433 int ip_mr_input(struct sk_buff *skb)
1434 {
1435         struct mfc_cache *cache;
1436         struct net *net = dev_net(skb->dev);
1437         int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1438
1439         /* Packet is looped back after forward, it should not be
1440            forwarded second time, but still can be delivered locally.
1441          */
1442         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1443                 goto dont_forward;
1444
1445         if (!local) {
1446                     if (IPCB(skb)->opt.router_alert) {
1447                             if (ip_call_ra_chain(skb))
1448                                     return 0;
1449                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1450                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1451                                Cisco IOS <= 11.2(8)) do not put router alert
1452                                option to IGMP packets destined to routable
1453                                groups. It is very bad, because it means
1454                                that we can forward NO IGMP messages.
1455                              */
1456                             read_lock(&mrt_lock);
1457                             if (net->ipv4.mroute_sk) {
1458                                     nf_reset(skb);
1459                                     raw_rcv(net->ipv4.mroute_sk, skb);
1460                                     read_unlock(&mrt_lock);
1461                                     return 0;
1462                             }
1463                             read_unlock(&mrt_lock);
1464                     }
1465         }
1466
1467         read_lock(&mrt_lock);
1468         cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1469
1470         /*
1471          *      No usable cache entry
1472          */
1473         if (cache == NULL) {
1474                 int vif;
1475
1476                 if (local) {
1477                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1478                         ip_local_deliver(skb);
1479                         if (skb2 == NULL) {
1480                                 read_unlock(&mrt_lock);
1481                                 return -ENOBUFS;
1482                         }
1483                         skb = skb2;
1484                 }
1485
1486                 vif = ipmr_find_vif(skb->dev);
1487                 if (vif >= 0) {
1488                         int err = ipmr_cache_unresolved(net, vif, skb);
1489                         read_unlock(&mrt_lock);
1490
1491                         return err;
1492                 }
1493                 read_unlock(&mrt_lock);
1494                 kfree_skb(skb);
1495                 return -ENODEV;
1496         }
1497
1498         ip_mr_forward(skb, cache, local);
1499
1500         read_unlock(&mrt_lock);
1501
1502         if (local)
1503                 return ip_local_deliver(skb);
1504
1505         return 0;
1506
1507 dont_forward:
1508         if (local)
1509                 return ip_local_deliver(skb);
1510         kfree_skb(skb);
1511         return 0;
1512 }
1513
1514 #ifdef CONFIG_IP_PIMSM
1515 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1516 {
1517         struct net_device *reg_dev = NULL;
1518         struct iphdr *encap;
1519         struct net *net = dev_net(skb->dev);
1520
1521         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1522         /*
1523            Check that:
1524            a. packet is really destinted to a multicast group
1525            b. packet is not a NULL-REGISTER
1526            c. packet is not truncated
1527          */
1528         if (!ipv4_is_multicast(encap->daddr) ||
1529             encap->tot_len == 0 ||
1530             ntohs(encap->tot_len) + pimlen > skb->len)
1531                 return 1;
1532
1533         read_lock(&mrt_lock);
1534         if (net->ipv4.mroute_reg_vif_num >= 0)
1535                 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1536         if (reg_dev)
1537                 dev_hold(reg_dev);
1538         read_unlock(&mrt_lock);
1539
1540         if (reg_dev == NULL)
1541                 return 1;
1542
1543         skb->mac_header = skb->network_header;
1544         skb_pull(skb, (u8*)encap - skb->data);
1545         skb_reset_network_header(skb);
1546         skb->dev = reg_dev;
1547         skb->protocol = htons(ETH_P_IP);
1548         skb->ip_summed = 0;
1549         skb->pkt_type = PACKET_HOST;
1550         skb_dst_drop(skb);
1551         reg_dev->stats.rx_bytes += skb->len;
1552         reg_dev->stats.rx_packets++;
1553         nf_reset(skb);
1554         netif_rx(skb);
1555         dev_put(reg_dev);
1556
1557         return 0;
1558 }
1559 #endif
1560
1561 #ifdef CONFIG_IP_PIMSM_V1
1562 /*
1563  * Handle IGMP messages of PIMv1
1564  */
1565
1566 int pim_rcv_v1(struct sk_buff * skb)
1567 {
1568         struct igmphdr *pim;
1569         struct net *net = dev_net(skb->dev);
1570
1571         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1572                 goto drop;
1573
1574         pim = igmp_hdr(skb);
1575
1576         if (!net->ipv4.mroute_do_pim ||
1577             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1578                 goto drop;
1579
1580         if (__pim_rcv(skb, sizeof(*pim))) {
1581 drop:
1582                 kfree_skb(skb);
1583         }
1584         return 0;
1585 }
1586 #endif
1587
1588 #ifdef CONFIG_IP_PIMSM_V2
1589 static int pim_rcv(struct sk_buff * skb)
1590 {
1591         struct pimreghdr *pim;
1592
1593         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1594                 goto drop;
1595
1596         pim = (struct pimreghdr *)skb_transport_header(skb);
1597         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1598             (pim->flags&PIM_NULL_REGISTER) ||
1599             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1600              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1601                 goto drop;
1602
1603         if (__pim_rcv(skb, sizeof(*pim))) {
1604 drop:
1605                 kfree_skb(skb);
1606         }
1607         return 0;
1608 }
1609 #endif
1610
1611 static int
1612 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1613 {
1614         int ct;
1615         struct rtnexthop *nhp;
1616         struct net *net = mfc_net(c);
1617         struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1618         u8 *b = skb_tail_pointer(skb);
1619         struct rtattr *mp_head;
1620
1621         if (dev)
1622                 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1623
1624         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1625
1626         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1627                 if (c->mfc_un.res.ttls[ct] < 255) {
1628                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1629                                 goto rtattr_failure;
1630                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1631                         nhp->rtnh_flags = 0;
1632                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1633                         nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1634                         nhp->rtnh_len = sizeof(*nhp);
1635                 }
1636         }
1637         mp_head->rta_type = RTA_MULTIPATH;
1638         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1639         rtm->rtm_type = RTN_MULTICAST;
1640         return 1;
1641
1642 rtattr_failure:
1643         nlmsg_trim(skb, b);
1644         return -EMSGSIZE;
1645 }
1646
1647 int ipmr_get_route(struct net *net,
1648                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1649 {
1650         int err;
1651         struct mfc_cache *cache;
1652         struct rtable *rt = skb_rtable(skb);
1653
1654         read_lock(&mrt_lock);
1655         cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1656
1657         if (cache == NULL) {
1658                 struct sk_buff *skb2;
1659                 struct iphdr *iph;
1660                 struct net_device *dev;
1661                 int vif;
1662
1663                 if (nowait) {
1664                         read_unlock(&mrt_lock);
1665                         return -EAGAIN;
1666                 }
1667
1668                 dev = skb->dev;
1669                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1670                         read_unlock(&mrt_lock);
1671                         return -ENODEV;
1672                 }
1673                 skb2 = skb_clone(skb, GFP_ATOMIC);
1674                 if (!skb2) {
1675                         read_unlock(&mrt_lock);
1676                         return -ENOMEM;
1677                 }
1678
1679                 skb_push(skb2, sizeof(struct iphdr));
1680                 skb_reset_network_header(skb2);
1681                 iph = ip_hdr(skb2);
1682                 iph->ihl = sizeof(struct iphdr) >> 2;
1683                 iph->saddr = rt->rt_src;
1684                 iph->daddr = rt->rt_dst;
1685                 iph->version = 0;
1686                 err = ipmr_cache_unresolved(net, vif, skb2);
1687                 read_unlock(&mrt_lock);
1688                 return err;
1689         }
1690
1691         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1692                 cache->mfc_flags |= MFC_NOTIFY;
1693         err = ipmr_fill_mroute(skb, cache, rtm);
1694         read_unlock(&mrt_lock);
1695         return err;
1696 }
1697
1698 #ifdef CONFIG_PROC_FS
1699 /*
1700  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1701  */
1702 struct ipmr_vif_iter {
1703         struct seq_net_private p;
1704         int ct;
1705 };
1706
1707 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1708                                            struct ipmr_vif_iter *iter,
1709                                            loff_t pos)
1710 {
1711         for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1712                 if (!VIF_EXISTS(net, iter->ct))
1713                         continue;
1714                 if (pos-- == 0)
1715                         return &net->ipv4.vif_table[iter->ct];
1716         }
1717         return NULL;
1718 }
1719
1720 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1721         __acquires(mrt_lock)
1722 {
1723         struct net *net = seq_file_net(seq);
1724
1725         read_lock(&mrt_lock);
1726         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1727                 : SEQ_START_TOKEN;
1728 }
1729
1730 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1731 {
1732         struct ipmr_vif_iter *iter = seq->private;
1733         struct net *net = seq_file_net(seq);
1734
1735         ++*pos;
1736         if (v == SEQ_START_TOKEN)
1737                 return ipmr_vif_seq_idx(net, iter, 0);
1738
1739         while (++iter->ct < net->ipv4.maxvif) {
1740                 if (!VIF_EXISTS(net, iter->ct))
1741                         continue;
1742                 return &net->ipv4.vif_table[iter->ct];
1743         }
1744         return NULL;
1745 }
1746
1747 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1748         __releases(mrt_lock)
1749 {
1750         read_unlock(&mrt_lock);
1751 }
1752
1753 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1754 {
1755         struct net *net = seq_file_net(seq);
1756
1757         if (v == SEQ_START_TOKEN) {
1758                 seq_puts(seq,
1759                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1760         } else {
1761                 const struct vif_device *vif = v;
1762                 const char *name =  vif->dev ? vif->dev->name : "none";
1763
1764                 seq_printf(seq,
1765                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1766                            vif - net->ipv4.vif_table,
1767                            name, vif->bytes_in, vif->pkt_in,
1768                            vif->bytes_out, vif->pkt_out,
1769                            vif->flags, vif->local, vif->remote);
1770         }
1771         return 0;
1772 }
1773
1774 static const struct seq_operations ipmr_vif_seq_ops = {
1775         .start = ipmr_vif_seq_start,
1776         .next  = ipmr_vif_seq_next,
1777         .stop  = ipmr_vif_seq_stop,
1778         .show  = ipmr_vif_seq_show,
1779 };
1780
1781 static int ipmr_vif_open(struct inode *inode, struct file *file)
1782 {
1783         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1784                             sizeof(struct ipmr_vif_iter));
1785 }
1786
1787 static const struct file_operations ipmr_vif_fops = {
1788         .owner   = THIS_MODULE,
1789         .open    = ipmr_vif_open,
1790         .read    = seq_read,
1791         .llseek  = seq_lseek,
1792         .release = seq_release_net,
1793 };
1794
1795 struct ipmr_mfc_iter {
1796         struct seq_net_private p;
1797         struct mfc_cache **cache;
1798         int ct;
1799 };
1800
1801
1802 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1803                                           struct ipmr_mfc_iter *it, loff_t pos)
1804 {
1805         struct mfc_cache *mfc;
1806
1807         it->cache = net->ipv4.mfc_cache_array;
1808         read_lock(&mrt_lock);
1809         for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1810                 for (mfc = net->ipv4.mfc_cache_array[it->ct];
1811                      mfc; mfc = mfc->next)
1812                         if (pos-- == 0)
1813                                 return mfc;
1814         read_unlock(&mrt_lock);
1815
1816         it->cache = &mfc_unres_queue;
1817         spin_lock_bh(&mfc_unres_lock);
1818         for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1819                 if (net_eq(mfc_net(mfc), net) &&
1820                     pos-- == 0)
1821                         return mfc;
1822         spin_unlock_bh(&mfc_unres_lock);
1823
1824         it->cache = NULL;
1825         return NULL;
1826 }
1827
1828
1829 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1830 {
1831         struct ipmr_mfc_iter *it = seq->private;
1832         struct net *net = seq_file_net(seq);
1833
1834         it->cache = NULL;
1835         it->ct = 0;
1836         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1837                 : SEQ_START_TOKEN;
1838 }
1839
1840 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1841 {
1842         struct mfc_cache *mfc = v;
1843         struct ipmr_mfc_iter *it = seq->private;
1844         struct net *net = seq_file_net(seq);
1845
1846         ++*pos;
1847
1848         if (v == SEQ_START_TOKEN)
1849                 return ipmr_mfc_seq_idx(net, seq->private, 0);
1850
1851         if (mfc->next)
1852                 return mfc->next;
1853
1854         if (it->cache == &mfc_unres_queue)
1855                 goto end_of_list;
1856
1857         BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1858
1859         while (++it->ct < MFC_LINES) {
1860                 mfc = net->ipv4.mfc_cache_array[it->ct];
1861                 if (mfc)
1862                         return mfc;
1863         }
1864
1865         /* exhausted cache_array, show unresolved */
1866         read_unlock(&mrt_lock);
1867         it->cache = &mfc_unres_queue;
1868         it->ct = 0;
1869
1870         spin_lock_bh(&mfc_unres_lock);
1871         mfc = mfc_unres_queue;
1872         while (mfc && !net_eq(mfc_net(mfc), net))
1873                 mfc = mfc->next;
1874         if (mfc)
1875                 return mfc;
1876
1877  end_of_list:
1878         spin_unlock_bh(&mfc_unres_lock);
1879         it->cache = NULL;
1880
1881         return NULL;
1882 }
1883
1884 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1885 {
1886         struct ipmr_mfc_iter *it = seq->private;
1887         struct net *net = seq_file_net(seq);
1888
1889         if (it->cache == &mfc_unres_queue)
1890                 spin_unlock_bh(&mfc_unres_lock);
1891         else if (it->cache == net->ipv4.mfc_cache_array)
1892                 read_unlock(&mrt_lock);
1893 }
1894
1895 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1896 {
1897         int n;
1898         struct net *net = seq_file_net(seq);
1899
1900         if (v == SEQ_START_TOKEN) {
1901                 seq_puts(seq,
1902                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1903         } else {
1904                 const struct mfc_cache *mfc = v;
1905                 const struct ipmr_mfc_iter *it = seq->private;
1906
1907                 seq_printf(seq, "%08lX %08lX %-3hd",
1908                            (unsigned long) mfc->mfc_mcastgrp,
1909                            (unsigned long) mfc->mfc_origin,
1910                            mfc->mfc_parent);
1911
1912                 if (it->cache != &mfc_unres_queue) {
1913                         seq_printf(seq, " %8lu %8lu %8lu",
1914                                    mfc->mfc_un.res.pkt,
1915                                    mfc->mfc_un.res.bytes,
1916                                    mfc->mfc_un.res.wrong_if);
1917                         for (n = mfc->mfc_un.res.minvif;
1918                              n < mfc->mfc_un.res.maxvif; n++ ) {
1919                                 if (VIF_EXISTS(net, n) &&
1920                                     mfc->mfc_un.res.ttls[n] < 255)
1921                                         seq_printf(seq,
1922                                            " %2d:%-3d",
1923                                            n, mfc->mfc_un.res.ttls[n]);
1924                         }
1925                 } else {
1926                         /* unresolved mfc_caches don't contain
1927                          * pkt, bytes and wrong_if values
1928                          */
1929                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1930                 }
1931                 seq_putc(seq, '\n');
1932         }
1933         return 0;
1934 }
1935
1936 static const struct seq_operations ipmr_mfc_seq_ops = {
1937         .start = ipmr_mfc_seq_start,
1938         .next  = ipmr_mfc_seq_next,
1939         .stop  = ipmr_mfc_seq_stop,
1940         .show  = ipmr_mfc_seq_show,
1941 };
1942
1943 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1944 {
1945         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1946                             sizeof(struct ipmr_mfc_iter));
1947 }
1948
1949 static const struct file_operations ipmr_mfc_fops = {
1950         .owner   = THIS_MODULE,
1951         .open    = ipmr_mfc_open,
1952         .read    = seq_read,
1953         .llseek  = seq_lseek,
1954         .release = seq_release_net,
1955 };
1956 #endif
1957
1958 #ifdef CONFIG_IP_PIMSM_V2
1959 static const struct net_protocol pim_protocol = {
1960         .handler        =       pim_rcv,
1961         .netns_ok       =       1,
1962 };
1963 #endif
1964
1965
1966 /*
1967  *      Setup for IP multicast routing
1968  */
1969 static int __net_init ipmr_net_init(struct net *net)
1970 {
1971         int err = 0;
1972
1973         net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1974                                       GFP_KERNEL);
1975         if (!net->ipv4.vif_table) {
1976                 err = -ENOMEM;
1977                 goto fail;
1978         }
1979
1980         /* Forwarding cache */
1981         net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1982                                             sizeof(struct mfc_cache *),
1983                                             GFP_KERNEL);
1984         if (!net->ipv4.mfc_cache_array) {
1985                 err = -ENOMEM;
1986                 goto fail_mfc_cache;
1987         }
1988
1989 #ifdef CONFIG_IP_PIMSM
1990         net->ipv4.mroute_reg_vif_num = -1;
1991 #endif
1992
1993 #ifdef CONFIG_PROC_FS
1994         err = -ENOMEM;
1995         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1996                 goto proc_vif_fail;
1997         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1998                 goto proc_cache_fail;
1999 #endif
2000         return 0;
2001
2002 #ifdef CONFIG_PROC_FS
2003 proc_cache_fail:
2004         proc_net_remove(net, "ip_mr_vif");
2005 proc_vif_fail:
2006         kfree(net->ipv4.mfc_cache_array);
2007 #endif
2008 fail_mfc_cache:
2009         kfree(net->ipv4.vif_table);
2010 fail:
2011         return err;
2012 }
2013
2014 static void __net_exit ipmr_net_exit(struct net *net)
2015 {
2016 #ifdef CONFIG_PROC_FS
2017         proc_net_remove(net, "ip_mr_cache");
2018         proc_net_remove(net, "ip_mr_vif");
2019 #endif
2020         kfree(net->ipv4.mfc_cache_array);
2021         kfree(net->ipv4.vif_table);
2022 }
2023
2024 static struct pernet_operations ipmr_net_ops = {
2025         .init = ipmr_net_init,
2026         .exit = ipmr_net_exit,
2027 };
2028
2029 int __init ip_mr_init(void)
2030 {
2031         int err;
2032
2033         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2034                                        sizeof(struct mfc_cache),
2035                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2036                                        NULL);
2037         if (!mrt_cachep)
2038                 return -ENOMEM;
2039
2040         err = register_pernet_subsys(&ipmr_net_ops);
2041         if (err)
2042                 goto reg_pernet_fail;
2043
2044         setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2045         err = register_netdevice_notifier(&ip_mr_notifier);
2046         if (err)
2047                 goto reg_notif_fail;
2048 #ifdef CONFIG_IP_PIMSM_V2
2049         if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2050                 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2051                 err = -EAGAIN;
2052                 goto add_proto_fail;
2053         }
2054 #endif
2055         return 0;
2056
2057 #ifdef CONFIG_IP_PIMSM_V2
2058 add_proto_fail:
2059         unregister_netdevice_notifier(&ip_mr_notifier);
2060 #endif
2061 reg_notif_fail:
2062         del_timer(&ipmr_expire_timer);
2063         unregister_pernet_subsys(&ipmr_net_ops);
2064 reg_pernet_fail:
2065         kmem_cache_destroy(mrt_cachep);
2066         return err;
2067 }