[NET]: Revert sk_buff walker cleanups.
[safe/jmp/linux-2.6] / net / core / rtnetlink.c
index 1c15a90..cec1111 100644 (file)
  *     Vitaly E. Lavrov                RTA_OK arithmetics was wrong.
  */
 
-#include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/sockios.h>
@@ -35,6 +33,8 @@
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/security.h>
+#include <linux/mutex.h>
+#include <linux/if_addr.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <net/udp.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
-#include <net/netlink.h>
+#include <net/fib_rules.h>
+#include <net/rtnetlink.h>
 
-DECLARE_MUTEX(rtnl_sem);
+struct rtnl_link
+{
+       rtnl_doit_func          doit;
+       rtnl_dumpit_func        dumpit;
+};
+
+static DEFINE_MUTEX(rtnl_mutex);
+static struct sock *rtnl;
 
 void rtnl_lock(void)
 {
-       rtnl_shlock();
+       mutex_lock(&rtnl_mutex);
 }
 
-int rtnl_lock_interruptible(void)
+void __rtnl_unlock(void)
 {
-       return down_interruptible(&rtnl_sem);
+       mutex_unlock(&rtnl_mutex);
 }
+
 void rtnl_unlock(void)
 {
-       rtnl_shunlock();
-
+       mutex_unlock(&rtnl_mutex);
+       if (rtnl && rtnl->sk_receive_queue.qlen)
+               rtnl->sk_data_ready(rtnl, 0);
        netdev_run_todo();
 }
 
+int rtnl_trylock(void)
+{
+       return mutex_trylock(&rtnl_mutex);
+}
+
 int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
 {
        memset(tb, 0, sizeof(struct rtattr*)*maxattr);
@@ -83,25 +97,164 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
        return 0;
 }
 
-struct sock *rtnl;
+static struct rtnl_link *rtnl_msg_handlers[NPROTO];
+
+static inline int rtm_msgindex(int msgtype)
+{
+       int msgindex = msgtype - RTM_BASE;
+
+       /*
+        * msgindex < 0 implies someone tried to register a netlink
+        * control code. msgindex >= RTM_NR_MSGTYPES may indicate that
+        * the message type has not been added to linux/rtnetlink.h
+        */
+       BUG_ON(msgindex < 0 || msgindex >= RTM_NR_MSGTYPES);
+
+       return msgindex;
+}
+
+static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
+{
+       struct rtnl_link *tab;
+
+       tab = rtnl_msg_handlers[protocol];
+       if (tab == NULL || tab[msgindex].doit == NULL)
+               tab = rtnl_msg_handlers[PF_UNSPEC];
+
+       return tab ? tab[msgindex].doit : NULL;
+}
+
+static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
+{
+       struct rtnl_link *tab;
+
+       tab = rtnl_msg_handlers[protocol];
+       if (tab == NULL || tab[msgindex].dumpit == NULL)
+               tab = rtnl_msg_handlers[PF_UNSPEC];
+
+       return tab ? tab[msgindex].dumpit : NULL;
+}
+
+/**
+ * __rtnl_register - Register a rtnetlink message type
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ * @doit: Function pointer called for each request message
+ * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ *
+ * Registers the specified function pointers (at least one of them has
+ * to be non-NULL) to be called whenever a request message for the
+ * specified protocol family and message type is received.
+ *
+ * The special protocol family PF_UNSPEC may be used to define fallback
+ * function pointers for the case when no entry for the specific protocol
+ * family exists.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int __rtnl_register(int protocol, int msgtype,
+                   rtnl_doit_func doit, rtnl_dumpit_func dumpit)
+{
+       struct rtnl_link *tab;
+       int msgindex;
+
+       BUG_ON(protocol < 0 || protocol >= NPROTO);
+       msgindex = rtm_msgindex(msgtype);
+
+       tab = rtnl_msg_handlers[protocol];
+       if (tab == NULL) {
+               tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
+               if (tab == NULL)
+                       return -ENOBUFS;
+
+               rtnl_msg_handlers[protocol] = tab;
+       }
+
+       if (doit)
+               tab[msgindex].doit = doit;
+
+       if (dumpit)
+               tab[msgindex].dumpit = dumpit;
+
+       return 0;
+}
+
+EXPORT_SYMBOL_GPL(__rtnl_register);
+
+/**
+ * rtnl_register - Register a rtnetlink message type
+ *
+ * Identical to __rtnl_register() but panics on failure. This is useful
+ * as failure of this function is very unlikely, it can only happen due
+ * to lack of memory when allocating the chain to store all message
+ * handlers for a protocol. Meant for use in init functions where lack
+ * of memory implies no sense in continueing.
+ */
+void rtnl_register(int protocol, int msgtype,
+                  rtnl_doit_func doit, rtnl_dumpit_func dumpit)
+{
+       if (__rtnl_register(protocol, msgtype, doit, dumpit) < 0)
+               panic("Unable to register rtnetlink message handler, "
+                     "protocol = %d, message type = %d\n",
+                     protocol, msgtype);
+}
+
+EXPORT_SYMBOL_GPL(rtnl_register);
+
+/**
+ * rtnl_unregister - Unregister a rtnetlink message type
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int rtnl_unregister(int protocol, int msgtype)
+{
+       int msgindex;
+
+       BUG_ON(protocol < 0 || protocol >= NPROTO);
+       msgindex = rtm_msgindex(msgtype);
+
+       if (rtnl_msg_handlers[protocol] == NULL)
+               return -ENOENT;
+
+       rtnl_msg_handlers[protocol][msgindex].doit = NULL;
+       rtnl_msg_handlers[protocol][msgindex].dumpit = NULL;
+
+       return 0;
+}
+
+EXPORT_SYMBOL_GPL(rtnl_unregister);
+
+/**
+ * rtnl_unregister_all - Unregister all rtnetlink message type of a protocol
+ * @protocol : Protocol family or PF_UNSPEC
+ *
+ * Identical to calling rtnl_unregster() for all registered message types
+ * of a certain protocol family.
+ */
+void rtnl_unregister_all(int protocol)
+{
+       BUG_ON(protocol < 0 || protocol >= NPROTO);
+
+       kfree(rtnl_msg_handlers[protocol]);
+       rtnl_msg_handlers[protocol] = NULL;
+}
 
-struct rtnetlink_link * rtnetlink_links[NPROTO];
+EXPORT_SYMBOL_GPL(rtnl_unregister_all);
 
 static const int rtm_min[RTM_NR_FAMILIES] =
 {
        [RTM_FAM(RTM_NEWLINK)]      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
        [RTM_FAM(RTM_NEWADDR)]      = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
        [RTM_FAM(RTM_NEWROUTE)]     = NLMSG_LENGTH(sizeof(struct rtmsg)),
-       [RTM_FAM(RTM_NEWNEIGH)]     = NLMSG_LENGTH(sizeof(struct ndmsg)),
-       [RTM_FAM(RTM_NEWRULE)]      = NLMSG_LENGTH(sizeof(struct rtmsg)),
+       [RTM_FAM(RTM_NEWRULE)]      = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)),
        [RTM_FAM(RTM_NEWQDISC)]     = NLMSG_LENGTH(sizeof(struct tcmsg)),
        [RTM_FAM(RTM_NEWTCLASS)]    = NLMSG_LENGTH(sizeof(struct tcmsg)),
        [RTM_FAM(RTM_NEWTFILTER)]   = NLMSG_LENGTH(sizeof(struct tcmsg)),
        [RTM_FAM(RTM_NEWACTION)]    = NLMSG_LENGTH(sizeof(struct tcamsg)),
-       [RTM_FAM(RTM_NEWPREFIX)]    = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
        [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
        [RTM_FAM(RTM_GETANYCAST)]   = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
-       [RTM_FAM(RTM_NEWNEIGHTBL)]  = NLMSG_LENGTH(sizeof(struct ndtmsg)),
 };
 
 static const int rta_max[RTM_NR_FAMILIES] =
@@ -109,13 +262,11 @@ static const int rta_max[RTM_NR_FAMILIES] =
        [RTM_FAM(RTM_NEWLINK)]      = IFLA_MAX,
        [RTM_FAM(RTM_NEWADDR)]      = IFA_MAX,
        [RTM_FAM(RTM_NEWROUTE)]     = RTA_MAX,
-       [RTM_FAM(RTM_NEWNEIGH)]     = NDA_MAX,
-       [RTM_FAM(RTM_NEWRULE)]      = RTA_MAX,
+       [RTM_FAM(RTM_NEWRULE)]      = FRA_MAX,
        [RTM_FAM(RTM_NEWQDISC)]     = TCA_MAX,
        [RTM_FAM(RTM_NEWTCLASS)]    = TCA_MAX,
        [RTM_FAM(RTM_NEWTFILTER)]   = TCA_MAX,
        [RTM_FAM(RTM_NEWACTION)]    = TCAA_MAX,
-       [RTM_FAM(RTM_NEWNEIGHTBL)]  = NDTA_MAX,
 };
 
 void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
@@ -158,26 +309,74 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
        return err;
 }
 
+int rtnl_unicast(struct sk_buff *skb, u32 pid)
+{
+       return nlmsg_unicast(rtnl, skb, pid);
+}
+
+int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
+               struct nlmsghdr *nlh, gfp_t flags)
+{
+       int report = 0;
+
+       if (nlh)
+               report = nlmsg_report(nlh);
+
+       return nlmsg_notify(rtnl, skb, pid, group, report, flags);
+}
+
+void rtnl_set_sk_err(u32 group, int error)
+{
+       netlink_set_err(rtnl, 0, group, error);
+}
+
 int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
 {
-       struct rtattr *mx = (struct rtattr*)skb->tail;
-       int i;
+       struct nlattr *mx;
+       int i, valid = 0;
 
-       RTA_PUT(skb, RTA_METRICS, 0, NULL);
-       for (i=0; i<RTAX_MAX; i++) {
-               if (metrics[i])
-                       RTA_PUT(skb, i+1, sizeof(u32), metrics+i);
+       mx = nla_nest_start(skb, RTA_METRICS);
+       if (mx == NULL)
+               return -ENOBUFS;
+
+       for (i = 0; i < RTAX_MAX; i++) {
+               if (metrics[i]) {
+                       valid++;
+                       NLA_PUT_U32(skb, i+1, metrics[i]);
+               }
        }
-       mx->rta_len = skb->tail - (u8*)mx;
-       if (mx->rta_len == RTA_LENGTH(0))
-               skb_trim(skb, (u8*)mx - skb->data);
-       return 0;
 
-rtattr_failure:
-       skb_trim(skb, (u8*)mx - skb->data);
-       return -1;
+       if (!valid) {
+               nla_nest_cancel(skb, mx);
+               return 0;
+       }
+
+       return nla_nest_end(skb, mx);
+
+nla_put_failure:
+       return nla_nest_cancel(skb, mx);
 }
 
+int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
+                      u32 ts, u32 tsage, long expires, u32 error)
+{
+       struct rta_cacheinfo ci = {
+               .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse),
+               .rta_used = dst->__use,
+               .rta_clntref = atomic_read(&(dst->__refcnt)),
+               .rta_error = error,
+               .rta_id =  id,
+               .rta_ts = ts,
+               .rta_tsage = tsage,
+       };
+
+       if (expires)
+               ci.rta_expires = jiffies_to_clock_t(expires);
+
+       return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+}
+
+EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
 
 static void set_operstate(struct net_device *dev, unsigned char transition)
 {
@@ -196,7 +395,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
                    operstate == IF_OPER_UNKNOWN)
                        operstate = IF_OPER_DORMANT;
                break;
-       };
+       }
 
        if (dev->operstate != operstate) {
                write_lock_bh(&dev_base_lock);
@@ -206,41 +405,92 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
        }
 }
 
-static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
-                                int type, u32 pid, u32 seq, u32 change, 
-                                unsigned int flags)
+static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
+                                struct net_device_stats *b)
 {
-       struct ifinfomsg *r;
-       struct nlmsghdr  *nlh;
-       unsigned char    *b = skb->tail;
-
-       nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags);
-       r = NLMSG_DATA(nlh);
-       r->ifi_family = AF_UNSPEC;
-       r->__ifi_pad = 0;
-       r->ifi_type = dev->type;
-       r->ifi_index = dev->ifindex;
-       r->ifi_flags = dev_get_flags(dev);
-       r->ifi_change = change;
-
-       RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
+       a->rx_packets = b->rx_packets;
+       a->tx_packets = b->tx_packets;
+       a->rx_bytes = b->rx_bytes;
+       a->tx_bytes = b->tx_bytes;
+       a->rx_errors = b->rx_errors;
+       a->tx_errors = b->tx_errors;
+       a->rx_dropped = b->rx_dropped;
+       a->tx_dropped = b->tx_dropped;
+
+       a->multicast = b->multicast;
+       a->collisions = b->collisions;
+
+       a->rx_length_errors = b->rx_length_errors;
+       a->rx_over_errors = b->rx_over_errors;
+       a->rx_crc_errors = b->rx_crc_errors;
+       a->rx_frame_errors = b->rx_frame_errors;
+       a->rx_fifo_errors = b->rx_fifo_errors;
+       a->rx_missed_errors = b->rx_missed_errors;
+
+       a->tx_aborted_errors = b->tx_aborted_errors;
+       a->tx_carrier_errors = b->tx_carrier_errors;
+       a->tx_fifo_errors = b->tx_fifo_errors;
+       a->tx_heartbeat_errors = b->tx_heartbeat_errors;
+       a->tx_window_errors = b->tx_window_errors;
+
+       a->rx_compressed = b->rx_compressed;
+       a->tx_compressed = b->tx_compressed;
+};
 
-       if (1) {
-               u32 txqlen = dev->tx_queue_len;
-               RTA_PUT(skb, IFLA_TXQLEN, sizeof(txqlen), &txqlen);
-       }
+static inline size_t if_nlmsg_size(int iwbuflen)
+{
+       return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+              + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+              + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
+              + nla_total_size(sizeof(struct rtnl_link_ifmap))
+              + nla_total_size(sizeof(struct rtnl_link_stats))
+              + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+              + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
+              + nla_total_size(4) /* IFLA_TXQLEN */
+              + nla_total_size(4) /* IFLA_WEIGHT */
+              + nla_total_size(4) /* IFLA_MTU */
+              + nla_total_size(4) /* IFLA_LINK */
+              + nla_total_size(4) /* IFLA_MASTER */
+              + nla_total_size(1) /* IFLA_OPERSTATE */
+              + nla_total_size(1) /* IFLA_LINKMODE */
+              + nla_total_size(iwbuflen);
+}
 
-       if (1) {
-               u32 weight = dev->weight;
-               RTA_PUT(skb, IFLA_WEIGHT, sizeof(weight), &weight);
-       }
+static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+                           void *iwbuf, int iwbuflen, int type, u32 pid,
+                           u32 seq, u32 change, unsigned int flags)
+{
+       struct ifinfomsg *ifm;
+       struct nlmsghdr *nlh;
+
+       nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
+       if (nlh == NULL)
+               return -EMSGSIZE;
+
+       ifm = nlmsg_data(nlh);
+       ifm->ifi_family = AF_UNSPEC;
+       ifm->__ifi_pad = 0;
+       ifm->ifi_type = dev->type;
+       ifm->ifi_index = dev->ifindex;
+       ifm->ifi_flags = dev_get_flags(dev);
+       ifm->ifi_change = change;
+
+       NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
+       NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len);
+       NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight);
+       NLA_PUT_U8(skb, IFLA_OPERSTATE,
+                  netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
+       NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
+       NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+
+       if (dev->ifindex != dev->iflink)
+               NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
+
+       if (dev->master)
+               NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex);
 
-       if (1) {
-               u8 operstate = netif_running(dev)?dev->operstate:IF_OPER_DOWN;
-               u8 link_mode = dev->link_mode;
-               RTA_PUT(skb, IFLA_OPERSTATE, sizeof(operstate), &operstate);
-               RTA_PUT(skb, IFLA_LINKMODE, sizeof(link_mode), &link_mode);
-       }
+       if (dev->qdisc_sleeping)
+               NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id);
 
        if (1) {
                struct rtnl_link_ifmap map = {
@@ -251,125 +501,121 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
                        .dma         = dev->dma,
                        .port        = dev->if_port,
                };
-               RTA_PUT(skb, IFLA_MAP, sizeof(map), &map);
+               NLA_PUT(skb, IFLA_MAP, sizeof(map), &map);
        }
 
        if (dev->addr_len) {
-               RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
-               RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
-       }
-
-       if (1) {
-               u32 mtu = dev->mtu;
-               RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
-       }
-
-       if (dev->ifindex != dev->iflink) {
-               u32 iflink = dev->iflink;
-               RTA_PUT(skb, IFLA_LINK, sizeof(iflink), &iflink);
-       }
-
-       if (dev->qdisc_sleeping)
-               RTA_PUT(skb, IFLA_QDISC,
-                       strlen(dev->qdisc_sleeping->ops->id) + 1,
-                       dev->qdisc_sleeping->ops->id);
-       
-       if (dev->master) {
-               u32 master = dev->master->ifindex;
-               RTA_PUT(skb, IFLA_MASTER, sizeof(master), &master);
+               NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+               NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
        }
 
        if (dev->get_stats) {
-               unsigned long *stats = (unsigned long*)dev->get_stats(dev);
+               struct net_device_stats *stats = dev->get_stats(dev);
                if (stats) {
-                       struct rtattr  *a;
-                       __u32          *s;
-                       int             i;
-                       int             n = sizeof(struct rtnl_link_stats)/4;
-
-                       a = __RTA_PUT(skb, IFLA_STATS, n*4);
-                       s = RTA_DATA(a);
-                       for (i=0; i<n; i++)
-                               s[i] = stats[i];
+                       struct nlattr *attr;
+
+                       attr = nla_reserve(skb, IFLA_STATS,
+                                          sizeof(struct rtnl_link_stats));
+                       if (attr == NULL)
+                               goto nla_put_failure;
+
+                       copy_rtnl_link_stats(nla_data(attr), stats);
                }
        }
-       nlh->nlmsg_len = skb->tail - b;
-       return skb->len;
 
-nlmsg_failure:
-rtattr_failure:
-       skb_trim(skb, b - skb->data);
-       return -1;
+       if (iwbuf)
+               NLA_PUT(skb, IFLA_WIRELESS, iwbuflen, iwbuf);
+
+       return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+       nlmsg_cancel(skb, nlh);
+       return -EMSGSIZE;
 }
 
-static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 {
        int idx;
        int s_idx = cb->args[0];
        struct net_device *dev;
 
-       read_lock(&dev_base_lock);
        for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
                if (idx < s_idx)
                        continue;
-               if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK,
-                                         NETLINK_CB(cb->skb).pid,
-                                         cb->nlh->nlmsg_seq, 0,
-                                         NLM_F_MULTI) <= 0)
+               if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK,
+                                    NETLINK_CB(cb->skb).pid,
+                                    cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0)
                        break;
        }
-       read_unlock(&dev_base_lock);
        cb->args[0] = idx;
 
        return skb->len;
 }
 
-static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = {
+       [IFLA_IFNAME]           = { .type = NLA_STRING, .len = IFNAMSIZ-1 },
+       [IFLA_MAP]              = { .len = sizeof(struct rtnl_link_ifmap) },
+       [IFLA_MTU]              = { .type = NLA_U32 },
+       [IFLA_TXQLEN]           = { .type = NLA_U32 },
+       [IFLA_WEIGHT]           = { .type = NLA_U32 },
+       [IFLA_OPERSTATE]        = { .type = NLA_U8 },
+       [IFLA_LINKMODE]         = { .type = NLA_U8 },
+};
+
+static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-       struct ifinfomsg  *ifm = NLMSG_DATA(nlh);
-       struct rtattr    **ida = arg;
+       struct ifinfomsg *ifm;
        struct net_device *dev;
-       int err, send_addr_notify = 0;
+       int err, send_addr_notify = 0, modified = 0;
+       struct nlattr *tb[IFLA_MAX+1];
+       char ifname[IFNAMSIZ];
+
+       err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+       if (err < 0)
+               goto errout;
 
+       if (tb[IFLA_IFNAME])
+               nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+       else
+               ifname[0] = '\0';
+
+       err = -EINVAL;
+       ifm = nlmsg_data(nlh);
        if (ifm->ifi_index >= 0)
                dev = dev_get_by_index(ifm->ifi_index);
-       else if (ida[IFLA_IFNAME - 1]) {
-               char ifname[IFNAMSIZ];
-
-               if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
-                                  IFNAMSIZ) >= IFNAMSIZ)
-                       return -EINVAL;
+       else if (tb[IFLA_IFNAME])
                dev = dev_get_by_name(ifname);
-       else
-               return -EINVAL;
+       else
+               goto errout;
 
-       if (!dev)
-               return -ENODEV;
+       if (dev == NULL) {
+               err = -ENODEV;
+               goto errout;
+       }
 
-       err = -EINVAL;
+       if (tb[IFLA_ADDRESS] &&
+           nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
+               goto errout_dev;
 
-       if (ifm->ifi_flags)
-               dev_change_flags(dev, ifm->ifi_flags);
+       if (tb[IFLA_BROADCAST] &&
+           nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
+               goto errout_dev;
 
-       if (ida[IFLA_MAP - 1]) {
+       if (tb[IFLA_MAP]) {
                struct rtnl_link_ifmap *u_map;
                struct ifmap k_map;
 
                if (!dev->set_config) {
                        err = -EOPNOTSUPP;
-                       goto out;
+                       goto errout_dev;
                }
 
                if (!netif_device_present(dev)) {
                        err = -ENODEV;
-                       goto out;
+                       goto errout_dev;
                }
-               
-               if (ida[IFLA_MAP - 1]->rta_len != RTA_LENGTH(sizeof(*u_map)))
-                       goto out;
-
-               u_map = RTA_DATA(ida[IFLA_MAP - 1]);
 
+               u_map = nla_data(tb[IFLA_MAP]);
                k_map.mem_start = (unsigned long) u_map->mem_start;
                k_map.mem_end = (unsigned long) u_map->mem_end;
                k_map.base_addr = (unsigned short) u_map->base_addr;
@@ -378,99 +624,148 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
                k_map.port = (unsigned char) u_map->port;
 
                err = dev->set_config(dev, &k_map);
+               if (err < 0)
+                       goto errout_dev;
 
-               if (err)
-                       goto out;
+               modified = 1;
        }
 
-       if (ida[IFLA_ADDRESS - 1]) {
+       if (tb[IFLA_ADDRESS]) {
+               struct sockaddr *sa;
+               int len;
+
                if (!dev->set_mac_address) {
                        err = -EOPNOTSUPP;
-                       goto out;
+                       goto errout_dev;
                }
+
                if (!netif_device_present(dev)) {
                        err = -ENODEV;
-                       goto out;
+                       goto errout_dev;
                }
-               if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len))
-                       goto out;
 
-               err = dev->set_mac_address(dev, RTA_DATA(ida[IFLA_ADDRESS - 1]));
+               len = sizeof(sa_family_t) + dev->addr_len;
+               sa = kmalloc(len, GFP_KERNEL);
+               if (!sa) {
+                       err = -ENOMEM;
+                       goto errout_dev;
+               }
+               sa->sa_family = dev->type;
+               memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
+                      dev->addr_len);
+               err = dev->set_mac_address(dev, sa);
+               kfree(sa);
                if (err)
-                       goto out;
+                       goto errout_dev;
                send_addr_notify = 1;
+               modified = 1;
        }
 
-       if (ida[IFLA_BROADCAST - 1]) {
-               if (ida[IFLA_BROADCAST - 1]->rta_len != RTA_LENGTH(dev->addr_len))
-                       goto out;
-               memcpy(dev->broadcast, RTA_DATA(ida[IFLA_BROADCAST - 1]),
-                      dev->addr_len);
-               send_addr_notify = 1;
+       if (tb[IFLA_MTU]) {
+               err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+               if (err < 0)
+                       goto errout_dev;
+               modified = 1;
        }
 
-       if (ida[IFLA_MTU - 1]) {
-               if (ida[IFLA_MTU - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
-                       goto out;
-               err = dev_set_mtu(dev, *((u32 *) RTA_DATA(ida[IFLA_MTU - 1])));
-
-               if (err)
-                       goto out;
-
+       /*
+        * Interface selected by interface index but interface
+        * name provided implies that a name change has been
+        * requested.
+        */
+       if (ifm->ifi_index >= 0 && ifname[0]) {
+               err = dev_change_name(dev, ifname);
+               if (err < 0)
+                       goto errout_dev;
+               modified = 1;
        }
 
-       if (ida[IFLA_TXQLEN - 1]) {
-               if (ida[IFLA_TXQLEN - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
-                       goto out;
-
-               dev->tx_queue_len = *((u32 *) RTA_DATA(ida[IFLA_TXQLEN - 1]));
+       if (tb[IFLA_BROADCAST]) {
+               nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
+               send_addr_notify = 1;
        }
 
-       if (ida[IFLA_WEIGHT - 1]) {
-               if (ida[IFLA_WEIGHT - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
-                       goto out;
 
-               dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1]));
-       }
+       if (ifm->ifi_flags)
+               dev_change_flags(dev, ifm->ifi_flags);
 
-       if (ida[IFLA_OPERSTATE - 1]) {
-               if (ida[IFLA_OPERSTATE - 1]->rta_len != RTA_LENGTH(sizeof(u8)))
-                       goto out;
+       if (tb[IFLA_TXQLEN])
+               dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
 
-               set_operstate(dev, *((u8 *) RTA_DATA(ida[IFLA_OPERSTATE - 1])));
-       }
+       if (tb[IFLA_WEIGHT])
+               dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
 
-       if (ida[IFLA_LINKMODE - 1]) {
-               if (ida[IFLA_LINKMODE - 1]->rta_len != RTA_LENGTH(sizeof(u8)))
-                       goto out;
+       if (tb[IFLA_OPERSTATE])
+               set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
 
+       if (tb[IFLA_LINKMODE]) {
                write_lock_bh(&dev_base_lock);
-               dev->link_mode = *((u8 *) RTA_DATA(ida[IFLA_LINKMODE - 1]));
+               dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
                write_unlock_bh(&dev_base_lock);
        }
 
-       if (ifm->ifi_index >= 0 && ida[IFLA_IFNAME - 1]) {
-               char ifname[IFNAMSIZ];
-
-               if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
-                                  IFNAMSIZ) >= IFNAMSIZ)
-                       goto out;
-               err = dev_change_name(dev, ifname);
-               if (err)
-                       goto out;
-       }
-
        err = 0;
 
-out:
+errout_dev:
+       if (err < 0 && modified && net_ratelimit())
+               printk(KERN_WARNING "A link change request failed with "
+                      "some changes comitted already. Interface %s may "
+                      "have been left with an inconsistent configuration, "
+                      "please check.\n", dev->name);
+
        if (send_addr_notify)
                call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 
        dev_put(dev);
+errout:
+       return err;
+}
+
+static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+       struct ifinfomsg *ifm;
+       struct nlattr *tb[IFLA_MAX+1];
+       struct net_device *dev = NULL;
+       struct sk_buff *nskb;
+       char *iw_buf = NULL, *iw = NULL;
+       int iw_buf_len = 0;
+       int err;
+
+       err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+       if (err < 0)
+               return err;
+
+       ifm = nlmsg_data(nlh);
+       if (ifm->ifi_index >= 0) {
+               dev = dev_get_by_index(ifm->ifi_index);
+               if (dev == NULL)
+                       return -ENODEV;
+       } else
+               return -EINVAL;
+
+       nskb = nlmsg_new(if_nlmsg_size(iw_buf_len), GFP_KERNEL);
+       if (nskb == NULL) {
+               err = -ENOBUFS;
+               goto errout;
+       }
+
+       err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK,
+                              NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0);
+       if (err < 0) {
+               /* -EMSGSIZE implies BUG in if_nlmsg_size */
+               WARN_ON(err == -EMSGSIZE);
+               kfree_skb(nskb);
+               goto errout;
+       }
+       err = rtnl_unicast(nskb, NETLINK_CB(skb).pid);
+errout:
+       kfree(iw_buf);
+       dev_put(dev);
+
        return err;
 }
 
-static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
+static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 {
        int idx;
        int s_idx = cb->family;
@@ -481,12 +776,12 @@ static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
                int type = cb->nlh->nlmsg_type-RTM_BASE;
                if (idx < s_idx || idx == PF_PACKET)
                        continue;
-               if (rtnetlink_links[idx] == NULL ||
-                   rtnetlink_links[idx][type].dumpit == NULL)
+               if (rtnl_msg_handlers[idx] == NULL ||
+                   rtnl_msg_handlers[idx][type].dumpit == NULL)
                        continue;
                if (idx > s_idx)
                        memset(&cb->args[0], 0, sizeof(cb->args));
-               if (rtnetlink_links[idx][type].dumpit(skb, cb))
+               if (rtnl_msg_handlers[idx][type].dumpit(skb, cb))
                        break;
        }
        cb->family = idx;
@@ -497,20 +792,23 @@ static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 {
        struct sk_buff *skb;
-       int size = NLMSG_SPACE(sizeof(struct ifinfomsg) +
-                              sizeof(struct rtnl_link_ifmap) +
-                              sizeof(struct rtnl_link_stats) + 128);
+       int err = -ENOBUFS;
 
-       skb = alloc_skb(size, GFP_KERNEL);
-       if (!skb)
-               return;
+       skb = nlmsg_new(if_nlmsg_size(0), GFP_KERNEL);
+       if (skb == NULL)
+               goto errout;
 
-       if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change, 0) < 0) {
+       err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0);
+       if (err < 0) {
+               /* -EMSGSIZE implies BUG in if_nlmsg_size() */
+               WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
-               return;
+               goto errout;
        }
-       NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
-       netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL);
+       err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+errout:
+       if (err < 0)
+               rtnl_set_sk_err(RTNLGRP_LINK, err);
 }
 
 /* Protected by RTNL sempahore.  */
@@ -519,30 +817,18 @@ static int rtattr_max;
 
 /* Process one rtnetlink message. */
 
-static __inline__ int
-rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-       struct rtnetlink_link *link;
-       struct rtnetlink_link *link_tab;
+       rtnl_doit_func doit;
        int sz_idx, kind;
        int min_len;
        int family;
        int type;
        int err;
 
-       /* Only requests are handled by kernel now */
-       if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
-               return 0;
-
        type = nlh->nlmsg_type;
-
-       /* A control message: ignore them */
-       if (type < RTM_BASE)
-               return 0;
-
-       /* Unknown message: reply with EINVAL */
        if (type > RTM_MAX)
-               goto err_inval;
+               return -EOPNOTSUPP;
 
        type -= RTM_BASE;
 
@@ -551,45 +837,33 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
                return 0;
 
        family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
-       if (family >= NPROTO) {
-               *errp = -EAFNOSUPPORT;
-               return -1;
-       }
-
-       link_tab = rtnetlink_links[family];
-       if (link_tab == NULL)
-               link_tab = rtnetlink_links[PF_UNSPEC];
-       link = &link_tab[type];
+       if (family >= NPROTO)
+               return -EAFNOSUPPORT;
 
        sz_idx = type>>2;
        kind = type&3;
 
-       if (kind != 2 && security_netlink_recv(skb)) {
-               *errp = -EPERM;
-               return -1;
-       }
+       if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN))
+               return -EPERM;
 
        if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
-               if (link->dumpit == NULL)
-                       link = &(rtnetlink_links[PF_UNSPEC][type]);
+               rtnl_dumpit_func dumpit;
 
-               if (link->dumpit == NULL)
-                       goto err_inval;
+               dumpit = rtnl_get_dumpit(family, type);
+               if (dumpit == NULL)
+                       return -EOPNOTSUPP;
 
-               if ((*errp = netlink_dump_start(rtnl, skb, nlh,
-                                               link->dumpit, NULL)) != 0) {
-                       return -1;
-               }
-
-               netlink_queue_skip(nlh, skb);
-               return -1;
+               __rtnl_unlock();
+               err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
+               rtnl_lock();
+               return err;
        }
 
        memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
 
        min_len = rtm_min[sz_idx];
        if (nlh->nlmsg_len < min_len)
-               goto err_inval;
+               return -EINVAL;
 
        if (nlh->nlmsg_len > min_len) {
                int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
@@ -599,25 +873,18 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
                        unsigned flavor = attr->rta_type;
                        if (flavor) {
                                if (flavor > rta_max[sz_idx])
-                                       goto err_inval;
+                                       return -EINVAL;
                                rta_buf[flavor-1] = attr;
                        }
                        attr = RTA_NEXT(attr, attrlen);
                }
        }
 
-       if (link->doit == NULL)
-               link = &(rtnetlink_links[PF_UNSPEC][type]);
-       if (link->doit == NULL)
-               goto err_inval;
-       err = link->doit(skb, nlh, (void *)&rta_buf[0]);
+       doit = rtnl_get_doit(family, type);
+       if (doit == NULL)
+               return -EOPNOTSUPP;
 
-       *errp = err;
-       return err;
-
-err_inval:
-       *errp = -EINVAL;
-       return -1;
+       return doit(skb, nlh, (void *)&rta_buf[0]);
 }
 
 static void rtnetlink_rcv(struct sock *sk, int len)
@@ -625,28 +892,14 @@ static void rtnetlink_rcv(struct sock *sk, int len)
        unsigned int qlen = 0;
 
        do {
-               rtnl_lock();
+               mutex_lock(&rtnl_mutex);
                netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg);
-               up(&rtnl_sem);
+               mutex_unlock(&rtnl_mutex);
 
                netdev_run_todo();
        } while (qlen);
 }
 
-static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
-{
-       [RTM_GETLINK     - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo },
-       [RTM_SETLINK     - RTM_BASE] = { .doit   = do_setlink            },
-       [RTM_GETADDR     - RTM_BASE] = { .dumpit = rtnetlink_dump_all    },
-       [RTM_GETROUTE    - RTM_BASE] = { .dumpit = rtnetlink_dump_all    },
-       [RTM_NEWNEIGH    - RTM_BASE] = { .doit   = neigh_add             },
-       [RTM_DELNEIGH    - RTM_BASE] = { .doit   = neigh_delete          },
-       [RTM_GETNEIGH    - RTM_BASE] = { .dumpit = neigh_dump_info       },
-       [RTM_GETRULE     - RTM_BASE] = { .dumpit = rtnetlink_dump_all    },
-       [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info    },
-       [RTM_SETNEIGHTBL - RTM_BASE] = { .doit   = neightbl_set          },
-};
-
 static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
        struct net_device *dev = ptr;
@@ -688,22 +941,26 @@ void __init rtnetlink_init(void)
                panic("rtnetlink_init: cannot allocate rta_buf\n");
 
        rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
-                                    THIS_MODULE);
+                                    &rtnl_mutex, THIS_MODULE);
        if (rtnl == NULL)
                panic("rtnetlink_init: cannot initialize rtnetlink\n");
        netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
        register_netdevice_notifier(&rtnetlink_dev_notifier);
-       rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
-       rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
+
+       rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo);
+       rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL);
+
+       rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all);
+       rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all);
 }
 
 EXPORT_SYMBOL(__rta_fill);
 EXPORT_SYMBOL(rtattr_strlcpy);
 EXPORT_SYMBOL(rtattr_parse);
-EXPORT_SYMBOL(rtnetlink_links);
 EXPORT_SYMBOL(rtnetlink_put_metrics);
-EXPORT_SYMBOL(rtnl);
 EXPORT_SYMBOL(rtnl_lock);
-EXPORT_SYMBOL(rtnl_lock_interruptible);
-EXPORT_SYMBOL(rtnl_sem);
+EXPORT_SYMBOL(rtnl_trylock);
 EXPORT_SYMBOL(rtnl_unlock);
+EXPORT_SYMBOL(rtnl_unicast);
+EXPORT_SYMBOL(rtnl_notify);
+EXPORT_SYMBOL(rtnl_set_sk_err);