2 * Copyright (c) 2008, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Author: Lucy Liu <lucy.liu@intel.com>
20 #include <linux/netdevice.h>
21 #include <linux/netlink.h>
22 #include <net/netlink.h>
23 #include <net/rtnetlink.h>
24 #include <linux/dcbnl.h>
25 #include <linux/rtnetlink.h>
29 * Data Center Bridging (DCB) is a collection of Ethernet enhancements
30 * intended to allow network traffic with differing requirements
31 * (highly reliable, no drops vs. best effort vs. low latency) to operate
32 * and co-exist on Ethernet. Current DCB features are:
34 * Enhanced Transmission Selection (aka Priority Grouping [PG]) - provides a
35 * framework for assigning bandwidth guarantees to traffic classes.
37 * Priority-based Flow Control (PFC) - provides a flow control mechanism which
38 * can work independently for each 802.1p priority.
40 * Congestion Notification - provides a mechanism for end-to-end congestion
41 * control for protocols which do not have built-in congestion management.
43 * More information about the emerging standards for these Ethernet features
44 * can be found at: http://www.ieee802.org/1/pages/dcbridges.html
46 * This file implements an rtnetlink interface to allow configuration of DCB
47 * features for capable devices.
50 MODULE_AUTHOR("Lucy Liu, <lucy.liu@intel.com>");
51 MODULE_DESCRIPTION("Data Center Bridging generic netlink interface");
52 MODULE_LICENSE("GPL");
54 /**************** DCB attribute policies *************************************/
56 /* DCB netlink attributes policy */
57 static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = {
58 [DCB_ATTR_IFNAME] = {.type = NLA_STRING, .len = IFNAMSIZ - 1},
59 [DCB_ATTR_STATE] = {.type = NLA_U8},
60 [DCB_ATTR_PFC_CFG] = {.type = NLA_NESTED},
61 [DCB_ATTR_PG_CFG] = {.type = NLA_NESTED},
62 [DCB_ATTR_SET_ALL] = {.type = NLA_U8},
63 [DCB_ATTR_PERM_HWADDR] = {.type = NLA_FLAG},
66 /* DCB priority flow control to User Priority nested attributes */
67 static struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = {
68 [DCB_PFC_UP_ATTR_0] = {.type = NLA_U8},
69 [DCB_PFC_UP_ATTR_1] = {.type = NLA_U8},
70 [DCB_PFC_UP_ATTR_2] = {.type = NLA_U8},
71 [DCB_PFC_UP_ATTR_3] = {.type = NLA_U8},
72 [DCB_PFC_UP_ATTR_4] = {.type = NLA_U8},
73 [DCB_PFC_UP_ATTR_5] = {.type = NLA_U8},
74 [DCB_PFC_UP_ATTR_6] = {.type = NLA_U8},
75 [DCB_PFC_UP_ATTR_7] = {.type = NLA_U8},
76 [DCB_PFC_UP_ATTR_ALL] = {.type = NLA_FLAG},
79 /* DCB priority grouping nested attributes */
80 static struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = {
81 [DCB_PG_ATTR_TC_0] = {.type = NLA_NESTED},
82 [DCB_PG_ATTR_TC_1] = {.type = NLA_NESTED},
83 [DCB_PG_ATTR_TC_2] = {.type = NLA_NESTED},
84 [DCB_PG_ATTR_TC_3] = {.type = NLA_NESTED},
85 [DCB_PG_ATTR_TC_4] = {.type = NLA_NESTED},
86 [DCB_PG_ATTR_TC_5] = {.type = NLA_NESTED},
87 [DCB_PG_ATTR_TC_6] = {.type = NLA_NESTED},
88 [DCB_PG_ATTR_TC_7] = {.type = NLA_NESTED},
89 [DCB_PG_ATTR_TC_ALL] = {.type = NLA_NESTED},
90 [DCB_PG_ATTR_BW_ID_0] = {.type = NLA_U8},
91 [DCB_PG_ATTR_BW_ID_1] = {.type = NLA_U8},
92 [DCB_PG_ATTR_BW_ID_2] = {.type = NLA_U8},
93 [DCB_PG_ATTR_BW_ID_3] = {.type = NLA_U8},
94 [DCB_PG_ATTR_BW_ID_4] = {.type = NLA_U8},
95 [DCB_PG_ATTR_BW_ID_5] = {.type = NLA_U8},
96 [DCB_PG_ATTR_BW_ID_6] = {.type = NLA_U8},
97 [DCB_PG_ATTR_BW_ID_7] = {.type = NLA_U8},
98 [DCB_PG_ATTR_BW_ID_ALL] = {.type = NLA_FLAG},
101 /* DCB traffic class nested attributes. */
102 static struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = {
103 [DCB_TC_ATTR_PARAM_PGID] = {.type = NLA_U8},
104 [DCB_TC_ATTR_PARAM_UP_MAPPING] = {.type = NLA_U8},
105 [DCB_TC_ATTR_PARAM_STRICT_PRIO] = {.type = NLA_U8},
106 [DCB_TC_ATTR_PARAM_BW_PCT] = {.type = NLA_U8},
107 [DCB_TC_ATTR_PARAM_ALL] = {.type = NLA_FLAG},
111 /* standard netlink reply call */
112 static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid,
115 struct sk_buff *dcbnl_skb;
117 struct nlmsghdr *nlh;
120 dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
124 nlh = NLMSG_NEW(dcbnl_skb, pid, seq, event, sizeof(*dcb), flags);
126 dcb = NLMSG_DATA(nlh);
127 dcb->dcb_family = AF_UNSPEC;
131 ret = nla_put_u8(dcbnl_skb, attr, value);
135 /* end the message, assign the nlmsg_len. */
136 nlmsg_end(dcbnl_skb, nlh);
137 ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
148 static int dcbnl_getstate(struct net_device *netdev, struct nlattr **tb,
149 u32 pid, u32 seq, u16 flags)
153 /* if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->getstate) */
154 if (!netdev->dcbnl_ops->getstate)
157 ret = dcbnl_reply(netdev->dcbnl_ops->getstate(netdev), RTM_GETDCB,
158 DCB_CMD_GSTATE, DCB_ATTR_STATE, pid, seq, flags);
163 static int dcbnl_getpfccfg(struct net_device *netdev, struct nlattr **tb,
164 u32 pid, u32 seq, u16 flags)
166 struct sk_buff *dcbnl_skb;
167 struct nlmsghdr *nlh;
169 struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1], *nest;
175 if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->getpfccfg)
178 ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX,
179 tb[DCB_ATTR_PFC_CFG],
184 dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
188 nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
190 dcb = NLMSG_DATA(nlh);
191 dcb->dcb_family = AF_UNSPEC;
192 dcb->cmd = DCB_CMD_PFC_GCFG;
194 nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PFC_CFG);
198 if (data[DCB_PFC_UP_ATTR_ALL])
201 for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) {
202 if (!getall && !data[i])
205 netdev->dcbnl_ops->getpfccfg(netdev, i - DCB_PFC_UP_ATTR_0,
207 ret = nla_put_u8(dcbnl_skb, i, value);
210 nla_nest_cancel(dcbnl_skb, nest);
214 nla_nest_end(dcbnl_skb, nest);
216 nlmsg_end(dcbnl_skb, nlh);
218 ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
230 static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlattr **tb,
231 u32 pid, u32 seq, u16 flags)
233 struct sk_buff *dcbnl_skb;
234 struct nlmsghdr *nlh;
236 u8 perm_addr[MAX_ADDR_LEN];
239 if (!netdev->dcbnl_ops->getpermhwaddr)
242 dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
246 nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
248 dcb = NLMSG_DATA(nlh);
249 dcb->dcb_family = AF_UNSPEC;
250 dcb->cmd = DCB_CMD_GPERM_HWADDR;
252 netdev->dcbnl_ops->getpermhwaddr(netdev, perm_addr);
254 ret = nla_put(dcbnl_skb, DCB_ATTR_PERM_HWADDR, sizeof(perm_addr),
257 nlmsg_end(dcbnl_skb, nlh);
259 ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
272 static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb,
273 u32 pid, u32 seq, u16 flags, int dir)
275 struct sk_buff *dcbnl_skb;
276 struct nlmsghdr *nlh;
278 struct nlattr *pg_nest, *param_nest, *data;
279 struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1];
280 struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1];
281 u8 prio, pgid, tc_pct, up_map;
286 if (!tb[DCB_ATTR_PG_CFG] ||
287 !netdev->dcbnl_ops->getpgtccfgtx ||
288 !netdev->dcbnl_ops->getpgtccfgrx ||
289 !netdev->dcbnl_ops->getpgbwgcfgtx ||
290 !netdev->dcbnl_ops->getpgbwgcfgrx)
293 ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX,
294 tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest);
299 dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
303 nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
305 dcb = NLMSG_DATA(nlh);
306 dcb->dcb_family = AF_UNSPEC;
307 dcb->cmd = (dir) ? DCB_CMD_PGRX_GCFG : DCB_CMD_PGTX_GCFG;
309 pg_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PG_CFG);
313 if (pg_tb[DCB_PG_ATTR_TC_ALL])
316 for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) {
317 if (!getall && !pg_tb[i])
320 if (pg_tb[DCB_PG_ATTR_TC_ALL])
321 data = pg_tb[DCB_PG_ATTR_TC_ALL];
324 ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX,
325 data, dcbnl_tc_param_nest);
329 param_nest = nla_nest_start(dcbnl_skb, i);
333 pgid = DCB_ATTR_VALUE_UNDEFINED;
334 prio = DCB_ATTR_VALUE_UNDEFINED;
335 tc_pct = DCB_ATTR_VALUE_UNDEFINED;
336 up_map = DCB_ATTR_VALUE_UNDEFINED;
340 netdev->dcbnl_ops->getpgtccfgrx(netdev,
341 i - DCB_PG_ATTR_TC_0, &prio,
342 &pgid, &tc_pct, &up_map);
345 netdev->dcbnl_ops->getpgtccfgtx(netdev,
346 i - DCB_PG_ATTR_TC_0, &prio,
347 &pgid, &tc_pct, &up_map);
350 if (param_tb[DCB_TC_ATTR_PARAM_PGID] ||
351 param_tb[DCB_TC_ATTR_PARAM_ALL]) {
352 ret = nla_put_u8(dcbnl_skb,
353 DCB_TC_ATTR_PARAM_PGID, pgid);
357 if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING] ||
358 param_tb[DCB_TC_ATTR_PARAM_ALL]) {
359 ret = nla_put_u8(dcbnl_skb,
360 DCB_TC_ATTR_PARAM_UP_MAPPING, up_map);
364 if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO] ||
365 param_tb[DCB_TC_ATTR_PARAM_ALL]) {
366 ret = nla_put_u8(dcbnl_skb,
367 DCB_TC_ATTR_PARAM_STRICT_PRIO, prio);
371 if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT] ||
372 param_tb[DCB_TC_ATTR_PARAM_ALL]) {
373 ret = nla_put_u8(dcbnl_skb, DCB_TC_ATTR_PARAM_BW_PCT,
378 nla_nest_end(dcbnl_skb, param_nest);
381 if (pg_tb[DCB_PG_ATTR_BW_ID_ALL])
386 for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) {
387 if (!getall && !pg_tb[i])
390 tc_pct = DCB_ATTR_VALUE_UNDEFINED;
394 netdev->dcbnl_ops->getpgbwgcfgrx(netdev,
395 i - DCB_PG_ATTR_BW_ID_0, &tc_pct);
398 netdev->dcbnl_ops->getpgbwgcfgtx(netdev,
399 i - DCB_PG_ATTR_BW_ID_0, &tc_pct);
401 ret = nla_put_u8(dcbnl_skb, i, tc_pct);
407 nla_nest_end(dcbnl_skb, pg_nest);
409 nlmsg_end(dcbnl_skb, nlh);
411 ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
418 nla_nest_cancel(dcbnl_skb, param_nest);
420 nla_nest_cancel(dcbnl_skb, pg_nest);
429 static int dcbnl_pgtx_getcfg(struct net_device *netdev, struct nlattr **tb,
430 u32 pid, u32 seq, u16 flags)
432 return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 0);
435 static int dcbnl_pgrx_getcfg(struct net_device *netdev, struct nlattr **tb,
436 u32 pid, u32 seq, u16 flags)
438 return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 1);
441 static int dcbnl_setstate(struct net_device *netdev, struct nlattr **tb,
442 u32 pid, u32 seq, u16 flags)
447 if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->setstate)
450 value = nla_get_u8(tb[DCB_ATTR_STATE]);
452 netdev->dcbnl_ops->setstate(netdev, value);
454 ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_SSTATE, DCB_ATTR_STATE,
460 static int dcbnl_setpfccfg(struct net_device *netdev, struct nlattr **tb,
461 u32 pid, u32 seq, u16 flags)
463 struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1];
468 if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->setpfccfg)
471 ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX,
472 tb[DCB_ATTR_PFC_CFG],
477 for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) {
480 value = nla_get_u8(data[i]);
481 netdev->dcbnl_ops->setpfccfg(netdev,
482 data[i]->nla_type - DCB_PFC_UP_ATTR_0, value);
485 ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_PFC_SCFG, DCB_ATTR_PFC_CFG,
491 static int dcbnl_setall(struct net_device *netdev, struct nlattr **tb,
492 u32 pid, u32 seq, u16 flags)
496 if (!tb[DCB_ATTR_SET_ALL] || !netdev->dcbnl_ops->setall)
499 ret = dcbnl_reply(netdev->dcbnl_ops->setall(netdev), RTM_SETDCB,
500 DCB_CMD_SET_ALL, DCB_ATTR_SET_ALL, pid, seq, flags);
505 static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlattr **tb,
506 u32 pid, u32 seq, u16 flags, int dir)
508 struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1];
509 struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1];
517 if (!tb[DCB_ATTR_PG_CFG] ||
518 !netdev->dcbnl_ops->setpgtccfgtx ||
519 !netdev->dcbnl_ops->setpgtccfgrx ||
520 !netdev->dcbnl_ops->setpgbwgcfgtx ||
521 !netdev->dcbnl_ops->setpgbwgcfgrx)
524 ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX,
525 tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest);
529 for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) {
533 ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX,
534 pg_tb[i], dcbnl_tc_param_nest);
538 pgid = DCB_ATTR_VALUE_UNDEFINED;
539 prio = DCB_ATTR_VALUE_UNDEFINED;
540 tc_pct = DCB_ATTR_VALUE_UNDEFINED;
541 up_map = DCB_ATTR_VALUE_UNDEFINED;
543 if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO])
545 nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO]);
547 if (param_tb[DCB_TC_ATTR_PARAM_PGID])
548 pgid = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_PGID]);
550 if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT])
551 tc_pct = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_BW_PCT]);
553 if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING])
555 nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING]);
557 /* dir: Tx = 0, Rx = 1 */
560 netdev->dcbnl_ops->setpgtccfgrx(netdev,
561 i - DCB_PG_ATTR_TC_0,
562 prio, pgid, tc_pct, up_map);
565 netdev->dcbnl_ops->setpgtccfgtx(netdev,
566 i - DCB_PG_ATTR_TC_0,
567 prio, pgid, tc_pct, up_map);
571 for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) {
575 tc_pct = nla_get_u8(pg_tb[i]);
577 /* dir: Tx = 0, Rx = 1 */
580 netdev->dcbnl_ops->setpgbwgcfgrx(netdev,
581 i - DCB_PG_ATTR_BW_ID_0, tc_pct);
584 netdev->dcbnl_ops->setpgbwgcfgtx(netdev,
585 i - DCB_PG_ATTR_BW_ID_0, tc_pct);
589 ret = dcbnl_reply(0, RTM_SETDCB,
590 (dir ? DCB_CMD_PGRX_SCFG : DCB_CMD_PGTX_SCFG),
591 DCB_ATTR_PG_CFG, pid, seq, flags);
597 static int dcbnl_pgtx_setcfg(struct net_device *netdev, struct nlattr **tb,
598 u32 pid, u32 seq, u16 flags)
600 return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 0);
603 static int dcbnl_pgrx_setcfg(struct net_device *netdev, struct nlattr **tb,
604 u32 pid, u32 seq, u16 flags)
606 return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 1);
609 static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
611 struct net *net = sock_net(skb->sk);
612 struct net_device *netdev;
613 struct dcbmsg *dcb = (struct dcbmsg *)NLMSG_DATA(nlh);
614 struct nlattr *tb[DCB_ATTR_MAX + 1];
615 u32 pid = skb ? NETLINK_CB(skb).pid : 0;
618 if (net != &init_net)
621 ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX,
626 if (!tb[DCB_ATTR_IFNAME])
629 netdev = dev_get_by_name(&init_net, nla_data(tb[DCB_ATTR_IFNAME]));
633 if (!netdev->dcbnl_ops)
638 ret = dcbnl_getstate(netdev, tb, pid, nlh->nlmsg_seq,
641 case DCB_CMD_PFC_GCFG:
642 ret = dcbnl_getpfccfg(netdev, tb, pid, nlh->nlmsg_seq,
645 case DCB_CMD_GPERM_HWADDR:
646 ret = dcbnl_getperm_hwaddr(netdev, tb, pid, nlh->nlmsg_seq,
649 case DCB_CMD_PGTX_GCFG:
650 ret = dcbnl_pgtx_getcfg(netdev, tb, pid, nlh->nlmsg_seq,
653 case DCB_CMD_PGRX_GCFG:
654 ret = dcbnl_pgrx_getcfg(netdev, tb, pid, nlh->nlmsg_seq,
658 ret = dcbnl_setstate(netdev, tb, pid, nlh->nlmsg_seq,
661 case DCB_CMD_PFC_SCFG:
662 ret = dcbnl_setpfccfg(netdev, tb, pid, nlh->nlmsg_seq,
666 case DCB_CMD_SET_ALL:
667 ret = dcbnl_setall(netdev, tb, pid, nlh->nlmsg_seq,
670 case DCB_CMD_PGTX_SCFG:
671 ret = dcbnl_pgtx_setcfg(netdev, tb, pid, nlh->nlmsg_seq,
674 case DCB_CMD_PGRX_SCFG:
675 ret = dcbnl_pgrx_setcfg(netdev, tb, pid, nlh->nlmsg_seq,
688 static int __init dcbnl_init(void)
690 rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL);
691 rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL);
695 module_init(dcbnl_init);
697 static void __exit dcbnl_exit(void)
699 rtnl_unregister(PF_UNSPEC, RTM_GETDCB);
700 rtnl_unregister(PF_UNSPEC, RTM_SETDCB);
702 module_exit(dcbnl_exit);