X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fipv4%2Fipvs%2Fip_vs_ctl.c;h=771551d8fba9e1cf9b2609db06ca3bea8c7cc2a9;hb=6067804047b64dde89f4f133fc7eba48ee44107d;hp=3c4d22a468ecd68499df51dbfbfa5ad3ad5bccbe;hpb=1e356f9cdfa885c78791d5d6e5d2baef22f01853;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 3c4d22a..771551d 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -5,8 +5,6 @@ * high-performance and highly available server based on a * cluster of servers. * - * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $ - * * Authors: Wensong Zhang * Peter Kese * Julian Anastasov @@ -37,8 +35,13 @@ #include #include +#ifdef CONFIG_IP_VS_IPV6 +#include +#include +#endif #include #include +#include #include @@ -92,6 +95,26 @@ int ip_vs_get_debug_level(void) } #endif +#ifdef CONFIG_IP_VS_IPV6 +/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ +static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) +{ + struct rt6_info *rt; + struct flowi fl = { + .oif = 0, + .nl_u = { + .ip6_u = { + .daddr = *addr, + .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } }, + }; + + rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); + if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) + return 1; + + return 0; +} +#endif /* * update_defense_level is called from keventd and from sysctl, * so it needs to protect itself from softirqs @@ -283,11 +306,19 @@ static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); * Returns hash value for virtual service */ static __inline__ unsigned -ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port) +ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, + __be16 port) { register unsigned porth = ntohs(port); + __be32 addr_fold = addr->ip; - return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth) +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + addr_fold = addr->ip6[0]^addr->ip6[1]^ + addr->ip6[2]^addr->ip6[3]; +#endif + + return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) & IP_VS_SVC_TAB_MASK; } @@ -318,7 +349,8 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) /* * Hash it by in ip_vs_svc_table */ - hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port); + hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr, + svc->port); list_add(&svc->s_list, &ip_vs_svc_table[hash]); } else { /* @@ -364,17 +396,19 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) /* * Get service by {proto,addr,port} in the service table. */ -static __inline__ struct ip_vs_service * -__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport) +static inline struct ip_vs_service * +__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr, + __be16 vport) { unsigned hash; struct ip_vs_service *svc; /* Check for "full" addressed entries */ - hash = ip_vs_svc_hashkey(protocol, vaddr, vport); + hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport); list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ - if ((svc->addr == vaddr) + if ((svc->af == af) + && ip_vs_addr_equal(af, &svc->addr, vaddr) && (svc->port == vport) && (svc->protocol == protocol)) { /* HIT */ @@ -390,7 +424,8 @@ __ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport) /* * Get service by {fwmark} in the service table. */ -static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark) +static inline struct ip_vs_service * +__ip_vs_svc_fwm_get(int af, __u32 fwmark) { unsigned hash; struct ip_vs_service *svc; @@ -399,7 +434,7 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark) hash = ip_vs_svc_fwm_hashkey(fwmark); list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { - if (svc->fwmark == fwmark) { + if (svc->fwmark == fwmark && svc->af == af) { /* HIT */ atomic_inc(&svc->usecnt); return svc; @@ -410,7 +445,8 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark) } struct ip_vs_service * -ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) +ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, + const union nf_inet_addr *vaddr, __be16 vport) { struct ip_vs_service *svc; @@ -419,14 +455,14 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) /* * Check the table hashed by fwmark first */ - if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark))) + if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark))) goto out; /* * Check the table hashed by * for "full" addressed entries */ - svc = __ip_vs_service_get(protocol, vaddr, vport); + svc = __ip_vs_service_get(af, protocol, vaddr, vport); if (svc == NULL && protocol == IPPROTO_TCP @@ -436,7 +472,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) * Check if ftp service entry exists, the packet * might belong to FTP data connections. */ - svc = __ip_vs_service_get(protocol, vaddr, FTPPORT); + svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT); } if (svc == NULL @@ -444,16 +480,16 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) /* * Check if the catch-all port (port zero) exists */ - svc = __ip_vs_service_get(protocol, vaddr, 0); + svc = __ip_vs_service_get(af, protocol, vaddr, 0); } out: read_unlock(&__ip_vs_svc_lock); - IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n", - fwmark, ip_vs_proto_name(protocol), - NIPQUAD(vaddr), ntohs(vport), - svc?"hit":"not hit"); + IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", + fwmark, ip_vs_proto_name(protocol), + IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), + svc ? "hit" : "not hit"); return svc; } @@ -480,11 +516,20 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) /* * Returns hash value for real service */ -static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port) +static inline unsigned ip_vs_rs_hashkey(int af, + const union nf_inet_addr *addr, + __be16 port) { register unsigned porth = ntohs(port); + __be32 addr_fold = addr->ip; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + addr_fold = addr->ip6[0]^addr->ip6[1]^ + addr->ip6[2]^addr->ip6[3]; +#endif - return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth) + return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth) & IP_VS_RTAB_MASK; } @@ -504,7 +549,8 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest) * Hash by proto,addr,port, * which are the parameters of the real service. */ - hash = ip_vs_rs_hashkey(dest->addr, dest->port); + hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); + list_add(&dest->d_list, &ip_vs_rtable[hash]); return 1; @@ -531,7 +577,9 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) * Lookup real service by in the real service table. */ struct ip_vs_dest * -ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) +ip_vs_lookup_real_service(int af, __u16 protocol, + const union nf_inet_addr *daddr, + __be16 dport) { unsigned hash; struct ip_vs_dest *dest; @@ -540,11 +588,12 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) * Check for "full" addressed entries * Return the first found entry */ - hash = ip_vs_rs_hashkey(daddr, dport); + hash = ip_vs_rs_hashkey(af, daddr, dport); read_lock(&__ip_vs_rs_lock); list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { - if ((dest->addr == daddr) + if ((dest->af == af) + && ip_vs_addr_equal(af, &dest->addr, daddr) && (dest->port == dport) && ((dest->protocol == protocol) || dest->vfwmark)) { @@ -562,7 +611,8 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) * Lookup destination by {addr,port} in the given service */ static struct ip_vs_dest * -ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) +ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, + __be16 dport) { struct ip_vs_dest *dest; @@ -570,7 +620,9 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) * Find the destination for the given service */ list_for_each_entry(dest, &svc->destinations, n_list) { - if ((dest->addr == daddr) && (dest->port == dport)) { + if ((dest->af == svc->af) + && ip_vs_addr_equal(svc->af, &dest->addr, daddr) + && (dest->port == dport)) { /* HIT */ return dest; } @@ -589,13 +641,15 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) * ip_vs_lookup_real_service() looked promissing, but * seems not working as expected. */ -struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport, - __be32 vaddr, __be16 vport, __u16 protocol) +struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, + __be16 dport, + const union nf_inet_addr *vaddr, + __be16 vport, __u16 protocol) { struct ip_vs_dest *dest; struct ip_vs_service *svc; - svc = ip_vs_service_get(0, protocol, vaddr, vport); + svc = ip_vs_service_get(af, 0, protocol, vaddr, vport); if (!svc) return NULL; dest = ip_vs_lookup_dest(svc, daddr, dport); @@ -604,7 +658,6 @@ struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport, ip_vs_service_put(svc); return dest; } -EXPORT_SYMBOL(ip_vs_find_dest); /* * Lookup dest by {svc,addr,port} in the destination trash. @@ -617,7 +670,8 @@ EXPORT_SYMBOL(ip_vs_find_dest); * scheduling. */ static struct ip_vs_dest * -ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) +ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, + __be16 dport) { struct ip_vs_dest *dest, *nxt; @@ -625,17 +679,19 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) * Find the destination in trash */ list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { - IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, " - "dest->refcnt=%d\n", - dest->vfwmark, - NIPQUAD(dest->addr), ntohs(dest->port), - atomic_read(&dest->refcnt)); - if (dest->addr == daddr && + IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " + "dest->refcnt=%d\n", + dest->vfwmark, + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port), + atomic_read(&dest->refcnt)); + if (dest->af == svc->af && + ip_vs_addr_equal(svc->af, &dest->addr, daddr) && dest->port == dport && dest->vfwmark == svc->fwmark && dest->protocol == svc->protocol && (svc->fwmark || - (dest->vaddr == svc->addr && + (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && dest->vport == svc->port))) { /* HIT */ return dest; @@ -645,10 +701,11 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) * Try to purge the destination from trash if not referenced */ if (atomic_read(&dest->refcnt) == 1) { - IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u " - "from trash\n", - dest->vfwmark, - NIPQUAD(dest->addr), ntohs(dest->port)); + IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u " + "from trash\n", + dest->vfwmark, + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port)); list_del(&dest->n_list); ip_vs_dst_reset(dest); __ip_vs_unbind_svc(dest); @@ -686,9 +743,11 @@ static void ip_vs_zero_stats(struct ip_vs_stats *stats) { spin_lock_bh(&stats->lock); - memset(stats, 0, (char *)&stats->lock - (char *)stats); - spin_unlock_bh(&stats->lock); + + memset(&stats->ustats, 0, sizeof(stats->ustats)); ip_vs_zero_estimator(stats); + + spin_unlock_bh(&stats->lock); } /* @@ -696,7 +755,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats) */ static void __ip_vs_update_dest(struct ip_vs_service *svc, - struct ip_vs_dest *dest, struct ip_vs_dest_user *udest) + struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest) { int conn_flags; @@ -705,10 +764,18 @@ __ip_vs_update_dest(struct ip_vs_service *svc, conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; /* check if local node and update the flags */ - if (inet_addr_type(udest->addr) == RTN_LOCAL) { - conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) - | IP_VS_CONN_F_LOCALNODE; - } +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) { + if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) { + conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) + | IP_VS_CONN_F_LOCALNODE; + } + } else +#endif + if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) { + conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) + | IP_VS_CONN_F_LOCALNODE; + } /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) { @@ -749,7 +816,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, * Create a destination for the given service */ static int -ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, +ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, struct ip_vs_dest **dest_p) { struct ip_vs_dest *dest; @@ -757,9 +824,20 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, EnterFunction(2); - atype = inet_addr_type(udest->addr); - if (atype != RTN_LOCAL && atype != RTN_UNICAST) - return -EINVAL; +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) { + atype = ipv6_addr_type(&udest->addr.in6); + if ((!(atype & IPV6_ADDR_UNICAST) || + atype & IPV6_ADDR_LINKLOCAL) && + !__ip_vs_addr_is_local_v6(&udest->addr.in6)) + return -EINVAL; + } else +#endif + { + atype = inet_addr_type(&init_net, udest->addr.ip); + if (atype != RTN_LOCAL && atype != RTN_UNICAST) + return -EINVAL; + } dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC); if (dest == NULL) { @@ -767,11 +845,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, return -ENOMEM; } + dest->af = svc->af; dest->protocol = svc->protocol; dest->vaddr = svc->addr; dest->vport = svc->port; dest->vfwmark = svc->fwmark; - dest->addr = udest->addr; + ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr); dest->port = udest->port; atomic_set(&dest->activeconns, 0); @@ -796,10 +875,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, * Add a destination into an existing service */ static int -ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) +ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; - __be32 daddr = udest->addr; + union nf_inet_addr daddr; __be16 dport = udest->port; int ret; @@ -816,10 +895,13 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) return -ERANGE; } + ip_vs_addr_copy(svc->af, &daddr, &udest->addr); + /* * Check if the dest already exists in the list */ - dest = ip_vs_lookup_dest(svc, daddr, dport); + dest = ip_vs_lookup_dest(svc, &daddr, dport); + if (dest != NULL) { IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n"); return -EEXIST; @@ -829,15 +911,17 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) * Check if the dest already exists in the trash and * is from the same service */ - dest = ip_vs_trash_get_dest(svc, daddr, dport); + dest = ip_vs_trash_get_dest(svc, &daddr, dport); + if (dest != NULL) { - IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, " - "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n", - NIPQUAD(daddr), ntohs(dport), - atomic_read(&dest->refcnt), - dest->vfwmark, - NIPQUAD(dest->vaddr), - ntohs(dest->vport)); + IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " + "dest->refcnt=%d, service %u/%s:%u\n", + IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport), + atomic_read(&dest->refcnt), + dest->vfwmark, + IP_VS_DBG_ADDR(svc->af, &dest->vaddr), + ntohs(dest->vport)); + __ip_vs_update_dest(svc, dest, udest); /* @@ -858,7 +942,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) svc->num_dests++; /* call the update_service function of its scheduler */ - svc->scheduler->update_service(svc); + if (svc->scheduler->update_service) + svc->scheduler->update_service(svc); write_unlock_bh(&__ip_vs_svc_lock); return 0; @@ -888,7 +973,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) svc->num_dests++; /* call the update_service function of its scheduler */ - svc->scheduler->update_service(svc); + if (svc->scheduler->update_service) + svc->scheduler->update_service(svc); write_unlock_bh(&__ip_vs_svc_lock); @@ -902,10 +988,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) * Edit a destination in the given service */ static int -ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) +ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; - __be32 daddr = udest->addr; + union nf_inet_addr daddr; __be16 dport = udest->port; EnterFunction(2); @@ -921,10 +1007,13 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) return -ERANGE; } + ip_vs_addr_copy(svc->af, &daddr, &udest->addr); + /* * Lookup the destination list */ - dest = ip_vs_lookup_dest(svc, daddr, dport); + dest = ip_vs_lookup_dest(svc, &daddr, dport); + if (dest == NULL) { IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n"); return -ENOENT; @@ -938,7 +1027,8 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); /* call the update_service, because server weight may be changed */ - svc->scheduler->update_service(svc); + if (svc->scheduler->update_service) + svc->scheduler->update_service(svc); write_unlock_bh(&__ip_vs_svc_lock); @@ -977,10 +1067,11 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest) atomic_dec(&dest->svc->refcnt); kfree(dest); } else { - IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, " - "dest->refcnt=%d\n", - NIPQUAD(dest->addr), ntohs(dest->port), - atomic_read(&dest->refcnt)); + IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " + "dest->refcnt=%d\n", + IP_VS_DBG_ADDR(dest->af, &dest->addr), + ntohs(dest->port), + atomic_read(&dest->refcnt)); list_add(&dest->n_list, &ip_vs_dest_trash); atomic_inc(&dest->refcnt); } @@ -1001,12 +1092,12 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, */ list_del(&dest->n_list); svc->num_dests--; - if (svcupd) { - /* - * Call the update_service function of its scheduler - */ - svc->scheduler->update_service(svc); - } + + /* + * Call the update_service function of its scheduler + */ + if (svcupd && svc->scheduler->update_service) + svc->scheduler->update_service(svc); } @@ -1014,15 +1105,15 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, * Delete a destination server in the given service */ static int -ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest) +ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; - __be32 daddr = udest->addr; __be16 dport = udest->port; EnterFunction(2); - dest = ip_vs_lookup_dest(svc, daddr, dport); + dest = ip_vs_lookup_dest(svc, &udest->addr, dport); + if (dest == NULL) { IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n"); return -ENOENT; @@ -1057,7 +1148,8 @@ ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest) * Add a service into the service hash table */ static int -ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) +ip_vs_add_service(struct ip_vs_service_user_kern *u, + struct ip_vs_service **svc_p) { int ret = 0; struct ip_vs_scheduler *sched = NULL; @@ -1075,6 +1167,19 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) goto out_mod_dec; } +#ifdef CONFIG_IP_VS_IPV6 + if (u->af == AF_INET6) { + if (!sched->supports_ipv6) { + ret = -EAFNOSUPPORT; + goto out_err; + } + if ((u->netmask < 1) || (u->netmask > 128)) { + ret = -EINVAL; + goto out_err; + } + } +#endif + svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC); if (svc == NULL) { IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n"); @@ -1086,8 +1191,9 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) atomic_set(&svc->usecnt, 1); atomic_set(&svc->refcnt, 0); + svc->af = u->af; svc->protocol = u->protocol; - svc->addr = u->addr; + ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); svc->port = u->port; svc->fwmark = u->fwmark; svc->flags = u->flags; @@ -1111,7 +1217,10 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) atomic_inc(&ip_vs_nullsvc_counter); ip_vs_new_estimator(&svc->stats); - ip_vs_num_services++; + + /* Count only IPv4 services for old get/setsockopt interface */ + if (svc->af == AF_INET) + ip_vs_num_services++; /* Hash the service into the service table */ write_lock_bh(&__ip_vs_svc_lock); @@ -1146,7 +1255,7 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) * Edit a service and bind it with a new scheduler */ static int -ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) +ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) { struct ip_vs_scheduler *sched, *old_sched; int ret = 0; @@ -1162,6 +1271,19 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) } old_sched = sched; +#ifdef CONFIG_IP_VS_IPV6 + if (u->af == AF_INET6) { + if (!sched->supports_ipv6) { + ret = -EAFNOSUPPORT; + goto out; + } + if ((u->netmask < 1) || (u->netmask > 128)) { + ret = -EINVAL; + goto out; + } + } +#endif + write_lock_bh(&__ip_vs_svc_lock); /* @@ -1183,7 +1305,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) */ if ((ret = ip_vs_unbind_scheduler(svc))) { old_sched = sched; - goto out; + goto out_unlock; } /* @@ -1202,12 +1324,13 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) */ ip_vs_bind_scheduler(svc, old_sched); old_sched = sched; - goto out; + goto out_unlock; } } - out: + out_unlock: write_unlock_bh(&__ip_vs_svc_lock); + out: if (old_sched) ip_vs_scheduler_put(old_sched); @@ -1226,7 +1349,10 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) struct ip_vs_dest *dest, *nxt; struct ip_vs_scheduler *old_sched; - ip_vs_num_services--; + /* Count only IPv4 services for old get/setsockopt interface */ + if (svc->af == AF_INET) + ip_vs_num_services--; + ip_vs_kill_estimator(&svc->stats); /* Unbind scheduler */ @@ -1425,7 +1551,6 @@ proc_do_sync_threshold(ctl_table *table, int write, struct file *filp, static struct ctl_table vs_vars[] = { { - .ctl_name = NET_IPV4_VS_AMEMTHRESH, .procname = "amemthresh", .data = &sysctl_ip_vs_amemthresh, .maxlen = sizeof(int), @@ -1434,7 +1559,6 @@ static struct ctl_table vs_vars[] = { }, #ifdef CONFIG_IP_VS_DEBUG { - .ctl_name = NET_IPV4_VS_DEBUG_LEVEL, .procname = "debug_level", .data = &sysctl_ip_vs_debug_level, .maxlen = sizeof(int), @@ -1443,7 +1567,6 @@ static struct ctl_table vs_vars[] = { }, #endif { - .ctl_name = NET_IPV4_VS_AMDROPRATE, .procname = "am_droprate", .data = &sysctl_ip_vs_am_droprate, .maxlen = sizeof(int), @@ -1451,7 +1574,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = NET_IPV4_VS_DROP_ENTRY, .procname = "drop_entry", .data = &sysctl_ip_vs_drop_entry, .maxlen = sizeof(int), @@ -1459,7 +1581,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_do_defense_mode, }, { - .ctl_name = NET_IPV4_VS_DROP_PACKET, .procname = "drop_packet", .data = &sysctl_ip_vs_drop_packet, .maxlen = sizeof(int), @@ -1467,7 +1588,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_do_defense_mode, }, { - .ctl_name = NET_IPV4_VS_SECURE_TCP, .procname = "secure_tcp", .data = &sysctl_ip_vs_secure_tcp, .maxlen = sizeof(int), @@ -1476,7 +1596,6 @@ static struct ctl_table vs_vars[] = { }, #if 0 { - .ctl_name = NET_IPV4_VS_TO_ES, .procname = "timeout_established", .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED], .maxlen = sizeof(int), @@ -1484,7 +1603,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_SS, .procname = "timeout_synsent", .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT], .maxlen = sizeof(int), @@ -1492,7 +1610,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_SR, .procname = "timeout_synrecv", .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV], .maxlen = sizeof(int), @@ -1500,7 +1617,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_FW, .procname = "timeout_finwait", .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT], .maxlen = sizeof(int), @@ -1508,7 +1624,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_TW, .procname = "timeout_timewait", .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT], .maxlen = sizeof(int), @@ -1516,7 +1631,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_CL, .procname = "timeout_close", .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE], .maxlen = sizeof(int), @@ -1524,7 +1638,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_CW, .procname = "timeout_closewait", .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT], .maxlen = sizeof(int), @@ -1532,7 +1645,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_LA, .procname = "timeout_lastack", .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK], .maxlen = sizeof(int), @@ -1540,7 +1652,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_LI, .procname = "timeout_listen", .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN], .maxlen = sizeof(int), @@ -1548,7 +1659,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_SA, .procname = "timeout_synack", .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK], .maxlen = sizeof(int), @@ -1556,7 +1666,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_UDP, .procname = "timeout_udp", .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP], .maxlen = sizeof(int), @@ -1564,7 +1673,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_ICMP, .procname = "timeout_icmp", .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP], .maxlen = sizeof(int), @@ -1573,7 +1681,6 @@ static struct ctl_table vs_vars[] = { }, #endif { - .ctl_name = NET_IPV4_VS_CACHE_BYPASS, .procname = "cache_bypass", .data = &sysctl_ip_vs_cache_bypass, .maxlen = sizeof(int), @@ -1581,7 +1688,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = NET_IPV4_VS_EXPIRE_NODEST_CONN, .procname = "expire_nodest_conn", .data = &sysctl_ip_vs_expire_nodest_conn, .maxlen = sizeof(int), @@ -1589,7 +1695,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE, .procname = "expire_quiescent_template", .data = &sysctl_ip_vs_expire_quiescent_template, .maxlen = sizeof(int), @@ -1597,7 +1702,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = NET_IPV4_VS_SYNC_THRESHOLD, .procname = "sync_threshold", .data = &sysctl_ip_vs_sync_threshold, .maxlen = sizeof(sysctl_ip_vs_sync_threshold), @@ -1605,7 +1709,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_do_sync_threshold, }, { - .ctl_name = NET_IPV4_VS_NAT_ICMP_SEND, .procname = "nat_icmp_send", .data = &sysctl_ip_vs_nat_icmp_send, .maxlen = sizeof(int), @@ -1615,35 +1718,13 @@ static struct ctl_table vs_vars[] = { { .ctl_name = 0 } }; -static ctl_table vs_table[] = { - { - .ctl_name = NET_IPV4_VS, - .procname = "vs", - .mode = 0555, - .child = vs_vars - }, - { .ctl_name = 0 } -}; - -static ctl_table ipvs_ipv4_table[] = { - { - .ctl_name = NET_IPV4, - .procname = "ipv4", - .mode = 0555, - .child = vs_table, - }, - { .ctl_name = 0 } -}; - -static ctl_table vs_root_table[] = { - { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = ipvs_ipv4_table, - }, - { .ctl_name = 0 } +const struct ctl_path net_vs_ctl_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "vs", }, + { } }; +EXPORT_SYMBOL_GPL(net_vs_ctl_path); static struct ctl_table_header * sysctl_header; @@ -1706,6 +1787,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) } static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) +__acquires(__ip_vs_svc_lock) { read_lock_bh(&__ip_vs_svc_lock); @@ -1759,6 +1841,7 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) +__releases(__ip_vs_svc_lock) { read_unlock_bh(&__ip_vs_svc_lock); } @@ -1779,15 +1862,25 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; - if (iter->table == ip_vs_svc_table) - seq_printf(seq, "%s %08X:%04X %s ", - ip_vs_proto_name(svc->protocol), - ntohl(svc->addr), - ntohs(svc->port), - svc->scheduler->name); - else + if (iter->table == ip_vs_svc_table) { +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) + seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ", + ip_vs_proto_name(svc->protocol), + NIP6(svc->addr.in6), + ntohs(svc->port), + svc->scheduler->name); + else +#endif + seq_printf(seq, "%s %08X:%04X %s ", + ip_vs_proto_name(svc->protocol), + ntohl(svc->addr.ip), + ntohs(svc->port), + svc->scheduler->name); + } else { seq_printf(seq, "FWM %08X %s ", svc->fwmark, svc->scheduler->name); + } if (svc->flags & IP_VS_SVC_F_PERSISTENT) seq_printf(seq, "persistent %d %08X\n", @@ -1797,13 +1890,29 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) seq_putc(seq, '\n'); list_for_each_entry(dest, &svc->destinations, n_list) { - seq_printf(seq, - " -> %08X:%04X %-7s %-6d %-10d %-10d\n", - ntohl(dest->addr), ntohs(dest->port), - ip_vs_fwd_name(atomic_read(&dest->conn_flags)), - atomic_read(&dest->weight), - atomic_read(&dest->activeconns), - atomic_read(&dest->inactconns)); +#ifdef CONFIG_IP_VS_IPV6 + if (dest->af == AF_INET6) + seq_printf(seq, + " -> [" NIP6_FMT "]:%04X" + " %-7s %-6d %-10d %-10d\n", + NIP6(dest->addr.in6), + ntohs(dest->port), + ip_vs_fwd_name(atomic_read(&dest->conn_flags)), + atomic_read(&dest->weight), + atomic_read(&dest->activeconns), + atomic_read(&dest->inactconns)); + else +#endif + seq_printf(seq, + " -> %08X:%04X " + "%-7s %-6d %-10d %-10d\n", + ntohl(dest->addr.ip), + ntohs(dest->port), + ip_vs_fwd_name(atomic_read(&dest->conn_flags)), + atomic_read(&dest->weight), + atomic_read(&dest->activeconns), + atomic_read(&dest->inactconns)); + } } return 0; @@ -1832,7 +1941,9 @@ static const struct file_operations ip_vs_info_fops = { #endif -struct ip_vs_stats ip_vs_stats; +struct ip_vs_stats ip_vs_stats = { + .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock), +}; #ifdef CONFIG_PROC_FS static int ip_vs_stats_show(struct seq_file *seq, void *v) @@ -1845,20 +1956,20 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) " Conns Packets Packets Bytes Bytes\n"); spin_lock_bh(&ip_vs_stats.lock); - seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns, - ip_vs_stats.inpkts, ip_vs_stats.outpkts, - (unsigned long long) ip_vs_stats.inbytes, - (unsigned long long) ip_vs_stats.outbytes); + seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns, + ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts, + (unsigned long long) ip_vs_stats.ustats.inbytes, + (unsigned long long) ip_vs_stats.ustats.outbytes); /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); seq_printf(seq,"%8X %8X %8X %16X %16X\n", - ip_vs_stats.cps, - ip_vs_stats.inpps, - ip_vs_stats.outpps, - ip_vs_stats.inbps, - ip_vs_stats.outbps); + ip_vs_stats.ustats.cps, + ip_vs_stats.ustats.inpps, + ip_vs_stats.ustats.outpps, + ip_vs_stats.ustats.inbps, + ip_vs_stats.ustats.outbps); spin_unlock_bh(&ip_vs_stats.lock); return 0; @@ -1933,14 +2044,44 @@ static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = { [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN, }; +static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, + struct ip_vs_service_user *usvc_compat) +{ + usvc->af = AF_INET; + usvc->protocol = usvc_compat->protocol; + usvc->addr.ip = usvc_compat->addr; + usvc->port = usvc_compat->port; + usvc->fwmark = usvc_compat->fwmark; + + /* Deep copy of sched_name is not needed here */ + usvc->sched_name = usvc_compat->sched_name; + + usvc->flags = usvc_compat->flags; + usvc->timeout = usvc_compat->timeout; + usvc->netmask = usvc_compat->netmask; +} + +static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, + struct ip_vs_dest_user *udest_compat) +{ + udest->addr.ip = udest_compat->addr; + udest->port = udest_compat->port; + udest->conn_flags = udest_compat->conn_flags; + udest->weight = udest_compat->weight; + udest->u_threshold = udest_compat->u_threshold; + udest->l_threshold = udest_compat->l_threshold; +} + static int do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { int ret; unsigned char arg[MAX_ARG_LEN]; - struct ip_vs_service_user *usvc; + struct ip_vs_service_user *usvc_compat; + struct ip_vs_service_user_kern usvc; struct ip_vs_service *svc; - struct ip_vs_dest_user *udest; + struct ip_vs_dest_user *udest_compat; + struct ip_vs_dest_user_kern udest; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -1980,35 +2121,40 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) goto out_unlock; } - usvc = (struct ip_vs_service_user *)arg; - udest = (struct ip_vs_dest_user *)(usvc + 1); + usvc_compat = (struct ip_vs_service_user *)arg; + udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1); + + /* We only use the new structs internally, so copy userspace compat + * structs to extended internal versions */ + ip_vs_copy_usvc_compat(&usvc, usvc_compat); + ip_vs_copy_udest_compat(&udest, udest_compat); if (cmd == IP_VS_SO_SET_ZERO) { /* if no service address is set, zero counters in all */ - if (!usvc->fwmark && !usvc->addr && !usvc->port) { + if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { ret = ip_vs_zero_all(); goto out_unlock; } } /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */ - if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) { + if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) { IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n", - usvc->protocol, NIPQUAD(usvc->addr), - ntohs(usvc->port), usvc->sched_name); + usvc.protocol, NIPQUAD(usvc.addr.ip), + ntohs(usvc.port), usvc.sched_name); ret = -EFAULT; goto out_unlock; } /* Lookup the exact service by or fwmark */ - if (usvc->fwmark == 0) - svc = __ip_vs_service_get(usvc->protocol, - usvc->addr, usvc->port); + if (usvc.fwmark == 0) + svc = __ip_vs_service_get(usvc.af, usvc.protocol, + &usvc.addr, usvc.port); else - svc = __ip_vs_svc_fwm_get(usvc->fwmark); + svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); if (cmd != IP_VS_SO_SET_ADD - && (svc == NULL || svc->protocol != usvc->protocol)) { + && (svc == NULL || svc->protocol != usvc.protocol)) { ret = -ESRCH; goto out_unlock; } @@ -2018,10 +2164,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (svc != NULL) ret = -EEXIST; else - ret = ip_vs_add_service(usvc, &svc); + ret = ip_vs_add_service(&usvc, &svc); break; case IP_VS_SO_SET_EDIT: - ret = ip_vs_edit_service(svc, usvc); + ret = ip_vs_edit_service(svc, &usvc); break; case IP_VS_SO_SET_DEL: ret = ip_vs_del_service(svc); @@ -2032,13 +2178,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) ret = ip_vs_zero_service(svc); break; case IP_VS_SO_SET_ADDDEST: - ret = ip_vs_add_dest(svc, udest); + ret = ip_vs_add_dest(svc, &udest); break; case IP_VS_SO_SET_EDITDEST: - ret = ip_vs_edit_dest(svc, udest); + ret = ip_vs_edit_dest(svc, &udest); break; case IP_VS_SO_SET_DELDEST: - ret = ip_vs_del_dest(svc, udest); + ret = ip_vs_del_dest(svc, &udest); break; default: ret = -EINVAL; @@ -2061,7 +2207,7 @@ static void ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) { spin_lock_bh(&src->lock); - memcpy(dst, src, (char*)&src->lock - (char*)src); + memcpy(dst, &src->ustats, sizeof(*dst)); spin_unlock_bh(&src->lock); } @@ -2069,7 +2215,7 @@ static void ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { dst->protocol = src->protocol; - dst->addr = src->addr; + dst->addr = src->addr.ip; dst->port = src->port; dst->fwmark = src->fwmark; strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name)); @@ -2091,6 +2237,10 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + /* Only expose IPv4 entries to old interface */ + if (svc->af != AF_INET) + continue; + if (count >= get->num_services) goto out; memset(&entry, 0, sizeof(entry)); @@ -2106,6 +2256,10 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + /* Only expose IPv4 entries to old interface */ + if (svc->af != AF_INET) + continue; + if (count >= get->num_services) goto out; memset(&entry, 0, sizeof(entry)); @@ -2127,13 +2281,15 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, struct ip_vs_get_dests __user *uptr) { struct ip_vs_service *svc; + union nf_inet_addr addr = { .ip = get->addr }; int ret = 0; if (get->fwmark) - svc = __ip_vs_svc_fwm_get(get->fwmark); + svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark); else - svc = __ip_vs_service_get(get->protocol, - get->addr, get->port); + svc = __ip_vs_service_get(AF_INET, get->protocol, &addr, + get->port); + if (svc) { int count = 0; struct ip_vs_dest *dest; @@ -2143,7 +2299,7 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, if (count >= get->num_dests) break; - entry.addr = dest->addr; + entry.addr = dest->addr.ip; entry.port = dest->port; entry.conn_flags = atomic_read(&dest->conn_flags); entry.weight = atomic_read(&dest->weight); @@ -2268,13 +2424,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { struct ip_vs_service_entry *entry; struct ip_vs_service *svc; + union nf_inet_addr addr; entry = (struct ip_vs_service_entry *)arg; + addr.ip = entry->addr; if (entry->fwmark) - svc = __ip_vs_svc_fwm_get(entry->fwmark); + svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark); else - svc = __ip_vs_service_get(entry->protocol, - entry->addr, entry->port); + svc = __ip_vs_service_get(AF_INET, entry->protocol, + &addr, entry->port); if (svc) { ip_vs_copy_service(entry, svc); if (copy_to_user(user, entry, sizeof(*entry)) != 0) @@ -2353,8 +2511,877 @@ static struct nf_sockopt_ops ip_vs_sockopts = { .owner = THIS_MODULE, }; +/* + * Generic Netlink interface + */ + +/* IPVS genetlink family */ +static struct genl_family ip_vs_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = IPVS_GENL_NAME, + .version = IPVS_GENL_VERSION, + .maxattr = IPVS_CMD_MAX, +}; + +/* Policy used for first-level command attributes */ +static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { + [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED }, + [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED }, + [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED }, + [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 }, + [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 }, + [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 }, +}; + +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */ +static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { + [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 }, + [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, + .len = IP_VS_IFNAME_MAXLEN }, + [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, +}; + +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ +static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { + [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 }, + [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 }, + [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY, + .len = sizeof(union nf_inet_addr) }, + [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 }, + [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, + [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, + .len = IP_VS_SCHEDNAME_MAXLEN }, + [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, + .len = sizeof(struct ip_vs_flags) }, + [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 }, + [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED }, +}; + +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */ +static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { + [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY, + .len = sizeof(union nf_inet_addr) }, + [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 }, + [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, +}; + +static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, + struct ip_vs_stats *stats) +{ + struct nlattr *nl_stats = nla_nest_start(skb, container_type); + if (!nl_stats) + return -EMSGSIZE; + + spin_lock_bh(&stats->lock); + + NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts); + NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes); + NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps); + + spin_unlock_bh(&stats->lock); + + nla_nest_end(skb, nl_stats); + + return 0; + +nla_put_failure: + spin_unlock_bh(&stats->lock); + nla_nest_cancel(skb, nl_stats); + return -EMSGSIZE; +} + +static int ip_vs_genl_fill_service(struct sk_buff *skb, + struct ip_vs_service *svc) +{ + struct nlattr *nl_service; + struct ip_vs_flags flags = { .flags = svc->flags, + .mask = ~0 }; + + nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); + if (!nl_service) + return -EMSGSIZE; + + NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af); + + if (svc->fwmark) { + NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark); + } else { + NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol); + NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr); + NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port); + } + + NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name); + NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags); + NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ); + NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask); + + if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats)) + goto nla_put_failure; + + nla_nest_end(skb, nl_service); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nl_service); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_service(struct sk_buff *skb, + struct ip_vs_service *svc, + struct netlink_callback *cb) +{ + void *hdr; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + &ip_vs_genl_family, NLM_F_MULTI, + IPVS_CMD_NEW_SERVICE); + if (!hdr) + return -EMSGSIZE; + + if (ip_vs_genl_fill_service(skb, svc) < 0) + goto nla_put_failure; + + return genlmsg_end(skb, hdr); + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_services(struct sk_buff *skb, + struct netlink_callback *cb) +{ + int idx = 0, i; + int start = cb->args[0]; + struct ip_vs_service *svc; + + mutex_lock(&__ip_vs_mutex); + for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { + list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { + if (++idx <= start) + continue; + if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { + idx--; + goto nla_put_failure; + } + } + } + + for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { + list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { + if (++idx <= start) + continue; + if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { + idx--; + goto nla_put_failure; + } + } + } + +nla_put_failure: + mutex_unlock(&__ip_vs_mutex); + cb->args[0] = idx; + + return skb->len; +} + +static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, + struct nlattr *nla, int full_entry) +{ + struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; + struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; + + /* Parse mandatory identifying service fields first */ + if (nla == NULL || + nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy)) + return -EINVAL; + + nla_af = attrs[IPVS_SVC_ATTR_AF]; + nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL]; + nla_addr = attrs[IPVS_SVC_ATTR_ADDR]; + nla_port = attrs[IPVS_SVC_ATTR_PORT]; + nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK]; + + if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) + return -EINVAL; + + usvc->af = nla_get_u16(nla_af); +#ifdef CONFIG_IP_VS_IPV6 + if (usvc->af != AF_INET && usvc->af != AF_INET6) +#else + if (usvc->af != AF_INET) +#endif + return -EAFNOSUPPORT; + + if (nla_fwmark) { + usvc->protocol = IPPROTO_TCP; + usvc->fwmark = nla_get_u32(nla_fwmark); + } else { + usvc->protocol = nla_get_u16(nla_protocol); + nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); + usvc->port = nla_get_u16(nla_port); + usvc->fwmark = 0; + } + + /* If a full entry was requested, check for the additional fields */ + if (full_entry) { + struct nlattr *nla_sched, *nla_flags, *nla_timeout, + *nla_netmask; + struct ip_vs_flags flags; + struct ip_vs_service *svc; + + nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; + nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; + nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; + nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; + + if (!(nla_sched && nla_flags && nla_timeout && nla_netmask)) + return -EINVAL; + + nla_memcpy(&flags, nla_flags, sizeof(flags)); + + /* prefill flags from service if it already exists */ + if (usvc->fwmark) + svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark); + else + svc = __ip_vs_service_get(usvc->af, usvc->protocol, + &usvc->addr, usvc->port); + if (svc) { + usvc->flags = svc->flags; + ip_vs_service_put(svc); + } else + usvc->flags = 0; + + /* set new flags from userland */ + usvc->flags = (usvc->flags & ~flags.mask) | + (flags.flags & flags.mask); + usvc->sched_name = nla_data(nla_sched); + usvc->timeout = nla_get_u32(nla_timeout); + usvc->netmask = nla_get_u32(nla_netmask); + } + + return 0; +} + +static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) +{ + struct ip_vs_service_user_kern usvc; + int ret; + + ret = ip_vs_genl_parse_service(&usvc, nla, 0); + if (ret) + return ERR_PTR(ret); + + if (usvc.fwmark) + return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); + else + return __ip_vs_service_get(usvc.af, usvc.protocol, + &usvc.addr, usvc.port); +} + +static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) +{ + struct nlattr *nl_dest; + + nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST); + if (!nl_dest) + return -EMSGSIZE; + + NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr); + NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port); + + NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD, + atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight)); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, + atomic_read(&dest->activeconns)); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS, + atomic_read(&dest->inactconns)); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, + atomic_read(&dest->persistconns)); + + if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats)) + goto nla_put_failure; + + nla_nest_end(skb, nl_dest); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nl_dest); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, + struct netlink_callback *cb) +{ + void *hdr; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + &ip_vs_genl_family, NLM_F_MULTI, + IPVS_CMD_NEW_DEST); + if (!hdr) + return -EMSGSIZE; + + if (ip_vs_genl_fill_dest(skb, dest) < 0) + goto nla_put_failure; + + return genlmsg_end(skb, hdr); + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_dests(struct sk_buff *skb, + struct netlink_callback *cb) +{ + int idx = 0; + int start = cb->args[0]; + struct ip_vs_service *svc; + struct ip_vs_dest *dest; + struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; + + mutex_lock(&__ip_vs_mutex); + + /* Try to find the service for which to dump destinations */ + if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, + IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) + goto out_err; + + svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]); + if (IS_ERR(svc) || svc == NULL) + goto out_err; + + /* Dump the destinations */ + list_for_each_entry(dest, &svc->destinations, n_list) { + if (++idx <= start) + continue; + if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) { + idx--; + goto nla_put_failure; + } + } + +nla_put_failure: + cb->args[0] = idx; + ip_vs_service_put(svc); + +out_err: + mutex_unlock(&__ip_vs_mutex); + + return skb->len; +} + +static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, + struct nlattr *nla, int full_entry) +{ + struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; + struct nlattr *nla_addr, *nla_port; + + /* Parse mandatory identifying destination fields first */ + if (nla == NULL || + nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy)) + return -EINVAL; + + nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; + nla_port = attrs[IPVS_DEST_ATTR_PORT]; + + if (!(nla_addr && nla_port)) + return -EINVAL; + + nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); + udest->port = nla_get_u16(nla_port); + + /* If a full entry was requested, check for the additional fields */ + if (full_entry) { + struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, + *nla_l_thresh; + + nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; + nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; + nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; + nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; + + if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) + return -EINVAL; + + udest->conn_flags = nla_get_u32(nla_fwd) + & IP_VS_CONN_F_FWD_MASK; + udest->weight = nla_get_u32(nla_weight); + udest->u_threshold = nla_get_u32(nla_u_thresh); + udest->l_threshold = nla_get_u32(nla_l_thresh); + } + + return 0; +} + +static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state, + const char *mcast_ifn, __be32 syncid) +{ + struct nlattr *nl_daemon; + + nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON); + if (!nl_daemon) + return -EMSGSIZE; + + NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state); + NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn); + NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid); + + nla_nest_end(skb, nl_daemon); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nl_daemon); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state, + const char *mcast_ifn, __be32 syncid, + struct netlink_callback *cb) +{ + void *hdr; + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + &ip_vs_genl_family, NLM_F_MULTI, + IPVS_CMD_NEW_DAEMON); + if (!hdr) + return -EMSGSIZE; + + if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid)) + goto nla_put_failure; + + return genlmsg_end(skb, hdr); + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_daemons(struct sk_buff *skb, + struct netlink_callback *cb) +{ + mutex_lock(&__ip_vs_mutex); + if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { + if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, + ip_vs_master_mcast_ifn, + ip_vs_master_syncid, cb) < 0) + goto nla_put_failure; + + cb->args[0] = 1; + } + + if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { + if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, + ip_vs_backup_mcast_ifn, + ip_vs_backup_syncid, cb) < 0) + goto nla_put_failure; + + cb->args[1] = 1; + } + +nla_put_failure: + mutex_unlock(&__ip_vs_mutex); + + return skb->len; +} + +static int ip_vs_genl_new_daemon(struct nlattr **attrs) +{ + if (!(attrs[IPVS_DAEMON_ATTR_STATE] && + attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && + attrs[IPVS_DAEMON_ATTR_SYNC_ID])) + return -EINVAL; + + return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), + nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), + nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); +} + +static int ip_vs_genl_del_daemon(struct nlattr **attrs) +{ + if (!attrs[IPVS_DAEMON_ATTR_STATE]) + return -EINVAL; + + return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); +} + +static int ip_vs_genl_set_config(struct nlattr **attrs) +{ + struct ip_vs_timeout_user t; + + __ip_vs_get_timeouts(&t); + + if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) + t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); + + if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]) + t.tcp_fin_timeout = + nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]); + + if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) + t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); + + return ip_vs_set_timeout(&t); +} + +static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) +{ + struct ip_vs_service *svc = NULL; + struct ip_vs_service_user_kern usvc; + struct ip_vs_dest_user_kern udest; + int ret = 0, cmd; + int need_full_svc = 0, need_full_dest = 0; + + cmd = info->genlhdr->cmd; + + mutex_lock(&__ip_vs_mutex); + + if (cmd == IPVS_CMD_FLUSH) { + ret = ip_vs_flush(); + goto out; + } else if (cmd == IPVS_CMD_SET_CONFIG) { + ret = ip_vs_genl_set_config(info->attrs); + goto out; + } else if (cmd == IPVS_CMD_NEW_DAEMON || + cmd == IPVS_CMD_DEL_DAEMON) { + + struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; + + if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || + nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, + info->attrs[IPVS_CMD_ATTR_DAEMON], + ip_vs_daemon_policy)) { + ret = -EINVAL; + goto out; + } + + if (cmd == IPVS_CMD_NEW_DAEMON) + ret = ip_vs_genl_new_daemon(daemon_attrs); + else + ret = ip_vs_genl_del_daemon(daemon_attrs); + goto out; + } else if (cmd == IPVS_CMD_ZERO && + !info->attrs[IPVS_CMD_ATTR_SERVICE]) { + ret = ip_vs_zero_all(); + goto out; + } + + /* All following commands require a service argument, so check if we + * received a valid one. We need a full service specification when + * adding / editing a service. Only identifying members otherwise. */ + if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) + need_full_svc = 1; + + ret = ip_vs_genl_parse_service(&usvc, + info->attrs[IPVS_CMD_ATTR_SERVICE], + need_full_svc); + if (ret) + goto out; + + /* Lookup the exact service by or fwmark */ + if (usvc.fwmark == 0) + svc = __ip_vs_service_get(usvc.af, usvc.protocol, + &usvc.addr, usvc.port); + else + svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); + + /* Unless we're adding a new service, the service must already exist */ + if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { + ret = -ESRCH; + goto out; + } + + /* Destination commands require a valid destination argument. For + * adding / editing a destination, we need a full destination + * specification. */ + if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST || + cmd == IPVS_CMD_DEL_DEST) { + if (cmd != IPVS_CMD_DEL_DEST) + need_full_dest = 1; + + ret = ip_vs_genl_parse_dest(&udest, + info->attrs[IPVS_CMD_ATTR_DEST], + need_full_dest); + if (ret) + goto out; + } + + switch (cmd) { + case IPVS_CMD_NEW_SERVICE: + if (svc == NULL) + ret = ip_vs_add_service(&usvc, &svc); + else + ret = -EEXIST; + break; + case IPVS_CMD_SET_SERVICE: + ret = ip_vs_edit_service(svc, &usvc); + break; + case IPVS_CMD_DEL_SERVICE: + ret = ip_vs_del_service(svc); + break; + case IPVS_CMD_NEW_DEST: + ret = ip_vs_add_dest(svc, &udest); + break; + case IPVS_CMD_SET_DEST: + ret = ip_vs_edit_dest(svc, &udest); + break; + case IPVS_CMD_DEL_DEST: + ret = ip_vs_del_dest(svc, &udest); + break; + case IPVS_CMD_ZERO: + ret = ip_vs_zero_service(svc); + break; + default: + ret = -EINVAL; + } + +out: + if (svc) + ip_vs_service_put(svc); + mutex_unlock(&__ip_vs_mutex); + + return ret; +} + +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + void *reply; + int ret, cmd, reply_cmd; + + cmd = info->genlhdr->cmd; + + if (cmd == IPVS_CMD_GET_SERVICE) + reply_cmd = IPVS_CMD_NEW_SERVICE; + else if (cmd == IPVS_CMD_GET_INFO) + reply_cmd = IPVS_CMD_SET_INFO; + else if (cmd == IPVS_CMD_GET_CONFIG) + reply_cmd = IPVS_CMD_SET_CONFIG; + else { + IP_VS_ERR("unknown Generic Netlink command\n"); + return -EINVAL; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + mutex_lock(&__ip_vs_mutex); + + reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); + if (reply == NULL) + goto nla_put_failure; + + switch (cmd) { + case IPVS_CMD_GET_SERVICE: + { + struct ip_vs_service *svc; + + svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]); + if (IS_ERR(svc)) { + ret = PTR_ERR(svc); + goto out_err; + } else if (svc) { + ret = ip_vs_genl_fill_service(msg, svc); + ip_vs_service_put(svc); + if (ret) + goto nla_put_failure; + } else { + ret = -ESRCH; + goto out_err; + } + + break; + } + + case IPVS_CMD_GET_CONFIG: + { + struct ip_vs_timeout_user t; + + __ip_vs_get_timeouts(&t); +#ifdef CONFIG_IP_VS_PROTO_TCP + NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); + NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, + t.tcp_fin_timeout); +#endif +#ifdef CONFIG_IP_VS_PROTO_UDP + NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout); +#endif + + break; + } + + case IPVS_CMD_GET_INFO: + NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE); + NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, + IP_VS_CONN_TAB_SIZE); + break; + } + + genlmsg_end(msg, reply); + ret = genlmsg_unicast(msg, info->snd_pid); + goto out; + +nla_put_failure: + IP_VS_ERR("not enough space in Netlink message\n"); + ret = -EMSGSIZE; + +out_err: + nlmsg_free(msg); +out: + mutex_unlock(&__ip_vs_mutex); + + return ret; +} + + +static struct genl_ops ip_vs_genl_ops[] __read_mostly = { + { + .cmd = IPVS_CMD_NEW_SERVICE, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_SET_SERVICE, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_DEL_SERVICE, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_GET_SERVICE, + .flags = GENL_ADMIN_PERM, + .doit = ip_vs_genl_get_cmd, + .dumpit = ip_vs_genl_dump_services, + .policy = ip_vs_cmd_policy, + }, + { + .cmd = IPVS_CMD_NEW_DEST, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_SET_DEST, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_DEL_DEST, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_GET_DEST, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .dumpit = ip_vs_genl_dump_dests, + }, + { + .cmd = IPVS_CMD_NEW_DAEMON, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_DEL_DAEMON, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_GET_DAEMON, + .flags = GENL_ADMIN_PERM, + .dumpit = ip_vs_genl_dump_daemons, + }, + { + .cmd = IPVS_CMD_SET_CONFIG, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_GET_CONFIG, + .flags = GENL_ADMIN_PERM, + .doit = ip_vs_genl_get_cmd, + }, + { + .cmd = IPVS_CMD_GET_INFO, + .flags = GENL_ADMIN_PERM, + .doit = ip_vs_genl_get_cmd, + }, + { + .cmd = IPVS_CMD_ZERO, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_FLUSH, + .flags = GENL_ADMIN_PERM, + .doit = ip_vs_genl_set_cmd, + }, +}; + +static int __init ip_vs_genl_register(void) +{ + int ret, i; -int ip_vs_control_init(void) + ret = genl_register_family(&ip_vs_genl_family); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) { + ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]); + if (ret) + goto err_out; + } + return 0; + +err_out: + genl_unregister_family(&ip_vs_genl_family); + return ret; +} + +static void ip_vs_genl_unregister(void) +{ + genl_unregister_family(&ip_vs_genl_family); +} + +/* End of Generic Netlink interface definitions */ + + +int __init ip_vs_control_init(void) { int ret; int idx; @@ -2367,10 +3394,17 @@ int ip_vs_control_init(void) return ret; } + ret = ip_vs_genl_register(); + if (ret) { + IP_VS_ERR("cannot register Generic Netlink interface.\n"); + nf_unregister_sockopt(&ip_vs_sockopts); + return ret; + } + proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); - sysctl_header = register_sysctl_table(vs_root_table); + sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars); /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { @@ -2381,8 +3415,6 @@ int ip_vs_control_init(void) INIT_LIST_HEAD(&ip_vs_rtable[idx]); } - memset(&ip_vs_stats, 0, sizeof(ip_vs_stats)); - spin_lock_init(&ip_vs_stats.lock); ip_vs_new_estimator(&ip_vs_stats); /* Hook the defense timer */ @@ -2403,6 +3435,7 @@ void ip_vs_control_cleanup(void) unregister_sysctl_table(sysctl_header); proc_net_remove(&init_net, "ip_vs_stats"); proc_net_remove(&init_net, "ip_vs"); + ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); }