2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
21 #include <linux/module.h>
22 #include <linux/init.h>
23 #include <linux/types.h>
24 #include <linux/capability.h>
26 #include <linux/sysctl.h>
27 #include <linux/proc_fs.h>
28 #include <linux/workqueue.h>
29 #include <linux/swap.h>
30 #include <linux/seq_file.h>
32 #include <linux/netfilter.h>
33 #include <linux/netfilter_ipv4.h>
34 #include <linux/mutex.h>
36 #include <net/net_namespace.h>
38 #include <net/route.h>
40 #include <net/genetlink.h>
42 #include <asm/uaccess.h>
44 #include <net/ip_vs.h>
46 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
47 static DEFINE_MUTEX(__ip_vs_mutex);
49 /* lock for service table */
50 static DEFINE_RWLOCK(__ip_vs_svc_lock);
52 /* lock for table with the real services */
53 static DEFINE_RWLOCK(__ip_vs_rs_lock);
55 /* lock for state and timeout tables */
56 static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
58 /* lock for drop entry handling */
59 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
61 /* lock for drop packet handling */
62 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
64 /* 1/rate drop and drop-entry variables */
65 int ip_vs_drop_rate = 0;
66 int ip_vs_drop_counter = 0;
67 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
69 /* number of virtual services */
70 static int ip_vs_num_services = 0;
72 /* sysctl variables */
73 static int sysctl_ip_vs_drop_entry = 0;
74 static int sysctl_ip_vs_drop_packet = 0;
75 static int sysctl_ip_vs_secure_tcp = 0;
76 static int sysctl_ip_vs_amemthresh = 1024;
77 static int sysctl_ip_vs_am_droprate = 10;
78 int sysctl_ip_vs_cache_bypass = 0;
79 int sysctl_ip_vs_expire_nodest_conn = 0;
80 int sysctl_ip_vs_expire_quiescent_template = 0;
81 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
82 int sysctl_ip_vs_nat_icmp_send = 0;
85 #ifdef CONFIG_IP_VS_DEBUG
86 static int sysctl_ip_vs_debug_level = 0;
88 int ip_vs_get_debug_level(void)
90 return sysctl_ip_vs_debug_level;
95 * update_defense_level is called from keventd and from sysctl,
96 * so it needs to protect itself from softirqs
98 static void update_defense_level(void)
101 static int old_secure_tcp = 0;
106 /* we only count free and buffered memory (in pages) */
108 availmem = i.freeram + i.bufferram;
109 /* however in linux 2.5 the i.bufferram is total page cache size,
111 /* si_swapinfo(&i); */
112 /* availmem = availmem - (i.totalswap - i.freeswap); */
114 nomem = (availmem < sysctl_ip_vs_amemthresh);
119 spin_lock(&__ip_vs_dropentry_lock);
120 switch (sysctl_ip_vs_drop_entry) {
122 atomic_set(&ip_vs_dropentry, 0);
126 atomic_set(&ip_vs_dropentry, 1);
127 sysctl_ip_vs_drop_entry = 2;
129 atomic_set(&ip_vs_dropentry, 0);
134 atomic_set(&ip_vs_dropentry, 1);
136 atomic_set(&ip_vs_dropentry, 0);
137 sysctl_ip_vs_drop_entry = 1;
141 atomic_set(&ip_vs_dropentry, 1);
144 spin_unlock(&__ip_vs_dropentry_lock);
147 spin_lock(&__ip_vs_droppacket_lock);
148 switch (sysctl_ip_vs_drop_packet) {
154 ip_vs_drop_rate = ip_vs_drop_counter
155 = sysctl_ip_vs_amemthresh /
156 (sysctl_ip_vs_amemthresh-availmem);
157 sysctl_ip_vs_drop_packet = 2;
164 ip_vs_drop_rate = ip_vs_drop_counter
165 = sysctl_ip_vs_amemthresh /
166 (sysctl_ip_vs_amemthresh-availmem);
169 sysctl_ip_vs_drop_packet = 1;
173 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
176 spin_unlock(&__ip_vs_droppacket_lock);
179 write_lock(&__ip_vs_securetcp_lock);
180 switch (sysctl_ip_vs_secure_tcp) {
182 if (old_secure_tcp >= 2)
187 if (old_secure_tcp < 2)
189 sysctl_ip_vs_secure_tcp = 2;
191 if (old_secure_tcp >= 2)
197 if (old_secure_tcp < 2)
200 if (old_secure_tcp >= 2)
202 sysctl_ip_vs_secure_tcp = 1;
206 if (old_secure_tcp < 2)
210 old_secure_tcp = sysctl_ip_vs_secure_tcp;
212 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
213 write_unlock(&__ip_vs_securetcp_lock);
220 * Timer for checking the defense
222 #define DEFENSE_TIMER_PERIOD 1*HZ
223 static void defense_work_handler(struct work_struct *work);
224 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
226 static void defense_work_handler(struct work_struct *work)
228 update_defense_level();
229 if (atomic_read(&ip_vs_dropentry))
230 ip_vs_random_dropentry();
232 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
236 ip_vs_use_count_inc(void)
238 return try_module_get(THIS_MODULE);
242 ip_vs_use_count_dec(void)
244 module_put(THIS_MODULE);
249 * Hash table: for virtual service lookups
251 #define IP_VS_SVC_TAB_BITS 8
252 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
253 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
255 /* the service table hashed by <protocol, addr, port> */
256 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
257 /* the service table hashed by fwmark */
258 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
261 * Hash table: for real service lookups
263 #define IP_VS_RTAB_BITS 4
264 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
265 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
267 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
270 * Trash for destinations
272 static LIST_HEAD(ip_vs_dest_trash);
275 * FTP & NULL virtual service counters
277 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
278 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
282 * Returns hash value for virtual service
284 static __inline__ unsigned
285 ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
288 register unsigned porth = ntohs(port);
289 __be32 addr_fold = addr->ip;
291 #ifdef CONFIG_IP_VS_IPV6
293 addr_fold = addr->ip6[0]^addr->ip6[1]^
294 addr->ip6[2]^addr->ip6[3];
297 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
298 & IP_VS_SVC_TAB_MASK;
302 * Returns hash value of fwmark for virtual service lookup
304 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
306 return fwmark & IP_VS_SVC_TAB_MASK;
310 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
311 * or in the ip_vs_svc_fwm_table by fwmark.
312 * Should be called with locked tables.
314 static int ip_vs_svc_hash(struct ip_vs_service *svc)
318 if (svc->flags & IP_VS_SVC_F_HASHED) {
319 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
320 "called from %p\n", __builtin_return_address(0));
324 if (svc->fwmark == 0) {
326 * Hash it by <protocol,addr,port> in ip_vs_svc_table
328 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
330 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
333 * Hash it by fwmark in ip_vs_svc_fwm_table
335 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
336 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
339 svc->flags |= IP_VS_SVC_F_HASHED;
340 /* increase its refcnt because it is referenced by the svc table */
341 atomic_inc(&svc->refcnt);
347 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
348 * Should be called with locked tables.
350 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
352 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
353 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
354 "called from %p\n", __builtin_return_address(0));
358 if (svc->fwmark == 0) {
359 /* Remove it from the ip_vs_svc_table table */
360 list_del(&svc->s_list);
362 /* Remove it from the ip_vs_svc_fwm_table table */
363 list_del(&svc->f_list);
366 svc->flags &= ~IP_VS_SVC_F_HASHED;
367 atomic_dec(&svc->refcnt);
373 * Get service by {proto,addr,port} in the service table.
375 static inline struct ip_vs_service *
376 __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
380 struct ip_vs_service *svc;
382 /* Check for "full" addressed entries */
383 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
385 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
387 && ip_vs_addr_equal(af, &svc->addr, vaddr)
388 && (svc->port == vport)
389 && (svc->protocol == protocol)) {
391 atomic_inc(&svc->usecnt);
401 * Get service by {fwmark} in the service table.
403 static inline struct ip_vs_service *
404 __ip_vs_svc_fwm_get(int af, __u32 fwmark)
407 struct ip_vs_service *svc;
409 /* Check for fwmark addressed entries */
410 hash = ip_vs_svc_fwm_hashkey(fwmark);
412 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
413 if (svc->fwmark == fwmark && svc->af == af) {
415 atomic_inc(&svc->usecnt);
423 struct ip_vs_service *
424 ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
426 struct ip_vs_service *svc;
427 union nf_inet_addr _vaddr = { .ip = vaddr };
428 read_lock(&__ip_vs_svc_lock);
431 * Check the table hashed by fwmark first
433 if (fwmark && (svc = __ip_vs_svc_fwm_get(AF_INET, fwmark)))
437 * Check the table hashed by <protocol,addr,port>
438 * for "full" addressed entries
440 svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, vport);
443 && protocol == IPPROTO_TCP
444 && atomic_read(&ip_vs_ftpsvc_counter)
445 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
447 * Check if ftp service entry exists, the packet
448 * might belong to FTP data connections.
450 svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, FTPPORT);
454 && atomic_read(&ip_vs_nullsvc_counter)) {
456 * Check if the catch-all port (port zero) exists
458 svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, 0);
462 read_unlock(&__ip_vs_svc_lock);
464 IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
465 fwmark, ip_vs_proto_name(protocol),
466 NIPQUAD(vaddr), ntohs(vport),
467 svc?"hit":"not hit");
474 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
476 atomic_inc(&svc->refcnt);
481 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
483 struct ip_vs_service *svc = dest->svc;
486 if (atomic_dec_and_test(&svc->refcnt))
492 * Returns hash value for real service
494 static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
496 register unsigned porth = ntohs(port);
498 return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
503 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
504 * should be called with locked tables.
506 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
510 if (!list_empty(&dest->d_list)) {
515 * Hash by proto,addr,port,
516 * which are the parameters of the real service.
518 hash = ip_vs_rs_hashkey(dest->addr.ip, dest->port);
519 list_add(&dest->d_list, &ip_vs_rtable[hash]);
525 * UNhashes ip_vs_dest from ip_vs_rtable.
526 * should be called with locked tables.
528 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
531 * Remove it from the ip_vs_rtable table.
533 if (!list_empty(&dest->d_list)) {
534 list_del(&dest->d_list);
535 INIT_LIST_HEAD(&dest->d_list);
542 * Lookup real service by <proto,addr,port> in the real service table.
545 ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
548 struct ip_vs_dest *dest;
551 * Check for "full" addressed entries
552 * Return the first found entry
554 hash = ip_vs_rs_hashkey(daddr, dport);
556 read_lock(&__ip_vs_rs_lock);
557 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
558 if ((dest->addr.ip == daddr)
559 && (dest->port == dport)
560 && ((dest->protocol == protocol) ||
563 read_unlock(&__ip_vs_rs_lock);
567 read_unlock(&__ip_vs_rs_lock);
573 * Lookup destination by {addr,port} in the given service
575 static struct ip_vs_dest *
576 ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
578 struct ip_vs_dest *dest;
581 * Find the destination for the given service
583 list_for_each_entry(dest, &svc->destinations, n_list) {
584 if ((dest->addr.ip == daddr) && (dest->port == dport)) {
594 * Find destination by {daddr,dport,vaddr,protocol}
595 * Cretaed to be used in ip_vs_process_message() in
596 * the backup synchronization daemon. It finds the
597 * destination to be bound to the received connection
600 * ip_vs_lookup_real_service() looked promissing, but
601 * seems not working as expected.
603 struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
604 __be32 vaddr, __be16 vport, __u16 protocol)
606 struct ip_vs_dest *dest;
607 struct ip_vs_service *svc;
609 svc = ip_vs_service_get(0, protocol, vaddr, vport);
612 dest = ip_vs_lookup_dest(svc, daddr, dport);
614 atomic_inc(&dest->refcnt);
615 ip_vs_service_put(svc);
620 * Lookup dest by {svc,addr,port} in the destination trash.
621 * The destination trash is used to hold the destinations that are removed
622 * from the service table but are still referenced by some conn entries.
623 * The reason to add the destination trash is when the dest is temporary
624 * down (either by administrator or by monitor program), the dest can be
625 * picked back from the trash, the remaining connections to the dest can
626 * continue, and the counting information of the dest is also useful for
629 static struct ip_vs_dest *
630 ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
632 struct ip_vs_dest *dest, *nxt;
635 * Find the destination in trash
637 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
638 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
641 NIPQUAD(dest->addr.ip), ntohs(dest->port),
642 atomic_read(&dest->refcnt));
643 if (dest->addr.ip == daddr &&
644 dest->port == dport &&
645 dest->vfwmark == svc->fwmark &&
646 dest->protocol == svc->protocol &&
648 (dest->vaddr.ip == svc->addr.ip &&
649 dest->vport == svc->port))) {
655 * Try to purge the destination from trash if not referenced
657 if (atomic_read(&dest->refcnt) == 1) {
658 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
661 NIPQUAD(dest->addr.ip), ntohs(dest->port));
662 list_del(&dest->n_list);
663 ip_vs_dst_reset(dest);
664 __ip_vs_unbind_svc(dest);
674 * Clean up all the destinations in the trash
675 * Called by the ip_vs_control_cleanup()
677 * When the ip_vs_control_clearup is activated by ipvs module exit,
678 * the service tables must have been flushed and all the connections
679 * are expired, and the refcnt of each destination in the trash must
680 * be 1, so we simply release them here.
682 static void ip_vs_trash_cleanup(void)
684 struct ip_vs_dest *dest, *nxt;
686 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
687 list_del(&dest->n_list);
688 ip_vs_dst_reset(dest);
689 __ip_vs_unbind_svc(dest);
696 ip_vs_zero_stats(struct ip_vs_stats *stats)
698 spin_lock_bh(&stats->lock);
712 ip_vs_zero_estimator(stats);
714 spin_unlock_bh(&stats->lock);
718 * Update a destination in the given service
721 __ip_vs_update_dest(struct ip_vs_service *svc,
722 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
726 /* set the weight and the flags */
727 atomic_set(&dest->weight, udest->weight);
728 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
730 /* check if local node and update the flags */
731 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
732 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
733 | IP_VS_CONN_F_LOCALNODE;
736 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
737 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
738 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
741 * Put the real service in ip_vs_rtable if not present.
742 * For now only for NAT!
744 write_lock_bh(&__ip_vs_rs_lock);
746 write_unlock_bh(&__ip_vs_rs_lock);
748 atomic_set(&dest->conn_flags, conn_flags);
750 /* bind the service */
752 __ip_vs_bind_svc(dest, svc);
754 if (dest->svc != svc) {
755 __ip_vs_unbind_svc(dest);
756 ip_vs_zero_stats(&dest->stats);
757 __ip_vs_bind_svc(dest, svc);
761 /* set the dest status flags */
762 dest->flags |= IP_VS_DEST_F_AVAILABLE;
764 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
765 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
766 dest->u_threshold = udest->u_threshold;
767 dest->l_threshold = udest->l_threshold;
772 * Create a destination for the given service
775 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
776 struct ip_vs_dest **dest_p)
778 struct ip_vs_dest *dest;
783 atype = inet_addr_type(&init_net, udest->addr.ip);
784 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
787 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
789 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
794 dest->protocol = svc->protocol;
795 dest->vaddr = svc->addr;
796 dest->vport = svc->port;
797 dest->vfwmark = svc->fwmark;
798 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
799 dest->port = udest->port;
801 atomic_set(&dest->activeconns, 0);
802 atomic_set(&dest->inactconns, 0);
803 atomic_set(&dest->persistconns, 0);
804 atomic_set(&dest->refcnt, 0);
806 INIT_LIST_HEAD(&dest->d_list);
807 spin_lock_init(&dest->dst_lock);
808 spin_lock_init(&dest->stats.lock);
809 __ip_vs_update_dest(svc, dest, udest);
810 ip_vs_new_estimator(&dest->stats);
820 * Add a destination into an existing service
823 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
825 struct ip_vs_dest *dest;
826 union nf_inet_addr daddr;
827 __be16 dport = udest->port;
832 if (udest->weight < 0) {
833 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
837 if (udest->l_threshold > udest->u_threshold) {
838 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
839 "upper threshold\n");
843 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
846 * Check if the dest already exists in the list
848 dest = ip_vs_lookup_dest(svc, daddr.ip, dport);
850 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
855 * Check if the dest already exists in the trash and
856 * is from the same service
858 dest = ip_vs_trash_get_dest(svc, daddr.ip, dport);
860 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
861 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
862 NIPQUAD(daddr), ntohs(dport),
863 atomic_read(&dest->refcnt),
865 NIPQUAD(dest->vaddr.ip),
867 __ip_vs_update_dest(svc, dest, udest);
870 * Get the destination from the trash
872 list_del(&dest->n_list);
874 ip_vs_new_estimator(&dest->stats);
876 write_lock_bh(&__ip_vs_svc_lock);
879 * Wait until all other svc users go away.
881 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
883 list_add(&dest->n_list, &svc->destinations);
886 /* call the update_service function of its scheduler */
887 if (svc->scheduler->update_service)
888 svc->scheduler->update_service(svc);
890 write_unlock_bh(&__ip_vs_svc_lock);
895 * Allocate and initialize the dest structure
897 ret = ip_vs_new_dest(svc, udest, &dest);
903 * Add the dest entry into the list
905 atomic_inc(&dest->refcnt);
907 write_lock_bh(&__ip_vs_svc_lock);
910 * Wait until all other svc users go away.
912 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
914 list_add(&dest->n_list, &svc->destinations);
917 /* call the update_service function of its scheduler */
918 if (svc->scheduler->update_service)
919 svc->scheduler->update_service(svc);
921 write_unlock_bh(&__ip_vs_svc_lock);
930 * Edit a destination in the given service
933 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
935 struct ip_vs_dest *dest;
936 union nf_inet_addr daddr;
937 __be16 dport = udest->port;
941 if (udest->weight < 0) {
942 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
946 if (udest->l_threshold > udest->u_threshold) {
947 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
948 "upper threshold\n");
952 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
955 * Lookup the destination list
957 dest = ip_vs_lookup_dest(svc, daddr.ip, dport);
959 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
963 __ip_vs_update_dest(svc, dest, udest);
965 write_lock_bh(&__ip_vs_svc_lock);
967 /* Wait until all other svc users go away */
968 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
970 /* call the update_service, because server weight may be changed */
971 if (svc->scheduler->update_service)
972 svc->scheduler->update_service(svc);
974 write_unlock_bh(&__ip_vs_svc_lock);
983 * Delete a destination (must be already unlinked from the service)
985 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
987 ip_vs_kill_estimator(&dest->stats);
990 * Remove it from the d-linked list with the real services.
992 write_lock_bh(&__ip_vs_rs_lock);
993 ip_vs_rs_unhash(dest);
994 write_unlock_bh(&__ip_vs_rs_lock);
997 * Decrease the refcnt of the dest, and free the dest
998 * if nobody refers to it (refcnt=0). Otherwise, throw
999 * the destination into the trash.
1001 if (atomic_dec_and_test(&dest->refcnt)) {
1002 ip_vs_dst_reset(dest);
1003 /* simply decrease svc->refcnt here, let the caller check
1004 and release the service if nobody refers to it.
1005 Only user context can release destination and service,
1006 and only one user context can update virtual service at a
1007 time, so the operation here is OK */
1008 atomic_dec(&dest->svc->refcnt);
1011 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
1012 "dest->refcnt=%d\n",
1013 NIPQUAD(dest->addr.ip), ntohs(dest->port),
1014 atomic_read(&dest->refcnt));
1015 list_add(&dest->n_list, &ip_vs_dest_trash);
1016 atomic_inc(&dest->refcnt);
1022 * Unlink a destination from the given service
1024 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1025 struct ip_vs_dest *dest,
1028 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1031 * Remove it from the d-linked destination list.
1033 list_del(&dest->n_list);
1037 * Call the update_service function of its scheduler
1039 if (svcupd && svc->scheduler->update_service)
1040 svc->scheduler->update_service(svc);
1045 * Delete a destination server in the given service
1048 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1050 struct ip_vs_dest *dest;
1051 __be16 dport = udest->port;
1055 dest = ip_vs_lookup_dest(svc, udest->addr.ip, dport);
1058 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1062 write_lock_bh(&__ip_vs_svc_lock);
1065 * Wait until all other svc users go away.
1067 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1070 * Unlink dest from the service
1072 __ip_vs_unlink_dest(svc, dest, 1);
1074 write_unlock_bh(&__ip_vs_svc_lock);
1077 * Delete the destination
1079 __ip_vs_del_dest(dest);
1088 * Add a service into the service hash table
1091 ip_vs_add_service(struct ip_vs_service_user_kern *u,
1092 struct ip_vs_service **svc_p)
1095 struct ip_vs_scheduler *sched = NULL;
1096 struct ip_vs_service *svc = NULL;
1098 /* increase the module use count */
1099 ip_vs_use_count_inc();
1101 /* Lookup the scheduler by 'u->sched_name' */
1102 sched = ip_vs_scheduler_get(u->sched_name);
1103 if (sched == NULL) {
1104 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1110 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1112 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1117 /* I'm the first user of the service */
1118 atomic_set(&svc->usecnt, 1);
1119 atomic_set(&svc->refcnt, 0);
1122 svc->protocol = u->protocol;
1123 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1124 svc->port = u->port;
1125 svc->fwmark = u->fwmark;
1126 svc->flags = u->flags;
1127 svc->timeout = u->timeout * HZ;
1128 svc->netmask = u->netmask;
1130 INIT_LIST_HEAD(&svc->destinations);
1131 rwlock_init(&svc->sched_lock);
1132 spin_lock_init(&svc->stats.lock);
1134 /* Bind the scheduler */
1135 ret = ip_vs_bind_scheduler(svc, sched);
1140 /* Update the virtual service counters */
1141 if (svc->port == FTPPORT)
1142 atomic_inc(&ip_vs_ftpsvc_counter);
1143 else if (svc->port == 0)
1144 atomic_inc(&ip_vs_nullsvc_counter);
1146 ip_vs_new_estimator(&svc->stats);
1147 ip_vs_num_services++;
1149 /* Hash the service into the service table */
1150 write_lock_bh(&__ip_vs_svc_lock);
1151 ip_vs_svc_hash(svc);
1152 write_unlock_bh(&__ip_vs_svc_lock);
1160 ip_vs_unbind_scheduler(svc);
1163 ip_vs_app_inc_put(svc->inc);
1168 ip_vs_scheduler_put(sched);
1171 /* decrease the module use count */
1172 ip_vs_use_count_dec();
1179 * Edit a service and bind it with a new scheduler
1182 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1184 struct ip_vs_scheduler *sched, *old_sched;
1188 * Lookup the scheduler, by 'u->sched_name'
1190 sched = ip_vs_scheduler_get(u->sched_name);
1191 if (sched == NULL) {
1192 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1198 write_lock_bh(&__ip_vs_svc_lock);
1201 * Wait until all other svc users go away.
1203 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1206 * Set the flags and timeout value
1208 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1209 svc->timeout = u->timeout * HZ;
1210 svc->netmask = u->netmask;
1212 old_sched = svc->scheduler;
1213 if (sched != old_sched) {
1215 * Unbind the old scheduler
1217 if ((ret = ip_vs_unbind_scheduler(svc))) {
1223 * Bind the new scheduler
1225 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1227 * If ip_vs_bind_scheduler fails, restore the old
1229 * The main reason of failure is out of memory.
1231 * The question is if the old scheduler can be
1232 * restored all the time. TODO: if it cannot be
1233 * restored some time, we must delete the service,
1234 * otherwise the system may crash.
1236 ip_vs_bind_scheduler(svc, old_sched);
1243 write_unlock_bh(&__ip_vs_svc_lock);
1246 ip_vs_scheduler_put(old_sched);
1253 * Delete a service from the service list
1254 * - The service must be unlinked, unlocked and not referenced!
1255 * - We are called under _bh lock
1257 static void __ip_vs_del_service(struct ip_vs_service *svc)
1259 struct ip_vs_dest *dest, *nxt;
1260 struct ip_vs_scheduler *old_sched;
1262 ip_vs_num_services--;
1263 ip_vs_kill_estimator(&svc->stats);
1265 /* Unbind scheduler */
1266 old_sched = svc->scheduler;
1267 ip_vs_unbind_scheduler(svc);
1269 ip_vs_scheduler_put(old_sched);
1271 /* Unbind app inc */
1273 ip_vs_app_inc_put(svc->inc);
1278 * Unlink the whole destination list
1280 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1281 __ip_vs_unlink_dest(svc, dest, 0);
1282 __ip_vs_del_dest(dest);
1286 * Update the virtual service counters
1288 if (svc->port == FTPPORT)
1289 atomic_dec(&ip_vs_ftpsvc_counter);
1290 else if (svc->port == 0)
1291 atomic_dec(&ip_vs_nullsvc_counter);
1294 * Free the service if nobody refers to it
1296 if (atomic_read(&svc->refcnt) == 0)
1299 /* decrease the module use count */
1300 ip_vs_use_count_dec();
1304 * Delete a service from the service list
1306 static int ip_vs_del_service(struct ip_vs_service *svc)
1312 * Unhash it from the service table
1314 write_lock_bh(&__ip_vs_svc_lock);
1316 ip_vs_svc_unhash(svc);
1319 * Wait until all the svc users go away.
1321 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1323 __ip_vs_del_service(svc);
1325 write_unlock_bh(&__ip_vs_svc_lock);
1332 * Flush all the virtual services
1334 static int ip_vs_flush(void)
1337 struct ip_vs_service *svc, *nxt;
1340 * Flush the service table hashed by <protocol,addr,port>
1342 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1343 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1344 write_lock_bh(&__ip_vs_svc_lock);
1345 ip_vs_svc_unhash(svc);
1347 * Wait until all the svc users go away.
1349 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1350 __ip_vs_del_service(svc);
1351 write_unlock_bh(&__ip_vs_svc_lock);
1356 * Flush the service table hashed by fwmark
1358 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1359 list_for_each_entry_safe(svc, nxt,
1360 &ip_vs_svc_fwm_table[idx], f_list) {
1361 write_lock_bh(&__ip_vs_svc_lock);
1362 ip_vs_svc_unhash(svc);
1364 * Wait until all the svc users go away.
1366 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1367 __ip_vs_del_service(svc);
1368 write_unlock_bh(&__ip_vs_svc_lock);
1377 * Zero counters in a service or all services
1379 static int ip_vs_zero_service(struct ip_vs_service *svc)
1381 struct ip_vs_dest *dest;
1383 write_lock_bh(&__ip_vs_svc_lock);
1384 list_for_each_entry(dest, &svc->destinations, n_list) {
1385 ip_vs_zero_stats(&dest->stats);
1387 ip_vs_zero_stats(&svc->stats);
1388 write_unlock_bh(&__ip_vs_svc_lock);
1392 static int ip_vs_zero_all(void)
1395 struct ip_vs_service *svc;
1397 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1398 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1399 ip_vs_zero_service(svc);
1403 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1404 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1405 ip_vs_zero_service(svc);
1409 ip_vs_zero_stats(&ip_vs_stats);
1415 proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1416 void __user *buffer, size_t *lenp, loff_t *ppos)
1418 int *valp = table->data;
1422 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1423 if (write && (*valp != val)) {
1424 if ((*valp < 0) || (*valp > 3)) {
1425 /* Restore the correct value */
1428 update_defense_level();
1436 proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1437 void __user *buffer, size_t *lenp, loff_t *ppos)
1439 int *valp = table->data;
1443 /* backup the value first */
1444 memcpy(val, valp, sizeof(val));
1446 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1447 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1448 /* Restore the correct value */
1449 memcpy(valp, val, sizeof(val));
1456 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1459 static struct ctl_table vs_vars[] = {
1461 .procname = "amemthresh",
1462 .data = &sysctl_ip_vs_amemthresh,
1463 .maxlen = sizeof(int),
1465 .proc_handler = &proc_dointvec,
1467 #ifdef CONFIG_IP_VS_DEBUG
1469 .procname = "debug_level",
1470 .data = &sysctl_ip_vs_debug_level,
1471 .maxlen = sizeof(int),
1473 .proc_handler = &proc_dointvec,
1477 .procname = "am_droprate",
1478 .data = &sysctl_ip_vs_am_droprate,
1479 .maxlen = sizeof(int),
1481 .proc_handler = &proc_dointvec,
1484 .procname = "drop_entry",
1485 .data = &sysctl_ip_vs_drop_entry,
1486 .maxlen = sizeof(int),
1488 .proc_handler = &proc_do_defense_mode,
1491 .procname = "drop_packet",
1492 .data = &sysctl_ip_vs_drop_packet,
1493 .maxlen = sizeof(int),
1495 .proc_handler = &proc_do_defense_mode,
1498 .procname = "secure_tcp",
1499 .data = &sysctl_ip_vs_secure_tcp,
1500 .maxlen = sizeof(int),
1502 .proc_handler = &proc_do_defense_mode,
1506 .procname = "timeout_established",
1507 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1508 .maxlen = sizeof(int),
1510 .proc_handler = &proc_dointvec_jiffies,
1513 .procname = "timeout_synsent",
1514 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1515 .maxlen = sizeof(int),
1517 .proc_handler = &proc_dointvec_jiffies,
1520 .procname = "timeout_synrecv",
1521 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1522 .maxlen = sizeof(int),
1524 .proc_handler = &proc_dointvec_jiffies,
1527 .procname = "timeout_finwait",
1528 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1529 .maxlen = sizeof(int),
1531 .proc_handler = &proc_dointvec_jiffies,
1534 .procname = "timeout_timewait",
1535 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1536 .maxlen = sizeof(int),
1538 .proc_handler = &proc_dointvec_jiffies,
1541 .procname = "timeout_close",
1542 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1543 .maxlen = sizeof(int),
1545 .proc_handler = &proc_dointvec_jiffies,
1548 .procname = "timeout_closewait",
1549 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1550 .maxlen = sizeof(int),
1552 .proc_handler = &proc_dointvec_jiffies,
1555 .procname = "timeout_lastack",
1556 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1557 .maxlen = sizeof(int),
1559 .proc_handler = &proc_dointvec_jiffies,
1562 .procname = "timeout_listen",
1563 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1564 .maxlen = sizeof(int),
1566 .proc_handler = &proc_dointvec_jiffies,
1569 .procname = "timeout_synack",
1570 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1571 .maxlen = sizeof(int),
1573 .proc_handler = &proc_dointvec_jiffies,
1576 .procname = "timeout_udp",
1577 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1578 .maxlen = sizeof(int),
1580 .proc_handler = &proc_dointvec_jiffies,
1583 .procname = "timeout_icmp",
1584 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1585 .maxlen = sizeof(int),
1587 .proc_handler = &proc_dointvec_jiffies,
1591 .procname = "cache_bypass",
1592 .data = &sysctl_ip_vs_cache_bypass,
1593 .maxlen = sizeof(int),
1595 .proc_handler = &proc_dointvec,
1598 .procname = "expire_nodest_conn",
1599 .data = &sysctl_ip_vs_expire_nodest_conn,
1600 .maxlen = sizeof(int),
1602 .proc_handler = &proc_dointvec,
1605 .procname = "expire_quiescent_template",
1606 .data = &sysctl_ip_vs_expire_quiescent_template,
1607 .maxlen = sizeof(int),
1609 .proc_handler = &proc_dointvec,
1612 .procname = "sync_threshold",
1613 .data = &sysctl_ip_vs_sync_threshold,
1614 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1616 .proc_handler = &proc_do_sync_threshold,
1619 .procname = "nat_icmp_send",
1620 .data = &sysctl_ip_vs_nat_icmp_send,
1621 .maxlen = sizeof(int),
1623 .proc_handler = &proc_dointvec,
1628 const struct ctl_path net_vs_ctl_path[] = {
1629 { .procname = "net", .ctl_name = CTL_NET, },
1630 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1631 { .procname = "vs", },
1634 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1636 static struct ctl_table_header * sysctl_header;
1638 #ifdef CONFIG_PROC_FS
1641 struct list_head *table;
1646 * Write the contents of the VS rule table to a PROCfs file.
1647 * (It is kept just for backward compatibility)
1649 static inline const char *ip_vs_fwd_name(unsigned flags)
1651 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1652 case IP_VS_CONN_F_LOCALNODE:
1654 case IP_VS_CONN_F_TUNNEL:
1656 case IP_VS_CONN_F_DROUTE:
1664 /* Get the Nth entry in the two lists */
1665 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1667 struct ip_vs_iter *iter = seq->private;
1669 struct ip_vs_service *svc;
1671 /* look in hash by protocol */
1672 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1673 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1675 iter->table = ip_vs_svc_table;
1682 /* keep looking in fwmark */
1683 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1684 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1686 iter->table = ip_vs_svc_fwm_table;
1696 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1699 read_lock_bh(&__ip_vs_svc_lock);
1700 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1704 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1706 struct list_head *e;
1707 struct ip_vs_iter *iter;
1708 struct ip_vs_service *svc;
1711 if (v == SEQ_START_TOKEN)
1712 return ip_vs_info_array(seq,0);
1715 iter = seq->private;
1717 if (iter->table == ip_vs_svc_table) {
1718 /* next service in table hashed by protocol */
1719 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1720 return list_entry(e, struct ip_vs_service, s_list);
1723 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1724 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1730 iter->table = ip_vs_svc_fwm_table;
1735 /* next service in hashed by fwmark */
1736 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1737 return list_entry(e, struct ip_vs_service, f_list);
1740 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1741 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1749 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1751 read_unlock_bh(&__ip_vs_svc_lock);
1755 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1757 if (v == SEQ_START_TOKEN) {
1759 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1760 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1762 "Prot LocalAddress:Port Scheduler Flags\n");
1764 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1766 const struct ip_vs_service *svc = v;
1767 const struct ip_vs_iter *iter = seq->private;
1768 const struct ip_vs_dest *dest;
1770 if (iter->table == ip_vs_svc_table)
1771 seq_printf(seq, "%s %08X:%04X %s ",
1772 ip_vs_proto_name(svc->protocol),
1773 ntohl(svc->addr.ip),
1775 svc->scheduler->name);
1777 seq_printf(seq, "FWM %08X %s ",
1778 svc->fwmark, svc->scheduler->name);
1780 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1781 seq_printf(seq, "persistent %d %08X\n",
1783 ntohl(svc->netmask));
1785 seq_putc(seq, '\n');
1787 list_for_each_entry(dest, &svc->destinations, n_list) {
1789 " -> %08X:%04X %-7s %-6d %-10d %-10d\n",
1790 ntohl(dest->addr.ip), ntohs(dest->port),
1791 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1792 atomic_read(&dest->weight),
1793 atomic_read(&dest->activeconns),
1794 atomic_read(&dest->inactconns));
1800 static const struct seq_operations ip_vs_info_seq_ops = {
1801 .start = ip_vs_info_seq_start,
1802 .next = ip_vs_info_seq_next,
1803 .stop = ip_vs_info_seq_stop,
1804 .show = ip_vs_info_seq_show,
1807 static int ip_vs_info_open(struct inode *inode, struct file *file)
1809 return seq_open_private(file, &ip_vs_info_seq_ops,
1810 sizeof(struct ip_vs_iter));
1813 static const struct file_operations ip_vs_info_fops = {
1814 .owner = THIS_MODULE,
1815 .open = ip_vs_info_open,
1817 .llseek = seq_lseek,
1818 .release = seq_release_private,
1823 struct ip_vs_stats ip_vs_stats = {
1824 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1827 #ifdef CONFIG_PROC_FS
1828 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1831 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1833 " Total Incoming Outgoing Incoming Outgoing\n");
1835 " Conns Packets Packets Bytes Bytes\n");
1837 spin_lock_bh(&ip_vs_stats.lock);
1838 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1839 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1840 (unsigned long long) ip_vs_stats.inbytes,
1841 (unsigned long long) ip_vs_stats.outbytes);
1843 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1845 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1846 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1851 ip_vs_stats.outbps);
1852 spin_unlock_bh(&ip_vs_stats.lock);
1857 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1859 return single_open(file, ip_vs_stats_show, NULL);
1862 static const struct file_operations ip_vs_stats_fops = {
1863 .owner = THIS_MODULE,
1864 .open = ip_vs_stats_seq_open,
1866 .llseek = seq_lseek,
1867 .release = single_release,
1873 * Set timeout values for tcp tcpfin udp in the timeout_table.
1875 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1877 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1882 #ifdef CONFIG_IP_VS_PROTO_TCP
1883 if (u->tcp_timeout) {
1884 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1885 = u->tcp_timeout * HZ;
1888 if (u->tcp_fin_timeout) {
1889 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1890 = u->tcp_fin_timeout * HZ;
1894 #ifdef CONFIG_IP_VS_PROTO_UDP
1895 if (u->udp_timeout) {
1896 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1897 = u->udp_timeout * HZ;
1904 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1905 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
1906 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
1907 sizeof(struct ip_vs_dest_user))
1908 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
1909 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
1910 #define MAX_ARG_LEN SVCDEST_ARG_LEN
1912 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1913 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
1914 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
1915 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
1916 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
1917 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
1918 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
1919 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
1920 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
1921 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
1922 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
1923 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
1926 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
1927 struct ip_vs_service_user *usvc_compat)
1930 usvc->protocol = usvc_compat->protocol;
1931 usvc->addr.ip = usvc_compat->addr;
1932 usvc->port = usvc_compat->port;
1933 usvc->fwmark = usvc_compat->fwmark;
1935 /* Deep copy of sched_name is not needed here */
1936 usvc->sched_name = usvc_compat->sched_name;
1938 usvc->flags = usvc_compat->flags;
1939 usvc->timeout = usvc_compat->timeout;
1940 usvc->netmask = usvc_compat->netmask;
1943 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
1944 struct ip_vs_dest_user *udest_compat)
1946 udest->addr.ip = udest_compat->addr;
1947 udest->port = udest_compat->port;
1948 udest->conn_flags = udest_compat->conn_flags;
1949 udest->weight = udest_compat->weight;
1950 udest->u_threshold = udest_compat->u_threshold;
1951 udest->l_threshold = udest_compat->l_threshold;
1955 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1958 unsigned char arg[MAX_ARG_LEN];
1959 struct ip_vs_service_user *usvc_compat;
1960 struct ip_vs_service_user_kern usvc;
1961 struct ip_vs_service *svc;
1962 struct ip_vs_dest_user *udest_compat;
1963 struct ip_vs_dest_user_kern udest;
1965 if (!capable(CAP_NET_ADMIN))
1968 if (len != set_arglen[SET_CMDID(cmd)]) {
1969 IP_VS_ERR("set_ctl: len %u != %u\n",
1970 len, set_arglen[SET_CMDID(cmd)]);
1974 if (copy_from_user(arg, user, len) != 0)
1977 /* increase the module use count */
1978 ip_vs_use_count_inc();
1980 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1985 if (cmd == IP_VS_SO_SET_FLUSH) {
1986 /* Flush the virtual service */
1987 ret = ip_vs_flush();
1989 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1990 /* Set timeout values for (tcp tcpfin udp) */
1991 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1993 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1994 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1995 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1997 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1998 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1999 ret = stop_sync_thread(dm->state);
2003 usvc_compat = (struct ip_vs_service_user *)arg;
2004 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2006 /* We only use the new structs internally, so copy userspace compat
2007 * structs to extended internal versions */
2008 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2009 ip_vs_copy_udest_compat(&udest, udest_compat);
2011 if (cmd == IP_VS_SO_SET_ZERO) {
2012 /* if no service address is set, zero counters in all */
2013 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2014 ret = ip_vs_zero_all();
2019 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
2020 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
2021 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
2022 usvc.protocol, NIPQUAD(usvc.addr.ip),
2023 ntohs(usvc.port), usvc.sched_name);
2028 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2029 if (usvc.fwmark == 0)
2030 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2031 &usvc.addr, usvc.port);
2033 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2035 if (cmd != IP_VS_SO_SET_ADD
2036 && (svc == NULL || svc->protocol != usvc.protocol)) {
2042 case IP_VS_SO_SET_ADD:
2046 ret = ip_vs_add_service(&usvc, &svc);
2048 case IP_VS_SO_SET_EDIT:
2049 ret = ip_vs_edit_service(svc, &usvc);
2051 case IP_VS_SO_SET_DEL:
2052 ret = ip_vs_del_service(svc);
2056 case IP_VS_SO_SET_ZERO:
2057 ret = ip_vs_zero_service(svc);
2059 case IP_VS_SO_SET_ADDDEST:
2060 ret = ip_vs_add_dest(svc, &udest);
2062 case IP_VS_SO_SET_EDITDEST:
2063 ret = ip_vs_edit_dest(svc, &udest);
2065 case IP_VS_SO_SET_DELDEST:
2066 ret = ip_vs_del_dest(svc, &udest);
2073 ip_vs_service_put(svc);
2076 mutex_unlock(&__ip_vs_mutex);
2078 /* decrease the module use count */
2079 ip_vs_use_count_dec();
2086 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2088 spin_lock_bh(&src->lock);
2089 memcpy(dst, src, (char*)&src->lock - (char*)src);
2090 spin_unlock_bh(&src->lock);
2094 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2096 dst->protocol = src->protocol;
2097 dst->addr = src->addr.ip;
2098 dst->port = src->port;
2099 dst->fwmark = src->fwmark;
2100 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2101 dst->flags = src->flags;
2102 dst->timeout = src->timeout / HZ;
2103 dst->netmask = src->netmask;
2104 dst->num_dests = src->num_dests;
2105 ip_vs_copy_stats(&dst->stats, &src->stats);
2109 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2110 struct ip_vs_get_services __user *uptr)
2113 struct ip_vs_service *svc;
2114 struct ip_vs_service_entry entry;
2117 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2118 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2119 if (count >= get->num_services)
2121 memset(&entry, 0, sizeof(entry));
2122 ip_vs_copy_service(&entry, svc);
2123 if (copy_to_user(&uptr->entrytable[count],
2124 &entry, sizeof(entry))) {
2132 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2133 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2134 if (count >= get->num_services)
2136 memset(&entry, 0, sizeof(entry));
2137 ip_vs_copy_service(&entry, svc);
2138 if (copy_to_user(&uptr->entrytable[count],
2139 &entry, sizeof(entry))) {
2151 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2152 struct ip_vs_get_dests __user *uptr)
2154 struct ip_vs_service *svc;
2155 union nf_inet_addr addr = { .ip = get->addr };
2159 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
2161 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2166 struct ip_vs_dest *dest;
2167 struct ip_vs_dest_entry entry;
2169 list_for_each_entry(dest, &svc->destinations, n_list) {
2170 if (count >= get->num_dests)
2173 entry.addr = dest->addr.ip;
2174 entry.port = dest->port;
2175 entry.conn_flags = atomic_read(&dest->conn_flags);
2176 entry.weight = atomic_read(&dest->weight);
2177 entry.u_threshold = dest->u_threshold;
2178 entry.l_threshold = dest->l_threshold;
2179 entry.activeconns = atomic_read(&dest->activeconns);
2180 entry.inactconns = atomic_read(&dest->inactconns);
2181 entry.persistconns = atomic_read(&dest->persistconns);
2182 ip_vs_copy_stats(&entry.stats, &dest->stats);
2183 if (copy_to_user(&uptr->entrytable[count],
2184 &entry, sizeof(entry))) {
2190 ip_vs_service_put(svc);
2197 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2199 #ifdef CONFIG_IP_VS_PROTO_TCP
2201 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2202 u->tcp_fin_timeout =
2203 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2205 #ifdef CONFIG_IP_VS_PROTO_UDP
2207 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2212 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2213 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2214 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2215 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2216 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2217 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2218 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2220 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2221 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2222 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2223 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2224 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2225 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2226 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2227 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2231 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2233 unsigned char arg[128];
2236 if (!capable(CAP_NET_ADMIN))
2239 if (*len < get_arglen[GET_CMDID(cmd)]) {
2240 IP_VS_ERR("get_ctl: len %u < %u\n",
2241 *len, get_arglen[GET_CMDID(cmd)]);
2245 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2248 if (mutex_lock_interruptible(&__ip_vs_mutex))
2249 return -ERESTARTSYS;
2252 case IP_VS_SO_GET_VERSION:
2256 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2257 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2258 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2262 *len = strlen(buf)+1;
2266 case IP_VS_SO_GET_INFO:
2268 struct ip_vs_getinfo info;
2269 info.version = IP_VS_VERSION_CODE;
2270 info.size = IP_VS_CONN_TAB_SIZE;
2271 info.num_services = ip_vs_num_services;
2272 if (copy_to_user(user, &info, sizeof(info)) != 0)
2277 case IP_VS_SO_GET_SERVICES:
2279 struct ip_vs_get_services *get;
2282 get = (struct ip_vs_get_services *)arg;
2283 size = sizeof(*get) +
2284 sizeof(struct ip_vs_service_entry) * get->num_services;
2286 IP_VS_ERR("length: %u != %u\n", *len, size);
2290 ret = __ip_vs_get_service_entries(get, user);
2294 case IP_VS_SO_GET_SERVICE:
2296 struct ip_vs_service_entry *entry;
2297 struct ip_vs_service *svc;
2298 union nf_inet_addr addr;
2300 entry = (struct ip_vs_service_entry *)arg;
2301 addr.ip = entry->addr;
2303 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
2305 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2306 &addr, entry->port);
2308 ip_vs_copy_service(entry, svc);
2309 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2311 ip_vs_service_put(svc);
2317 case IP_VS_SO_GET_DESTS:
2319 struct ip_vs_get_dests *get;
2322 get = (struct ip_vs_get_dests *)arg;
2323 size = sizeof(*get) +
2324 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2326 IP_VS_ERR("length: %u != %u\n", *len, size);
2330 ret = __ip_vs_get_dest_entries(get, user);
2334 case IP_VS_SO_GET_TIMEOUT:
2336 struct ip_vs_timeout_user t;
2338 __ip_vs_get_timeouts(&t);
2339 if (copy_to_user(user, &t, sizeof(t)) != 0)
2344 case IP_VS_SO_GET_DAEMON:
2346 struct ip_vs_daemon_user d[2];
2348 memset(&d, 0, sizeof(d));
2349 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2350 d[0].state = IP_VS_STATE_MASTER;
2351 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2352 d[0].syncid = ip_vs_master_syncid;
2354 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2355 d[1].state = IP_VS_STATE_BACKUP;
2356 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2357 d[1].syncid = ip_vs_backup_syncid;
2359 if (copy_to_user(user, &d, sizeof(d)) != 0)
2369 mutex_unlock(&__ip_vs_mutex);
2374 static struct nf_sockopt_ops ip_vs_sockopts = {
2376 .set_optmin = IP_VS_BASE_CTL,
2377 .set_optmax = IP_VS_SO_SET_MAX+1,
2378 .set = do_ip_vs_set_ctl,
2379 .get_optmin = IP_VS_BASE_CTL,
2380 .get_optmax = IP_VS_SO_GET_MAX+1,
2381 .get = do_ip_vs_get_ctl,
2382 .owner = THIS_MODULE,
2386 * Generic Netlink interface
2389 /* IPVS genetlink family */
2390 static struct genl_family ip_vs_genl_family = {
2391 .id = GENL_ID_GENERATE,
2393 .name = IPVS_GENL_NAME,
2394 .version = IPVS_GENL_VERSION,
2395 .maxattr = IPVS_CMD_MAX,
2398 /* Policy used for first-level command attributes */
2399 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2400 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2401 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2402 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2403 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2404 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2405 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2408 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2409 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2410 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2411 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2412 .len = IP_VS_IFNAME_MAXLEN },
2413 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2416 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2417 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2418 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2419 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2420 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2421 .len = sizeof(union nf_inet_addr) },
2422 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2423 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2424 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2425 .len = IP_VS_SCHEDNAME_MAXLEN },
2426 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2427 .len = sizeof(struct ip_vs_flags) },
2428 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2429 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2430 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2433 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2434 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2435 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2436 .len = sizeof(union nf_inet_addr) },
2437 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2438 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2439 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2440 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2441 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2442 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2443 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2444 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2445 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2448 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2449 struct ip_vs_stats *stats)
2451 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2455 spin_lock_bh(&stats->lock);
2457 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2458 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2459 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2460 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2461 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2462 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2463 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2464 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2465 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2466 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2468 spin_unlock_bh(&stats->lock);
2470 nla_nest_end(skb, nl_stats);
2475 spin_unlock_bh(&stats->lock);
2476 nla_nest_cancel(skb, nl_stats);
2480 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2481 struct ip_vs_service *svc)
2483 struct nlattr *nl_service;
2484 struct ip_vs_flags flags = { .flags = svc->flags,
2487 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2491 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2494 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2496 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2497 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2498 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2501 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2502 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2503 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2504 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2506 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2507 goto nla_put_failure;
2509 nla_nest_end(skb, nl_service);
2514 nla_nest_cancel(skb, nl_service);
2518 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2519 struct ip_vs_service *svc,
2520 struct netlink_callback *cb)
2524 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2525 &ip_vs_genl_family, NLM_F_MULTI,
2526 IPVS_CMD_NEW_SERVICE);
2530 if (ip_vs_genl_fill_service(skb, svc) < 0)
2531 goto nla_put_failure;
2533 return genlmsg_end(skb, hdr);
2536 genlmsg_cancel(skb, hdr);
2540 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2541 struct netlink_callback *cb)
2544 int start = cb->args[0];
2545 struct ip_vs_service *svc;
2547 mutex_lock(&__ip_vs_mutex);
2548 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2549 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2552 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2554 goto nla_put_failure;
2559 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2560 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2563 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2565 goto nla_put_failure;
2571 mutex_unlock(&__ip_vs_mutex);
2577 static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2578 struct nlattr *nla, int full_entry)
2580 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2581 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2583 /* Parse mandatory identifying service fields first */
2585 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2588 nla_af = attrs[IPVS_SVC_ATTR_AF];
2589 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2590 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2591 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2592 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2594 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2597 usvc->af = nla_get_u16(nla_af);
2598 /* For now, only support IPv4 */
2599 if (nla_get_u16(nla_af) != AF_INET)
2600 return -EAFNOSUPPORT;
2603 usvc->protocol = IPPROTO_TCP;
2604 usvc->fwmark = nla_get_u32(nla_fwmark);
2606 usvc->protocol = nla_get_u16(nla_protocol);
2607 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2608 usvc->port = nla_get_u16(nla_port);
2612 /* If a full entry was requested, check for the additional fields */
2614 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2616 struct ip_vs_flags flags;
2617 struct ip_vs_service *svc;
2619 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2620 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2621 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2622 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2624 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2627 nla_memcpy(&flags, nla_flags, sizeof(flags));
2629 /* prefill flags from service if it already exists */
2631 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
2633 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2634 &usvc->addr, usvc->port);
2636 usvc->flags = svc->flags;
2637 ip_vs_service_put(svc);
2641 /* set new flags from userland */
2642 usvc->flags = (usvc->flags & ~flags.mask) |
2643 (flags.flags & flags.mask);
2644 usvc->sched_name = nla_data(nla_sched);
2645 usvc->timeout = nla_get_u32(nla_timeout);
2646 usvc->netmask = nla_get_u32(nla_netmask);
2652 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2654 struct ip_vs_service_user_kern usvc;
2657 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2659 return ERR_PTR(ret);
2662 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2664 return __ip_vs_service_get(usvc.af, usvc.protocol,
2665 &usvc.addr, usvc.port);
2668 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2670 struct nlattr *nl_dest;
2672 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2676 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2677 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2679 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2680 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2681 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2682 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2683 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2684 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2685 atomic_read(&dest->activeconns));
2686 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2687 atomic_read(&dest->inactconns));
2688 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2689 atomic_read(&dest->persistconns));
2691 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2692 goto nla_put_failure;
2694 nla_nest_end(skb, nl_dest);
2699 nla_nest_cancel(skb, nl_dest);
2703 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2704 struct netlink_callback *cb)
2708 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2709 &ip_vs_genl_family, NLM_F_MULTI,
2714 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2715 goto nla_put_failure;
2717 return genlmsg_end(skb, hdr);
2720 genlmsg_cancel(skb, hdr);
2724 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2725 struct netlink_callback *cb)
2728 int start = cb->args[0];
2729 struct ip_vs_service *svc;
2730 struct ip_vs_dest *dest;
2731 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2733 mutex_lock(&__ip_vs_mutex);
2735 /* Try to find the service for which to dump destinations */
2736 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2737 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2740 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2741 if (IS_ERR(svc) || svc == NULL)
2744 /* Dump the destinations */
2745 list_for_each_entry(dest, &svc->destinations, n_list) {
2748 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2750 goto nla_put_failure;
2756 ip_vs_service_put(svc);
2759 mutex_unlock(&__ip_vs_mutex);
2764 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2765 struct nlattr *nla, int full_entry)
2767 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2768 struct nlattr *nla_addr, *nla_port;
2770 /* Parse mandatory identifying destination fields first */
2772 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2775 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2776 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2778 if (!(nla_addr && nla_port))
2781 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2782 udest->port = nla_get_u16(nla_port);
2784 /* If a full entry was requested, check for the additional fields */
2786 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2789 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2790 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2791 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2792 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2794 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2797 udest->conn_flags = nla_get_u32(nla_fwd)
2798 & IP_VS_CONN_F_FWD_MASK;
2799 udest->weight = nla_get_u32(nla_weight);
2800 udest->u_threshold = nla_get_u32(nla_u_thresh);
2801 udest->l_threshold = nla_get_u32(nla_l_thresh);
2807 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2808 const char *mcast_ifn, __be32 syncid)
2810 struct nlattr *nl_daemon;
2812 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2816 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2817 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2818 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2820 nla_nest_end(skb, nl_daemon);
2825 nla_nest_cancel(skb, nl_daemon);
2829 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2830 const char *mcast_ifn, __be32 syncid,
2831 struct netlink_callback *cb)
2834 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2835 &ip_vs_genl_family, NLM_F_MULTI,
2836 IPVS_CMD_NEW_DAEMON);
2840 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2841 goto nla_put_failure;
2843 return genlmsg_end(skb, hdr);
2846 genlmsg_cancel(skb, hdr);
2850 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2851 struct netlink_callback *cb)
2853 mutex_lock(&__ip_vs_mutex);
2854 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2855 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2856 ip_vs_master_mcast_ifn,
2857 ip_vs_master_syncid, cb) < 0)
2858 goto nla_put_failure;
2863 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2864 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2865 ip_vs_backup_mcast_ifn,
2866 ip_vs_backup_syncid, cb) < 0)
2867 goto nla_put_failure;
2873 mutex_unlock(&__ip_vs_mutex);
2878 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2880 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2881 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2882 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2885 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2886 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2887 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2890 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2892 if (!attrs[IPVS_DAEMON_ATTR_STATE])
2895 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2898 static int ip_vs_genl_set_config(struct nlattr **attrs)
2900 struct ip_vs_timeout_user t;
2902 __ip_vs_get_timeouts(&t);
2904 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
2905 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
2907 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
2909 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
2911 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
2912 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
2914 return ip_vs_set_timeout(&t);
2917 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
2919 struct ip_vs_service *svc = NULL;
2920 struct ip_vs_service_user_kern usvc;
2921 struct ip_vs_dest_user_kern udest;
2923 int need_full_svc = 0, need_full_dest = 0;
2925 cmd = info->genlhdr->cmd;
2927 mutex_lock(&__ip_vs_mutex);
2929 if (cmd == IPVS_CMD_FLUSH) {
2930 ret = ip_vs_flush();
2932 } else if (cmd == IPVS_CMD_SET_CONFIG) {
2933 ret = ip_vs_genl_set_config(info->attrs);
2935 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
2936 cmd == IPVS_CMD_DEL_DAEMON) {
2938 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
2940 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
2941 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
2942 info->attrs[IPVS_CMD_ATTR_DAEMON],
2943 ip_vs_daemon_policy)) {
2948 if (cmd == IPVS_CMD_NEW_DAEMON)
2949 ret = ip_vs_genl_new_daemon(daemon_attrs);
2951 ret = ip_vs_genl_del_daemon(daemon_attrs);
2953 } else if (cmd == IPVS_CMD_ZERO &&
2954 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
2955 ret = ip_vs_zero_all();
2959 /* All following commands require a service argument, so check if we
2960 * received a valid one. We need a full service specification when
2961 * adding / editing a service. Only identifying members otherwise. */
2962 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
2965 ret = ip_vs_genl_parse_service(&usvc,
2966 info->attrs[IPVS_CMD_ATTR_SERVICE],
2971 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2972 if (usvc.fwmark == 0)
2973 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2974 &usvc.addr, usvc.port);
2976 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2978 /* Unless we're adding a new service, the service must already exist */
2979 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
2984 /* Destination commands require a valid destination argument. For
2985 * adding / editing a destination, we need a full destination
2987 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
2988 cmd == IPVS_CMD_DEL_DEST) {
2989 if (cmd != IPVS_CMD_DEL_DEST)
2992 ret = ip_vs_genl_parse_dest(&udest,
2993 info->attrs[IPVS_CMD_ATTR_DEST],
3000 case IPVS_CMD_NEW_SERVICE:
3002 ret = ip_vs_add_service(&usvc, &svc);
3006 case IPVS_CMD_SET_SERVICE:
3007 ret = ip_vs_edit_service(svc, &usvc);
3009 case IPVS_CMD_DEL_SERVICE:
3010 ret = ip_vs_del_service(svc);
3012 case IPVS_CMD_NEW_DEST:
3013 ret = ip_vs_add_dest(svc, &udest);
3015 case IPVS_CMD_SET_DEST:
3016 ret = ip_vs_edit_dest(svc, &udest);
3018 case IPVS_CMD_DEL_DEST:
3019 ret = ip_vs_del_dest(svc, &udest);
3022 ret = ip_vs_zero_service(svc);
3030 ip_vs_service_put(svc);
3031 mutex_unlock(&__ip_vs_mutex);
3036 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3038 struct sk_buff *msg;
3040 int ret, cmd, reply_cmd;
3042 cmd = info->genlhdr->cmd;
3044 if (cmd == IPVS_CMD_GET_SERVICE)
3045 reply_cmd = IPVS_CMD_NEW_SERVICE;
3046 else if (cmd == IPVS_CMD_GET_INFO)
3047 reply_cmd = IPVS_CMD_SET_INFO;
3048 else if (cmd == IPVS_CMD_GET_CONFIG)
3049 reply_cmd = IPVS_CMD_SET_CONFIG;
3051 IP_VS_ERR("unknown Generic Netlink command\n");
3055 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3059 mutex_lock(&__ip_vs_mutex);
3061 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3063 goto nla_put_failure;
3066 case IPVS_CMD_GET_SERVICE:
3068 struct ip_vs_service *svc;
3070 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3075 ret = ip_vs_genl_fill_service(msg, svc);
3076 ip_vs_service_put(svc);
3078 goto nla_put_failure;
3087 case IPVS_CMD_GET_CONFIG:
3089 struct ip_vs_timeout_user t;
3091 __ip_vs_get_timeouts(&t);
3092 #ifdef CONFIG_IP_VS_PROTO_TCP
3093 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3094 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3097 #ifdef CONFIG_IP_VS_PROTO_UDP
3098 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3104 case IPVS_CMD_GET_INFO:
3105 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3106 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3107 IP_VS_CONN_TAB_SIZE);
3111 genlmsg_end(msg, reply);
3112 ret = genlmsg_unicast(msg, info->snd_pid);
3116 IP_VS_ERR("not enough space in Netlink message\n");
3122 mutex_unlock(&__ip_vs_mutex);
3128 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3130 .cmd = IPVS_CMD_NEW_SERVICE,
3131 .flags = GENL_ADMIN_PERM,
3132 .policy = ip_vs_cmd_policy,
3133 .doit = ip_vs_genl_set_cmd,
3136 .cmd = IPVS_CMD_SET_SERVICE,
3137 .flags = GENL_ADMIN_PERM,
3138 .policy = ip_vs_cmd_policy,
3139 .doit = ip_vs_genl_set_cmd,
3142 .cmd = IPVS_CMD_DEL_SERVICE,
3143 .flags = GENL_ADMIN_PERM,
3144 .policy = ip_vs_cmd_policy,
3145 .doit = ip_vs_genl_set_cmd,
3148 .cmd = IPVS_CMD_GET_SERVICE,
3149 .flags = GENL_ADMIN_PERM,
3150 .doit = ip_vs_genl_get_cmd,
3151 .dumpit = ip_vs_genl_dump_services,
3152 .policy = ip_vs_cmd_policy,
3155 .cmd = IPVS_CMD_NEW_DEST,
3156 .flags = GENL_ADMIN_PERM,
3157 .policy = ip_vs_cmd_policy,
3158 .doit = ip_vs_genl_set_cmd,
3161 .cmd = IPVS_CMD_SET_DEST,
3162 .flags = GENL_ADMIN_PERM,
3163 .policy = ip_vs_cmd_policy,
3164 .doit = ip_vs_genl_set_cmd,
3167 .cmd = IPVS_CMD_DEL_DEST,
3168 .flags = GENL_ADMIN_PERM,
3169 .policy = ip_vs_cmd_policy,
3170 .doit = ip_vs_genl_set_cmd,
3173 .cmd = IPVS_CMD_GET_DEST,
3174 .flags = GENL_ADMIN_PERM,
3175 .policy = ip_vs_cmd_policy,
3176 .dumpit = ip_vs_genl_dump_dests,
3179 .cmd = IPVS_CMD_NEW_DAEMON,
3180 .flags = GENL_ADMIN_PERM,
3181 .policy = ip_vs_cmd_policy,
3182 .doit = ip_vs_genl_set_cmd,
3185 .cmd = IPVS_CMD_DEL_DAEMON,
3186 .flags = GENL_ADMIN_PERM,
3187 .policy = ip_vs_cmd_policy,
3188 .doit = ip_vs_genl_set_cmd,
3191 .cmd = IPVS_CMD_GET_DAEMON,
3192 .flags = GENL_ADMIN_PERM,
3193 .dumpit = ip_vs_genl_dump_daemons,
3196 .cmd = IPVS_CMD_SET_CONFIG,
3197 .flags = GENL_ADMIN_PERM,
3198 .policy = ip_vs_cmd_policy,
3199 .doit = ip_vs_genl_set_cmd,
3202 .cmd = IPVS_CMD_GET_CONFIG,
3203 .flags = GENL_ADMIN_PERM,
3204 .doit = ip_vs_genl_get_cmd,
3207 .cmd = IPVS_CMD_GET_INFO,
3208 .flags = GENL_ADMIN_PERM,
3209 .doit = ip_vs_genl_get_cmd,
3212 .cmd = IPVS_CMD_ZERO,
3213 .flags = GENL_ADMIN_PERM,
3214 .policy = ip_vs_cmd_policy,
3215 .doit = ip_vs_genl_set_cmd,
3218 .cmd = IPVS_CMD_FLUSH,
3219 .flags = GENL_ADMIN_PERM,
3220 .doit = ip_vs_genl_set_cmd,
3224 static int __init ip_vs_genl_register(void)
3228 ret = genl_register_family(&ip_vs_genl_family);
3232 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3233 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3240 genl_unregister_family(&ip_vs_genl_family);
3244 static void ip_vs_genl_unregister(void)
3246 genl_unregister_family(&ip_vs_genl_family);
3249 /* End of Generic Netlink interface definitions */
3252 int __init ip_vs_control_init(void)
3259 ret = nf_register_sockopt(&ip_vs_sockopts);
3261 IP_VS_ERR("cannot register sockopt.\n");
3265 ret = ip_vs_genl_register();
3267 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3268 nf_unregister_sockopt(&ip_vs_sockopts);
3272 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3273 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3275 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3277 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3278 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3279 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3280 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3282 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3283 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3286 ip_vs_new_estimator(&ip_vs_stats);
3288 /* Hook the defense timer */
3289 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3296 void ip_vs_control_cleanup(void)
3299 ip_vs_trash_cleanup();
3300 cancel_rearming_delayed_work(&defense_work);
3301 cancel_work_sync(&defense_work.work);
3302 ip_vs_kill_estimator(&ip_vs_stats);
3303 unregister_sysctl_table(sysctl_header);
3304 proc_net_remove(&init_net, "ip_vs_stats");
3305 proc_net_remove(&init_net, "ip_vs");
3306 ip_vs_genl_unregister();
3307 nf_unregister_sockopt(&ip_vs_sockopts);