a2d69b2ce6a1dae6951dd8deca6f53ab2cd53d66
[safe/jmp/linux-2.6] / net / ipv4 / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #include <linux/module.h>
22 #include <linux/init.h>
23 #include <linux/types.h>
24 #include <linux/capability.h>
25 #include <linux/fs.h>
26 #include <linux/sysctl.h>
27 #include <linux/proc_fs.h>
28 #include <linux/workqueue.h>
29 #include <linux/swap.h>
30 #include <linux/seq_file.h>
31
32 #include <linux/netfilter.h>
33 #include <linux/netfilter_ipv4.h>
34 #include <linux/mutex.h>
35
36 #include <net/net_namespace.h>
37 #include <net/ip.h>
38 #include <net/route.h>
39 #include <net/sock.h>
40 #include <net/genetlink.h>
41
42 #include <asm/uaccess.h>
43
44 #include <net/ip_vs.h>
45
46 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
47 static DEFINE_MUTEX(__ip_vs_mutex);
48
49 /* lock for service table */
50 static DEFINE_RWLOCK(__ip_vs_svc_lock);
51
52 /* lock for table with the real services */
53 static DEFINE_RWLOCK(__ip_vs_rs_lock);
54
55 /* lock for state and timeout tables */
56 static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
57
58 /* lock for drop entry handling */
59 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
60
61 /* lock for drop packet handling */
62 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
63
64 /* 1/rate drop and drop-entry variables */
65 int ip_vs_drop_rate = 0;
66 int ip_vs_drop_counter = 0;
67 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
68
69 /* number of virtual services */
70 static int ip_vs_num_services = 0;
71
72 /* sysctl variables */
73 static int sysctl_ip_vs_drop_entry = 0;
74 static int sysctl_ip_vs_drop_packet = 0;
75 static int sysctl_ip_vs_secure_tcp = 0;
76 static int sysctl_ip_vs_amemthresh = 1024;
77 static int sysctl_ip_vs_am_droprate = 10;
78 int sysctl_ip_vs_cache_bypass = 0;
79 int sysctl_ip_vs_expire_nodest_conn = 0;
80 int sysctl_ip_vs_expire_quiescent_template = 0;
81 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
82 int sysctl_ip_vs_nat_icmp_send = 0;
83
84
85 #ifdef CONFIG_IP_VS_DEBUG
86 static int sysctl_ip_vs_debug_level = 0;
87
88 int ip_vs_get_debug_level(void)
89 {
90         return sysctl_ip_vs_debug_level;
91 }
92 #endif
93
94 /*
95  *      update_defense_level is called from keventd and from sysctl,
96  *      so it needs to protect itself from softirqs
97  */
98 static void update_defense_level(void)
99 {
100         struct sysinfo i;
101         static int old_secure_tcp = 0;
102         int availmem;
103         int nomem;
104         int to_change = -1;
105
106         /* we only count free and buffered memory (in pages) */
107         si_meminfo(&i);
108         availmem = i.freeram + i.bufferram;
109         /* however in linux 2.5 the i.bufferram is total page cache size,
110            we need adjust it */
111         /* si_swapinfo(&i); */
112         /* availmem = availmem - (i.totalswap - i.freeswap); */
113
114         nomem = (availmem < sysctl_ip_vs_amemthresh);
115
116         local_bh_disable();
117
118         /* drop_entry */
119         spin_lock(&__ip_vs_dropentry_lock);
120         switch (sysctl_ip_vs_drop_entry) {
121         case 0:
122                 atomic_set(&ip_vs_dropentry, 0);
123                 break;
124         case 1:
125                 if (nomem) {
126                         atomic_set(&ip_vs_dropentry, 1);
127                         sysctl_ip_vs_drop_entry = 2;
128                 } else {
129                         atomic_set(&ip_vs_dropentry, 0);
130                 }
131                 break;
132         case 2:
133                 if (nomem) {
134                         atomic_set(&ip_vs_dropentry, 1);
135                 } else {
136                         atomic_set(&ip_vs_dropentry, 0);
137                         sysctl_ip_vs_drop_entry = 1;
138                 };
139                 break;
140         case 3:
141                 atomic_set(&ip_vs_dropentry, 1);
142                 break;
143         }
144         spin_unlock(&__ip_vs_dropentry_lock);
145
146         /* drop_packet */
147         spin_lock(&__ip_vs_droppacket_lock);
148         switch (sysctl_ip_vs_drop_packet) {
149         case 0:
150                 ip_vs_drop_rate = 0;
151                 break;
152         case 1:
153                 if (nomem) {
154                         ip_vs_drop_rate = ip_vs_drop_counter
155                                 = sysctl_ip_vs_amemthresh /
156                                 (sysctl_ip_vs_amemthresh-availmem);
157                         sysctl_ip_vs_drop_packet = 2;
158                 } else {
159                         ip_vs_drop_rate = 0;
160                 }
161                 break;
162         case 2:
163                 if (nomem) {
164                         ip_vs_drop_rate = ip_vs_drop_counter
165                                 = sysctl_ip_vs_amemthresh /
166                                 (sysctl_ip_vs_amemthresh-availmem);
167                 } else {
168                         ip_vs_drop_rate = 0;
169                         sysctl_ip_vs_drop_packet = 1;
170                 }
171                 break;
172         case 3:
173                 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
174                 break;
175         }
176         spin_unlock(&__ip_vs_droppacket_lock);
177
178         /* secure_tcp */
179         write_lock(&__ip_vs_securetcp_lock);
180         switch (sysctl_ip_vs_secure_tcp) {
181         case 0:
182                 if (old_secure_tcp >= 2)
183                         to_change = 0;
184                 break;
185         case 1:
186                 if (nomem) {
187                         if (old_secure_tcp < 2)
188                                 to_change = 1;
189                         sysctl_ip_vs_secure_tcp = 2;
190                 } else {
191                         if (old_secure_tcp >= 2)
192                                 to_change = 0;
193                 }
194                 break;
195         case 2:
196                 if (nomem) {
197                         if (old_secure_tcp < 2)
198                                 to_change = 1;
199                 } else {
200                         if (old_secure_tcp >= 2)
201                                 to_change = 0;
202                         sysctl_ip_vs_secure_tcp = 1;
203                 }
204                 break;
205         case 3:
206                 if (old_secure_tcp < 2)
207                         to_change = 1;
208                 break;
209         }
210         old_secure_tcp = sysctl_ip_vs_secure_tcp;
211         if (to_change >= 0)
212                 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
213         write_unlock(&__ip_vs_securetcp_lock);
214
215         local_bh_enable();
216 }
217
218
219 /*
220  *      Timer for checking the defense
221  */
222 #define DEFENSE_TIMER_PERIOD    1*HZ
223 static void defense_work_handler(struct work_struct *work);
224 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
225
226 static void defense_work_handler(struct work_struct *work)
227 {
228         update_defense_level();
229         if (atomic_read(&ip_vs_dropentry))
230                 ip_vs_random_dropentry();
231
232         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
233 }
234
235 int
236 ip_vs_use_count_inc(void)
237 {
238         return try_module_get(THIS_MODULE);
239 }
240
241 void
242 ip_vs_use_count_dec(void)
243 {
244         module_put(THIS_MODULE);
245 }
246
247
248 /*
249  *      Hash table: for virtual service lookups
250  */
251 #define IP_VS_SVC_TAB_BITS 8
252 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
253 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
254
255 /* the service table hashed by <protocol, addr, port> */
256 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
257 /* the service table hashed by fwmark */
258 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
259
260 /*
261  *      Hash table: for real service lookups
262  */
263 #define IP_VS_RTAB_BITS 4
264 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
265 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
266
267 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
268
269 /*
270  *      Trash for destinations
271  */
272 static LIST_HEAD(ip_vs_dest_trash);
273
274 /*
275  *      FTP & NULL virtual service counters
276  */
277 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
278 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
279
280
281 /*
282  *      Returns hash value for virtual service
283  */
284 static __inline__ unsigned
285 ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
286                   __be16 port)
287 {
288         register unsigned porth = ntohs(port);
289         __be32 addr_fold = addr->ip;
290
291 #ifdef CONFIG_IP_VS_IPV6
292         if (af == AF_INET6)
293                 addr_fold = addr->ip6[0]^addr->ip6[1]^
294                             addr->ip6[2]^addr->ip6[3];
295 #endif
296
297         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
298                 & IP_VS_SVC_TAB_MASK;
299 }
300
301 /*
302  *      Returns hash value of fwmark for virtual service lookup
303  */
304 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
305 {
306         return fwmark & IP_VS_SVC_TAB_MASK;
307 }
308
309 /*
310  *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
311  *      or in the ip_vs_svc_fwm_table by fwmark.
312  *      Should be called with locked tables.
313  */
314 static int ip_vs_svc_hash(struct ip_vs_service *svc)
315 {
316         unsigned hash;
317
318         if (svc->flags & IP_VS_SVC_F_HASHED) {
319                 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
320                           "called from %p\n", __builtin_return_address(0));
321                 return 0;
322         }
323
324         if (svc->fwmark == 0) {
325                 /*
326                  *  Hash it by <protocol,addr,port> in ip_vs_svc_table
327                  */
328                 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
329                                          svc->port);
330                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
331         } else {
332                 /*
333                  *  Hash it by fwmark in ip_vs_svc_fwm_table
334                  */
335                 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
336                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
337         }
338
339         svc->flags |= IP_VS_SVC_F_HASHED;
340         /* increase its refcnt because it is referenced by the svc table */
341         atomic_inc(&svc->refcnt);
342         return 1;
343 }
344
345
346 /*
347  *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
348  *      Should be called with locked tables.
349  */
350 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
351 {
352         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
353                 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
354                           "called from %p\n", __builtin_return_address(0));
355                 return 0;
356         }
357
358         if (svc->fwmark == 0) {
359                 /* Remove it from the ip_vs_svc_table table */
360                 list_del(&svc->s_list);
361         } else {
362                 /* Remove it from the ip_vs_svc_fwm_table table */
363                 list_del(&svc->f_list);
364         }
365
366         svc->flags &= ~IP_VS_SVC_F_HASHED;
367         atomic_dec(&svc->refcnt);
368         return 1;
369 }
370
371
372 /*
373  *      Get service by {proto,addr,port} in the service table.
374  */
375 static inline struct ip_vs_service *
376 __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
377                     __be16 vport)
378 {
379         unsigned hash;
380         struct ip_vs_service *svc;
381
382         /* Check for "full" addressed entries */
383         hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
384
385         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
386                 if ((svc->af == af)
387                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
388                     && (svc->port == vport)
389                     && (svc->protocol == protocol)) {
390                         /* HIT */
391                         atomic_inc(&svc->usecnt);
392                         return svc;
393                 }
394         }
395
396         return NULL;
397 }
398
399
400 /*
401  *      Get service by {fwmark} in the service table.
402  */
403 static inline struct ip_vs_service *
404 __ip_vs_svc_fwm_get(int af, __u32 fwmark)
405 {
406         unsigned hash;
407         struct ip_vs_service *svc;
408
409         /* Check for fwmark addressed entries */
410         hash = ip_vs_svc_fwm_hashkey(fwmark);
411
412         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
413                 if (svc->fwmark == fwmark && svc->af == af) {
414                         /* HIT */
415                         atomic_inc(&svc->usecnt);
416                         return svc;
417                 }
418         }
419
420         return NULL;
421 }
422
423 struct ip_vs_service *
424 ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
425 {
426         struct ip_vs_service *svc;
427         union nf_inet_addr _vaddr = { .ip = vaddr };
428         read_lock(&__ip_vs_svc_lock);
429
430         /*
431          *      Check the table hashed by fwmark first
432          */
433         if (fwmark && (svc = __ip_vs_svc_fwm_get(AF_INET, fwmark)))
434                 goto out;
435
436         /*
437          *      Check the table hashed by <protocol,addr,port>
438          *      for "full" addressed entries
439          */
440         svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, vport);
441
442         if (svc == NULL
443             && protocol == IPPROTO_TCP
444             && atomic_read(&ip_vs_ftpsvc_counter)
445             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
446                 /*
447                  * Check if ftp service entry exists, the packet
448                  * might belong to FTP data connections.
449                  */
450                 svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, FTPPORT);
451         }
452
453         if (svc == NULL
454             && atomic_read(&ip_vs_nullsvc_counter)) {
455                 /*
456                  * Check if the catch-all port (port zero) exists
457                  */
458                 svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, 0);
459         }
460
461   out:
462         read_unlock(&__ip_vs_svc_lock);
463
464         IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
465                   fwmark, ip_vs_proto_name(protocol),
466                   NIPQUAD(vaddr), ntohs(vport),
467                   svc?"hit":"not hit");
468
469         return svc;
470 }
471
472
473 static inline void
474 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
475 {
476         atomic_inc(&svc->refcnt);
477         dest->svc = svc;
478 }
479
480 static inline void
481 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
482 {
483         struct ip_vs_service *svc = dest->svc;
484
485         dest->svc = NULL;
486         if (atomic_dec_and_test(&svc->refcnt))
487                 kfree(svc);
488 }
489
490
491 /*
492  *      Returns hash value for real service
493  */
494 static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
495 {
496         register unsigned porth = ntohs(port);
497
498         return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
499                 & IP_VS_RTAB_MASK;
500 }
501
502 /*
503  *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
504  *      should be called with locked tables.
505  */
506 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
507 {
508         unsigned hash;
509
510         if (!list_empty(&dest->d_list)) {
511                 return 0;
512         }
513
514         /*
515          *      Hash by proto,addr,port,
516          *      which are the parameters of the real service.
517          */
518         hash = ip_vs_rs_hashkey(dest->addr.ip, dest->port);
519         list_add(&dest->d_list, &ip_vs_rtable[hash]);
520
521         return 1;
522 }
523
524 /*
525  *      UNhashes ip_vs_dest from ip_vs_rtable.
526  *      should be called with locked tables.
527  */
528 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
529 {
530         /*
531          * Remove it from the ip_vs_rtable table.
532          */
533         if (!list_empty(&dest->d_list)) {
534                 list_del(&dest->d_list);
535                 INIT_LIST_HEAD(&dest->d_list);
536         }
537
538         return 1;
539 }
540
541 /*
542  *      Lookup real service by <proto,addr,port> in the real service table.
543  */
544 struct ip_vs_dest *
545 ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
546 {
547         unsigned hash;
548         struct ip_vs_dest *dest;
549
550         /*
551          *      Check for "full" addressed entries
552          *      Return the first found entry
553          */
554         hash = ip_vs_rs_hashkey(daddr, dport);
555
556         read_lock(&__ip_vs_rs_lock);
557         list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
558                 if ((dest->addr.ip == daddr)
559                     && (dest->port == dport)
560                     && ((dest->protocol == protocol) ||
561                         dest->vfwmark)) {
562                         /* HIT */
563                         read_unlock(&__ip_vs_rs_lock);
564                         return dest;
565                 }
566         }
567         read_unlock(&__ip_vs_rs_lock);
568
569         return NULL;
570 }
571
572 /*
573  *      Lookup destination by {addr,port} in the given service
574  */
575 static struct ip_vs_dest *
576 ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
577 {
578         struct ip_vs_dest *dest;
579
580         /*
581          * Find the destination for the given service
582          */
583         list_for_each_entry(dest, &svc->destinations, n_list) {
584                 if ((dest->addr.ip == daddr) && (dest->port == dport)) {
585                         /* HIT */
586                         return dest;
587                 }
588         }
589
590         return NULL;
591 }
592
593 /*
594  * Find destination by {daddr,dport,vaddr,protocol}
595  * Cretaed to be used in ip_vs_process_message() in
596  * the backup synchronization daemon. It finds the
597  * destination to be bound to the received connection
598  * on the backup.
599  *
600  * ip_vs_lookup_real_service() looked promissing, but
601  * seems not working as expected.
602  */
603 struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
604                                     __be32 vaddr, __be16 vport, __u16 protocol)
605 {
606         struct ip_vs_dest *dest;
607         struct ip_vs_service *svc;
608
609         svc = ip_vs_service_get(0, protocol, vaddr, vport);
610         if (!svc)
611                 return NULL;
612         dest = ip_vs_lookup_dest(svc, daddr, dport);
613         if (dest)
614                 atomic_inc(&dest->refcnt);
615         ip_vs_service_put(svc);
616         return dest;
617 }
618
619 /*
620  *  Lookup dest by {svc,addr,port} in the destination trash.
621  *  The destination trash is used to hold the destinations that are removed
622  *  from the service table but are still referenced by some conn entries.
623  *  The reason to add the destination trash is when the dest is temporary
624  *  down (either by administrator or by monitor program), the dest can be
625  *  picked back from the trash, the remaining connections to the dest can
626  *  continue, and the counting information of the dest is also useful for
627  *  scheduling.
628  */
629 static struct ip_vs_dest *
630 ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
631 {
632         struct ip_vs_dest *dest, *nxt;
633
634         /*
635          * Find the destination in trash
636          */
637         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
638                 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
639                           "dest->refcnt=%d\n",
640                           dest->vfwmark,
641                           NIPQUAD(dest->addr.ip), ntohs(dest->port),
642                           atomic_read(&dest->refcnt));
643                 if (dest->addr.ip == daddr &&
644                     dest->port == dport &&
645                     dest->vfwmark == svc->fwmark &&
646                     dest->protocol == svc->protocol &&
647                     (svc->fwmark ||
648                      (dest->vaddr.ip == svc->addr.ip &&
649                       dest->vport == svc->port))) {
650                         /* HIT */
651                         return dest;
652                 }
653
654                 /*
655                  * Try to purge the destination from trash if not referenced
656                  */
657                 if (atomic_read(&dest->refcnt) == 1) {
658                         IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
659                                   "from trash\n",
660                                   dest->vfwmark,
661                                   NIPQUAD(dest->addr.ip), ntohs(dest->port));
662                         list_del(&dest->n_list);
663                         ip_vs_dst_reset(dest);
664                         __ip_vs_unbind_svc(dest);
665                         kfree(dest);
666                 }
667         }
668
669         return NULL;
670 }
671
672
673 /*
674  *  Clean up all the destinations in the trash
675  *  Called by the ip_vs_control_cleanup()
676  *
677  *  When the ip_vs_control_clearup is activated by ipvs module exit,
678  *  the service tables must have been flushed and all the connections
679  *  are expired, and the refcnt of each destination in the trash must
680  *  be 1, so we simply release them here.
681  */
682 static void ip_vs_trash_cleanup(void)
683 {
684         struct ip_vs_dest *dest, *nxt;
685
686         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
687                 list_del(&dest->n_list);
688                 ip_vs_dst_reset(dest);
689                 __ip_vs_unbind_svc(dest);
690                 kfree(dest);
691         }
692 }
693
694
695 static void
696 ip_vs_zero_stats(struct ip_vs_stats *stats)
697 {
698         spin_lock_bh(&stats->lock);
699
700         stats->conns = 0;
701         stats->inpkts = 0;
702         stats->outpkts = 0;
703         stats->inbytes = 0;
704         stats->outbytes = 0;
705
706         stats->cps = 0;
707         stats->inpps = 0;
708         stats->outpps = 0;
709         stats->inbps = 0;
710         stats->outbps = 0;
711
712         ip_vs_zero_estimator(stats);
713
714         spin_unlock_bh(&stats->lock);
715 }
716
717 /*
718  *      Update a destination in the given service
719  */
720 static void
721 __ip_vs_update_dest(struct ip_vs_service *svc,
722                     struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
723 {
724         int conn_flags;
725
726         /* set the weight and the flags */
727         atomic_set(&dest->weight, udest->weight);
728         conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
729
730         /* check if local node and update the flags */
731         if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
732                 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
733                         | IP_VS_CONN_F_LOCALNODE;
734         }
735
736         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
737         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
738                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
739         } else {
740                 /*
741                  *    Put the real service in ip_vs_rtable if not present.
742                  *    For now only for NAT!
743                  */
744                 write_lock_bh(&__ip_vs_rs_lock);
745                 ip_vs_rs_hash(dest);
746                 write_unlock_bh(&__ip_vs_rs_lock);
747         }
748         atomic_set(&dest->conn_flags, conn_flags);
749
750         /* bind the service */
751         if (!dest->svc) {
752                 __ip_vs_bind_svc(dest, svc);
753         } else {
754                 if (dest->svc != svc) {
755                         __ip_vs_unbind_svc(dest);
756                         ip_vs_zero_stats(&dest->stats);
757                         __ip_vs_bind_svc(dest, svc);
758                 }
759         }
760
761         /* set the dest status flags */
762         dest->flags |= IP_VS_DEST_F_AVAILABLE;
763
764         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
765                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
766         dest->u_threshold = udest->u_threshold;
767         dest->l_threshold = udest->l_threshold;
768 }
769
770
771 /*
772  *      Create a destination for the given service
773  */
774 static int
775 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
776                struct ip_vs_dest **dest_p)
777 {
778         struct ip_vs_dest *dest;
779         unsigned atype;
780
781         EnterFunction(2);
782
783         atype = inet_addr_type(&init_net, udest->addr.ip);
784         if (atype != RTN_LOCAL && atype != RTN_UNICAST)
785                 return -EINVAL;
786
787         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
788         if (dest == NULL) {
789                 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
790                 return -ENOMEM;
791         }
792
793         dest->af = svc->af;
794         dest->protocol = svc->protocol;
795         dest->vaddr = svc->addr;
796         dest->vport = svc->port;
797         dest->vfwmark = svc->fwmark;
798         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
799         dest->port = udest->port;
800
801         atomic_set(&dest->activeconns, 0);
802         atomic_set(&dest->inactconns, 0);
803         atomic_set(&dest->persistconns, 0);
804         atomic_set(&dest->refcnt, 0);
805
806         INIT_LIST_HEAD(&dest->d_list);
807         spin_lock_init(&dest->dst_lock);
808         spin_lock_init(&dest->stats.lock);
809         __ip_vs_update_dest(svc, dest, udest);
810         ip_vs_new_estimator(&dest->stats);
811
812         *dest_p = dest;
813
814         LeaveFunction(2);
815         return 0;
816 }
817
818
819 /*
820  *      Add a destination into an existing service
821  */
822 static int
823 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
824 {
825         struct ip_vs_dest *dest;
826         union nf_inet_addr daddr;
827         __be16 dport = udest->port;
828         int ret;
829
830         EnterFunction(2);
831
832         if (udest->weight < 0) {
833                 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
834                 return -ERANGE;
835         }
836
837         if (udest->l_threshold > udest->u_threshold) {
838                 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
839                           "upper threshold\n");
840                 return -ERANGE;
841         }
842
843         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
844
845         /*
846          * Check if the dest already exists in the list
847          */
848         dest = ip_vs_lookup_dest(svc, daddr.ip, dport);
849         if (dest != NULL) {
850                 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
851                 return -EEXIST;
852         }
853
854         /*
855          * Check if the dest already exists in the trash and
856          * is from the same service
857          */
858         dest = ip_vs_trash_get_dest(svc, daddr.ip, dport);
859         if (dest != NULL) {
860                 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
861                           "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
862                           NIPQUAD(daddr), ntohs(dport),
863                           atomic_read(&dest->refcnt),
864                           dest->vfwmark,
865                           NIPQUAD(dest->vaddr.ip),
866                           ntohs(dest->vport));
867                 __ip_vs_update_dest(svc, dest, udest);
868
869                 /*
870                  * Get the destination from the trash
871                  */
872                 list_del(&dest->n_list);
873
874                 ip_vs_new_estimator(&dest->stats);
875
876                 write_lock_bh(&__ip_vs_svc_lock);
877
878                 /*
879                  * Wait until all other svc users go away.
880                  */
881                 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
882
883                 list_add(&dest->n_list, &svc->destinations);
884                 svc->num_dests++;
885
886                 /* call the update_service function of its scheduler */
887                 if (svc->scheduler->update_service)
888                         svc->scheduler->update_service(svc);
889
890                 write_unlock_bh(&__ip_vs_svc_lock);
891                 return 0;
892         }
893
894         /*
895          * Allocate and initialize the dest structure
896          */
897         ret = ip_vs_new_dest(svc, udest, &dest);
898         if (ret) {
899                 return ret;
900         }
901
902         /*
903          * Add the dest entry into the list
904          */
905         atomic_inc(&dest->refcnt);
906
907         write_lock_bh(&__ip_vs_svc_lock);
908
909         /*
910          * Wait until all other svc users go away.
911          */
912         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
913
914         list_add(&dest->n_list, &svc->destinations);
915         svc->num_dests++;
916
917         /* call the update_service function of its scheduler */
918         if (svc->scheduler->update_service)
919                 svc->scheduler->update_service(svc);
920
921         write_unlock_bh(&__ip_vs_svc_lock);
922
923         LeaveFunction(2);
924
925         return 0;
926 }
927
928
929 /*
930  *      Edit a destination in the given service
931  */
932 static int
933 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
934 {
935         struct ip_vs_dest *dest;
936         union nf_inet_addr daddr;
937         __be16 dport = udest->port;
938
939         EnterFunction(2);
940
941         if (udest->weight < 0) {
942                 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
943                 return -ERANGE;
944         }
945
946         if (udest->l_threshold > udest->u_threshold) {
947                 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
948                           "upper threshold\n");
949                 return -ERANGE;
950         }
951
952         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
953
954         /*
955          *  Lookup the destination list
956          */
957         dest = ip_vs_lookup_dest(svc, daddr.ip, dport);
958         if (dest == NULL) {
959                 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
960                 return -ENOENT;
961         }
962
963         __ip_vs_update_dest(svc, dest, udest);
964
965         write_lock_bh(&__ip_vs_svc_lock);
966
967         /* Wait until all other svc users go away */
968         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
969
970         /* call the update_service, because server weight may be changed */
971         if (svc->scheduler->update_service)
972                 svc->scheduler->update_service(svc);
973
974         write_unlock_bh(&__ip_vs_svc_lock);
975
976         LeaveFunction(2);
977
978         return 0;
979 }
980
981
982 /*
983  *      Delete a destination (must be already unlinked from the service)
984  */
985 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
986 {
987         ip_vs_kill_estimator(&dest->stats);
988
989         /*
990          *  Remove it from the d-linked list with the real services.
991          */
992         write_lock_bh(&__ip_vs_rs_lock);
993         ip_vs_rs_unhash(dest);
994         write_unlock_bh(&__ip_vs_rs_lock);
995
996         /*
997          *  Decrease the refcnt of the dest, and free the dest
998          *  if nobody refers to it (refcnt=0). Otherwise, throw
999          *  the destination into the trash.
1000          */
1001         if (atomic_dec_and_test(&dest->refcnt)) {
1002                 ip_vs_dst_reset(dest);
1003                 /* simply decrease svc->refcnt here, let the caller check
1004                    and release the service if nobody refers to it.
1005                    Only user context can release destination and service,
1006                    and only one user context can update virtual service at a
1007                    time, so the operation here is OK */
1008                 atomic_dec(&dest->svc->refcnt);
1009                 kfree(dest);
1010         } else {
1011                 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
1012                           "dest->refcnt=%d\n",
1013                           NIPQUAD(dest->addr.ip), ntohs(dest->port),
1014                           atomic_read(&dest->refcnt));
1015                 list_add(&dest->n_list, &ip_vs_dest_trash);
1016                 atomic_inc(&dest->refcnt);
1017         }
1018 }
1019
1020
1021 /*
1022  *      Unlink a destination from the given service
1023  */
1024 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1025                                 struct ip_vs_dest *dest,
1026                                 int svcupd)
1027 {
1028         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1029
1030         /*
1031          *  Remove it from the d-linked destination list.
1032          */
1033         list_del(&dest->n_list);
1034         svc->num_dests--;
1035
1036         /*
1037          *  Call the update_service function of its scheduler
1038          */
1039         if (svcupd && svc->scheduler->update_service)
1040                         svc->scheduler->update_service(svc);
1041 }
1042
1043
1044 /*
1045  *      Delete a destination server in the given service
1046  */
1047 static int
1048 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1049 {
1050         struct ip_vs_dest *dest;
1051         __be16 dport = udest->port;
1052
1053         EnterFunction(2);
1054
1055         dest = ip_vs_lookup_dest(svc, udest->addr.ip, dport);
1056
1057         if (dest == NULL) {
1058                 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1059                 return -ENOENT;
1060         }
1061
1062         write_lock_bh(&__ip_vs_svc_lock);
1063
1064         /*
1065          *      Wait until all other svc users go away.
1066          */
1067         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1068
1069         /*
1070          *      Unlink dest from the service
1071          */
1072         __ip_vs_unlink_dest(svc, dest, 1);
1073
1074         write_unlock_bh(&__ip_vs_svc_lock);
1075
1076         /*
1077          *      Delete the destination
1078          */
1079         __ip_vs_del_dest(dest);
1080
1081         LeaveFunction(2);
1082
1083         return 0;
1084 }
1085
1086
1087 /*
1088  *      Add a service into the service hash table
1089  */
1090 static int
1091 ip_vs_add_service(struct ip_vs_service_user_kern *u,
1092                   struct ip_vs_service **svc_p)
1093 {
1094         int ret = 0;
1095         struct ip_vs_scheduler *sched = NULL;
1096         struct ip_vs_service *svc = NULL;
1097
1098         /* increase the module use count */
1099         ip_vs_use_count_inc();
1100
1101         /* Lookup the scheduler by 'u->sched_name' */
1102         sched = ip_vs_scheduler_get(u->sched_name);
1103         if (sched == NULL) {
1104                 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1105                            u->sched_name);
1106                 ret = -ENOENT;
1107                 goto out_mod_dec;
1108         }
1109
1110         svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1111         if (svc == NULL) {
1112                 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1113                 ret = -ENOMEM;
1114                 goto out_err;
1115         }
1116
1117         /* I'm the first user of the service */
1118         atomic_set(&svc->usecnt, 1);
1119         atomic_set(&svc->refcnt, 0);
1120
1121         svc->af = u->af;
1122         svc->protocol = u->protocol;
1123         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1124         svc->port = u->port;
1125         svc->fwmark = u->fwmark;
1126         svc->flags = u->flags;
1127         svc->timeout = u->timeout * HZ;
1128         svc->netmask = u->netmask;
1129
1130         INIT_LIST_HEAD(&svc->destinations);
1131         rwlock_init(&svc->sched_lock);
1132         spin_lock_init(&svc->stats.lock);
1133
1134         /* Bind the scheduler */
1135         ret = ip_vs_bind_scheduler(svc, sched);
1136         if (ret)
1137                 goto out_err;
1138         sched = NULL;
1139
1140         /* Update the virtual service counters */
1141         if (svc->port == FTPPORT)
1142                 atomic_inc(&ip_vs_ftpsvc_counter);
1143         else if (svc->port == 0)
1144                 atomic_inc(&ip_vs_nullsvc_counter);
1145
1146         ip_vs_new_estimator(&svc->stats);
1147         ip_vs_num_services++;
1148
1149         /* Hash the service into the service table */
1150         write_lock_bh(&__ip_vs_svc_lock);
1151         ip_vs_svc_hash(svc);
1152         write_unlock_bh(&__ip_vs_svc_lock);
1153
1154         *svc_p = svc;
1155         return 0;
1156
1157   out_err:
1158         if (svc != NULL) {
1159                 if (svc->scheduler)
1160                         ip_vs_unbind_scheduler(svc);
1161                 if (svc->inc) {
1162                         local_bh_disable();
1163                         ip_vs_app_inc_put(svc->inc);
1164                         local_bh_enable();
1165                 }
1166                 kfree(svc);
1167         }
1168         ip_vs_scheduler_put(sched);
1169
1170   out_mod_dec:
1171         /* decrease the module use count */
1172         ip_vs_use_count_dec();
1173
1174         return ret;
1175 }
1176
1177
1178 /*
1179  *      Edit a service and bind it with a new scheduler
1180  */
1181 static int
1182 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1183 {
1184         struct ip_vs_scheduler *sched, *old_sched;
1185         int ret = 0;
1186
1187         /*
1188          * Lookup the scheduler, by 'u->sched_name'
1189          */
1190         sched = ip_vs_scheduler_get(u->sched_name);
1191         if (sched == NULL) {
1192                 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1193                            u->sched_name);
1194                 return -ENOENT;
1195         }
1196         old_sched = sched;
1197
1198         write_lock_bh(&__ip_vs_svc_lock);
1199
1200         /*
1201          * Wait until all other svc users go away.
1202          */
1203         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1204
1205         /*
1206          * Set the flags and timeout value
1207          */
1208         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1209         svc->timeout = u->timeout * HZ;
1210         svc->netmask = u->netmask;
1211
1212         old_sched = svc->scheduler;
1213         if (sched != old_sched) {
1214                 /*
1215                  * Unbind the old scheduler
1216                  */
1217                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1218                         old_sched = sched;
1219                         goto out;
1220                 }
1221
1222                 /*
1223                  * Bind the new scheduler
1224                  */
1225                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1226                         /*
1227                          * If ip_vs_bind_scheduler fails, restore the old
1228                          * scheduler.
1229                          * The main reason of failure is out of memory.
1230                          *
1231                          * The question is if the old scheduler can be
1232                          * restored all the time. TODO: if it cannot be
1233                          * restored some time, we must delete the service,
1234                          * otherwise the system may crash.
1235                          */
1236                         ip_vs_bind_scheduler(svc, old_sched);
1237                         old_sched = sched;
1238                         goto out;
1239                 }
1240         }
1241
1242   out:
1243         write_unlock_bh(&__ip_vs_svc_lock);
1244
1245         if (old_sched)
1246                 ip_vs_scheduler_put(old_sched);
1247
1248         return ret;
1249 }
1250
1251
1252 /*
1253  *      Delete a service from the service list
1254  *      - The service must be unlinked, unlocked and not referenced!
1255  *      - We are called under _bh lock
1256  */
1257 static void __ip_vs_del_service(struct ip_vs_service *svc)
1258 {
1259         struct ip_vs_dest *dest, *nxt;
1260         struct ip_vs_scheduler *old_sched;
1261
1262         ip_vs_num_services--;
1263         ip_vs_kill_estimator(&svc->stats);
1264
1265         /* Unbind scheduler */
1266         old_sched = svc->scheduler;
1267         ip_vs_unbind_scheduler(svc);
1268         if (old_sched)
1269                 ip_vs_scheduler_put(old_sched);
1270
1271         /* Unbind app inc */
1272         if (svc->inc) {
1273                 ip_vs_app_inc_put(svc->inc);
1274                 svc->inc = NULL;
1275         }
1276
1277         /*
1278          *    Unlink the whole destination list
1279          */
1280         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1281                 __ip_vs_unlink_dest(svc, dest, 0);
1282                 __ip_vs_del_dest(dest);
1283         }
1284
1285         /*
1286          *    Update the virtual service counters
1287          */
1288         if (svc->port == FTPPORT)
1289                 atomic_dec(&ip_vs_ftpsvc_counter);
1290         else if (svc->port == 0)
1291                 atomic_dec(&ip_vs_nullsvc_counter);
1292
1293         /*
1294          *    Free the service if nobody refers to it
1295          */
1296         if (atomic_read(&svc->refcnt) == 0)
1297                 kfree(svc);
1298
1299         /* decrease the module use count */
1300         ip_vs_use_count_dec();
1301 }
1302
1303 /*
1304  *      Delete a service from the service list
1305  */
1306 static int ip_vs_del_service(struct ip_vs_service *svc)
1307 {
1308         if (svc == NULL)
1309                 return -EEXIST;
1310
1311         /*
1312          * Unhash it from the service table
1313          */
1314         write_lock_bh(&__ip_vs_svc_lock);
1315
1316         ip_vs_svc_unhash(svc);
1317
1318         /*
1319          * Wait until all the svc users go away.
1320          */
1321         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1322
1323         __ip_vs_del_service(svc);
1324
1325         write_unlock_bh(&__ip_vs_svc_lock);
1326
1327         return 0;
1328 }
1329
1330
1331 /*
1332  *      Flush all the virtual services
1333  */
1334 static int ip_vs_flush(void)
1335 {
1336         int idx;
1337         struct ip_vs_service *svc, *nxt;
1338
1339         /*
1340          * Flush the service table hashed by <protocol,addr,port>
1341          */
1342         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1343                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1344                         write_lock_bh(&__ip_vs_svc_lock);
1345                         ip_vs_svc_unhash(svc);
1346                         /*
1347                          * Wait until all the svc users go away.
1348                          */
1349                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1350                         __ip_vs_del_service(svc);
1351                         write_unlock_bh(&__ip_vs_svc_lock);
1352                 }
1353         }
1354
1355         /*
1356          * Flush the service table hashed by fwmark
1357          */
1358         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1359                 list_for_each_entry_safe(svc, nxt,
1360                                          &ip_vs_svc_fwm_table[idx], f_list) {
1361                         write_lock_bh(&__ip_vs_svc_lock);
1362                         ip_vs_svc_unhash(svc);
1363                         /*
1364                          * Wait until all the svc users go away.
1365                          */
1366                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1367                         __ip_vs_del_service(svc);
1368                         write_unlock_bh(&__ip_vs_svc_lock);
1369                 }
1370         }
1371
1372         return 0;
1373 }
1374
1375
1376 /*
1377  *      Zero counters in a service or all services
1378  */
1379 static int ip_vs_zero_service(struct ip_vs_service *svc)
1380 {
1381         struct ip_vs_dest *dest;
1382
1383         write_lock_bh(&__ip_vs_svc_lock);
1384         list_for_each_entry(dest, &svc->destinations, n_list) {
1385                 ip_vs_zero_stats(&dest->stats);
1386         }
1387         ip_vs_zero_stats(&svc->stats);
1388         write_unlock_bh(&__ip_vs_svc_lock);
1389         return 0;
1390 }
1391
1392 static int ip_vs_zero_all(void)
1393 {
1394         int idx;
1395         struct ip_vs_service *svc;
1396
1397         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1398                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1399                         ip_vs_zero_service(svc);
1400                 }
1401         }
1402
1403         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1404                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1405                         ip_vs_zero_service(svc);
1406                 }
1407         }
1408
1409         ip_vs_zero_stats(&ip_vs_stats);
1410         return 0;
1411 }
1412
1413
1414 static int
1415 proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1416                      void __user *buffer, size_t *lenp, loff_t *ppos)
1417 {
1418         int *valp = table->data;
1419         int val = *valp;
1420         int rc;
1421
1422         rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1423         if (write && (*valp != val)) {
1424                 if ((*valp < 0) || (*valp > 3)) {
1425                         /* Restore the correct value */
1426                         *valp = val;
1427                 } else {
1428                         update_defense_level();
1429                 }
1430         }
1431         return rc;
1432 }
1433
1434
1435 static int
1436 proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1437                        void __user *buffer, size_t *lenp, loff_t *ppos)
1438 {
1439         int *valp = table->data;
1440         int val[2];
1441         int rc;
1442
1443         /* backup the value first */
1444         memcpy(val, valp, sizeof(val));
1445
1446         rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1447         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1448                 /* Restore the correct value */
1449                 memcpy(valp, val, sizeof(val));
1450         }
1451         return rc;
1452 }
1453
1454
1455 /*
1456  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1457  */
1458
1459 static struct ctl_table vs_vars[] = {
1460         {
1461                 .procname       = "amemthresh",
1462                 .data           = &sysctl_ip_vs_amemthresh,
1463                 .maxlen         = sizeof(int),
1464                 .mode           = 0644,
1465                 .proc_handler   = &proc_dointvec,
1466         },
1467 #ifdef CONFIG_IP_VS_DEBUG
1468         {
1469                 .procname       = "debug_level",
1470                 .data           = &sysctl_ip_vs_debug_level,
1471                 .maxlen         = sizeof(int),
1472                 .mode           = 0644,
1473                 .proc_handler   = &proc_dointvec,
1474         },
1475 #endif
1476         {
1477                 .procname       = "am_droprate",
1478                 .data           = &sysctl_ip_vs_am_droprate,
1479                 .maxlen         = sizeof(int),
1480                 .mode           = 0644,
1481                 .proc_handler   = &proc_dointvec,
1482         },
1483         {
1484                 .procname       = "drop_entry",
1485                 .data           = &sysctl_ip_vs_drop_entry,
1486                 .maxlen         = sizeof(int),
1487                 .mode           = 0644,
1488                 .proc_handler   = &proc_do_defense_mode,
1489         },
1490         {
1491                 .procname       = "drop_packet",
1492                 .data           = &sysctl_ip_vs_drop_packet,
1493                 .maxlen         = sizeof(int),
1494                 .mode           = 0644,
1495                 .proc_handler   = &proc_do_defense_mode,
1496         },
1497         {
1498                 .procname       = "secure_tcp",
1499                 .data           = &sysctl_ip_vs_secure_tcp,
1500                 .maxlen         = sizeof(int),
1501                 .mode           = 0644,
1502                 .proc_handler   = &proc_do_defense_mode,
1503         },
1504 #if 0
1505         {
1506                 .procname       = "timeout_established",
1507                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1508                 .maxlen         = sizeof(int),
1509                 .mode           = 0644,
1510                 .proc_handler   = &proc_dointvec_jiffies,
1511         },
1512         {
1513                 .procname       = "timeout_synsent",
1514                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1515                 .maxlen         = sizeof(int),
1516                 .mode           = 0644,
1517                 .proc_handler   = &proc_dointvec_jiffies,
1518         },
1519         {
1520                 .procname       = "timeout_synrecv",
1521                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1522                 .maxlen         = sizeof(int),
1523                 .mode           = 0644,
1524                 .proc_handler   = &proc_dointvec_jiffies,
1525         },
1526         {
1527                 .procname       = "timeout_finwait",
1528                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1529                 .maxlen         = sizeof(int),
1530                 .mode           = 0644,
1531                 .proc_handler   = &proc_dointvec_jiffies,
1532         },
1533         {
1534                 .procname       = "timeout_timewait",
1535                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1536                 .maxlen         = sizeof(int),
1537                 .mode           = 0644,
1538                 .proc_handler   = &proc_dointvec_jiffies,
1539         },
1540         {
1541                 .procname       = "timeout_close",
1542                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1543                 .maxlen         = sizeof(int),
1544                 .mode           = 0644,
1545                 .proc_handler   = &proc_dointvec_jiffies,
1546         },
1547         {
1548                 .procname       = "timeout_closewait",
1549                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1550                 .maxlen         = sizeof(int),
1551                 .mode           = 0644,
1552                 .proc_handler   = &proc_dointvec_jiffies,
1553         },
1554         {
1555                 .procname       = "timeout_lastack",
1556                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1557                 .maxlen         = sizeof(int),
1558                 .mode           = 0644,
1559                 .proc_handler   = &proc_dointvec_jiffies,
1560         },
1561         {
1562                 .procname       = "timeout_listen",
1563                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1564                 .maxlen         = sizeof(int),
1565                 .mode           = 0644,
1566                 .proc_handler   = &proc_dointvec_jiffies,
1567         },
1568         {
1569                 .procname       = "timeout_synack",
1570                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1571                 .maxlen         = sizeof(int),
1572                 .mode           = 0644,
1573                 .proc_handler   = &proc_dointvec_jiffies,
1574         },
1575         {
1576                 .procname       = "timeout_udp",
1577                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1578                 .maxlen         = sizeof(int),
1579                 .mode           = 0644,
1580                 .proc_handler   = &proc_dointvec_jiffies,
1581         },
1582         {
1583                 .procname       = "timeout_icmp",
1584                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1585                 .maxlen         = sizeof(int),
1586                 .mode           = 0644,
1587                 .proc_handler   = &proc_dointvec_jiffies,
1588         },
1589 #endif
1590         {
1591                 .procname       = "cache_bypass",
1592                 .data           = &sysctl_ip_vs_cache_bypass,
1593                 .maxlen         = sizeof(int),
1594                 .mode           = 0644,
1595                 .proc_handler   = &proc_dointvec,
1596         },
1597         {
1598                 .procname       = "expire_nodest_conn",
1599                 .data           = &sysctl_ip_vs_expire_nodest_conn,
1600                 .maxlen         = sizeof(int),
1601                 .mode           = 0644,
1602                 .proc_handler   = &proc_dointvec,
1603         },
1604         {
1605                 .procname       = "expire_quiescent_template",
1606                 .data           = &sysctl_ip_vs_expire_quiescent_template,
1607                 .maxlen         = sizeof(int),
1608                 .mode           = 0644,
1609                 .proc_handler   = &proc_dointvec,
1610         },
1611         {
1612                 .procname       = "sync_threshold",
1613                 .data           = &sysctl_ip_vs_sync_threshold,
1614                 .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
1615                 .mode           = 0644,
1616                 .proc_handler   = &proc_do_sync_threshold,
1617         },
1618         {
1619                 .procname       = "nat_icmp_send",
1620                 .data           = &sysctl_ip_vs_nat_icmp_send,
1621                 .maxlen         = sizeof(int),
1622                 .mode           = 0644,
1623                 .proc_handler   = &proc_dointvec,
1624         },
1625         { .ctl_name = 0 }
1626 };
1627
1628 const struct ctl_path net_vs_ctl_path[] = {
1629         { .procname = "net", .ctl_name = CTL_NET, },
1630         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1631         { .procname = "vs", },
1632         { }
1633 };
1634 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1635
1636 static struct ctl_table_header * sysctl_header;
1637
1638 #ifdef CONFIG_PROC_FS
1639
1640 struct ip_vs_iter {
1641         struct list_head *table;
1642         int bucket;
1643 };
1644
1645 /*
1646  *      Write the contents of the VS rule table to a PROCfs file.
1647  *      (It is kept just for backward compatibility)
1648  */
1649 static inline const char *ip_vs_fwd_name(unsigned flags)
1650 {
1651         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1652         case IP_VS_CONN_F_LOCALNODE:
1653                 return "Local";
1654         case IP_VS_CONN_F_TUNNEL:
1655                 return "Tunnel";
1656         case IP_VS_CONN_F_DROUTE:
1657                 return "Route";
1658         default:
1659                 return "Masq";
1660         }
1661 }
1662
1663
1664 /* Get the Nth entry in the two lists */
1665 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1666 {
1667         struct ip_vs_iter *iter = seq->private;
1668         int idx;
1669         struct ip_vs_service *svc;
1670
1671         /* look in hash by protocol */
1672         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1673                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1674                         if (pos-- == 0){
1675                                 iter->table = ip_vs_svc_table;
1676                                 iter->bucket = idx;
1677                                 return svc;
1678                         }
1679                 }
1680         }
1681
1682         /* keep looking in fwmark */
1683         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1684                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1685                         if (pos-- == 0) {
1686                                 iter->table = ip_vs_svc_fwm_table;
1687                                 iter->bucket = idx;
1688                                 return svc;
1689                         }
1690                 }
1691         }
1692
1693         return NULL;
1694 }
1695
1696 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1697 {
1698
1699         read_lock_bh(&__ip_vs_svc_lock);
1700         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1701 }
1702
1703
1704 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1705 {
1706         struct list_head *e;
1707         struct ip_vs_iter *iter;
1708         struct ip_vs_service *svc;
1709
1710         ++*pos;
1711         if (v == SEQ_START_TOKEN)
1712                 return ip_vs_info_array(seq,0);
1713
1714         svc = v;
1715         iter = seq->private;
1716
1717         if (iter->table == ip_vs_svc_table) {
1718                 /* next service in table hashed by protocol */
1719                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1720                         return list_entry(e, struct ip_vs_service, s_list);
1721
1722
1723                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1724                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1725                                             s_list) {
1726                                 return svc;
1727                         }
1728                 }
1729
1730                 iter->table = ip_vs_svc_fwm_table;
1731                 iter->bucket = -1;
1732                 goto scan_fwmark;
1733         }
1734
1735         /* next service in hashed by fwmark */
1736         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1737                 return list_entry(e, struct ip_vs_service, f_list);
1738
1739  scan_fwmark:
1740         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1741                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1742                                     f_list)
1743                         return svc;
1744         }
1745
1746         return NULL;
1747 }
1748
1749 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1750 {
1751         read_unlock_bh(&__ip_vs_svc_lock);
1752 }
1753
1754
1755 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1756 {
1757         if (v == SEQ_START_TOKEN) {
1758                 seq_printf(seq,
1759                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1760                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1761                 seq_puts(seq,
1762                          "Prot LocalAddress:Port Scheduler Flags\n");
1763                 seq_puts(seq,
1764                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1765         } else {
1766                 const struct ip_vs_service *svc = v;
1767                 const struct ip_vs_iter *iter = seq->private;
1768                 const struct ip_vs_dest *dest;
1769
1770                 if (iter->table == ip_vs_svc_table)
1771                         seq_printf(seq, "%s  %08X:%04X %s ",
1772                                    ip_vs_proto_name(svc->protocol),
1773                                    ntohl(svc->addr.ip),
1774                                    ntohs(svc->port),
1775                                    svc->scheduler->name);
1776                 else
1777                         seq_printf(seq, "FWM  %08X %s ",
1778                                    svc->fwmark, svc->scheduler->name);
1779
1780                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1781                         seq_printf(seq, "persistent %d %08X\n",
1782                                 svc->timeout,
1783                                 ntohl(svc->netmask));
1784                 else
1785                         seq_putc(seq, '\n');
1786
1787                 list_for_each_entry(dest, &svc->destinations, n_list) {
1788                         seq_printf(seq,
1789                                    "  -> %08X:%04X      %-7s %-6d %-10d %-10d\n",
1790                                    ntohl(dest->addr.ip), ntohs(dest->port),
1791                                    ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1792                                    atomic_read(&dest->weight),
1793                                    atomic_read(&dest->activeconns),
1794                                    atomic_read(&dest->inactconns));
1795                 }
1796         }
1797         return 0;
1798 }
1799
1800 static const struct seq_operations ip_vs_info_seq_ops = {
1801         .start = ip_vs_info_seq_start,
1802         .next  = ip_vs_info_seq_next,
1803         .stop  = ip_vs_info_seq_stop,
1804         .show  = ip_vs_info_seq_show,
1805 };
1806
1807 static int ip_vs_info_open(struct inode *inode, struct file *file)
1808 {
1809         return seq_open_private(file, &ip_vs_info_seq_ops,
1810                         sizeof(struct ip_vs_iter));
1811 }
1812
1813 static const struct file_operations ip_vs_info_fops = {
1814         .owner   = THIS_MODULE,
1815         .open    = ip_vs_info_open,
1816         .read    = seq_read,
1817         .llseek  = seq_lseek,
1818         .release = seq_release_private,
1819 };
1820
1821 #endif
1822
1823 struct ip_vs_stats ip_vs_stats = {
1824         .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1825 };
1826
1827 #ifdef CONFIG_PROC_FS
1828 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1829 {
1830
1831 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1832         seq_puts(seq,
1833                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
1834         seq_printf(seq,
1835                    "   Conns  Packets  Packets            Bytes            Bytes\n");
1836
1837         spin_lock_bh(&ip_vs_stats.lock);
1838         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1839                    ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1840                    (unsigned long long) ip_vs_stats.inbytes,
1841                    (unsigned long long) ip_vs_stats.outbytes);
1842
1843 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1844         seq_puts(seq,
1845                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
1846         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1847                         ip_vs_stats.cps,
1848                         ip_vs_stats.inpps,
1849                         ip_vs_stats.outpps,
1850                         ip_vs_stats.inbps,
1851                         ip_vs_stats.outbps);
1852         spin_unlock_bh(&ip_vs_stats.lock);
1853
1854         return 0;
1855 }
1856
1857 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1858 {
1859         return single_open(file, ip_vs_stats_show, NULL);
1860 }
1861
1862 static const struct file_operations ip_vs_stats_fops = {
1863         .owner = THIS_MODULE,
1864         .open = ip_vs_stats_seq_open,
1865         .read = seq_read,
1866         .llseek = seq_lseek,
1867         .release = single_release,
1868 };
1869
1870 #endif
1871
1872 /*
1873  *      Set timeout values for tcp tcpfin udp in the timeout_table.
1874  */
1875 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1876 {
1877         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1878                   u->tcp_timeout,
1879                   u->tcp_fin_timeout,
1880                   u->udp_timeout);
1881
1882 #ifdef CONFIG_IP_VS_PROTO_TCP
1883         if (u->tcp_timeout) {
1884                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1885                         = u->tcp_timeout * HZ;
1886         }
1887
1888         if (u->tcp_fin_timeout) {
1889                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1890                         = u->tcp_fin_timeout * HZ;
1891         }
1892 #endif
1893
1894 #ifdef CONFIG_IP_VS_PROTO_UDP
1895         if (u->udp_timeout) {
1896                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1897                         = u->udp_timeout * HZ;
1898         }
1899 #endif
1900         return 0;
1901 }
1902
1903
1904 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
1905 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
1906 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
1907                                  sizeof(struct ip_vs_dest_user))
1908 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
1909 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
1910 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
1911
1912 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1913         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
1914         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
1915         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
1916         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
1917         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
1918         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
1919         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
1920         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
1921         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
1922         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
1923         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
1924 };
1925
1926 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
1927                                   struct ip_vs_service_user *usvc_compat)
1928 {
1929         usvc->af                = AF_INET;
1930         usvc->protocol          = usvc_compat->protocol;
1931         usvc->addr.ip           = usvc_compat->addr;
1932         usvc->port              = usvc_compat->port;
1933         usvc->fwmark            = usvc_compat->fwmark;
1934
1935         /* Deep copy of sched_name is not needed here */
1936         usvc->sched_name        = usvc_compat->sched_name;
1937
1938         usvc->flags             = usvc_compat->flags;
1939         usvc->timeout           = usvc_compat->timeout;
1940         usvc->netmask           = usvc_compat->netmask;
1941 }
1942
1943 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
1944                                    struct ip_vs_dest_user *udest_compat)
1945 {
1946         udest->addr.ip          = udest_compat->addr;
1947         udest->port             = udest_compat->port;
1948         udest->conn_flags       = udest_compat->conn_flags;
1949         udest->weight           = udest_compat->weight;
1950         udest->u_threshold      = udest_compat->u_threshold;
1951         udest->l_threshold      = udest_compat->l_threshold;
1952 }
1953
1954 static int
1955 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1956 {
1957         int ret;
1958         unsigned char arg[MAX_ARG_LEN];
1959         struct ip_vs_service_user *usvc_compat;
1960         struct ip_vs_service_user_kern usvc;
1961         struct ip_vs_service *svc;
1962         struct ip_vs_dest_user *udest_compat;
1963         struct ip_vs_dest_user_kern udest;
1964
1965         if (!capable(CAP_NET_ADMIN))
1966                 return -EPERM;
1967
1968         if (len != set_arglen[SET_CMDID(cmd)]) {
1969                 IP_VS_ERR("set_ctl: len %u != %u\n",
1970                           len, set_arglen[SET_CMDID(cmd)]);
1971                 return -EINVAL;
1972         }
1973
1974         if (copy_from_user(arg, user, len) != 0)
1975                 return -EFAULT;
1976
1977         /* increase the module use count */
1978         ip_vs_use_count_inc();
1979
1980         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1981                 ret = -ERESTARTSYS;
1982                 goto out_dec;
1983         }
1984
1985         if (cmd == IP_VS_SO_SET_FLUSH) {
1986                 /* Flush the virtual service */
1987                 ret = ip_vs_flush();
1988                 goto out_unlock;
1989         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1990                 /* Set timeout values for (tcp tcpfin udp) */
1991                 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1992                 goto out_unlock;
1993         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1994                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1995                 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1996                 goto out_unlock;
1997         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1998                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1999                 ret = stop_sync_thread(dm->state);
2000                 goto out_unlock;
2001         }
2002
2003         usvc_compat = (struct ip_vs_service_user *)arg;
2004         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2005
2006         /* We only use the new structs internally, so copy userspace compat
2007          * structs to extended internal versions */
2008         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2009         ip_vs_copy_udest_compat(&udest, udest_compat);
2010
2011         if (cmd == IP_VS_SO_SET_ZERO) {
2012                 /* if no service address is set, zero counters in all */
2013                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2014                         ret = ip_vs_zero_all();
2015                         goto out_unlock;
2016                 }
2017         }
2018
2019         /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
2020         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
2021                 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
2022                           usvc.protocol, NIPQUAD(usvc.addr.ip),
2023                           ntohs(usvc.port), usvc.sched_name);
2024                 ret = -EFAULT;
2025                 goto out_unlock;
2026         }
2027
2028         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2029         if (usvc.fwmark == 0)
2030                 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2031                                           &usvc.addr, usvc.port);
2032         else
2033                 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2034
2035         if (cmd != IP_VS_SO_SET_ADD
2036             && (svc == NULL || svc->protocol != usvc.protocol)) {
2037                 ret = -ESRCH;
2038                 goto out_unlock;
2039         }
2040
2041         switch (cmd) {
2042         case IP_VS_SO_SET_ADD:
2043                 if (svc != NULL)
2044                         ret = -EEXIST;
2045                 else
2046                         ret = ip_vs_add_service(&usvc, &svc);
2047                 break;
2048         case IP_VS_SO_SET_EDIT:
2049                 ret = ip_vs_edit_service(svc, &usvc);
2050                 break;
2051         case IP_VS_SO_SET_DEL:
2052                 ret = ip_vs_del_service(svc);
2053                 if (!ret)
2054                         goto out_unlock;
2055                 break;
2056         case IP_VS_SO_SET_ZERO:
2057                 ret = ip_vs_zero_service(svc);
2058                 break;
2059         case IP_VS_SO_SET_ADDDEST:
2060                 ret = ip_vs_add_dest(svc, &udest);
2061                 break;
2062         case IP_VS_SO_SET_EDITDEST:
2063                 ret = ip_vs_edit_dest(svc, &udest);
2064                 break;
2065         case IP_VS_SO_SET_DELDEST:
2066                 ret = ip_vs_del_dest(svc, &udest);
2067                 break;
2068         default:
2069                 ret = -EINVAL;
2070         }
2071
2072         if (svc)
2073                 ip_vs_service_put(svc);
2074
2075   out_unlock:
2076         mutex_unlock(&__ip_vs_mutex);
2077   out_dec:
2078         /* decrease the module use count */
2079         ip_vs_use_count_dec();
2080
2081         return ret;
2082 }
2083
2084
2085 static void
2086 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2087 {
2088         spin_lock_bh(&src->lock);
2089         memcpy(dst, src, (char*)&src->lock - (char*)src);
2090         spin_unlock_bh(&src->lock);
2091 }
2092
2093 static void
2094 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2095 {
2096         dst->protocol = src->protocol;
2097         dst->addr = src->addr.ip;
2098         dst->port = src->port;
2099         dst->fwmark = src->fwmark;
2100         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2101         dst->flags = src->flags;
2102         dst->timeout = src->timeout / HZ;
2103         dst->netmask = src->netmask;
2104         dst->num_dests = src->num_dests;
2105         ip_vs_copy_stats(&dst->stats, &src->stats);
2106 }
2107
2108 static inline int
2109 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2110                             struct ip_vs_get_services __user *uptr)
2111 {
2112         int idx, count=0;
2113         struct ip_vs_service *svc;
2114         struct ip_vs_service_entry entry;
2115         int ret = 0;
2116
2117         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2118                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2119                         if (count >= get->num_services)
2120                                 goto out;
2121                         memset(&entry, 0, sizeof(entry));
2122                         ip_vs_copy_service(&entry, svc);
2123                         if (copy_to_user(&uptr->entrytable[count],
2124                                          &entry, sizeof(entry))) {
2125                                 ret = -EFAULT;
2126                                 goto out;
2127                         }
2128                         count++;
2129                 }
2130         }
2131
2132         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2133                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2134                         if (count >= get->num_services)
2135                                 goto out;
2136                         memset(&entry, 0, sizeof(entry));
2137                         ip_vs_copy_service(&entry, svc);
2138                         if (copy_to_user(&uptr->entrytable[count],
2139                                          &entry, sizeof(entry))) {
2140                                 ret = -EFAULT;
2141                                 goto out;
2142                         }
2143                         count++;
2144                 }
2145         }
2146   out:
2147         return ret;
2148 }
2149
2150 static inline int
2151 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2152                          struct ip_vs_get_dests __user *uptr)
2153 {
2154         struct ip_vs_service *svc;
2155         union nf_inet_addr addr = { .ip = get->addr };
2156         int ret = 0;
2157
2158         if (get->fwmark)
2159                 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
2160         else
2161                 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2162                                           get->port);
2163
2164         if (svc) {
2165                 int count = 0;
2166                 struct ip_vs_dest *dest;
2167                 struct ip_vs_dest_entry entry;
2168
2169                 list_for_each_entry(dest, &svc->destinations, n_list) {
2170                         if (count >= get->num_dests)
2171                                 break;
2172
2173                         entry.addr = dest->addr.ip;
2174                         entry.port = dest->port;
2175                         entry.conn_flags = atomic_read(&dest->conn_flags);
2176                         entry.weight = atomic_read(&dest->weight);
2177                         entry.u_threshold = dest->u_threshold;
2178                         entry.l_threshold = dest->l_threshold;
2179                         entry.activeconns = atomic_read(&dest->activeconns);
2180                         entry.inactconns = atomic_read(&dest->inactconns);
2181                         entry.persistconns = atomic_read(&dest->persistconns);
2182                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2183                         if (copy_to_user(&uptr->entrytable[count],
2184                                          &entry, sizeof(entry))) {
2185                                 ret = -EFAULT;
2186                                 break;
2187                         }
2188                         count++;
2189                 }
2190                 ip_vs_service_put(svc);
2191         } else
2192                 ret = -ESRCH;
2193         return ret;
2194 }
2195
2196 static inline void
2197 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2198 {
2199 #ifdef CONFIG_IP_VS_PROTO_TCP
2200         u->tcp_timeout =
2201                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2202         u->tcp_fin_timeout =
2203                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2204 #endif
2205 #ifdef CONFIG_IP_VS_PROTO_UDP
2206         u->udp_timeout =
2207                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2208 #endif
2209 }
2210
2211
2212 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2213 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2214 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2215 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2216 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2217 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2218 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2219
2220 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2221         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2222         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2223         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2224         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2225         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2226         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2227         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2228 };
2229
2230 static int
2231 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2232 {
2233         unsigned char arg[128];
2234         int ret = 0;
2235
2236         if (!capable(CAP_NET_ADMIN))
2237                 return -EPERM;
2238
2239         if (*len < get_arglen[GET_CMDID(cmd)]) {
2240                 IP_VS_ERR("get_ctl: len %u < %u\n",
2241                           *len, get_arglen[GET_CMDID(cmd)]);
2242                 return -EINVAL;
2243         }
2244
2245         if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2246                 return -EFAULT;
2247
2248         if (mutex_lock_interruptible(&__ip_vs_mutex))
2249                 return -ERESTARTSYS;
2250
2251         switch (cmd) {
2252         case IP_VS_SO_GET_VERSION:
2253         {
2254                 char buf[64];
2255
2256                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2257                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2258                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2259                         ret = -EFAULT;
2260                         goto out;
2261                 }
2262                 *len = strlen(buf)+1;
2263         }
2264         break;
2265
2266         case IP_VS_SO_GET_INFO:
2267         {
2268                 struct ip_vs_getinfo info;
2269                 info.version = IP_VS_VERSION_CODE;
2270                 info.size = IP_VS_CONN_TAB_SIZE;
2271                 info.num_services = ip_vs_num_services;
2272                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2273                         ret = -EFAULT;
2274         }
2275         break;
2276
2277         case IP_VS_SO_GET_SERVICES:
2278         {
2279                 struct ip_vs_get_services *get;
2280                 int size;
2281
2282                 get = (struct ip_vs_get_services *)arg;
2283                 size = sizeof(*get) +
2284                         sizeof(struct ip_vs_service_entry) * get->num_services;
2285                 if (*len != size) {
2286                         IP_VS_ERR("length: %u != %u\n", *len, size);
2287                         ret = -EINVAL;
2288                         goto out;
2289                 }
2290                 ret = __ip_vs_get_service_entries(get, user);
2291         }
2292         break;
2293
2294         case IP_VS_SO_GET_SERVICE:
2295         {
2296                 struct ip_vs_service_entry *entry;
2297                 struct ip_vs_service *svc;
2298                 union nf_inet_addr addr;
2299
2300                 entry = (struct ip_vs_service_entry *)arg;
2301                 addr.ip = entry->addr;
2302                 if (entry->fwmark)
2303                         svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
2304                 else
2305                         svc = __ip_vs_service_get(AF_INET, entry->protocol,
2306                                                   &addr, entry->port);
2307                 if (svc) {
2308                         ip_vs_copy_service(entry, svc);
2309                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2310                                 ret = -EFAULT;
2311                         ip_vs_service_put(svc);
2312                 } else
2313                         ret = -ESRCH;
2314         }
2315         break;
2316
2317         case IP_VS_SO_GET_DESTS:
2318         {
2319                 struct ip_vs_get_dests *get;
2320                 int size;
2321
2322                 get = (struct ip_vs_get_dests *)arg;
2323                 size = sizeof(*get) +
2324                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2325                 if (*len != size) {
2326                         IP_VS_ERR("length: %u != %u\n", *len, size);
2327                         ret = -EINVAL;
2328                         goto out;
2329                 }
2330                 ret = __ip_vs_get_dest_entries(get, user);
2331         }
2332         break;
2333
2334         case IP_VS_SO_GET_TIMEOUT:
2335         {
2336                 struct ip_vs_timeout_user t;
2337
2338                 __ip_vs_get_timeouts(&t);
2339                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2340                         ret = -EFAULT;
2341         }
2342         break;
2343
2344         case IP_VS_SO_GET_DAEMON:
2345         {
2346                 struct ip_vs_daemon_user d[2];
2347
2348                 memset(&d, 0, sizeof(d));
2349                 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2350                         d[0].state = IP_VS_STATE_MASTER;
2351                         strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2352                         d[0].syncid = ip_vs_master_syncid;
2353                 }
2354                 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2355                         d[1].state = IP_VS_STATE_BACKUP;
2356                         strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2357                         d[1].syncid = ip_vs_backup_syncid;
2358                 }
2359                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2360                         ret = -EFAULT;
2361         }
2362         break;
2363
2364         default:
2365                 ret = -EINVAL;
2366         }
2367
2368   out:
2369         mutex_unlock(&__ip_vs_mutex);
2370         return ret;
2371 }
2372
2373
2374 static struct nf_sockopt_ops ip_vs_sockopts = {
2375         .pf             = PF_INET,
2376         .set_optmin     = IP_VS_BASE_CTL,
2377         .set_optmax     = IP_VS_SO_SET_MAX+1,
2378         .set            = do_ip_vs_set_ctl,
2379         .get_optmin     = IP_VS_BASE_CTL,
2380         .get_optmax     = IP_VS_SO_GET_MAX+1,
2381         .get            = do_ip_vs_get_ctl,
2382         .owner          = THIS_MODULE,
2383 };
2384
2385 /*
2386  * Generic Netlink interface
2387  */
2388
2389 /* IPVS genetlink family */
2390 static struct genl_family ip_vs_genl_family = {
2391         .id             = GENL_ID_GENERATE,
2392         .hdrsize        = 0,
2393         .name           = IPVS_GENL_NAME,
2394         .version        = IPVS_GENL_VERSION,
2395         .maxattr        = IPVS_CMD_MAX,
2396 };
2397
2398 /* Policy used for first-level command attributes */
2399 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2400         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2401         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2402         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2403         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2404         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2405         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2406 };
2407
2408 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2409 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2410         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2411         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2412                                             .len = IP_VS_IFNAME_MAXLEN },
2413         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2414 };
2415
2416 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2417 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2418         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2419         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2420         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2421                                             .len = sizeof(union nf_inet_addr) },
2422         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2423         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2424         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2425                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2426         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2427                                             .len = sizeof(struct ip_vs_flags) },
2428         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2429         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2430         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2431 };
2432
2433 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2434 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2435         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2436                                             .len = sizeof(union nf_inet_addr) },
2437         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2438         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2439         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2440         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2441         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2442         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2443         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2444         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2445         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2446 };
2447
2448 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2449                                  struct ip_vs_stats *stats)
2450 {
2451         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2452         if (!nl_stats)
2453                 return -EMSGSIZE;
2454
2455         spin_lock_bh(&stats->lock);
2456
2457         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2458         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2459         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2460         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2461         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2462         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2463         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2464         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2465         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2466         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2467
2468         spin_unlock_bh(&stats->lock);
2469
2470         nla_nest_end(skb, nl_stats);
2471
2472         return 0;
2473
2474 nla_put_failure:
2475         spin_unlock_bh(&stats->lock);
2476         nla_nest_cancel(skb, nl_stats);
2477         return -EMSGSIZE;
2478 }
2479
2480 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2481                                    struct ip_vs_service *svc)
2482 {
2483         struct nlattr *nl_service;
2484         struct ip_vs_flags flags = { .flags = svc->flags,
2485                                      .mask = ~0 };
2486
2487         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2488         if (!nl_service)
2489                 return -EMSGSIZE;
2490
2491         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2492
2493         if (svc->fwmark) {
2494                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2495         } else {
2496                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2497                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2498                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2499         }
2500
2501         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2502         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2503         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2504         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2505
2506         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2507                 goto nla_put_failure;
2508
2509         nla_nest_end(skb, nl_service);
2510
2511         return 0;
2512
2513 nla_put_failure:
2514         nla_nest_cancel(skb, nl_service);
2515         return -EMSGSIZE;
2516 }
2517
2518 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2519                                    struct ip_vs_service *svc,
2520                                    struct netlink_callback *cb)
2521 {
2522         void *hdr;
2523
2524         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2525                           &ip_vs_genl_family, NLM_F_MULTI,
2526                           IPVS_CMD_NEW_SERVICE);
2527         if (!hdr)
2528                 return -EMSGSIZE;
2529
2530         if (ip_vs_genl_fill_service(skb, svc) < 0)
2531                 goto nla_put_failure;
2532
2533         return genlmsg_end(skb, hdr);
2534
2535 nla_put_failure:
2536         genlmsg_cancel(skb, hdr);
2537         return -EMSGSIZE;
2538 }
2539
2540 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2541                                     struct netlink_callback *cb)
2542 {
2543         int idx = 0, i;
2544         int start = cb->args[0];
2545         struct ip_vs_service *svc;
2546
2547         mutex_lock(&__ip_vs_mutex);
2548         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2549                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2550                         if (++idx <= start)
2551                                 continue;
2552                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2553                                 idx--;
2554                                 goto nla_put_failure;
2555                         }
2556                 }
2557         }
2558
2559         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2560                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2561                         if (++idx <= start)
2562                                 continue;
2563                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2564                                 idx--;
2565                                 goto nla_put_failure;
2566                         }
2567                 }
2568         }
2569
2570 nla_put_failure:
2571         mutex_unlock(&__ip_vs_mutex);
2572         cb->args[0] = idx;
2573
2574         return skb->len;
2575 }
2576
2577 static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2578                                     struct nlattr *nla, int full_entry)
2579 {
2580         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2581         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2582
2583         /* Parse mandatory identifying service fields first */
2584         if (nla == NULL ||
2585             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2586                 return -EINVAL;
2587
2588         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2589         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2590         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2591         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2592         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2593
2594         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2595                 return -EINVAL;
2596
2597         usvc->af = nla_get_u16(nla_af);
2598         /* For now, only support IPv4 */
2599         if (nla_get_u16(nla_af) != AF_INET)
2600                 return -EAFNOSUPPORT;
2601
2602         if (nla_fwmark) {
2603                 usvc->protocol = IPPROTO_TCP;
2604                 usvc->fwmark = nla_get_u32(nla_fwmark);
2605         } else {
2606                 usvc->protocol = nla_get_u16(nla_protocol);
2607                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2608                 usvc->port = nla_get_u16(nla_port);
2609                 usvc->fwmark = 0;
2610         }
2611
2612         /* If a full entry was requested, check for the additional fields */
2613         if (full_entry) {
2614                 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2615                               *nla_netmask;
2616                 struct ip_vs_flags flags;
2617                 struct ip_vs_service *svc;
2618
2619                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2620                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2621                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2622                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2623
2624                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2625                         return -EINVAL;
2626
2627                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2628
2629                 /* prefill flags from service if it already exists */
2630                 if (usvc->fwmark)
2631                         svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
2632                 else
2633                         svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2634                                                   &usvc->addr, usvc->port);
2635                 if (svc) {
2636                         usvc->flags = svc->flags;
2637                         ip_vs_service_put(svc);
2638                 } else
2639                         usvc->flags = 0;
2640
2641                 /* set new flags from userland */
2642                 usvc->flags = (usvc->flags & ~flags.mask) |
2643                               (flags.flags & flags.mask);
2644                 usvc->sched_name = nla_data(nla_sched);
2645                 usvc->timeout = nla_get_u32(nla_timeout);
2646                 usvc->netmask = nla_get_u32(nla_netmask);
2647         }
2648
2649         return 0;
2650 }
2651
2652 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2653 {
2654         struct ip_vs_service_user_kern usvc;
2655         int ret;
2656
2657         ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2658         if (ret)
2659                 return ERR_PTR(ret);
2660
2661         if (usvc.fwmark)
2662                 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2663         else
2664                 return __ip_vs_service_get(usvc.af, usvc.protocol,
2665                                            &usvc.addr, usvc.port);
2666 }
2667
2668 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2669 {
2670         struct nlattr *nl_dest;
2671
2672         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2673         if (!nl_dest)
2674                 return -EMSGSIZE;
2675
2676         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2677         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2678
2679         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2680                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2681         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2682         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2683         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2684         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2685                     atomic_read(&dest->activeconns));
2686         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2687                     atomic_read(&dest->inactconns));
2688         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2689                     atomic_read(&dest->persistconns));
2690
2691         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2692                 goto nla_put_failure;
2693
2694         nla_nest_end(skb, nl_dest);
2695
2696         return 0;
2697
2698 nla_put_failure:
2699         nla_nest_cancel(skb, nl_dest);
2700         return -EMSGSIZE;
2701 }
2702
2703 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2704                                 struct netlink_callback *cb)
2705 {
2706         void *hdr;
2707
2708         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2709                           &ip_vs_genl_family, NLM_F_MULTI,
2710                           IPVS_CMD_NEW_DEST);
2711         if (!hdr)
2712                 return -EMSGSIZE;
2713
2714         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2715                 goto nla_put_failure;
2716
2717         return genlmsg_end(skb, hdr);
2718
2719 nla_put_failure:
2720         genlmsg_cancel(skb, hdr);
2721         return -EMSGSIZE;
2722 }
2723
2724 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2725                                  struct netlink_callback *cb)
2726 {
2727         int idx = 0;
2728         int start = cb->args[0];
2729         struct ip_vs_service *svc;
2730         struct ip_vs_dest *dest;
2731         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2732
2733         mutex_lock(&__ip_vs_mutex);
2734
2735         /* Try to find the service for which to dump destinations */
2736         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2737                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2738                 goto out_err;
2739
2740         svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2741         if (IS_ERR(svc) || svc == NULL)
2742                 goto out_err;
2743
2744         /* Dump the destinations */
2745         list_for_each_entry(dest, &svc->destinations, n_list) {
2746                 if (++idx <= start)
2747                         continue;
2748                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2749                         idx--;
2750                         goto nla_put_failure;
2751                 }
2752         }
2753
2754 nla_put_failure:
2755         cb->args[0] = idx;
2756         ip_vs_service_put(svc);
2757
2758 out_err:
2759         mutex_unlock(&__ip_vs_mutex);
2760
2761         return skb->len;
2762 }
2763
2764 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2765                                  struct nlattr *nla, int full_entry)
2766 {
2767         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2768         struct nlattr *nla_addr, *nla_port;
2769
2770         /* Parse mandatory identifying destination fields first */
2771         if (nla == NULL ||
2772             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2773                 return -EINVAL;
2774
2775         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
2776         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
2777
2778         if (!(nla_addr && nla_port))
2779                 return -EINVAL;
2780
2781         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2782         udest->port = nla_get_u16(nla_port);
2783
2784         /* If a full entry was requested, check for the additional fields */
2785         if (full_entry) {
2786                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2787                               *nla_l_thresh;
2788
2789                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2790                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
2791                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
2792                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
2793
2794                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2795                         return -EINVAL;
2796
2797                 udest->conn_flags = nla_get_u32(nla_fwd)
2798                                     & IP_VS_CONN_F_FWD_MASK;
2799                 udest->weight = nla_get_u32(nla_weight);
2800                 udest->u_threshold = nla_get_u32(nla_u_thresh);
2801                 udest->l_threshold = nla_get_u32(nla_l_thresh);
2802         }
2803
2804         return 0;
2805 }
2806
2807 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2808                                   const char *mcast_ifn, __be32 syncid)
2809 {
2810         struct nlattr *nl_daemon;
2811
2812         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2813         if (!nl_daemon)
2814                 return -EMSGSIZE;
2815
2816         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2817         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2818         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2819
2820         nla_nest_end(skb, nl_daemon);
2821
2822         return 0;
2823
2824 nla_put_failure:
2825         nla_nest_cancel(skb, nl_daemon);
2826         return -EMSGSIZE;
2827 }
2828
2829 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2830                                   const char *mcast_ifn, __be32 syncid,
2831                                   struct netlink_callback *cb)
2832 {
2833         void *hdr;
2834         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2835                           &ip_vs_genl_family, NLM_F_MULTI,
2836                           IPVS_CMD_NEW_DAEMON);
2837         if (!hdr)
2838                 return -EMSGSIZE;
2839
2840         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2841                 goto nla_put_failure;
2842
2843         return genlmsg_end(skb, hdr);
2844
2845 nla_put_failure:
2846         genlmsg_cancel(skb, hdr);
2847         return -EMSGSIZE;
2848 }
2849
2850 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2851                                    struct netlink_callback *cb)
2852 {
2853         mutex_lock(&__ip_vs_mutex);
2854         if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2855                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2856                                            ip_vs_master_mcast_ifn,
2857                                            ip_vs_master_syncid, cb) < 0)
2858                         goto nla_put_failure;
2859
2860                 cb->args[0] = 1;
2861         }
2862
2863         if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2864                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2865                                            ip_vs_backup_mcast_ifn,
2866                                            ip_vs_backup_syncid, cb) < 0)
2867                         goto nla_put_failure;
2868
2869                 cb->args[1] = 1;
2870         }
2871
2872 nla_put_failure:
2873         mutex_unlock(&__ip_vs_mutex);
2874
2875         return skb->len;
2876 }
2877
2878 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2879 {
2880         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2881               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2882               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2883                 return -EINVAL;
2884
2885         return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2886                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2887                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2888 }
2889
2890 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2891 {
2892         if (!attrs[IPVS_DAEMON_ATTR_STATE])
2893                 return -EINVAL;
2894
2895         return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2896 }
2897
2898 static int ip_vs_genl_set_config(struct nlattr **attrs)
2899 {
2900         struct ip_vs_timeout_user t;
2901
2902         __ip_vs_get_timeouts(&t);
2903
2904         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
2905                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
2906
2907         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
2908                 t.tcp_fin_timeout =
2909                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
2910
2911         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
2912                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
2913
2914         return ip_vs_set_timeout(&t);
2915 }
2916
2917 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
2918 {
2919         struct ip_vs_service *svc = NULL;
2920         struct ip_vs_service_user_kern usvc;
2921         struct ip_vs_dest_user_kern udest;
2922         int ret = 0, cmd;
2923         int need_full_svc = 0, need_full_dest = 0;
2924
2925         cmd = info->genlhdr->cmd;
2926
2927         mutex_lock(&__ip_vs_mutex);
2928
2929         if (cmd == IPVS_CMD_FLUSH) {
2930                 ret = ip_vs_flush();
2931                 goto out;
2932         } else if (cmd == IPVS_CMD_SET_CONFIG) {
2933                 ret = ip_vs_genl_set_config(info->attrs);
2934                 goto out;
2935         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
2936                    cmd == IPVS_CMD_DEL_DAEMON) {
2937
2938                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
2939
2940                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
2941                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
2942                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
2943                                      ip_vs_daemon_policy)) {
2944                         ret = -EINVAL;
2945                         goto out;
2946                 }
2947
2948                 if (cmd == IPVS_CMD_NEW_DAEMON)
2949                         ret = ip_vs_genl_new_daemon(daemon_attrs);
2950                 else
2951                         ret = ip_vs_genl_del_daemon(daemon_attrs);
2952                 goto out;
2953         } else if (cmd == IPVS_CMD_ZERO &&
2954                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
2955                 ret = ip_vs_zero_all();
2956                 goto out;
2957         }
2958
2959         /* All following commands require a service argument, so check if we
2960          * received a valid one. We need a full service specification when
2961          * adding / editing a service. Only identifying members otherwise. */
2962         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
2963                 need_full_svc = 1;
2964
2965         ret = ip_vs_genl_parse_service(&usvc,
2966                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
2967                                        need_full_svc);
2968         if (ret)
2969                 goto out;
2970
2971         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2972         if (usvc.fwmark == 0)
2973                 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2974                                           &usvc.addr, usvc.port);
2975         else
2976                 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2977
2978         /* Unless we're adding a new service, the service must already exist */
2979         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
2980                 ret = -ESRCH;
2981                 goto out;
2982         }
2983
2984         /* Destination commands require a valid destination argument. For
2985          * adding / editing a destination, we need a full destination
2986          * specification. */
2987         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
2988             cmd == IPVS_CMD_DEL_DEST) {
2989                 if (cmd != IPVS_CMD_DEL_DEST)
2990                         need_full_dest = 1;
2991
2992                 ret = ip_vs_genl_parse_dest(&udest,
2993                                             info->attrs[IPVS_CMD_ATTR_DEST],
2994                                             need_full_dest);
2995                 if (ret)
2996                         goto out;
2997         }
2998
2999         switch (cmd) {
3000         case IPVS_CMD_NEW_SERVICE:
3001                 if (svc == NULL)
3002                         ret = ip_vs_add_service(&usvc, &svc);
3003                 else
3004                         ret = -EEXIST;
3005                 break;
3006         case IPVS_CMD_SET_SERVICE:
3007                 ret = ip_vs_edit_service(svc, &usvc);
3008                 break;
3009         case IPVS_CMD_DEL_SERVICE:
3010                 ret = ip_vs_del_service(svc);
3011                 break;
3012         case IPVS_CMD_NEW_DEST:
3013                 ret = ip_vs_add_dest(svc, &udest);
3014                 break;
3015         case IPVS_CMD_SET_DEST:
3016                 ret = ip_vs_edit_dest(svc, &udest);
3017                 break;
3018         case IPVS_CMD_DEL_DEST:
3019                 ret = ip_vs_del_dest(svc, &udest);
3020                 break;
3021         case IPVS_CMD_ZERO:
3022                 ret = ip_vs_zero_service(svc);
3023                 break;
3024         default:
3025                 ret = -EINVAL;
3026         }
3027
3028 out:
3029         if (svc)
3030                 ip_vs_service_put(svc);
3031         mutex_unlock(&__ip_vs_mutex);
3032
3033         return ret;
3034 }
3035
3036 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3037 {
3038         struct sk_buff *msg;
3039         void *reply;
3040         int ret, cmd, reply_cmd;
3041
3042         cmd = info->genlhdr->cmd;
3043
3044         if (cmd == IPVS_CMD_GET_SERVICE)
3045                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3046         else if (cmd == IPVS_CMD_GET_INFO)
3047                 reply_cmd = IPVS_CMD_SET_INFO;
3048         else if (cmd == IPVS_CMD_GET_CONFIG)
3049                 reply_cmd = IPVS_CMD_SET_CONFIG;
3050         else {
3051                 IP_VS_ERR("unknown Generic Netlink command\n");
3052                 return -EINVAL;
3053         }
3054
3055         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3056         if (!msg)
3057                 return -ENOMEM;
3058
3059         mutex_lock(&__ip_vs_mutex);
3060
3061         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3062         if (reply == NULL)
3063                 goto nla_put_failure;
3064
3065         switch (cmd) {
3066         case IPVS_CMD_GET_SERVICE:
3067         {
3068                 struct ip_vs_service *svc;
3069
3070                 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3071                 if (IS_ERR(svc)) {
3072                         ret = PTR_ERR(svc);
3073                         goto out_err;
3074                 } else if (svc) {
3075                         ret = ip_vs_genl_fill_service(msg, svc);
3076                         ip_vs_service_put(svc);
3077                         if (ret)
3078                                 goto nla_put_failure;
3079                 } else {
3080                         ret = -ESRCH;
3081                         goto out_err;
3082                 }
3083
3084                 break;
3085         }
3086
3087         case IPVS_CMD_GET_CONFIG:
3088         {
3089                 struct ip_vs_timeout_user t;
3090
3091                 __ip_vs_get_timeouts(&t);
3092 #ifdef CONFIG_IP_VS_PROTO_TCP
3093                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3094                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3095                             t.tcp_fin_timeout);
3096 #endif
3097 #ifdef CONFIG_IP_VS_PROTO_UDP
3098                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3099 #endif
3100
3101                 break;
3102         }
3103
3104         case IPVS_CMD_GET_INFO:
3105                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3106                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3107                             IP_VS_CONN_TAB_SIZE);
3108                 break;
3109         }
3110
3111         genlmsg_end(msg, reply);
3112         ret = genlmsg_unicast(msg, info->snd_pid);
3113         goto out;
3114
3115 nla_put_failure:
3116         IP_VS_ERR("not enough space in Netlink message\n");
3117         ret = -EMSGSIZE;
3118
3119 out_err:
3120         nlmsg_free(msg);
3121 out:
3122         mutex_unlock(&__ip_vs_mutex);
3123
3124         return ret;
3125 }
3126
3127
3128 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3129         {
3130                 .cmd    = IPVS_CMD_NEW_SERVICE,
3131                 .flags  = GENL_ADMIN_PERM,
3132                 .policy = ip_vs_cmd_policy,
3133                 .doit   = ip_vs_genl_set_cmd,
3134         },
3135         {
3136                 .cmd    = IPVS_CMD_SET_SERVICE,
3137                 .flags  = GENL_ADMIN_PERM,
3138                 .policy = ip_vs_cmd_policy,
3139                 .doit   = ip_vs_genl_set_cmd,
3140         },
3141         {
3142                 .cmd    = IPVS_CMD_DEL_SERVICE,
3143                 .flags  = GENL_ADMIN_PERM,
3144                 .policy = ip_vs_cmd_policy,
3145                 .doit   = ip_vs_genl_set_cmd,
3146         },
3147         {
3148                 .cmd    = IPVS_CMD_GET_SERVICE,
3149                 .flags  = GENL_ADMIN_PERM,
3150                 .doit   = ip_vs_genl_get_cmd,
3151                 .dumpit = ip_vs_genl_dump_services,
3152                 .policy = ip_vs_cmd_policy,
3153         },
3154         {
3155                 .cmd    = IPVS_CMD_NEW_DEST,
3156                 .flags  = GENL_ADMIN_PERM,
3157                 .policy = ip_vs_cmd_policy,
3158                 .doit   = ip_vs_genl_set_cmd,
3159         },
3160         {
3161                 .cmd    = IPVS_CMD_SET_DEST,
3162                 .flags  = GENL_ADMIN_PERM,
3163                 .policy = ip_vs_cmd_policy,
3164                 .doit   = ip_vs_genl_set_cmd,
3165         },
3166         {
3167                 .cmd    = IPVS_CMD_DEL_DEST,
3168                 .flags  = GENL_ADMIN_PERM,
3169                 .policy = ip_vs_cmd_policy,
3170                 .doit   = ip_vs_genl_set_cmd,
3171         },
3172         {
3173                 .cmd    = IPVS_CMD_GET_DEST,
3174                 .flags  = GENL_ADMIN_PERM,
3175                 .policy = ip_vs_cmd_policy,
3176                 .dumpit = ip_vs_genl_dump_dests,
3177         },
3178         {
3179                 .cmd    = IPVS_CMD_NEW_DAEMON,
3180                 .flags  = GENL_ADMIN_PERM,
3181                 .policy = ip_vs_cmd_policy,
3182                 .doit   = ip_vs_genl_set_cmd,
3183         },
3184         {
3185                 .cmd    = IPVS_CMD_DEL_DAEMON,
3186                 .flags  = GENL_ADMIN_PERM,
3187                 .policy = ip_vs_cmd_policy,
3188                 .doit   = ip_vs_genl_set_cmd,
3189         },
3190         {
3191                 .cmd    = IPVS_CMD_GET_DAEMON,
3192                 .flags  = GENL_ADMIN_PERM,
3193                 .dumpit = ip_vs_genl_dump_daemons,
3194         },
3195         {
3196                 .cmd    = IPVS_CMD_SET_CONFIG,
3197                 .flags  = GENL_ADMIN_PERM,
3198                 .policy = ip_vs_cmd_policy,
3199                 .doit   = ip_vs_genl_set_cmd,
3200         },
3201         {
3202                 .cmd    = IPVS_CMD_GET_CONFIG,
3203                 .flags  = GENL_ADMIN_PERM,
3204                 .doit   = ip_vs_genl_get_cmd,
3205         },
3206         {
3207                 .cmd    = IPVS_CMD_GET_INFO,
3208                 .flags  = GENL_ADMIN_PERM,
3209                 .doit   = ip_vs_genl_get_cmd,
3210         },
3211         {
3212                 .cmd    = IPVS_CMD_ZERO,
3213                 .flags  = GENL_ADMIN_PERM,
3214                 .policy = ip_vs_cmd_policy,
3215                 .doit   = ip_vs_genl_set_cmd,
3216         },
3217         {
3218                 .cmd    = IPVS_CMD_FLUSH,
3219                 .flags  = GENL_ADMIN_PERM,
3220                 .doit   = ip_vs_genl_set_cmd,
3221         },
3222 };
3223
3224 static int __init ip_vs_genl_register(void)
3225 {
3226         int ret, i;
3227
3228         ret = genl_register_family(&ip_vs_genl_family);
3229         if (ret)
3230                 return ret;
3231
3232         for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3233                 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3234                 if (ret)
3235                         goto err_out;
3236         }
3237         return 0;
3238
3239 err_out:
3240         genl_unregister_family(&ip_vs_genl_family);
3241         return ret;
3242 }
3243
3244 static void ip_vs_genl_unregister(void)
3245 {
3246         genl_unregister_family(&ip_vs_genl_family);
3247 }
3248
3249 /* End of Generic Netlink interface definitions */
3250
3251
3252 int __init ip_vs_control_init(void)
3253 {
3254         int ret;
3255         int idx;
3256
3257         EnterFunction(2);
3258
3259         ret = nf_register_sockopt(&ip_vs_sockopts);
3260         if (ret) {
3261                 IP_VS_ERR("cannot register sockopt.\n");
3262                 return ret;
3263         }
3264
3265         ret = ip_vs_genl_register();
3266         if (ret) {
3267                 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3268                 nf_unregister_sockopt(&ip_vs_sockopts);
3269                 return ret;
3270         }
3271
3272         proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3273         proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3274
3275         sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3276
3277         /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3278         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3279                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3280                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3281         }
3282         for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
3283                 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3284         }
3285
3286         ip_vs_new_estimator(&ip_vs_stats);
3287
3288         /* Hook the defense timer */
3289         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3290
3291         LeaveFunction(2);
3292         return 0;
3293 }
3294
3295
3296 void ip_vs_control_cleanup(void)
3297 {
3298         EnterFunction(2);
3299         ip_vs_trash_cleanup();
3300         cancel_rearming_delayed_work(&defense_work);
3301         cancel_work_sync(&defense_work.work);
3302         ip_vs_kill_estimator(&ip_vs_stats);
3303         unregister_sysctl_table(sysctl_header);
3304         proc_net_remove(&init_net, "ip_vs_stats");
3305         proc_net_remove(&init_net, "ip_vs");
3306         ip_vs_genl_unregister();
3307         nf_unregister_sockopt(&ip_vs_sockopts);
3308         LeaveFunction(2);
3309 }