Merge branch 'master' of /repos/git/net-next-2.6
[safe/jmp/linux-2.6] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34
35 #include <linux/netfilter.h>
36 #include <linux/netfilter_ipv4.h>
37 #include <linux/mutex.h>
38
39 #include <net/net_namespace.h>
40 #include <net/ip.h>
41 #ifdef CONFIG_IP_VS_IPV6
42 #include <net/ipv6.h>
43 #include <net/ip6_route.h>
44 #endif
45 #include <net/route.h>
46 #include <net/sock.h>
47 #include <net/genetlink.h>
48
49 #include <asm/uaccess.h>
50
51 #include <net/ip_vs.h>
52
53 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
54 static DEFINE_MUTEX(__ip_vs_mutex);
55
56 /* lock for service table */
57 static DEFINE_RWLOCK(__ip_vs_svc_lock);
58
59 /* lock for table with the real services */
60 static DEFINE_RWLOCK(__ip_vs_rs_lock);
61
62 /* lock for state and timeout tables */
63 static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
64
65 /* lock for drop entry handling */
66 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
67
68 /* lock for drop packet handling */
69 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
70
71 /* 1/rate drop and drop-entry variables */
72 int ip_vs_drop_rate = 0;
73 int ip_vs_drop_counter = 0;
74 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
75
76 /* number of virtual services */
77 static int ip_vs_num_services = 0;
78
79 /* sysctl variables */
80 static int sysctl_ip_vs_drop_entry = 0;
81 static int sysctl_ip_vs_drop_packet = 0;
82 static int sysctl_ip_vs_secure_tcp = 0;
83 static int sysctl_ip_vs_amemthresh = 1024;
84 static int sysctl_ip_vs_am_droprate = 10;
85 int sysctl_ip_vs_cache_bypass = 0;
86 int sysctl_ip_vs_expire_nodest_conn = 0;
87 int sysctl_ip_vs_expire_quiescent_template = 0;
88 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
89 int sysctl_ip_vs_nat_icmp_send = 0;
90
91
92 #ifdef CONFIG_IP_VS_DEBUG
93 static int sysctl_ip_vs_debug_level = 0;
94
95 int ip_vs_get_debug_level(void)
96 {
97         return sysctl_ip_vs_debug_level;
98 }
99 #endif
100
101 #ifdef CONFIG_IP_VS_IPV6
102 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
103 static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
104 {
105         struct rt6_info *rt;
106         struct flowi fl = {
107                 .oif = 0,
108                 .nl_u = {
109                         .ip6_u = {
110                                 .daddr = *addr,
111                                 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
112         };
113
114         rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
115         if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
116                         return 1;
117
118         return 0;
119 }
120 #endif
121 /*
122  *      update_defense_level is called from keventd and from sysctl,
123  *      so it needs to protect itself from softirqs
124  */
125 static void update_defense_level(void)
126 {
127         struct sysinfo i;
128         static int old_secure_tcp = 0;
129         int availmem;
130         int nomem;
131         int to_change = -1;
132
133         /* we only count free and buffered memory (in pages) */
134         si_meminfo(&i);
135         availmem = i.freeram + i.bufferram;
136         /* however in linux 2.5 the i.bufferram is total page cache size,
137            we need adjust it */
138         /* si_swapinfo(&i); */
139         /* availmem = availmem - (i.totalswap - i.freeswap); */
140
141         nomem = (availmem < sysctl_ip_vs_amemthresh);
142
143         local_bh_disable();
144
145         /* drop_entry */
146         spin_lock(&__ip_vs_dropentry_lock);
147         switch (sysctl_ip_vs_drop_entry) {
148         case 0:
149                 atomic_set(&ip_vs_dropentry, 0);
150                 break;
151         case 1:
152                 if (nomem) {
153                         atomic_set(&ip_vs_dropentry, 1);
154                         sysctl_ip_vs_drop_entry = 2;
155                 } else {
156                         atomic_set(&ip_vs_dropentry, 0);
157                 }
158                 break;
159         case 2:
160                 if (nomem) {
161                         atomic_set(&ip_vs_dropentry, 1);
162                 } else {
163                         atomic_set(&ip_vs_dropentry, 0);
164                         sysctl_ip_vs_drop_entry = 1;
165                 };
166                 break;
167         case 3:
168                 atomic_set(&ip_vs_dropentry, 1);
169                 break;
170         }
171         spin_unlock(&__ip_vs_dropentry_lock);
172
173         /* drop_packet */
174         spin_lock(&__ip_vs_droppacket_lock);
175         switch (sysctl_ip_vs_drop_packet) {
176         case 0:
177                 ip_vs_drop_rate = 0;
178                 break;
179         case 1:
180                 if (nomem) {
181                         ip_vs_drop_rate = ip_vs_drop_counter
182                                 = sysctl_ip_vs_amemthresh /
183                                 (sysctl_ip_vs_amemthresh-availmem);
184                         sysctl_ip_vs_drop_packet = 2;
185                 } else {
186                         ip_vs_drop_rate = 0;
187                 }
188                 break;
189         case 2:
190                 if (nomem) {
191                         ip_vs_drop_rate = ip_vs_drop_counter
192                                 = sysctl_ip_vs_amemthresh /
193                                 (sysctl_ip_vs_amemthresh-availmem);
194                 } else {
195                         ip_vs_drop_rate = 0;
196                         sysctl_ip_vs_drop_packet = 1;
197                 }
198                 break;
199         case 3:
200                 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
201                 break;
202         }
203         spin_unlock(&__ip_vs_droppacket_lock);
204
205         /* secure_tcp */
206         write_lock(&__ip_vs_securetcp_lock);
207         switch (sysctl_ip_vs_secure_tcp) {
208         case 0:
209                 if (old_secure_tcp >= 2)
210                         to_change = 0;
211                 break;
212         case 1:
213                 if (nomem) {
214                         if (old_secure_tcp < 2)
215                                 to_change = 1;
216                         sysctl_ip_vs_secure_tcp = 2;
217                 } else {
218                         if (old_secure_tcp >= 2)
219                                 to_change = 0;
220                 }
221                 break;
222         case 2:
223                 if (nomem) {
224                         if (old_secure_tcp < 2)
225                                 to_change = 1;
226                 } else {
227                         if (old_secure_tcp >= 2)
228                                 to_change = 0;
229                         sysctl_ip_vs_secure_tcp = 1;
230                 }
231                 break;
232         case 3:
233                 if (old_secure_tcp < 2)
234                         to_change = 1;
235                 break;
236         }
237         old_secure_tcp = sysctl_ip_vs_secure_tcp;
238         if (to_change >= 0)
239                 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
240         write_unlock(&__ip_vs_securetcp_lock);
241
242         local_bh_enable();
243 }
244
245
246 /*
247  *      Timer for checking the defense
248  */
249 #define DEFENSE_TIMER_PERIOD    1*HZ
250 static void defense_work_handler(struct work_struct *work);
251 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
252
253 static void defense_work_handler(struct work_struct *work)
254 {
255         update_defense_level();
256         if (atomic_read(&ip_vs_dropentry))
257                 ip_vs_random_dropentry();
258
259         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
260 }
261
262 int
263 ip_vs_use_count_inc(void)
264 {
265         return try_module_get(THIS_MODULE);
266 }
267
268 void
269 ip_vs_use_count_dec(void)
270 {
271         module_put(THIS_MODULE);
272 }
273
274
275 /*
276  *      Hash table: for virtual service lookups
277  */
278 #define IP_VS_SVC_TAB_BITS 8
279 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
280 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
281
282 /* the service table hashed by <protocol, addr, port> */
283 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
284 /* the service table hashed by fwmark */
285 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
286
287 /*
288  *      Hash table: for real service lookups
289  */
290 #define IP_VS_RTAB_BITS 4
291 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
292 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
293
294 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
295
296 /*
297  *      Trash for destinations
298  */
299 static LIST_HEAD(ip_vs_dest_trash);
300
301 /*
302  *      FTP & NULL virtual service counters
303  */
304 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
305 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
306
307
308 /*
309  *      Returns hash value for virtual service
310  */
311 static __inline__ unsigned
312 ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
313                   __be16 port)
314 {
315         register unsigned porth = ntohs(port);
316         __be32 addr_fold = addr->ip;
317
318 #ifdef CONFIG_IP_VS_IPV6
319         if (af == AF_INET6)
320                 addr_fold = addr->ip6[0]^addr->ip6[1]^
321                             addr->ip6[2]^addr->ip6[3];
322 #endif
323
324         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
325                 & IP_VS_SVC_TAB_MASK;
326 }
327
328 /*
329  *      Returns hash value of fwmark for virtual service lookup
330  */
331 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
332 {
333         return fwmark & IP_VS_SVC_TAB_MASK;
334 }
335
336 /*
337  *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
338  *      or in the ip_vs_svc_fwm_table by fwmark.
339  *      Should be called with locked tables.
340  */
341 static int ip_vs_svc_hash(struct ip_vs_service *svc)
342 {
343         unsigned hash;
344
345         if (svc->flags & IP_VS_SVC_F_HASHED) {
346                 pr_err("%s(): request for already hashed, called from %pF\n",
347                        __func__, __builtin_return_address(0));
348                 return 0;
349         }
350
351         if (svc->fwmark == 0) {
352                 /*
353                  *  Hash it by <protocol,addr,port> in ip_vs_svc_table
354                  */
355                 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
356                                          svc->port);
357                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
358         } else {
359                 /*
360                  *  Hash it by fwmark in ip_vs_svc_fwm_table
361                  */
362                 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
363                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
364         }
365
366         svc->flags |= IP_VS_SVC_F_HASHED;
367         /* increase its refcnt because it is referenced by the svc table */
368         atomic_inc(&svc->refcnt);
369         return 1;
370 }
371
372
373 /*
374  *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
375  *      Should be called with locked tables.
376  */
377 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
378 {
379         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
380                 pr_err("%s(): request for unhash flagged, called from %pF\n",
381                        __func__, __builtin_return_address(0));
382                 return 0;
383         }
384
385         if (svc->fwmark == 0) {
386                 /* Remove it from the ip_vs_svc_table table */
387                 list_del(&svc->s_list);
388         } else {
389                 /* Remove it from the ip_vs_svc_fwm_table table */
390                 list_del(&svc->f_list);
391         }
392
393         svc->flags &= ~IP_VS_SVC_F_HASHED;
394         atomic_dec(&svc->refcnt);
395         return 1;
396 }
397
398
399 /*
400  *      Get service by {proto,addr,port} in the service table.
401  */
402 static inline struct ip_vs_service *
403 __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
404                     __be16 vport)
405 {
406         unsigned hash;
407         struct ip_vs_service *svc;
408
409         /* Check for "full" addressed entries */
410         hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
411
412         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
413                 if ((svc->af == af)
414                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
415                     && (svc->port == vport)
416                     && (svc->protocol == protocol)) {
417                         /* HIT */
418                         atomic_inc(&svc->usecnt);
419                         return svc;
420                 }
421         }
422
423         return NULL;
424 }
425
426
427 /*
428  *      Get service by {fwmark} in the service table.
429  */
430 static inline struct ip_vs_service *
431 __ip_vs_svc_fwm_get(int af, __u32 fwmark)
432 {
433         unsigned hash;
434         struct ip_vs_service *svc;
435
436         /* Check for fwmark addressed entries */
437         hash = ip_vs_svc_fwm_hashkey(fwmark);
438
439         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
440                 if (svc->fwmark == fwmark && svc->af == af) {
441                         /* HIT */
442                         atomic_inc(&svc->usecnt);
443                         return svc;
444                 }
445         }
446
447         return NULL;
448 }
449
450 struct ip_vs_service *
451 ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
452                   const union nf_inet_addr *vaddr, __be16 vport)
453 {
454         struct ip_vs_service *svc;
455
456         read_lock(&__ip_vs_svc_lock);
457
458         /*
459          *      Check the table hashed by fwmark first
460          */
461         if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
462                 goto out;
463
464         /*
465          *      Check the table hashed by <protocol,addr,port>
466          *      for "full" addressed entries
467          */
468         svc = __ip_vs_service_get(af, protocol, vaddr, vport);
469
470         if (svc == NULL
471             && protocol == IPPROTO_TCP
472             && atomic_read(&ip_vs_ftpsvc_counter)
473             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
474                 /*
475                  * Check if ftp service entry exists, the packet
476                  * might belong to FTP data connections.
477                  */
478                 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
479         }
480
481         if (svc == NULL
482             && atomic_read(&ip_vs_nullsvc_counter)) {
483                 /*
484                  * Check if the catch-all port (port zero) exists
485                  */
486                 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
487         }
488
489   out:
490         read_unlock(&__ip_vs_svc_lock);
491
492         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
493                       fwmark, ip_vs_proto_name(protocol),
494                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
495                       svc ? "hit" : "not hit");
496
497         return svc;
498 }
499
500
501 static inline void
502 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
503 {
504         atomic_inc(&svc->refcnt);
505         dest->svc = svc;
506 }
507
508 static inline void
509 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
510 {
511         struct ip_vs_service *svc = dest->svc;
512
513         dest->svc = NULL;
514         if (atomic_dec_and_test(&svc->refcnt))
515                 kfree(svc);
516 }
517
518
519 /*
520  *      Returns hash value for real service
521  */
522 static inline unsigned ip_vs_rs_hashkey(int af,
523                                             const union nf_inet_addr *addr,
524                                             __be16 port)
525 {
526         register unsigned porth = ntohs(port);
527         __be32 addr_fold = addr->ip;
528
529 #ifdef CONFIG_IP_VS_IPV6
530         if (af == AF_INET6)
531                 addr_fold = addr->ip6[0]^addr->ip6[1]^
532                             addr->ip6[2]^addr->ip6[3];
533 #endif
534
535         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
536                 & IP_VS_RTAB_MASK;
537 }
538
539 /*
540  *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
541  *      should be called with locked tables.
542  */
543 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
544 {
545         unsigned hash;
546
547         if (!list_empty(&dest->d_list)) {
548                 return 0;
549         }
550
551         /*
552          *      Hash by proto,addr,port,
553          *      which are the parameters of the real service.
554          */
555         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
556
557         list_add(&dest->d_list, &ip_vs_rtable[hash]);
558
559         return 1;
560 }
561
562 /*
563  *      UNhashes ip_vs_dest from ip_vs_rtable.
564  *      should be called with locked tables.
565  */
566 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
567 {
568         /*
569          * Remove it from the ip_vs_rtable table.
570          */
571         if (!list_empty(&dest->d_list)) {
572                 list_del(&dest->d_list);
573                 INIT_LIST_HEAD(&dest->d_list);
574         }
575
576         return 1;
577 }
578
579 /*
580  *      Lookup real service by <proto,addr,port> in the real service table.
581  */
582 struct ip_vs_dest *
583 ip_vs_lookup_real_service(int af, __u16 protocol,
584                           const union nf_inet_addr *daddr,
585                           __be16 dport)
586 {
587         unsigned hash;
588         struct ip_vs_dest *dest;
589
590         /*
591          *      Check for "full" addressed entries
592          *      Return the first found entry
593          */
594         hash = ip_vs_rs_hashkey(af, daddr, dport);
595
596         read_lock(&__ip_vs_rs_lock);
597         list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
598                 if ((dest->af == af)
599                     && ip_vs_addr_equal(af, &dest->addr, daddr)
600                     && (dest->port == dport)
601                     && ((dest->protocol == protocol) ||
602                         dest->vfwmark)) {
603                         /* HIT */
604                         read_unlock(&__ip_vs_rs_lock);
605                         return dest;
606                 }
607         }
608         read_unlock(&__ip_vs_rs_lock);
609
610         return NULL;
611 }
612
613 /*
614  *      Lookup destination by {addr,port} in the given service
615  */
616 static struct ip_vs_dest *
617 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
618                   __be16 dport)
619 {
620         struct ip_vs_dest *dest;
621
622         /*
623          * Find the destination for the given service
624          */
625         list_for_each_entry(dest, &svc->destinations, n_list) {
626                 if ((dest->af == svc->af)
627                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
628                     && (dest->port == dport)) {
629                         /* HIT */
630                         return dest;
631                 }
632         }
633
634         return NULL;
635 }
636
637 /*
638  * Find destination by {daddr,dport,vaddr,protocol}
639  * Cretaed to be used in ip_vs_process_message() in
640  * the backup synchronization daemon. It finds the
641  * destination to be bound to the received connection
642  * on the backup.
643  *
644  * ip_vs_lookup_real_service() looked promissing, but
645  * seems not working as expected.
646  */
647 struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
648                                    __be16 dport,
649                                    const union nf_inet_addr *vaddr,
650                                    __be16 vport, __u16 protocol)
651 {
652         struct ip_vs_dest *dest;
653         struct ip_vs_service *svc;
654
655         svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
656         if (!svc)
657                 return NULL;
658         dest = ip_vs_lookup_dest(svc, daddr, dport);
659         if (dest)
660                 atomic_inc(&dest->refcnt);
661         ip_vs_service_put(svc);
662         return dest;
663 }
664
665 /*
666  *  Lookup dest by {svc,addr,port} in the destination trash.
667  *  The destination trash is used to hold the destinations that are removed
668  *  from the service table but are still referenced by some conn entries.
669  *  The reason to add the destination trash is when the dest is temporary
670  *  down (either by administrator or by monitor program), the dest can be
671  *  picked back from the trash, the remaining connections to the dest can
672  *  continue, and the counting information of the dest is also useful for
673  *  scheduling.
674  */
675 static struct ip_vs_dest *
676 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
677                      __be16 dport)
678 {
679         struct ip_vs_dest *dest, *nxt;
680
681         /*
682          * Find the destination in trash
683          */
684         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
685                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
686                               "dest->refcnt=%d\n",
687                               dest->vfwmark,
688                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
689                               ntohs(dest->port),
690                               atomic_read(&dest->refcnt));
691                 if (dest->af == svc->af &&
692                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
693                     dest->port == dport &&
694                     dest->vfwmark == svc->fwmark &&
695                     dest->protocol == svc->protocol &&
696                     (svc->fwmark ||
697                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
698                       dest->vport == svc->port))) {
699                         /* HIT */
700                         return dest;
701                 }
702
703                 /*
704                  * Try to purge the destination from trash if not referenced
705                  */
706                 if (atomic_read(&dest->refcnt) == 1) {
707                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
708                                       "from trash\n",
709                                       dest->vfwmark,
710                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
711                                       ntohs(dest->port));
712                         list_del(&dest->n_list);
713                         ip_vs_dst_reset(dest);
714                         __ip_vs_unbind_svc(dest);
715                         kfree(dest);
716                 }
717         }
718
719         return NULL;
720 }
721
722
723 /*
724  *  Clean up all the destinations in the trash
725  *  Called by the ip_vs_control_cleanup()
726  *
727  *  When the ip_vs_control_clearup is activated by ipvs module exit,
728  *  the service tables must have been flushed and all the connections
729  *  are expired, and the refcnt of each destination in the trash must
730  *  be 1, so we simply release them here.
731  */
732 static void ip_vs_trash_cleanup(void)
733 {
734         struct ip_vs_dest *dest, *nxt;
735
736         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
737                 list_del(&dest->n_list);
738                 ip_vs_dst_reset(dest);
739                 __ip_vs_unbind_svc(dest);
740                 kfree(dest);
741         }
742 }
743
744
745 static void
746 ip_vs_zero_stats(struct ip_vs_stats *stats)
747 {
748         spin_lock_bh(&stats->lock);
749
750         memset(&stats->ustats, 0, sizeof(stats->ustats));
751         ip_vs_zero_estimator(stats);
752
753         spin_unlock_bh(&stats->lock);
754 }
755
756 /*
757  *      Update a destination in the given service
758  */
759 static void
760 __ip_vs_update_dest(struct ip_vs_service *svc,
761                     struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
762 {
763         int conn_flags;
764
765         /* set the weight and the flags */
766         atomic_set(&dest->weight, udest->weight);
767         conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
768
769         /* check if local node and update the flags */
770 #ifdef CONFIG_IP_VS_IPV6
771         if (svc->af == AF_INET6) {
772                 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
773                         conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
774                                 | IP_VS_CONN_F_LOCALNODE;
775                 }
776         } else
777 #endif
778                 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
779                         conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
780                                 | IP_VS_CONN_F_LOCALNODE;
781                 }
782
783         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
784         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
785                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
786         } else {
787                 /*
788                  *    Put the real service in ip_vs_rtable if not present.
789                  *    For now only for NAT!
790                  */
791                 write_lock_bh(&__ip_vs_rs_lock);
792                 ip_vs_rs_hash(dest);
793                 write_unlock_bh(&__ip_vs_rs_lock);
794         }
795         atomic_set(&dest->conn_flags, conn_flags);
796
797         /* bind the service */
798         if (!dest->svc) {
799                 __ip_vs_bind_svc(dest, svc);
800         } else {
801                 if (dest->svc != svc) {
802                         __ip_vs_unbind_svc(dest);
803                         ip_vs_zero_stats(&dest->stats);
804                         __ip_vs_bind_svc(dest, svc);
805                 }
806         }
807
808         /* set the dest status flags */
809         dest->flags |= IP_VS_DEST_F_AVAILABLE;
810
811         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
812                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
813         dest->u_threshold = udest->u_threshold;
814         dest->l_threshold = udest->l_threshold;
815 }
816
817
818 /*
819  *      Create a destination for the given service
820  */
821 static int
822 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
823                struct ip_vs_dest **dest_p)
824 {
825         struct ip_vs_dest *dest;
826         unsigned atype;
827
828         EnterFunction(2);
829
830 #ifdef CONFIG_IP_VS_IPV6
831         if (svc->af == AF_INET6) {
832                 atype = ipv6_addr_type(&udest->addr.in6);
833                 if ((!(atype & IPV6_ADDR_UNICAST) ||
834                         atype & IPV6_ADDR_LINKLOCAL) &&
835                         !__ip_vs_addr_is_local_v6(&udest->addr.in6))
836                         return -EINVAL;
837         } else
838 #endif
839         {
840                 atype = inet_addr_type(&init_net, udest->addr.ip);
841                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
842                         return -EINVAL;
843         }
844
845         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
846         if (dest == NULL) {
847                 pr_err("%s(): no memory.\n", __func__);
848                 return -ENOMEM;
849         }
850
851         dest->af = svc->af;
852         dest->protocol = svc->protocol;
853         dest->vaddr = svc->addr;
854         dest->vport = svc->port;
855         dest->vfwmark = svc->fwmark;
856         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
857         dest->port = udest->port;
858
859         atomic_set(&dest->activeconns, 0);
860         atomic_set(&dest->inactconns, 0);
861         atomic_set(&dest->persistconns, 0);
862         atomic_set(&dest->refcnt, 0);
863
864         INIT_LIST_HEAD(&dest->d_list);
865         spin_lock_init(&dest->dst_lock);
866         spin_lock_init(&dest->stats.lock);
867         __ip_vs_update_dest(svc, dest, udest);
868         ip_vs_new_estimator(&dest->stats);
869
870         *dest_p = dest;
871
872         LeaveFunction(2);
873         return 0;
874 }
875
876
877 /*
878  *      Add a destination into an existing service
879  */
880 static int
881 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
882 {
883         struct ip_vs_dest *dest;
884         union nf_inet_addr daddr;
885         __be16 dport = udest->port;
886         int ret;
887
888         EnterFunction(2);
889
890         if (udest->weight < 0) {
891                 pr_err("%s(): server weight less than zero\n", __func__);
892                 return -ERANGE;
893         }
894
895         if (udest->l_threshold > udest->u_threshold) {
896                 pr_err("%s(): lower threshold is higher than upper threshold\n",
897                         __func__);
898                 return -ERANGE;
899         }
900
901         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
902
903         /*
904          * Check if the dest already exists in the list
905          */
906         dest = ip_vs_lookup_dest(svc, &daddr, dport);
907
908         if (dest != NULL) {
909                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
910                 return -EEXIST;
911         }
912
913         /*
914          * Check if the dest already exists in the trash and
915          * is from the same service
916          */
917         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
918
919         if (dest != NULL) {
920                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
921                               "dest->refcnt=%d, service %u/%s:%u\n",
922                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
923                               atomic_read(&dest->refcnt),
924                               dest->vfwmark,
925                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
926                               ntohs(dest->vport));
927
928                 __ip_vs_update_dest(svc, dest, udest);
929
930                 /*
931                  * Get the destination from the trash
932                  */
933                 list_del(&dest->n_list);
934
935                 ip_vs_new_estimator(&dest->stats);
936
937                 write_lock_bh(&__ip_vs_svc_lock);
938
939                 /*
940                  * Wait until all other svc users go away.
941                  */
942                 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
943
944                 list_add(&dest->n_list, &svc->destinations);
945                 svc->num_dests++;
946
947                 /* call the update_service function of its scheduler */
948                 if (svc->scheduler->update_service)
949                         svc->scheduler->update_service(svc);
950
951                 write_unlock_bh(&__ip_vs_svc_lock);
952                 return 0;
953         }
954
955         /*
956          * Allocate and initialize the dest structure
957          */
958         ret = ip_vs_new_dest(svc, udest, &dest);
959         if (ret) {
960                 return ret;
961         }
962
963         /*
964          * Add the dest entry into the list
965          */
966         atomic_inc(&dest->refcnt);
967
968         write_lock_bh(&__ip_vs_svc_lock);
969
970         /*
971          * Wait until all other svc users go away.
972          */
973         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
974
975         list_add(&dest->n_list, &svc->destinations);
976         svc->num_dests++;
977
978         /* call the update_service function of its scheduler */
979         if (svc->scheduler->update_service)
980                 svc->scheduler->update_service(svc);
981
982         write_unlock_bh(&__ip_vs_svc_lock);
983
984         LeaveFunction(2);
985
986         return 0;
987 }
988
989
990 /*
991  *      Edit a destination in the given service
992  */
993 static int
994 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
995 {
996         struct ip_vs_dest *dest;
997         union nf_inet_addr daddr;
998         __be16 dport = udest->port;
999
1000         EnterFunction(2);
1001
1002         if (udest->weight < 0) {
1003                 pr_err("%s(): server weight less than zero\n", __func__);
1004                 return -ERANGE;
1005         }
1006
1007         if (udest->l_threshold > udest->u_threshold) {
1008                 pr_err("%s(): lower threshold is higher than upper threshold\n",
1009                         __func__);
1010                 return -ERANGE;
1011         }
1012
1013         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1014
1015         /*
1016          *  Lookup the destination list
1017          */
1018         dest = ip_vs_lookup_dest(svc, &daddr, dport);
1019
1020         if (dest == NULL) {
1021                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1022                 return -ENOENT;
1023         }
1024
1025         __ip_vs_update_dest(svc, dest, udest);
1026
1027         write_lock_bh(&__ip_vs_svc_lock);
1028
1029         /* Wait until all other svc users go away */
1030         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1031
1032         /* call the update_service, because server weight may be changed */
1033         if (svc->scheduler->update_service)
1034                 svc->scheduler->update_service(svc);
1035
1036         write_unlock_bh(&__ip_vs_svc_lock);
1037
1038         LeaveFunction(2);
1039
1040         return 0;
1041 }
1042
1043
1044 /*
1045  *      Delete a destination (must be already unlinked from the service)
1046  */
1047 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1048 {
1049         ip_vs_kill_estimator(&dest->stats);
1050
1051         /*
1052          *  Remove it from the d-linked list with the real services.
1053          */
1054         write_lock_bh(&__ip_vs_rs_lock);
1055         ip_vs_rs_unhash(dest);
1056         write_unlock_bh(&__ip_vs_rs_lock);
1057
1058         /*
1059          *  Decrease the refcnt of the dest, and free the dest
1060          *  if nobody refers to it (refcnt=0). Otherwise, throw
1061          *  the destination into the trash.
1062          */
1063         if (atomic_dec_and_test(&dest->refcnt)) {
1064                 ip_vs_dst_reset(dest);
1065                 /* simply decrease svc->refcnt here, let the caller check
1066                    and release the service if nobody refers to it.
1067                    Only user context can release destination and service,
1068                    and only one user context can update virtual service at a
1069                    time, so the operation here is OK */
1070                 atomic_dec(&dest->svc->refcnt);
1071                 kfree(dest);
1072         } else {
1073                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1074                               "dest->refcnt=%d\n",
1075                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1076                               ntohs(dest->port),
1077                               atomic_read(&dest->refcnt));
1078                 list_add(&dest->n_list, &ip_vs_dest_trash);
1079                 atomic_inc(&dest->refcnt);
1080         }
1081 }
1082
1083
1084 /*
1085  *      Unlink a destination from the given service
1086  */
1087 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1088                                 struct ip_vs_dest *dest,
1089                                 int svcupd)
1090 {
1091         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1092
1093         /*
1094          *  Remove it from the d-linked destination list.
1095          */
1096         list_del(&dest->n_list);
1097         svc->num_dests--;
1098
1099         /*
1100          *  Call the update_service function of its scheduler
1101          */
1102         if (svcupd && svc->scheduler->update_service)
1103                         svc->scheduler->update_service(svc);
1104 }
1105
1106
1107 /*
1108  *      Delete a destination server in the given service
1109  */
1110 static int
1111 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1112 {
1113         struct ip_vs_dest *dest;
1114         __be16 dport = udest->port;
1115
1116         EnterFunction(2);
1117
1118         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1119
1120         if (dest == NULL) {
1121                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1122                 return -ENOENT;
1123         }
1124
1125         write_lock_bh(&__ip_vs_svc_lock);
1126
1127         /*
1128          *      Wait until all other svc users go away.
1129          */
1130         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1131
1132         /*
1133          *      Unlink dest from the service
1134          */
1135         __ip_vs_unlink_dest(svc, dest, 1);
1136
1137         write_unlock_bh(&__ip_vs_svc_lock);
1138
1139         /*
1140          *      Delete the destination
1141          */
1142         __ip_vs_del_dest(dest);
1143
1144         LeaveFunction(2);
1145
1146         return 0;
1147 }
1148
1149
1150 /*
1151  *      Add a service into the service hash table
1152  */
1153 static int
1154 ip_vs_add_service(struct ip_vs_service_user_kern *u,
1155                   struct ip_vs_service **svc_p)
1156 {
1157         int ret = 0;
1158         struct ip_vs_scheduler *sched = NULL;
1159         struct ip_vs_service *svc = NULL;
1160
1161         /* increase the module use count */
1162         ip_vs_use_count_inc();
1163
1164         /* Lookup the scheduler by 'u->sched_name' */
1165         sched = ip_vs_scheduler_get(u->sched_name);
1166         if (sched == NULL) {
1167                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1168                 ret = -ENOENT;
1169                 goto out_mod_dec;
1170         }
1171
1172 #ifdef CONFIG_IP_VS_IPV6
1173         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1174                 ret = -EINVAL;
1175                 goto out_err;
1176         }
1177 #endif
1178
1179         svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1180         if (svc == NULL) {
1181                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1182                 ret = -ENOMEM;
1183                 goto out_err;
1184         }
1185
1186         /* I'm the first user of the service */
1187         atomic_set(&svc->usecnt, 1);
1188         atomic_set(&svc->refcnt, 0);
1189
1190         svc->af = u->af;
1191         svc->protocol = u->protocol;
1192         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1193         svc->port = u->port;
1194         svc->fwmark = u->fwmark;
1195         svc->flags = u->flags;
1196         svc->timeout = u->timeout * HZ;
1197         svc->netmask = u->netmask;
1198
1199         INIT_LIST_HEAD(&svc->destinations);
1200         rwlock_init(&svc->sched_lock);
1201         spin_lock_init(&svc->stats.lock);
1202
1203         /* Bind the scheduler */
1204         ret = ip_vs_bind_scheduler(svc, sched);
1205         if (ret)
1206                 goto out_err;
1207         sched = NULL;
1208
1209         /* Update the virtual service counters */
1210         if (svc->port == FTPPORT)
1211                 atomic_inc(&ip_vs_ftpsvc_counter);
1212         else if (svc->port == 0)
1213                 atomic_inc(&ip_vs_nullsvc_counter);
1214
1215         ip_vs_new_estimator(&svc->stats);
1216
1217         /* Count only IPv4 services for old get/setsockopt interface */
1218         if (svc->af == AF_INET)
1219                 ip_vs_num_services++;
1220
1221         /* Hash the service into the service table */
1222         write_lock_bh(&__ip_vs_svc_lock);
1223         ip_vs_svc_hash(svc);
1224         write_unlock_bh(&__ip_vs_svc_lock);
1225
1226         *svc_p = svc;
1227         return 0;
1228
1229   out_err:
1230         if (svc != NULL) {
1231                 if (svc->scheduler)
1232                         ip_vs_unbind_scheduler(svc);
1233                 if (svc->inc) {
1234                         local_bh_disable();
1235                         ip_vs_app_inc_put(svc->inc);
1236                         local_bh_enable();
1237                 }
1238                 kfree(svc);
1239         }
1240         ip_vs_scheduler_put(sched);
1241
1242   out_mod_dec:
1243         /* decrease the module use count */
1244         ip_vs_use_count_dec();
1245
1246         return ret;
1247 }
1248
1249
1250 /*
1251  *      Edit a service and bind it with a new scheduler
1252  */
1253 static int
1254 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1255 {
1256         struct ip_vs_scheduler *sched, *old_sched;
1257         int ret = 0;
1258
1259         /*
1260          * Lookup the scheduler, by 'u->sched_name'
1261          */
1262         sched = ip_vs_scheduler_get(u->sched_name);
1263         if (sched == NULL) {
1264                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1265                 return -ENOENT;
1266         }
1267         old_sched = sched;
1268
1269 #ifdef CONFIG_IP_VS_IPV6
1270         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1271                 ret = -EINVAL;
1272                 goto out;
1273         }
1274 #endif
1275
1276         write_lock_bh(&__ip_vs_svc_lock);
1277
1278         /*
1279          * Wait until all other svc users go away.
1280          */
1281         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1282
1283         /*
1284          * Set the flags and timeout value
1285          */
1286         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1287         svc->timeout = u->timeout * HZ;
1288         svc->netmask = u->netmask;
1289
1290         old_sched = svc->scheduler;
1291         if (sched != old_sched) {
1292                 /*
1293                  * Unbind the old scheduler
1294                  */
1295                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1296                         old_sched = sched;
1297                         goto out_unlock;
1298                 }
1299
1300                 /*
1301                  * Bind the new scheduler
1302                  */
1303                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1304                         /*
1305                          * If ip_vs_bind_scheduler fails, restore the old
1306                          * scheduler.
1307                          * The main reason of failure is out of memory.
1308                          *
1309                          * The question is if the old scheduler can be
1310                          * restored all the time. TODO: if it cannot be
1311                          * restored some time, we must delete the service,
1312                          * otherwise the system may crash.
1313                          */
1314                         ip_vs_bind_scheduler(svc, old_sched);
1315                         old_sched = sched;
1316                         goto out_unlock;
1317                 }
1318         }
1319
1320   out_unlock:
1321         write_unlock_bh(&__ip_vs_svc_lock);
1322 #ifdef CONFIG_IP_VS_IPV6
1323   out:
1324 #endif
1325
1326         if (old_sched)
1327                 ip_vs_scheduler_put(old_sched);
1328
1329         return ret;
1330 }
1331
1332
1333 /*
1334  *      Delete a service from the service list
1335  *      - The service must be unlinked, unlocked and not referenced!
1336  *      - We are called under _bh lock
1337  */
1338 static void __ip_vs_del_service(struct ip_vs_service *svc)
1339 {
1340         struct ip_vs_dest *dest, *nxt;
1341         struct ip_vs_scheduler *old_sched;
1342
1343         /* Count only IPv4 services for old get/setsockopt interface */
1344         if (svc->af == AF_INET)
1345                 ip_vs_num_services--;
1346
1347         ip_vs_kill_estimator(&svc->stats);
1348
1349         /* Unbind scheduler */
1350         old_sched = svc->scheduler;
1351         ip_vs_unbind_scheduler(svc);
1352         if (old_sched)
1353                 ip_vs_scheduler_put(old_sched);
1354
1355         /* Unbind app inc */
1356         if (svc->inc) {
1357                 ip_vs_app_inc_put(svc->inc);
1358                 svc->inc = NULL;
1359         }
1360
1361         /*
1362          *    Unlink the whole destination list
1363          */
1364         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1365                 __ip_vs_unlink_dest(svc, dest, 0);
1366                 __ip_vs_del_dest(dest);
1367         }
1368
1369         /*
1370          *    Update the virtual service counters
1371          */
1372         if (svc->port == FTPPORT)
1373                 atomic_dec(&ip_vs_ftpsvc_counter);
1374         else if (svc->port == 0)
1375                 atomic_dec(&ip_vs_nullsvc_counter);
1376
1377         /*
1378          *    Free the service if nobody refers to it
1379          */
1380         if (atomic_read(&svc->refcnt) == 0)
1381                 kfree(svc);
1382
1383         /* decrease the module use count */
1384         ip_vs_use_count_dec();
1385 }
1386
1387 /*
1388  *      Delete a service from the service list
1389  */
1390 static int ip_vs_del_service(struct ip_vs_service *svc)
1391 {
1392         if (svc == NULL)
1393                 return -EEXIST;
1394
1395         /*
1396          * Unhash it from the service table
1397          */
1398         write_lock_bh(&__ip_vs_svc_lock);
1399
1400         ip_vs_svc_unhash(svc);
1401
1402         /*
1403          * Wait until all the svc users go away.
1404          */
1405         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1406
1407         __ip_vs_del_service(svc);
1408
1409         write_unlock_bh(&__ip_vs_svc_lock);
1410
1411         return 0;
1412 }
1413
1414
1415 /*
1416  *      Flush all the virtual services
1417  */
1418 static int ip_vs_flush(void)
1419 {
1420         int idx;
1421         struct ip_vs_service *svc, *nxt;
1422
1423         /*
1424          * Flush the service table hashed by <protocol,addr,port>
1425          */
1426         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1427                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1428                         write_lock_bh(&__ip_vs_svc_lock);
1429                         ip_vs_svc_unhash(svc);
1430                         /*
1431                          * Wait until all the svc users go away.
1432                          */
1433                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1434                         __ip_vs_del_service(svc);
1435                         write_unlock_bh(&__ip_vs_svc_lock);
1436                 }
1437         }
1438
1439         /*
1440          * Flush the service table hashed by fwmark
1441          */
1442         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1443                 list_for_each_entry_safe(svc, nxt,
1444                                          &ip_vs_svc_fwm_table[idx], f_list) {
1445                         write_lock_bh(&__ip_vs_svc_lock);
1446                         ip_vs_svc_unhash(svc);
1447                         /*
1448                          * Wait until all the svc users go away.
1449                          */
1450                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1451                         __ip_vs_del_service(svc);
1452                         write_unlock_bh(&__ip_vs_svc_lock);
1453                 }
1454         }
1455
1456         return 0;
1457 }
1458
1459
1460 /*
1461  *      Zero counters in a service or all services
1462  */
1463 static int ip_vs_zero_service(struct ip_vs_service *svc)
1464 {
1465         struct ip_vs_dest *dest;
1466
1467         write_lock_bh(&__ip_vs_svc_lock);
1468         list_for_each_entry(dest, &svc->destinations, n_list) {
1469                 ip_vs_zero_stats(&dest->stats);
1470         }
1471         ip_vs_zero_stats(&svc->stats);
1472         write_unlock_bh(&__ip_vs_svc_lock);
1473         return 0;
1474 }
1475
1476 static int ip_vs_zero_all(void)
1477 {
1478         int idx;
1479         struct ip_vs_service *svc;
1480
1481         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1482                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1483                         ip_vs_zero_service(svc);
1484                 }
1485         }
1486
1487         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1488                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1489                         ip_vs_zero_service(svc);
1490                 }
1491         }
1492
1493         ip_vs_zero_stats(&ip_vs_stats);
1494         return 0;
1495 }
1496
1497
1498 static int
1499 proc_do_defense_mode(ctl_table *table, int write,
1500                      void __user *buffer, size_t *lenp, loff_t *ppos)
1501 {
1502         int *valp = table->data;
1503         int val = *valp;
1504         int rc;
1505
1506         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1507         if (write && (*valp != val)) {
1508                 if ((*valp < 0) || (*valp > 3)) {
1509                         /* Restore the correct value */
1510                         *valp = val;
1511                 } else {
1512                         update_defense_level();
1513                 }
1514         }
1515         return rc;
1516 }
1517
1518
1519 static int
1520 proc_do_sync_threshold(ctl_table *table, int write,
1521                        void __user *buffer, size_t *lenp, loff_t *ppos)
1522 {
1523         int *valp = table->data;
1524         int val[2];
1525         int rc;
1526
1527         /* backup the value first */
1528         memcpy(val, valp, sizeof(val));
1529
1530         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1531         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1532                 /* Restore the correct value */
1533                 memcpy(valp, val, sizeof(val));
1534         }
1535         return rc;
1536 }
1537
1538
1539 /*
1540  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1541  */
1542
1543 static struct ctl_table vs_vars[] = {
1544         {
1545                 .procname       = "amemthresh",
1546                 .data           = &sysctl_ip_vs_amemthresh,
1547                 .maxlen         = sizeof(int),
1548                 .mode           = 0644,
1549                 .proc_handler   = proc_dointvec,
1550         },
1551 #ifdef CONFIG_IP_VS_DEBUG
1552         {
1553                 .procname       = "debug_level",
1554                 .data           = &sysctl_ip_vs_debug_level,
1555                 .maxlen         = sizeof(int),
1556                 .mode           = 0644,
1557                 .proc_handler   = proc_dointvec,
1558         },
1559 #endif
1560         {
1561                 .procname       = "am_droprate",
1562                 .data           = &sysctl_ip_vs_am_droprate,
1563                 .maxlen         = sizeof(int),
1564                 .mode           = 0644,
1565                 .proc_handler   = proc_dointvec,
1566         },
1567         {
1568                 .procname       = "drop_entry",
1569                 .data           = &sysctl_ip_vs_drop_entry,
1570                 .maxlen         = sizeof(int),
1571                 .mode           = 0644,
1572                 .proc_handler   = proc_do_defense_mode,
1573         },
1574         {
1575                 .procname       = "drop_packet",
1576                 .data           = &sysctl_ip_vs_drop_packet,
1577                 .maxlen         = sizeof(int),
1578                 .mode           = 0644,
1579                 .proc_handler   = proc_do_defense_mode,
1580         },
1581         {
1582                 .procname       = "secure_tcp",
1583                 .data           = &sysctl_ip_vs_secure_tcp,
1584                 .maxlen         = sizeof(int),
1585                 .mode           = 0644,
1586                 .proc_handler   = proc_do_defense_mode,
1587         },
1588 #if 0
1589         {
1590                 .procname       = "timeout_established",
1591                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1592                 .maxlen         = sizeof(int),
1593                 .mode           = 0644,
1594                 .proc_handler   = proc_dointvec_jiffies,
1595         },
1596         {
1597                 .procname       = "timeout_synsent",
1598                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1599                 .maxlen         = sizeof(int),
1600                 .mode           = 0644,
1601                 .proc_handler   = proc_dointvec_jiffies,
1602         },
1603         {
1604                 .procname       = "timeout_synrecv",
1605                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1606                 .maxlen         = sizeof(int),
1607                 .mode           = 0644,
1608                 .proc_handler   = proc_dointvec_jiffies,
1609         },
1610         {
1611                 .procname       = "timeout_finwait",
1612                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1613                 .maxlen         = sizeof(int),
1614                 .mode           = 0644,
1615                 .proc_handler   = proc_dointvec_jiffies,
1616         },
1617         {
1618                 .procname       = "timeout_timewait",
1619                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1620                 .maxlen         = sizeof(int),
1621                 .mode           = 0644,
1622                 .proc_handler   = proc_dointvec_jiffies,
1623         },
1624         {
1625                 .procname       = "timeout_close",
1626                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1627                 .maxlen         = sizeof(int),
1628                 .mode           = 0644,
1629                 .proc_handler   = proc_dointvec_jiffies,
1630         },
1631         {
1632                 .procname       = "timeout_closewait",
1633                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1634                 .maxlen         = sizeof(int),
1635                 .mode           = 0644,
1636                 .proc_handler   = proc_dointvec_jiffies,
1637         },
1638         {
1639                 .procname       = "timeout_lastack",
1640                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1641                 .maxlen         = sizeof(int),
1642                 .mode           = 0644,
1643                 .proc_handler   = proc_dointvec_jiffies,
1644         },
1645         {
1646                 .procname       = "timeout_listen",
1647                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1648                 .maxlen         = sizeof(int),
1649                 .mode           = 0644,
1650                 .proc_handler   = proc_dointvec_jiffies,
1651         },
1652         {
1653                 .procname       = "timeout_synack",
1654                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1655                 .maxlen         = sizeof(int),
1656                 .mode           = 0644,
1657                 .proc_handler   = proc_dointvec_jiffies,
1658         },
1659         {
1660                 .procname       = "timeout_udp",
1661                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1662                 .maxlen         = sizeof(int),
1663                 .mode           = 0644,
1664                 .proc_handler   = proc_dointvec_jiffies,
1665         },
1666         {
1667                 .procname       = "timeout_icmp",
1668                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1669                 .maxlen         = sizeof(int),
1670                 .mode           = 0644,
1671                 .proc_handler   = proc_dointvec_jiffies,
1672         },
1673 #endif
1674         {
1675                 .procname       = "cache_bypass",
1676                 .data           = &sysctl_ip_vs_cache_bypass,
1677                 .maxlen         = sizeof(int),
1678                 .mode           = 0644,
1679                 .proc_handler   = proc_dointvec,
1680         },
1681         {
1682                 .procname       = "expire_nodest_conn",
1683                 .data           = &sysctl_ip_vs_expire_nodest_conn,
1684                 .maxlen         = sizeof(int),
1685                 .mode           = 0644,
1686                 .proc_handler   = proc_dointvec,
1687         },
1688         {
1689                 .procname       = "expire_quiescent_template",
1690                 .data           = &sysctl_ip_vs_expire_quiescent_template,
1691                 .maxlen         = sizeof(int),
1692                 .mode           = 0644,
1693                 .proc_handler   = proc_dointvec,
1694         },
1695         {
1696                 .procname       = "sync_threshold",
1697                 .data           = &sysctl_ip_vs_sync_threshold,
1698                 .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
1699                 .mode           = 0644,
1700                 .proc_handler   = proc_do_sync_threshold,
1701         },
1702         {
1703                 .procname       = "nat_icmp_send",
1704                 .data           = &sysctl_ip_vs_nat_icmp_send,
1705                 .maxlen         = sizeof(int),
1706                 .mode           = 0644,
1707                 .proc_handler   = proc_dointvec,
1708         },
1709         { }
1710 };
1711
1712 const struct ctl_path net_vs_ctl_path[] = {
1713         { .procname = "net", },
1714         { .procname = "ipv4", },
1715         { .procname = "vs", },
1716         { }
1717 };
1718 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1719
1720 static struct ctl_table_header * sysctl_header;
1721
1722 #ifdef CONFIG_PROC_FS
1723
1724 struct ip_vs_iter {
1725         struct list_head *table;
1726         int bucket;
1727 };
1728
1729 /*
1730  *      Write the contents of the VS rule table to a PROCfs file.
1731  *      (It is kept just for backward compatibility)
1732  */
1733 static inline const char *ip_vs_fwd_name(unsigned flags)
1734 {
1735         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1736         case IP_VS_CONN_F_LOCALNODE:
1737                 return "Local";
1738         case IP_VS_CONN_F_TUNNEL:
1739                 return "Tunnel";
1740         case IP_VS_CONN_F_DROUTE:
1741                 return "Route";
1742         default:
1743                 return "Masq";
1744         }
1745 }
1746
1747
1748 /* Get the Nth entry in the two lists */
1749 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1750 {
1751         struct ip_vs_iter *iter = seq->private;
1752         int idx;
1753         struct ip_vs_service *svc;
1754
1755         /* look in hash by protocol */
1756         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1757                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1758                         if (pos-- == 0){
1759                                 iter->table = ip_vs_svc_table;
1760                                 iter->bucket = idx;
1761                                 return svc;
1762                         }
1763                 }
1764         }
1765
1766         /* keep looking in fwmark */
1767         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1768                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1769                         if (pos-- == 0) {
1770                                 iter->table = ip_vs_svc_fwm_table;
1771                                 iter->bucket = idx;
1772                                 return svc;
1773                         }
1774                 }
1775         }
1776
1777         return NULL;
1778 }
1779
1780 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1781 __acquires(__ip_vs_svc_lock)
1782 {
1783
1784         read_lock_bh(&__ip_vs_svc_lock);
1785         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1786 }
1787
1788
1789 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1790 {
1791         struct list_head *e;
1792         struct ip_vs_iter *iter;
1793         struct ip_vs_service *svc;
1794
1795         ++*pos;
1796         if (v == SEQ_START_TOKEN)
1797                 return ip_vs_info_array(seq,0);
1798
1799         svc = v;
1800         iter = seq->private;
1801
1802         if (iter->table == ip_vs_svc_table) {
1803                 /* next service in table hashed by protocol */
1804                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1805                         return list_entry(e, struct ip_vs_service, s_list);
1806
1807
1808                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1809                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1810                                             s_list) {
1811                                 return svc;
1812                         }
1813                 }
1814
1815                 iter->table = ip_vs_svc_fwm_table;
1816                 iter->bucket = -1;
1817                 goto scan_fwmark;
1818         }
1819
1820         /* next service in hashed by fwmark */
1821         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1822                 return list_entry(e, struct ip_vs_service, f_list);
1823
1824  scan_fwmark:
1825         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1826                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1827                                     f_list)
1828                         return svc;
1829         }
1830
1831         return NULL;
1832 }
1833
1834 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1835 __releases(__ip_vs_svc_lock)
1836 {
1837         read_unlock_bh(&__ip_vs_svc_lock);
1838 }
1839
1840
1841 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1842 {
1843         if (v == SEQ_START_TOKEN) {
1844                 seq_printf(seq,
1845                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1846                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1847                 seq_puts(seq,
1848                          "Prot LocalAddress:Port Scheduler Flags\n");
1849                 seq_puts(seq,
1850                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1851         } else {
1852                 const struct ip_vs_service *svc = v;
1853                 const struct ip_vs_iter *iter = seq->private;
1854                 const struct ip_vs_dest *dest;
1855
1856                 if (iter->table == ip_vs_svc_table) {
1857 #ifdef CONFIG_IP_VS_IPV6
1858                         if (svc->af == AF_INET6)
1859                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
1860                                            ip_vs_proto_name(svc->protocol),
1861                                            &svc->addr.in6,
1862                                            ntohs(svc->port),
1863                                            svc->scheduler->name);
1864                         else
1865 #endif
1866                                 seq_printf(seq, "%s  %08X:%04X %s ",
1867                                            ip_vs_proto_name(svc->protocol),
1868                                            ntohl(svc->addr.ip),
1869                                            ntohs(svc->port),
1870                                            svc->scheduler->name);
1871                 } else {
1872                         seq_printf(seq, "FWM  %08X %s ",
1873                                    svc->fwmark, svc->scheduler->name);
1874                 }
1875
1876                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1877                         seq_printf(seq, "persistent %d %08X\n",
1878                                 svc->timeout,
1879                                 ntohl(svc->netmask));
1880                 else
1881                         seq_putc(seq, '\n');
1882
1883                 list_for_each_entry(dest, &svc->destinations, n_list) {
1884 #ifdef CONFIG_IP_VS_IPV6
1885                         if (dest->af == AF_INET6)
1886                                 seq_printf(seq,
1887                                            "  -> [%pI6]:%04X"
1888                                            "      %-7s %-6d %-10d %-10d\n",
1889                                            &dest->addr.in6,
1890                                            ntohs(dest->port),
1891                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1892                                            atomic_read(&dest->weight),
1893                                            atomic_read(&dest->activeconns),
1894                                            atomic_read(&dest->inactconns));
1895                         else
1896 #endif
1897                                 seq_printf(seq,
1898                                            "  -> %08X:%04X      "
1899                                            "%-7s %-6d %-10d %-10d\n",
1900                                            ntohl(dest->addr.ip),
1901                                            ntohs(dest->port),
1902                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1903                                            atomic_read(&dest->weight),
1904                                            atomic_read(&dest->activeconns),
1905                                            atomic_read(&dest->inactconns));
1906
1907                 }
1908         }
1909         return 0;
1910 }
1911
1912 static const struct seq_operations ip_vs_info_seq_ops = {
1913         .start = ip_vs_info_seq_start,
1914         .next  = ip_vs_info_seq_next,
1915         .stop  = ip_vs_info_seq_stop,
1916         .show  = ip_vs_info_seq_show,
1917 };
1918
1919 static int ip_vs_info_open(struct inode *inode, struct file *file)
1920 {
1921         return seq_open_private(file, &ip_vs_info_seq_ops,
1922                         sizeof(struct ip_vs_iter));
1923 }
1924
1925 static const struct file_operations ip_vs_info_fops = {
1926         .owner   = THIS_MODULE,
1927         .open    = ip_vs_info_open,
1928         .read    = seq_read,
1929         .llseek  = seq_lseek,
1930         .release = seq_release_private,
1931 };
1932
1933 #endif
1934
1935 struct ip_vs_stats ip_vs_stats = {
1936         .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1937 };
1938
1939 #ifdef CONFIG_PROC_FS
1940 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1941 {
1942
1943 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1944         seq_puts(seq,
1945                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
1946         seq_printf(seq,
1947                    "   Conns  Packets  Packets            Bytes            Bytes\n");
1948
1949         spin_lock_bh(&ip_vs_stats.lock);
1950         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1951                    ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1952                    (unsigned long long) ip_vs_stats.ustats.inbytes,
1953                    (unsigned long long) ip_vs_stats.ustats.outbytes);
1954
1955 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1956         seq_puts(seq,
1957                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
1958         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1959                         ip_vs_stats.ustats.cps,
1960                         ip_vs_stats.ustats.inpps,
1961                         ip_vs_stats.ustats.outpps,
1962                         ip_vs_stats.ustats.inbps,
1963                         ip_vs_stats.ustats.outbps);
1964         spin_unlock_bh(&ip_vs_stats.lock);
1965
1966         return 0;
1967 }
1968
1969 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1970 {
1971         return single_open(file, ip_vs_stats_show, NULL);
1972 }
1973
1974 static const struct file_operations ip_vs_stats_fops = {
1975         .owner = THIS_MODULE,
1976         .open = ip_vs_stats_seq_open,
1977         .read = seq_read,
1978         .llseek = seq_lseek,
1979         .release = single_release,
1980 };
1981
1982 #endif
1983
1984 /*
1985  *      Set timeout values for tcp tcpfin udp in the timeout_table.
1986  */
1987 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1988 {
1989         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1990                   u->tcp_timeout,
1991                   u->tcp_fin_timeout,
1992                   u->udp_timeout);
1993
1994 #ifdef CONFIG_IP_VS_PROTO_TCP
1995         if (u->tcp_timeout) {
1996                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1997                         = u->tcp_timeout * HZ;
1998         }
1999
2000         if (u->tcp_fin_timeout) {
2001                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2002                         = u->tcp_fin_timeout * HZ;
2003         }
2004 #endif
2005
2006 #ifdef CONFIG_IP_VS_PROTO_UDP
2007         if (u->udp_timeout) {
2008                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2009                         = u->udp_timeout * HZ;
2010         }
2011 #endif
2012         return 0;
2013 }
2014
2015
2016 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2017 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2018 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2019                                  sizeof(struct ip_vs_dest_user))
2020 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2021 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2022 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2023
2024 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2025         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2026         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2027         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2028         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2029         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2030         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2031         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2032         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2033         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2034         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2035         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2036 };
2037
2038 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2039                                   struct ip_vs_service_user *usvc_compat)
2040 {
2041         usvc->af                = AF_INET;
2042         usvc->protocol          = usvc_compat->protocol;
2043         usvc->addr.ip           = usvc_compat->addr;
2044         usvc->port              = usvc_compat->port;
2045         usvc->fwmark            = usvc_compat->fwmark;
2046
2047         /* Deep copy of sched_name is not needed here */
2048         usvc->sched_name        = usvc_compat->sched_name;
2049
2050         usvc->flags             = usvc_compat->flags;
2051         usvc->timeout           = usvc_compat->timeout;
2052         usvc->netmask           = usvc_compat->netmask;
2053 }
2054
2055 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2056                                    struct ip_vs_dest_user *udest_compat)
2057 {
2058         udest->addr.ip          = udest_compat->addr;
2059         udest->port             = udest_compat->port;
2060         udest->conn_flags       = udest_compat->conn_flags;
2061         udest->weight           = udest_compat->weight;
2062         udest->u_threshold      = udest_compat->u_threshold;
2063         udest->l_threshold      = udest_compat->l_threshold;
2064 }
2065
2066 static int
2067 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2068 {
2069         int ret;
2070         unsigned char arg[MAX_ARG_LEN];
2071         struct ip_vs_service_user *usvc_compat;
2072         struct ip_vs_service_user_kern usvc;
2073         struct ip_vs_service *svc;
2074         struct ip_vs_dest_user *udest_compat;
2075         struct ip_vs_dest_user_kern udest;
2076
2077         if (!capable(CAP_NET_ADMIN))
2078                 return -EPERM;
2079
2080         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2081                 return -EINVAL;
2082         if (len < 0 || len >  MAX_ARG_LEN)
2083                 return -EINVAL;
2084         if (len != set_arglen[SET_CMDID(cmd)]) {
2085                 pr_err("set_ctl: len %u != %u\n",
2086                        len, set_arglen[SET_CMDID(cmd)]);
2087                 return -EINVAL;
2088         }
2089
2090         if (copy_from_user(arg, user, len) != 0)
2091                 return -EFAULT;
2092
2093         /* increase the module use count */
2094         ip_vs_use_count_inc();
2095
2096         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2097                 ret = -ERESTARTSYS;
2098                 goto out_dec;
2099         }
2100
2101         if (cmd == IP_VS_SO_SET_FLUSH) {
2102                 /* Flush the virtual service */
2103                 ret = ip_vs_flush();
2104                 goto out_unlock;
2105         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2106                 /* Set timeout values for (tcp tcpfin udp) */
2107                 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2108                 goto out_unlock;
2109         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2110                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2111                 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2112                 goto out_unlock;
2113         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2114                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2115                 ret = stop_sync_thread(dm->state);
2116                 goto out_unlock;
2117         }
2118
2119         usvc_compat = (struct ip_vs_service_user *)arg;
2120         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2121
2122         /* We only use the new structs internally, so copy userspace compat
2123          * structs to extended internal versions */
2124         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2125         ip_vs_copy_udest_compat(&udest, udest_compat);
2126
2127         if (cmd == IP_VS_SO_SET_ZERO) {
2128                 /* if no service address is set, zero counters in all */
2129                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2130                         ret = ip_vs_zero_all();
2131                         goto out_unlock;
2132                 }
2133         }
2134
2135         /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
2136         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
2137                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2138                        usvc.protocol, &usvc.addr.ip,
2139                        ntohs(usvc.port), usvc.sched_name);
2140                 ret = -EFAULT;
2141                 goto out_unlock;
2142         }
2143
2144         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2145         if (usvc.fwmark == 0)
2146                 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2147                                           &usvc.addr, usvc.port);
2148         else
2149                 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2150
2151         if (cmd != IP_VS_SO_SET_ADD
2152             && (svc == NULL || svc->protocol != usvc.protocol)) {
2153                 ret = -ESRCH;
2154                 goto out_unlock;
2155         }
2156
2157         switch (cmd) {
2158         case IP_VS_SO_SET_ADD:
2159                 if (svc != NULL)
2160                         ret = -EEXIST;
2161                 else
2162                         ret = ip_vs_add_service(&usvc, &svc);
2163                 break;
2164         case IP_VS_SO_SET_EDIT:
2165                 ret = ip_vs_edit_service(svc, &usvc);
2166                 break;
2167         case IP_VS_SO_SET_DEL:
2168                 ret = ip_vs_del_service(svc);
2169                 if (!ret)
2170                         goto out_unlock;
2171                 break;
2172         case IP_VS_SO_SET_ZERO:
2173                 ret = ip_vs_zero_service(svc);
2174                 break;
2175         case IP_VS_SO_SET_ADDDEST:
2176                 ret = ip_vs_add_dest(svc, &udest);
2177                 break;
2178         case IP_VS_SO_SET_EDITDEST:
2179                 ret = ip_vs_edit_dest(svc, &udest);
2180                 break;
2181         case IP_VS_SO_SET_DELDEST:
2182                 ret = ip_vs_del_dest(svc, &udest);
2183                 break;
2184         default:
2185                 ret = -EINVAL;
2186         }
2187
2188         if (svc)
2189                 ip_vs_service_put(svc);
2190
2191   out_unlock:
2192         mutex_unlock(&__ip_vs_mutex);
2193   out_dec:
2194         /* decrease the module use count */
2195         ip_vs_use_count_dec();
2196
2197         return ret;
2198 }
2199
2200
2201 static void
2202 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2203 {
2204         spin_lock_bh(&src->lock);
2205         memcpy(dst, &src->ustats, sizeof(*dst));
2206         spin_unlock_bh(&src->lock);
2207 }
2208
2209 static void
2210 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2211 {
2212         dst->protocol = src->protocol;
2213         dst->addr = src->addr.ip;
2214         dst->port = src->port;
2215         dst->fwmark = src->fwmark;
2216         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2217         dst->flags = src->flags;
2218         dst->timeout = src->timeout / HZ;
2219         dst->netmask = src->netmask;
2220         dst->num_dests = src->num_dests;
2221         ip_vs_copy_stats(&dst->stats, &src->stats);
2222 }
2223
2224 static inline int
2225 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2226                             struct ip_vs_get_services __user *uptr)
2227 {
2228         int idx, count=0;
2229         struct ip_vs_service *svc;
2230         struct ip_vs_service_entry entry;
2231         int ret = 0;
2232
2233         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2234                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2235                         /* Only expose IPv4 entries to old interface */
2236                         if (svc->af != AF_INET)
2237                                 continue;
2238
2239                         if (count >= get->num_services)
2240                                 goto out;
2241                         memset(&entry, 0, sizeof(entry));
2242                         ip_vs_copy_service(&entry, svc);
2243                         if (copy_to_user(&uptr->entrytable[count],
2244                                          &entry, sizeof(entry))) {
2245                                 ret = -EFAULT;
2246                                 goto out;
2247                         }
2248                         count++;
2249                 }
2250         }
2251
2252         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2253                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2254                         /* Only expose IPv4 entries to old interface */
2255                         if (svc->af != AF_INET)
2256                                 continue;
2257
2258                         if (count >= get->num_services)
2259                                 goto out;
2260                         memset(&entry, 0, sizeof(entry));
2261                         ip_vs_copy_service(&entry, svc);
2262                         if (copy_to_user(&uptr->entrytable[count],
2263                                          &entry, sizeof(entry))) {
2264                                 ret = -EFAULT;
2265                                 goto out;
2266                         }
2267                         count++;
2268                 }
2269         }
2270   out:
2271         return ret;
2272 }
2273
2274 static inline int
2275 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2276                          struct ip_vs_get_dests __user *uptr)
2277 {
2278         struct ip_vs_service *svc;
2279         union nf_inet_addr addr = { .ip = get->addr };
2280         int ret = 0;
2281
2282         if (get->fwmark)
2283                 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
2284         else
2285                 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2286                                           get->port);
2287
2288         if (svc) {
2289                 int count = 0;
2290                 struct ip_vs_dest *dest;
2291                 struct ip_vs_dest_entry entry;
2292
2293                 list_for_each_entry(dest, &svc->destinations, n_list) {
2294                         if (count >= get->num_dests)
2295                                 break;
2296
2297                         entry.addr = dest->addr.ip;
2298                         entry.port = dest->port;
2299                         entry.conn_flags = atomic_read(&dest->conn_flags);
2300                         entry.weight = atomic_read(&dest->weight);
2301                         entry.u_threshold = dest->u_threshold;
2302                         entry.l_threshold = dest->l_threshold;
2303                         entry.activeconns = atomic_read(&dest->activeconns);
2304                         entry.inactconns = atomic_read(&dest->inactconns);
2305                         entry.persistconns = atomic_read(&dest->persistconns);
2306                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2307                         if (copy_to_user(&uptr->entrytable[count],
2308                                          &entry, sizeof(entry))) {
2309                                 ret = -EFAULT;
2310                                 break;
2311                         }
2312                         count++;
2313                 }
2314                 ip_vs_service_put(svc);
2315         } else
2316                 ret = -ESRCH;
2317         return ret;
2318 }
2319
2320 static inline void
2321 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2322 {
2323 #ifdef CONFIG_IP_VS_PROTO_TCP
2324         u->tcp_timeout =
2325                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2326         u->tcp_fin_timeout =
2327                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2328 #endif
2329 #ifdef CONFIG_IP_VS_PROTO_UDP
2330         u->udp_timeout =
2331                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2332 #endif
2333 }
2334
2335
2336 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2337 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2338 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2339 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2340 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2341 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2342 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2343
2344 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2345         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2346         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2347         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2348         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2349         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2350         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2351         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2352 };
2353
2354 static int
2355 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2356 {
2357         unsigned char arg[128];
2358         int ret = 0;
2359         unsigned int copylen;
2360
2361         if (!capable(CAP_NET_ADMIN))
2362                 return -EPERM;
2363
2364         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2365                 return -EINVAL;
2366
2367         if (*len < get_arglen[GET_CMDID(cmd)]) {
2368                 pr_err("get_ctl: len %u < %u\n",
2369                        *len, get_arglen[GET_CMDID(cmd)]);
2370                 return -EINVAL;
2371         }
2372
2373         copylen = get_arglen[GET_CMDID(cmd)];
2374         if (copylen > 128)
2375                 return -EINVAL;
2376
2377         if (copy_from_user(arg, user, copylen) != 0)
2378                 return -EFAULT;
2379
2380         if (mutex_lock_interruptible(&__ip_vs_mutex))
2381                 return -ERESTARTSYS;
2382
2383         switch (cmd) {
2384         case IP_VS_SO_GET_VERSION:
2385         {
2386                 char buf[64];
2387
2388                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2389                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2390                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2391                         ret = -EFAULT;
2392                         goto out;
2393                 }
2394                 *len = strlen(buf)+1;
2395         }
2396         break;
2397
2398         case IP_VS_SO_GET_INFO:
2399         {
2400                 struct ip_vs_getinfo info;
2401                 info.version = IP_VS_VERSION_CODE;
2402                 info.size = ip_vs_conn_tab_size;
2403                 info.num_services = ip_vs_num_services;
2404                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2405                         ret = -EFAULT;
2406         }
2407         break;
2408
2409         case IP_VS_SO_GET_SERVICES:
2410         {
2411                 struct ip_vs_get_services *get;
2412                 int size;
2413
2414                 get = (struct ip_vs_get_services *)arg;
2415                 size = sizeof(*get) +
2416                         sizeof(struct ip_vs_service_entry) * get->num_services;
2417                 if (*len != size) {
2418                         pr_err("length: %u != %u\n", *len, size);
2419                         ret = -EINVAL;
2420                         goto out;
2421                 }
2422                 ret = __ip_vs_get_service_entries(get, user);
2423         }
2424         break;
2425
2426         case IP_VS_SO_GET_SERVICE:
2427         {
2428                 struct ip_vs_service_entry *entry;
2429                 struct ip_vs_service *svc;
2430                 union nf_inet_addr addr;
2431
2432                 entry = (struct ip_vs_service_entry *)arg;
2433                 addr.ip = entry->addr;
2434                 if (entry->fwmark)
2435                         svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
2436                 else
2437                         svc = __ip_vs_service_get(AF_INET, entry->protocol,
2438                                                   &addr, entry->port);
2439                 if (svc) {
2440                         ip_vs_copy_service(entry, svc);
2441                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2442                                 ret = -EFAULT;
2443                         ip_vs_service_put(svc);
2444                 } else
2445                         ret = -ESRCH;
2446         }
2447         break;
2448
2449         case IP_VS_SO_GET_DESTS:
2450         {
2451                 struct ip_vs_get_dests *get;
2452                 int size;
2453
2454                 get = (struct ip_vs_get_dests *)arg;
2455                 size = sizeof(*get) +
2456                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2457                 if (*len != size) {
2458                         pr_err("length: %u != %u\n", *len, size);
2459                         ret = -EINVAL;
2460                         goto out;
2461                 }
2462                 ret = __ip_vs_get_dest_entries(get, user);
2463         }
2464         break;
2465
2466         case IP_VS_SO_GET_TIMEOUT:
2467         {
2468                 struct ip_vs_timeout_user t;
2469
2470                 __ip_vs_get_timeouts(&t);
2471                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2472                         ret = -EFAULT;
2473         }
2474         break;
2475
2476         case IP_VS_SO_GET_DAEMON:
2477         {
2478                 struct ip_vs_daemon_user d[2];
2479
2480                 memset(&d, 0, sizeof(d));
2481                 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2482                         d[0].state = IP_VS_STATE_MASTER;
2483                         strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2484                         d[0].syncid = ip_vs_master_syncid;
2485                 }
2486                 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2487                         d[1].state = IP_VS_STATE_BACKUP;
2488                         strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2489                         d[1].syncid = ip_vs_backup_syncid;
2490                 }
2491                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2492                         ret = -EFAULT;
2493         }
2494         break;
2495
2496         default:
2497                 ret = -EINVAL;
2498         }
2499
2500   out:
2501         mutex_unlock(&__ip_vs_mutex);
2502         return ret;
2503 }
2504
2505
2506 static struct nf_sockopt_ops ip_vs_sockopts = {
2507         .pf             = PF_INET,
2508         .set_optmin     = IP_VS_BASE_CTL,
2509         .set_optmax     = IP_VS_SO_SET_MAX+1,
2510         .set            = do_ip_vs_set_ctl,
2511         .get_optmin     = IP_VS_BASE_CTL,
2512         .get_optmax     = IP_VS_SO_GET_MAX+1,
2513         .get            = do_ip_vs_get_ctl,
2514         .owner          = THIS_MODULE,
2515 };
2516
2517 /*
2518  * Generic Netlink interface
2519  */
2520
2521 /* IPVS genetlink family */
2522 static struct genl_family ip_vs_genl_family = {
2523         .id             = GENL_ID_GENERATE,
2524         .hdrsize        = 0,
2525         .name           = IPVS_GENL_NAME,
2526         .version        = IPVS_GENL_VERSION,
2527         .maxattr        = IPVS_CMD_MAX,
2528 };
2529
2530 /* Policy used for first-level command attributes */
2531 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2532         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2533         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2534         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2535         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2536         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2537         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2538 };
2539
2540 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2541 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2542         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2543         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2544                                             .len = IP_VS_IFNAME_MAXLEN },
2545         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2546 };
2547
2548 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2549 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2550         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2551         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2552         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2553                                             .len = sizeof(union nf_inet_addr) },
2554         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2555         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2556         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2557                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2558         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2559                                             .len = sizeof(struct ip_vs_flags) },
2560         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2561         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2562         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2563 };
2564
2565 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2566 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2567         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2568                                             .len = sizeof(union nf_inet_addr) },
2569         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2570         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2571         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2572         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2573         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2574         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2575         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2576         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2577         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2578 };
2579
2580 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2581                                  struct ip_vs_stats *stats)
2582 {
2583         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2584         if (!nl_stats)
2585                 return -EMSGSIZE;
2586
2587         spin_lock_bh(&stats->lock);
2588
2589         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2590         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2591         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2592         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2593         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2594         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2595         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2596         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2597         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2598         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2599
2600         spin_unlock_bh(&stats->lock);
2601
2602         nla_nest_end(skb, nl_stats);
2603
2604         return 0;
2605
2606 nla_put_failure:
2607         spin_unlock_bh(&stats->lock);
2608         nla_nest_cancel(skb, nl_stats);
2609         return -EMSGSIZE;
2610 }
2611
2612 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2613                                    struct ip_vs_service *svc)
2614 {
2615         struct nlattr *nl_service;
2616         struct ip_vs_flags flags = { .flags = svc->flags,
2617                                      .mask = ~0 };
2618
2619         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2620         if (!nl_service)
2621                 return -EMSGSIZE;
2622
2623         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2624
2625         if (svc->fwmark) {
2626                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2627         } else {
2628                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2629                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2630                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2631         }
2632
2633         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2634         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2635         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2636         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2637
2638         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2639                 goto nla_put_failure;
2640
2641         nla_nest_end(skb, nl_service);
2642
2643         return 0;
2644
2645 nla_put_failure:
2646         nla_nest_cancel(skb, nl_service);
2647         return -EMSGSIZE;
2648 }
2649
2650 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2651                                    struct ip_vs_service *svc,
2652                                    struct netlink_callback *cb)
2653 {
2654         void *hdr;
2655
2656         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2657                           &ip_vs_genl_family, NLM_F_MULTI,
2658                           IPVS_CMD_NEW_SERVICE);
2659         if (!hdr)
2660                 return -EMSGSIZE;
2661
2662         if (ip_vs_genl_fill_service(skb, svc) < 0)
2663                 goto nla_put_failure;
2664
2665         return genlmsg_end(skb, hdr);
2666
2667 nla_put_failure:
2668         genlmsg_cancel(skb, hdr);
2669         return -EMSGSIZE;
2670 }
2671
2672 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2673                                     struct netlink_callback *cb)
2674 {
2675         int idx = 0, i;
2676         int start = cb->args[0];
2677         struct ip_vs_service *svc;
2678
2679         mutex_lock(&__ip_vs_mutex);
2680         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2681                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2682                         if (++idx <= start)
2683                                 continue;
2684                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2685                                 idx--;
2686                                 goto nla_put_failure;
2687                         }
2688                 }
2689         }
2690
2691         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2692                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2693                         if (++idx <= start)
2694                                 continue;
2695                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2696                                 idx--;
2697                                 goto nla_put_failure;
2698                         }
2699                 }
2700         }
2701
2702 nla_put_failure:
2703         mutex_unlock(&__ip_vs_mutex);
2704         cb->args[0] = idx;
2705
2706         return skb->len;
2707 }
2708
2709 static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2710                                     struct nlattr *nla, int full_entry)
2711 {
2712         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2713         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2714
2715         /* Parse mandatory identifying service fields first */
2716         if (nla == NULL ||
2717             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2718                 return -EINVAL;
2719
2720         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2721         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2722         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2723         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2724         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2725
2726         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2727                 return -EINVAL;
2728
2729         memset(usvc, 0, sizeof(*usvc));
2730
2731         usvc->af = nla_get_u16(nla_af);
2732 #ifdef CONFIG_IP_VS_IPV6
2733         if (usvc->af != AF_INET && usvc->af != AF_INET6)
2734 #else
2735         if (usvc->af != AF_INET)
2736 #endif
2737                 return -EAFNOSUPPORT;
2738
2739         if (nla_fwmark) {
2740                 usvc->protocol = IPPROTO_TCP;
2741                 usvc->fwmark = nla_get_u32(nla_fwmark);
2742         } else {
2743                 usvc->protocol = nla_get_u16(nla_protocol);
2744                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2745                 usvc->port = nla_get_u16(nla_port);
2746                 usvc->fwmark = 0;
2747         }
2748
2749         /* If a full entry was requested, check for the additional fields */
2750         if (full_entry) {
2751                 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2752                               *nla_netmask;
2753                 struct ip_vs_flags flags;
2754                 struct ip_vs_service *svc;
2755
2756                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2757                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2758                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2759                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2760
2761                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2762                         return -EINVAL;
2763
2764                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2765
2766                 /* prefill flags from service if it already exists */
2767                 if (usvc->fwmark)
2768                         svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
2769                 else
2770                         svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2771                                                   &usvc->addr, usvc->port);
2772                 if (svc) {
2773                         usvc->flags = svc->flags;
2774                         ip_vs_service_put(svc);
2775                 } else
2776                         usvc->flags = 0;
2777
2778                 /* set new flags from userland */
2779                 usvc->flags = (usvc->flags & ~flags.mask) |
2780                               (flags.flags & flags.mask);
2781                 usvc->sched_name = nla_data(nla_sched);
2782                 usvc->timeout = nla_get_u32(nla_timeout);
2783                 usvc->netmask = nla_get_u32(nla_netmask);
2784         }
2785
2786         return 0;
2787 }
2788
2789 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2790 {
2791         struct ip_vs_service_user_kern usvc;
2792         int ret;
2793
2794         ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2795         if (ret)
2796                 return ERR_PTR(ret);
2797
2798         if (usvc.fwmark)
2799                 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2800         else
2801                 return __ip_vs_service_get(usvc.af, usvc.protocol,
2802                                            &usvc.addr, usvc.port);
2803 }
2804
2805 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2806 {
2807         struct nlattr *nl_dest;
2808
2809         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2810         if (!nl_dest)
2811                 return -EMSGSIZE;
2812
2813         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2814         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2815
2816         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2817                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2818         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2819         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2820         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2821         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2822                     atomic_read(&dest->activeconns));
2823         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2824                     atomic_read(&dest->inactconns));
2825         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2826                     atomic_read(&dest->persistconns));
2827
2828         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2829                 goto nla_put_failure;
2830
2831         nla_nest_end(skb, nl_dest);
2832
2833         return 0;
2834
2835 nla_put_failure:
2836         nla_nest_cancel(skb, nl_dest);
2837         return -EMSGSIZE;
2838 }
2839
2840 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2841                                 struct netlink_callback *cb)
2842 {
2843         void *hdr;
2844
2845         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2846                           &ip_vs_genl_family, NLM_F_MULTI,
2847                           IPVS_CMD_NEW_DEST);
2848         if (!hdr)
2849                 return -EMSGSIZE;
2850
2851         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2852                 goto nla_put_failure;
2853
2854         return genlmsg_end(skb, hdr);
2855
2856 nla_put_failure:
2857         genlmsg_cancel(skb, hdr);
2858         return -EMSGSIZE;
2859 }
2860
2861 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2862                                  struct netlink_callback *cb)
2863 {
2864         int idx = 0;
2865         int start = cb->args[0];
2866         struct ip_vs_service *svc;
2867         struct ip_vs_dest *dest;
2868         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2869
2870         mutex_lock(&__ip_vs_mutex);
2871
2872         /* Try to find the service for which to dump destinations */
2873         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2874                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2875                 goto out_err;
2876
2877         svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2878         if (IS_ERR(svc) || svc == NULL)
2879                 goto out_err;
2880
2881         /* Dump the destinations */
2882         list_for_each_entry(dest, &svc->destinations, n_list) {
2883                 if (++idx <= start)
2884                         continue;
2885                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2886                         idx--;
2887                         goto nla_put_failure;
2888                 }
2889         }
2890
2891 nla_put_failure:
2892         cb->args[0] = idx;
2893         ip_vs_service_put(svc);
2894
2895 out_err:
2896         mutex_unlock(&__ip_vs_mutex);
2897
2898         return skb->len;
2899 }
2900
2901 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2902                                  struct nlattr *nla, int full_entry)
2903 {
2904         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2905         struct nlattr *nla_addr, *nla_port;
2906
2907         /* Parse mandatory identifying destination fields first */
2908         if (nla == NULL ||
2909             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2910                 return -EINVAL;
2911
2912         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
2913         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
2914
2915         if (!(nla_addr && nla_port))
2916                 return -EINVAL;
2917
2918         memset(udest, 0, sizeof(*udest));
2919
2920         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2921         udest->port = nla_get_u16(nla_port);
2922
2923         /* If a full entry was requested, check for the additional fields */
2924         if (full_entry) {
2925                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2926                               *nla_l_thresh;
2927
2928                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2929                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
2930                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
2931                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
2932
2933                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2934                         return -EINVAL;
2935
2936                 udest->conn_flags = nla_get_u32(nla_fwd)
2937                                     & IP_VS_CONN_F_FWD_MASK;
2938                 udest->weight = nla_get_u32(nla_weight);
2939                 udest->u_threshold = nla_get_u32(nla_u_thresh);
2940                 udest->l_threshold = nla_get_u32(nla_l_thresh);
2941         }
2942
2943         return 0;
2944 }
2945
2946 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2947                                   const char *mcast_ifn, __be32 syncid)
2948 {
2949         struct nlattr *nl_daemon;
2950
2951         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2952         if (!nl_daemon)
2953                 return -EMSGSIZE;
2954
2955         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2956         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2957         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2958
2959         nla_nest_end(skb, nl_daemon);
2960
2961         return 0;
2962
2963 nla_put_failure:
2964         nla_nest_cancel(skb, nl_daemon);
2965         return -EMSGSIZE;
2966 }
2967
2968 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2969                                   const char *mcast_ifn, __be32 syncid,
2970                                   struct netlink_callback *cb)
2971 {
2972         void *hdr;
2973         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2974                           &ip_vs_genl_family, NLM_F_MULTI,
2975                           IPVS_CMD_NEW_DAEMON);
2976         if (!hdr)
2977                 return -EMSGSIZE;
2978
2979         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2980                 goto nla_put_failure;
2981
2982         return genlmsg_end(skb, hdr);
2983
2984 nla_put_failure:
2985         genlmsg_cancel(skb, hdr);
2986         return -EMSGSIZE;
2987 }
2988
2989 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2990                                    struct netlink_callback *cb)
2991 {
2992         mutex_lock(&__ip_vs_mutex);
2993         if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2994                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2995                                            ip_vs_master_mcast_ifn,
2996                                            ip_vs_master_syncid, cb) < 0)
2997                         goto nla_put_failure;
2998
2999                 cb->args[0] = 1;
3000         }
3001
3002         if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3003                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3004                                            ip_vs_backup_mcast_ifn,
3005                                            ip_vs_backup_syncid, cb) < 0)
3006                         goto nla_put_failure;
3007
3008                 cb->args[1] = 1;
3009         }
3010
3011 nla_put_failure:
3012         mutex_unlock(&__ip_vs_mutex);
3013
3014         return skb->len;
3015 }
3016
3017 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3018 {
3019         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3020               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3021               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3022                 return -EINVAL;
3023
3024         return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3025                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3026                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3027 }
3028
3029 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3030 {
3031         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3032                 return -EINVAL;
3033
3034         return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3035 }
3036
3037 static int ip_vs_genl_set_config(struct nlattr **attrs)
3038 {
3039         struct ip_vs_timeout_user t;
3040
3041         __ip_vs_get_timeouts(&t);
3042
3043         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3044                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3045
3046         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3047                 t.tcp_fin_timeout =
3048                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3049
3050         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3051                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3052
3053         return ip_vs_set_timeout(&t);
3054 }
3055
3056 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3057 {
3058         struct ip_vs_service *svc = NULL;
3059         struct ip_vs_service_user_kern usvc;
3060         struct ip_vs_dest_user_kern udest;
3061         int ret = 0, cmd;
3062         int need_full_svc = 0, need_full_dest = 0;
3063
3064         cmd = info->genlhdr->cmd;
3065
3066         mutex_lock(&__ip_vs_mutex);
3067
3068         if (cmd == IPVS_CMD_FLUSH) {
3069                 ret = ip_vs_flush();
3070                 goto out;
3071         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3072                 ret = ip_vs_genl_set_config(info->attrs);
3073                 goto out;
3074         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3075                    cmd == IPVS_CMD_DEL_DAEMON) {
3076
3077                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3078
3079                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3080                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3081                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3082                                      ip_vs_daemon_policy)) {
3083                         ret = -EINVAL;
3084                         goto out;
3085                 }
3086
3087                 if (cmd == IPVS_CMD_NEW_DAEMON)
3088                         ret = ip_vs_genl_new_daemon(daemon_attrs);
3089                 else
3090                         ret = ip_vs_genl_del_daemon(daemon_attrs);
3091                 goto out;
3092         } else if (cmd == IPVS_CMD_ZERO &&
3093                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3094                 ret = ip_vs_zero_all();
3095                 goto out;
3096         }
3097
3098         /* All following commands require a service argument, so check if we
3099          * received a valid one. We need a full service specification when
3100          * adding / editing a service. Only identifying members otherwise. */
3101         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3102                 need_full_svc = 1;
3103
3104         ret = ip_vs_genl_parse_service(&usvc,
3105                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3106                                        need_full_svc);
3107         if (ret)
3108                 goto out;
3109
3110         /* Lookup the exact service by <protocol, addr, port> or fwmark */
3111         if (usvc.fwmark == 0)
3112                 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3113                                           &usvc.addr, usvc.port);
3114         else
3115                 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
3116
3117         /* Unless we're adding a new service, the service must already exist */
3118         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3119                 ret = -ESRCH;
3120                 goto out;
3121         }
3122
3123         /* Destination commands require a valid destination argument. For
3124          * adding / editing a destination, we need a full destination
3125          * specification. */
3126         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3127             cmd == IPVS_CMD_DEL_DEST) {
3128                 if (cmd != IPVS_CMD_DEL_DEST)
3129                         need_full_dest = 1;
3130
3131                 ret = ip_vs_genl_parse_dest(&udest,
3132                                             info->attrs[IPVS_CMD_ATTR_DEST],
3133                                             need_full_dest);
3134                 if (ret)
3135                         goto out;
3136         }
3137
3138         switch (cmd) {
3139         case IPVS_CMD_NEW_SERVICE:
3140                 if (svc == NULL)
3141                         ret = ip_vs_add_service(&usvc, &svc);
3142                 else
3143                         ret = -EEXIST;
3144                 break;
3145         case IPVS_CMD_SET_SERVICE:
3146                 ret = ip_vs_edit_service(svc, &usvc);
3147                 break;
3148         case IPVS_CMD_DEL_SERVICE:
3149                 ret = ip_vs_del_service(svc);
3150                 break;
3151         case IPVS_CMD_NEW_DEST:
3152                 ret = ip_vs_add_dest(svc, &udest);
3153                 break;
3154         case IPVS_CMD_SET_DEST:
3155                 ret = ip_vs_edit_dest(svc, &udest);
3156                 break;
3157         case IPVS_CMD_DEL_DEST:
3158                 ret = ip_vs_del_dest(svc, &udest);
3159                 break;
3160         case IPVS_CMD_ZERO:
3161                 ret = ip_vs_zero_service(svc);
3162                 break;
3163         default:
3164                 ret = -EINVAL;
3165         }
3166
3167 out:
3168         if (svc)
3169                 ip_vs_service_put(svc);
3170         mutex_unlock(&__ip_vs_mutex);
3171
3172         return ret;
3173 }
3174
3175 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3176 {
3177         struct sk_buff *msg;
3178         void *reply;
3179         int ret, cmd, reply_cmd;
3180
3181         cmd = info->genlhdr->cmd;
3182
3183         if (cmd == IPVS_CMD_GET_SERVICE)
3184                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3185         else if (cmd == IPVS_CMD_GET_INFO)
3186                 reply_cmd = IPVS_CMD_SET_INFO;
3187         else if (cmd == IPVS_CMD_GET_CONFIG)
3188                 reply_cmd = IPVS_CMD_SET_CONFIG;
3189         else {
3190                 pr_err("unknown Generic Netlink command\n");
3191                 return -EINVAL;
3192         }
3193
3194         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3195         if (!msg)
3196                 return -ENOMEM;
3197
3198         mutex_lock(&__ip_vs_mutex);
3199
3200         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3201         if (reply == NULL)
3202                 goto nla_put_failure;
3203
3204         switch (cmd) {
3205         case IPVS_CMD_GET_SERVICE:
3206         {
3207                 struct ip_vs_service *svc;
3208
3209                 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3210                 if (IS_ERR(svc)) {
3211                         ret = PTR_ERR(svc);
3212                         goto out_err;
3213                 } else if (svc) {
3214                         ret = ip_vs_genl_fill_service(msg, svc);
3215                         ip_vs_service_put(svc);
3216                         if (ret)
3217                                 goto nla_put_failure;
3218                 } else {
3219                         ret = -ESRCH;
3220                         goto out_err;
3221                 }
3222
3223                 break;
3224         }
3225
3226         case IPVS_CMD_GET_CONFIG:
3227         {
3228                 struct ip_vs_timeout_user t;
3229
3230                 __ip_vs_get_timeouts(&t);
3231 #ifdef CONFIG_IP_VS_PROTO_TCP
3232                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3233                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3234                             t.tcp_fin_timeout);
3235 #endif
3236 #ifdef CONFIG_IP_VS_PROTO_UDP
3237                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3238 #endif
3239
3240                 break;
3241         }
3242
3243         case IPVS_CMD_GET_INFO:
3244                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3245                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3246                             ip_vs_conn_tab_size);
3247                 break;
3248         }
3249
3250         genlmsg_end(msg, reply);
3251         ret = genlmsg_reply(msg, info);
3252         goto out;
3253
3254 nla_put_failure:
3255         pr_err("not enough space in Netlink message\n");
3256         ret = -EMSGSIZE;
3257
3258 out_err:
3259         nlmsg_free(msg);
3260 out:
3261         mutex_unlock(&__ip_vs_mutex);
3262
3263         return ret;
3264 }
3265
3266
3267 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3268         {
3269                 .cmd    = IPVS_CMD_NEW_SERVICE,
3270                 .flags  = GENL_ADMIN_PERM,
3271                 .policy = ip_vs_cmd_policy,
3272                 .doit   = ip_vs_genl_set_cmd,
3273         },
3274         {
3275                 .cmd    = IPVS_CMD_SET_SERVICE,
3276                 .flags  = GENL_ADMIN_PERM,
3277                 .policy = ip_vs_cmd_policy,
3278                 .doit   = ip_vs_genl_set_cmd,
3279         },
3280         {
3281                 .cmd    = IPVS_CMD_DEL_SERVICE,
3282                 .flags  = GENL_ADMIN_PERM,
3283                 .policy = ip_vs_cmd_policy,
3284                 .doit   = ip_vs_genl_set_cmd,
3285         },
3286         {
3287                 .cmd    = IPVS_CMD_GET_SERVICE,
3288                 .flags  = GENL_ADMIN_PERM,
3289                 .doit   = ip_vs_genl_get_cmd,
3290                 .dumpit = ip_vs_genl_dump_services,
3291                 .policy = ip_vs_cmd_policy,
3292         },
3293         {
3294                 .cmd    = IPVS_CMD_NEW_DEST,
3295                 .flags  = GENL_ADMIN_PERM,
3296                 .policy = ip_vs_cmd_policy,
3297                 .doit   = ip_vs_genl_set_cmd,
3298         },
3299         {
3300                 .cmd    = IPVS_CMD_SET_DEST,
3301                 .flags  = GENL_ADMIN_PERM,
3302                 .policy = ip_vs_cmd_policy,
3303                 .doit   = ip_vs_genl_set_cmd,
3304         },
3305         {
3306                 .cmd    = IPVS_CMD_DEL_DEST,
3307                 .flags  = GENL_ADMIN_PERM,
3308                 .policy = ip_vs_cmd_policy,
3309                 .doit   = ip_vs_genl_set_cmd,
3310         },
3311         {
3312                 .cmd    = IPVS_CMD_GET_DEST,
3313                 .flags  = GENL_ADMIN_PERM,
3314                 .policy = ip_vs_cmd_policy,
3315                 .dumpit = ip_vs_genl_dump_dests,
3316         },
3317         {
3318                 .cmd    = IPVS_CMD_NEW_DAEMON,
3319                 .flags  = GENL_ADMIN_PERM,
3320                 .policy = ip_vs_cmd_policy,
3321                 .doit   = ip_vs_genl_set_cmd,
3322         },
3323         {
3324                 .cmd    = IPVS_CMD_DEL_DAEMON,
3325                 .flags  = GENL_ADMIN_PERM,
3326                 .policy = ip_vs_cmd_policy,
3327                 .doit   = ip_vs_genl_set_cmd,
3328         },
3329         {
3330                 .cmd    = IPVS_CMD_GET_DAEMON,
3331                 .flags  = GENL_ADMIN_PERM,
3332                 .dumpit = ip_vs_genl_dump_daemons,
3333         },
3334         {
3335                 .cmd    = IPVS_CMD_SET_CONFIG,
3336                 .flags  = GENL_ADMIN_PERM,
3337                 .policy = ip_vs_cmd_policy,
3338                 .doit   = ip_vs_genl_set_cmd,
3339         },
3340         {
3341                 .cmd    = IPVS_CMD_GET_CONFIG,
3342                 .flags  = GENL_ADMIN_PERM,
3343                 .doit   = ip_vs_genl_get_cmd,
3344         },
3345         {
3346                 .cmd    = IPVS_CMD_GET_INFO,
3347                 .flags  = GENL_ADMIN_PERM,
3348                 .doit   = ip_vs_genl_get_cmd,
3349         },
3350         {
3351                 .cmd    = IPVS_CMD_ZERO,
3352                 .flags  = GENL_ADMIN_PERM,
3353                 .policy = ip_vs_cmd_policy,
3354                 .doit   = ip_vs_genl_set_cmd,
3355         },
3356         {
3357                 .cmd    = IPVS_CMD_FLUSH,
3358                 .flags  = GENL_ADMIN_PERM,
3359                 .doit   = ip_vs_genl_set_cmd,
3360         },
3361 };
3362
3363 static int __init ip_vs_genl_register(void)
3364 {
3365         return genl_register_family_with_ops(&ip_vs_genl_family,
3366                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3367 }
3368
3369 static void ip_vs_genl_unregister(void)
3370 {
3371         genl_unregister_family(&ip_vs_genl_family);
3372 }
3373
3374 /* End of Generic Netlink interface definitions */
3375
3376
3377 int __init ip_vs_control_init(void)
3378 {
3379         int ret;
3380         int idx;
3381
3382         EnterFunction(2);
3383
3384         ret = nf_register_sockopt(&ip_vs_sockopts);
3385         if (ret) {
3386                 pr_err("cannot register sockopt.\n");
3387                 return ret;
3388         }
3389
3390         ret = ip_vs_genl_register();
3391         if (ret) {
3392                 pr_err("cannot register Generic Netlink interface.\n");
3393                 nf_unregister_sockopt(&ip_vs_sockopts);
3394                 return ret;
3395         }
3396
3397         proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3398         proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3399
3400         sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3401
3402         /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3403         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3404                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3405                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3406         }
3407         for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
3408                 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3409         }
3410
3411         ip_vs_new_estimator(&ip_vs_stats);
3412
3413         /* Hook the defense timer */
3414         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3415
3416         LeaveFunction(2);
3417         return 0;
3418 }
3419
3420
3421 void ip_vs_control_cleanup(void)
3422 {
3423         EnterFunction(2);
3424         ip_vs_trash_cleanup();
3425         cancel_rearming_delayed_work(&defense_work);
3426         cancel_work_sync(&defense_work.work);
3427         ip_vs_kill_estimator(&ip_vs_stats);
3428         unregister_sysctl_table(sysctl_header);
3429         proc_net_remove(&init_net, "ip_vs_stats");
3430         proc_net_remove(&init_net, "ip_vs");
3431         ip_vs_genl_unregister();
3432         nf_unregister_sockopt(&ip_vs_sockopts);
3433         LeaveFunction(2);
3434 }