5f5e5f4bad5e49f6805730e36e501d1bb6ed3631
[safe/jmp/linux-2.6] / net / netfilter / ipvs / ip_vs_lblcr.c
1 /*
2  * IPVS:        Locality-Based Least-Connection with Replication scheduler
3  *
4  * Authors:     Wensong Zhang <wensong@gnuchina.org>
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  * Changes:
12  *     Julian Anastasov        :    Added the missing (dest->weight>0)
13  *                                  condition in the ip_vs_dest_set_max.
14  *
15  */
16
17 /*
18  * The lblc/r algorithm is as follows (pseudo code):
19  *
20  *       if serverSet[dest_ip] is null then
21  *               n, serverSet[dest_ip] <- {weighted least-conn node};
22  *       else
23  *               n <- {least-conn (alive) node in serverSet[dest_ip]};
24  *               if (n is null) OR
25  *                  (n.conns>n.weight AND
26  *                   there is a node m with m.conns<m.weight/2) then
27  *                   n <- {weighted least-conn node};
28  *                   add n to serverSet[dest_ip];
29  *               if |serverSet[dest_ip]| > 1 AND
30  *                   now - serverSet[dest_ip].lastMod > T then
31  *                   m <- {most conn node in serverSet[dest_ip]};
32  *                   remove m from serverSet[dest_ip];
33  *       if serverSet[dest_ip] changed then
34  *               serverSet[dest_ip].lastMod <- now;
35  *
36  *       return n;
37  *
38  */
39
40 #define KMSG_COMPONENT "IPVS"
41 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
42
43 #include <linux/ip.h>
44 #include <linux/module.h>
45 #include <linux/kernel.h>
46 #include <linux/skbuff.h>
47 #include <linux/jiffies.h>
48
49 /* for sysctl */
50 #include <linux/fs.h>
51 #include <linux/sysctl.h>
52 #include <net/net_namespace.h>
53
54 #include <net/ip_vs.h>
55
56
57 /*
58  *    It is for garbage collection of stale IPVS lblcr entries,
59  *    when the table is full.
60  */
61 #define CHECK_EXPIRE_INTERVAL   (60*HZ)
62 #define ENTRY_TIMEOUT           (6*60*HZ)
63
64 /*
65  *    It is for full expiration check.
66  *    When there is no partial expiration check (garbage collection)
67  *    in a half hour, do a full expiration check to collect stale
68  *    entries that haven't been touched for a day.
69  */
70 #define COUNT_FOR_FULL_EXPIRATION   30
71 static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
72
73
74 /*
75  *     for IPVS lblcr entry hash table
76  */
77 #ifndef CONFIG_IP_VS_LBLCR_TAB_BITS
78 #define CONFIG_IP_VS_LBLCR_TAB_BITS      10
79 #endif
80 #define IP_VS_LBLCR_TAB_BITS     CONFIG_IP_VS_LBLCR_TAB_BITS
81 #define IP_VS_LBLCR_TAB_SIZE     (1 << IP_VS_LBLCR_TAB_BITS)
82 #define IP_VS_LBLCR_TAB_MASK     (IP_VS_LBLCR_TAB_SIZE - 1)
83
84
85 /*
86  *      IPVS destination set structure and operations
87  */
88 struct ip_vs_dest_list {
89         struct ip_vs_dest_list  *next;          /* list link */
90         struct ip_vs_dest       *dest;          /* destination server */
91 };
92
93 struct ip_vs_dest_set {
94         atomic_t                size;           /* set size */
95         unsigned long           lastmod;        /* last modified time */
96         struct ip_vs_dest_list  *list;          /* destination list */
97         rwlock_t                lock;           /* lock for this list */
98 };
99
100
101 static struct ip_vs_dest_list *
102 ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
103 {
104         struct ip_vs_dest_list *e;
105
106         for (e=set->list; e!=NULL; e=e->next) {
107                 if (e->dest == dest)
108                         /* already existed */
109                         return NULL;
110         }
111
112         e = kmalloc(sizeof(*e), GFP_ATOMIC);
113         if (e == NULL) {
114                 IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n");
115                 return NULL;
116         }
117
118         atomic_inc(&dest->refcnt);
119         e->dest = dest;
120
121         /* link it to the list */
122         e->next = set->list;
123         set->list = e;
124         atomic_inc(&set->size);
125
126         set->lastmod = jiffies;
127         return e;
128 }
129
130 static void
131 ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
132 {
133         struct ip_vs_dest_list *e, **ep;
134
135         for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
136                 if (e->dest == dest) {
137                         /* HIT */
138                         *ep = e->next;
139                         atomic_dec(&set->size);
140                         set->lastmod = jiffies;
141                         atomic_dec(&e->dest->refcnt);
142                         kfree(e);
143                         break;
144                 }
145                 ep = &e->next;
146         }
147 }
148
149 static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
150 {
151         struct ip_vs_dest_list *e, **ep;
152
153         write_lock(&set->lock);
154         for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
155                 *ep = e->next;
156                 /*
157                  * We don't kfree dest because it is refered either
158                  * by its service or by the trash dest list.
159                  */
160                 atomic_dec(&e->dest->refcnt);
161                 kfree(e);
162         }
163         write_unlock(&set->lock);
164 }
165
166 /* get weighted least-connection node in the destination set */
167 static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
168 {
169         register struct ip_vs_dest_list *e;
170         struct ip_vs_dest *dest, *least;
171         int loh, doh;
172
173         if (set == NULL)
174                 return NULL;
175
176         /* select the first destination server, whose weight > 0 */
177         for (e=set->list; e!=NULL; e=e->next) {
178                 least = e->dest;
179                 if (least->flags & IP_VS_DEST_F_OVERLOAD)
180                         continue;
181
182                 if ((atomic_read(&least->weight) > 0)
183                     && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
184                         loh = atomic_read(&least->activeconns) * 50
185                                 + atomic_read(&least->inactconns);
186                         goto nextstage;
187                 }
188         }
189         return NULL;
190
191         /* find the destination with the weighted least load */
192   nextstage:
193         for (e=e->next; e!=NULL; e=e->next) {
194                 dest = e->dest;
195                 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
196                         continue;
197
198                 doh = atomic_read(&dest->activeconns) * 50
199                         + atomic_read(&dest->inactconns);
200                 if ((loh * atomic_read(&dest->weight) >
201                      doh * atomic_read(&least->weight))
202                     && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
203                         least = dest;
204                         loh = doh;
205                 }
206         }
207
208         IP_VS_DBG_BUF(6, "ip_vs_dest_set_min: server %s:%d "
209                       "activeconns %d refcnt %d weight %d overhead %d\n",
210                       IP_VS_DBG_ADDR(least->af, &least->addr),
211                       ntohs(least->port),
212                       atomic_read(&least->activeconns),
213                       atomic_read(&least->refcnt),
214                       atomic_read(&least->weight), loh);
215         return least;
216 }
217
218
219 /* get weighted most-connection node in the destination set */
220 static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
221 {
222         register struct ip_vs_dest_list *e;
223         struct ip_vs_dest *dest, *most;
224         int moh, doh;
225
226         if (set == NULL)
227                 return NULL;
228
229         /* select the first destination server, whose weight > 0 */
230         for (e=set->list; e!=NULL; e=e->next) {
231                 most = e->dest;
232                 if (atomic_read(&most->weight) > 0) {
233                         moh = atomic_read(&most->activeconns) * 50
234                                 + atomic_read(&most->inactconns);
235                         goto nextstage;
236                 }
237         }
238         return NULL;
239
240         /* find the destination with the weighted most load */
241   nextstage:
242         for (e=e->next; e!=NULL; e=e->next) {
243                 dest = e->dest;
244                 doh = atomic_read(&dest->activeconns) * 50
245                         + atomic_read(&dest->inactconns);
246                 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
247                 if ((moh * atomic_read(&dest->weight) <
248                      doh * atomic_read(&most->weight))
249                     && (atomic_read(&dest->weight) > 0)) {
250                         most = dest;
251                         moh = doh;
252                 }
253         }
254
255         IP_VS_DBG_BUF(6, "ip_vs_dest_set_max: server %s:%d "
256                       "activeconns %d refcnt %d weight %d overhead %d\n",
257                       IP_VS_DBG_ADDR(most->af, &most->addr), ntohs(most->port),
258                       atomic_read(&most->activeconns),
259                       atomic_read(&most->refcnt),
260                       atomic_read(&most->weight), moh);
261         return most;
262 }
263
264
265 /*
266  *      IPVS lblcr entry represents an association between destination
267  *      IP address and its destination server set
268  */
269 struct ip_vs_lblcr_entry {
270         struct list_head        list;
271         int                     af;             /* address family */
272         union nf_inet_addr      addr;           /* destination IP address */
273         struct ip_vs_dest_set   set;            /* destination server set */
274         unsigned long           lastuse;        /* last used time */
275 };
276
277
278 /*
279  *      IPVS lblcr hash table
280  */
281 struct ip_vs_lblcr_table {
282         struct list_head        bucket[IP_VS_LBLCR_TAB_SIZE];  /* hash bucket */
283         atomic_t                entries;        /* number of entries */
284         int                     max_size;       /* maximum size of entries */
285         struct timer_list       periodic_timer; /* collect stale entries */
286         int                     rover;          /* rover for expire check */
287         int                     counter;        /* counter for no expire */
288 };
289
290
291 /*
292  *      IPVS LBLCR sysctl table
293  */
294
295 static ctl_table vs_vars_table[] = {
296         {
297                 .procname       = "lblcr_expiration",
298                 .data           = &sysctl_ip_vs_lblcr_expiration,
299                 .maxlen         = sizeof(int),
300                 .mode           = 0644,
301                 .proc_handler   = proc_dointvec_jiffies,
302         },
303         { .ctl_name = 0 }
304 };
305
306 static struct ctl_table_header * sysctl_header;
307
308 static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
309 {
310         list_del(&en->list);
311         ip_vs_dest_set_eraseall(&en->set);
312         kfree(en);
313 }
314
315
316 /*
317  *      Returns hash value for IPVS LBLCR entry
318  */
319 static inline unsigned
320 ip_vs_lblcr_hashkey(int af, const union nf_inet_addr *addr)
321 {
322         __be32 addr_fold = addr->ip;
323
324 #ifdef CONFIG_IP_VS_IPV6
325         if (af == AF_INET6)
326                 addr_fold = addr->ip6[0]^addr->ip6[1]^
327                             addr->ip6[2]^addr->ip6[3];
328 #endif
329         return (ntohl(addr_fold)*2654435761UL) & IP_VS_LBLCR_TAB_MASK;
330 }
331
332
333 /*
334  *      Hash an entry in the ip_vs_lblcr_table.
335  *      returns bool success.
336  */
337 static void
338 ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
339 {
340         unsigned hash = ip_vs_lblcr_hashkey(en->af, &en->addr);
341
342         list_add(&en->list, &tbl->bucket[hash]);
343         atomic_inc(&tbl->entries);
344 }
345
346
347 /*
348  *  Get ip_vs_lblcr_entry associated with supplied parameters. Called under
349  *  read lock.
350  */
351 static inline struct ip_vs_lblcr_entry *
352 ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
353                 const union nf_inet_addr *addr)
354 {
355         unsigned hash = ip_vs_lblcr_hashkey(af, addr);
356         struct ip_vs_lblcr_entry *en;
357
358         list_for_each_entry(en, &tbl->bucket[hash], list)
359                 if (ip_vs_addr_equal(af, &en->addr, addr))
360                         return en;
361
362         return NULL;
363 }
364
365
366 /*
367  * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
368  * IP address to a server. Called under write lock.
369  */
370 static inline struct ip_vs_lblcr_entry *
371 ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
372                 struct ip_vs_dest *dest)
373 {
374         struct ip_vs_lblcr_entry *en;
375
376         en = ip_vs_lblcr_get(dest->af, tbl, daddr);
377         if (!en) {
378                 en = kmalloc(sizeof(*en), GFP_ATOMIC);
379                 if (!en) {
380                         IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
381                         return NULL;
382                 }
383
384                 en->af = dest->af;
385                 ip_vs_addr_copy(dest->af, &en->addr, daddr);
386                 en->lastuse = jiffies;
387
388                 /* initilize its dest set */
389                 atomic_set(&(en->set.size), 0);
390                 en->set.list = NULL;
391                 rwlock_init(&en->set.lock);
392
393                 ip_vs_lblcr_hash(tbl, en);
394         }
395
396         write_lock(&en->set.lock);
397         ip_vs_dest_set_insert(&en->set, dest);
398         write_unlock(&en->set.lock);
399
400         return en;
401 }
402
403
404 /*
405  *      Flush all the entries of the specified table.
406  */
407 static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
408 {
409         int i;
410         struct ip_vs_lblcr_entry *en, *nxt;
411
412         /* No locking required, only called during cleanup. */
413         for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
414                 list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
415                         ip_vs_lblcr_free(en);
416                 }
417         }
418 }
419
420
421 static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
422 {
423         struct ip_vs_lblcr_table *tbl = svc->sched_data;
424         unsigned long now = jiffies;
425         int i, j;
426         struct ip_vs_lblcr_entry *en, *nxt;
427
428         for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
429                 j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
430
431                 write_lock(&svc->sched_lock);
432                 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
433                         if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
434                                        now))
435                                 continue;
436
437                         ip_vs_lblcr_free(en);
438                         atomic_dec(&tbl->entries);
439                 }
440                 write_unlock(&svc->sched_lock);
441         }
442         tbl->rover = j;
443 }
444
445
446 /*
447  *      Periodical timer handler for IPVS lblcr table
448  *      It is used to collect stale entries when the number of entries
449  *      exceeds the maximum size of the table.
450  *
451  *      Fixme: we probably need more complicated algorithm to collect
452  *             entries that have not been used for a long time even
453  *             if the number of entries doesn't exceed the maximum size
454  *             of the table.
455  *      The full expiration check is for this purpose now.
456  */
457 static void ip_vs_lblcr_check_expire(unsigned long data)
458 {
459         struct ip_vs_service *svc = (struct ip_vs_service *) data;
460         struct ip_vs_lblcr_table *tbl = svc->sched_data;
461         unsigned long now = jiffies;
462         int goal;
463         int i, j;
464         struct ip_vs_lblcr_entry *en, *nxt;
465
466         if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
467                 /* do full expiration check */
468                 ip_vs_lblcr_full_check(svc);
469                 tbl->counter = 1;
470                 goto out;
471         }
472
473         if (atomic_read(&tbl->entries) <= tbl->max_size) {
474                 tbl->counter++;
475                 goto out;
476         }
477
478         goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
479         if (goal > tbl->max_size/2)
480                 goal = tbl->max_size/2;
481
482         for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
483                 j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
484
485                 write_lock(&svc->sched_lock);
486                 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
487                         if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
488                                 continue;
489
490                         ip_vs_lblcr_free(en);
491                         atomic_dec(&tbl->entries);
492                         goal--;
493                 }
494                 write_unlock(&svc->sched_lock);
495                 if (goal <= 0)
496                         break;
497         }
498         tbl->rover = j;
499
500   out:
501         mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
502 }
503
504 static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
505 {
506         int i;
507         struct ip_vs_lblcr_table *tbl;
508
509         /*
510          *    Allocate the ip_vs_lblcr_table for this service
511          */
512         tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
513         if (tbl == NULL) {
514                 IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n");
515                 return -ENOMEM;
516         }
517         svc->sched_data = tbl;
518         IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for "
519                   "current service\n", sizeof(*tbl));
520
521         /*
522          *    Initialize the hash buckets
523          */
524         for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
525                 INIT_LIST_HEAD(&tbl->bucket[i]);
526         }
527         tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
528         tbl->rover = 0;
529         tbl->counter = 1;
530
531         /*
532          *    Hook periodic timer for garbage collection
533          */
534         setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
535                         (unsigned long)svc);
536         mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
537
538         return 0;
539 }
540
541
542 static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
543 {
544         struct ip_vs_lblcr_table *tbl = svc->sched_data;
545
546         /* remove periodic timer */
547         del_timer_sync(&tbl->periodic_timer);
548
549         /* got to clean up table entries here */
550         ip_vs_lblcr_flush(tbl);
551
552         /* release the table itself */
553         kfree(tbl);
554         IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
555                   sizeof(*tbl));
556
557         return 0;
558 }
559
560
561 static inline struct ip_vs_dest *
562 __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
563 {
564         struct ip_vs_dest *dest, *least;
565         int loh, doh;
566
567         /*
568          * We think the overhead of processing active connections is fifty
569          * times higher than that of inactive connections in average. (This
570          * fifty times might not be accurate, we will change it later.) We
571          * use the following formula to estimate the overhead:
572          *                dest->activeconns*50 + dest->inactconns
573          * and the load:
574          *                (dest overhead) / dest->weight
575          *
576          * Remember -- no floats in kernel mode!!!
577          * The comparison of h1*w2 > h2*w1 is equivalent to that of
578          *                h1/w1 > h2/w2
579          * if every weight is larger than zero.
580          *
581          * The server with weight=0 is quiesced and will not receive any
582          * new connection.
583          */
584         list_for_each_entry(dest, &svc->destinations, n_list) {
585                 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
586                         continue;
587
588                 if (atomic_read(&dest->weight) > 0) {
589                         least = dest;
590                         loh = atomic_read(&least->activeconns) * 50
591                                 + atomic_read(&least->inactconns);
592                         goto nextstage;
593                 }
594         }
595         return NULL;
596
597         /*
598          *    Find the destination with the least load.
599          */
600   nextstage:
601         list_for_each_entry_continue(dest, &svc->destinations, n_list) {
602                 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
603                         continue;
604
605                 doh = atomic_read(&dest->activeconns) * 50
606                         + atomic_read(&dest->inactconns);
607                 if (loh * atomic_read(&dest->weight) >
608                     doh * atomic_read(&least->weight)) {
609                         least = dest;
610                         loh = doh;
611                 }
612         }
613
614         IP_VS_DBG_BUF(6, "LBLCR: server %s:%d "
615                       "activeconns %d refcnt %d weight %d overhead %d\n",
616                       IP_VS_DBG_ADDR(least->af, &least->addr),
617                       ntohs(least->port),
618                       atomic_read(&least->activeconns),
619                       atomic_read(&least->refcnt),
620                       atomic_read(&least->weight), loh);
621
622         return least;
623 }
624
625
626 /*
627  *   If this destination server is overloaded and there is a less loaded
628  *   server, then return true.
629  */
630 static inline int
631 is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
632 {
633         if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
634                 struct ip_vs_dest *d;
635
636                 list_for_each_entry(d, &svc->destinations, n_list) {
637                         if (atomic_read(&d->activeconns)*2
638                             < atomic_read(&d->weight)) {
639                                 return 1;
640                         }
641                 }
642         }
643         return 0;
644 }
645
646
647 /*
648  *    Locality-Based (weighted) Least-Connection scheduling
649  */
650 static struct ip_vs_dest *
651 ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
652 {
653         struct ip_vs_lblcr_table *tbl = svc->sched_data;
654         struct ip_vs_iphdr iph;
655         struct ip_vs_dest *dest = NULL;
656         struct ip_vs_lblcr_entry *en;
657
658         ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
659
660         IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
661
662         /* First look in our cache */
663         read_lock(&svc->sched_lock);
664         en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
665         if (en) {
666                 /* We only hold a read lock, but this is atomic */
667                 en->lastuse = jiffies;
668
669                 /* Get the least loaded destination */
670                 read_lock(&en->set.lock);
671                 dest = ip_vs_dest_set_min(&en->set);
672                 read_unlock(&en->set.lock);
673
674                 /* More than one destination + enough time passed by, cleanup */
675                 if (atomic_read(&en->set.size) > 1 &&
676                                 time_after(jiffies, en->set.lastmod +
677                                 sysctl_ip_vs_lblcr_expiration)) {
678                         struct ip_vs_dest *m;
679
680                         write_lock(&en->set.lock);
681                         m = ip_vs_dest_set_max(&en->set);
682                         if (m)
683                                 ip_vs_dest_set_erase(&en->set, m);
684                         write_unlock(&en->set.lock);
685                 }
686
687                 /* If the destination is not overloaded, use it */
688                 if (dest && !is_overloaded(dest, svc)) {
689                         read_unlock(&svc->sched_lock);
690                         goto out;
691                 }
692
693                 /* The cache entry is invalid, time to schedule */
694                 dest = __ip_vs_lblcr_schedule(svc);
695                 if (!dest) {
696                         IP_VS_ERR_RL("LBLCR: no destination available\n");
697                         read_unlock(&svc->sched_lock);
698                         return NULL;
699                 }
700
701                 /* Update our cache entry */
702                 write_lock(&en->set.lock);
703                 ip_vs_dest_set_insert(&en->set, dest);
704                 write_unlock(&en->set.lock);
705         }
706         read_unlock(&svc->sched_lock);
707
708         if (dest)
709                 goto out;
710
711         /* No cache entry, time to schedule */
712         dest = __ip_vs_lblcr_schedule(svc);
713         if (!dest) {
714                 IP_VS_DBG(1, "no destination available\n");
715                 return NULL;
716         }
717
718         /* If we fail to create a cache entry, we'll just use the valid dest */
719         write_lock(&svc->sched_lock);
720         ip_vs_lblcr_new(tbl, &iph.daddr, dest);
721         write_unlock(&svc->sched_lock);
722
723 out:
724         IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
725                       IP_VS_DBG_ADDR(svc->af, &iph.daddr),
726                       IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
727
728         return dest;
729 }
730
731
732 /*
733  *      IPVS LBLCR Scheduler structure
734  */
735 static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
736 {
737         .name =                 "lblcr",
738         .refcnt =               ATOMIC_INIT(0),
739         .module =               THIS_MODULE,
740         .n_list =               LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
741         .init_service =         ip_vs_lblcr_init_svc,
742         .done_service =         ip_vs_lblcr_done_svc,
743         .schedule =             ip_vs_lblcr_schedule,
744 };
745
746
747 static int __init ip_vs_lblcr_init(void)
748 {
749         int ret;
750
751         sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
752         ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
753         if (ret)
754                 unregister_sysctl_table(sysctl_header);
755         return ret;
756 }
757
758
759 static void __exit ip_vs_lblcr_cleanup(void)
760 {
761         unregister_sysctl_table(sysctl_header);
762         unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
763 }
764
765
766 module_init(ip_vs_lblcr_init);
767 module_exit(ip_vs_lblcr_cleanup);
768 MODULE_LICENSE("GPL");