b242f491cea931c84bedfb0f0e5bb45c3a01157b
[safe/jmp/linux-2.6] / net / sunrpc / cache.c
1 /*
2  * net/sunrpc/cache.c
3  *
4  * Generic code for various authentication-related caches
5  * used by sunrpc clients and servers.
6  *
7  * Copyright (C) 2002 Neil Brown <neilb@cse.unsw.edu.au>
8  *
9  * Released under terms in GPL version 2.  See COPYING.
10  *
11  */
12
13 #include <linux/types.h>
14 #include <linux/fs.h>
15 #include <linux/file.h>
16 #include <linux/slab.h>
17 #include <linux/signal.h>
18 #include <linux/sched.h>
19 #include <linux/kmod.h>
20 #include <linux/list.h>
21 #include <linux/module.h>
22 #include <linux/ctype.h>
23 #include <asm/uaccess.h>
24 #include <linux/poll.h>
25 #include <linux/seq_file.h>
26 #include <linux/proc_fs.h>
27 #include <linux/net.h>
28 #include <linux/workqueue.h>
29 #include <linux/mutex.h>
30 #include <asm/ioctls.h>
31 #include <linux/sunrpc/types.h>
32 #include <linux/sunrpc/cache.h>
33 #include <linux/sunrpc/stats.h>
34
35 #define  RPCDBG_FACILITY RPCDBG_CACHE
36
37 static void cache_defer_req(struct cache_req *req, struct cache_head *item);
38 static void cache_revisit_request(struct cache_head *item);
39
40 void cache_init(struct cache_head *h)
41 {
42         time_t now = get_seconds();
43         h->next = NULL;
44         h->flags = 0;
45         atomic_set(&h->refcnt, 1);
46         h->expiry_time = now + CACHE_NEW_EXPIRY;
47         h->last_refresh = now;
48 }
49
50 struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
51                                        struct cache_head *key, int hash)
52 {
53         struct cache_head **head,  **hp;
54         struct cache_head *new = NULL;
55
56         head = &detail->hash_table[hash];
57
58         read_lock(&detail->hash_lock);
59
60         for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
61                 struct cache_head *tmp = *hp;
62                 if (detail->match(tmp, key)) {
63                         cache_get(tmp);
64                         read_unlock(&detail->hash_lock);
65                         return tmp;
66                 }
67         }
68         read_unlock(&detail->hash_lock);
69         /* Didn't find anything, insert an empty entry */
70
71         new = detail->alloc();
72         if (!new)
73                 return NULL;
74         cache_init(new);
75
76         write_lock(&detail->hash_lock);
77
78         /* check if entry appeared while we slept */
79         for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
80                 struct cache_head *tmp = *hp;
81                 if (detail->match(tmp, key)) {
82                         cache_get(tmp);
83                         write_unlock(&detail->hash_lock);
84                         detail->cache_put(new, detail);
85                         return tmp;
86                 }
87         }
88         detail->init(new, key);
89         new->next = *head;
90         *head = new;
91         detail->entries++;
92         cache_get(new);
93         write_unlock(&detail->hash_lock);
94
95         return new;
96 }
97 EXPORT_SYMBOL(sunrpc_cache_lookup);
98
99 struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
100                                        struct cache_head *new, struct cache_head *old, int hash)
101 {
102         /* The 'old' entry is to be replaced by 'new'.
103          * If 'old' is not VALID, we update it directly,
104          * otherwise we need to replace it
105          */
106         struct cache_head **head;
107         struct cache_head *tmp;
108
109         if (!test_bit(CACHE_VALID, &old->flags)) {
110                 write_lock(&detail->hash_lock);
111                 if (!test_bit(CACHE_VALID, &old->flags)) {
112                         if (test_bit(CACHE_NEGATIVE, &new->flags))
113                                 set_bit(CACHE_NEGATIVE, &old->flags);
114                         else
115                                 detail->update(old, new);
116                         /* FIXME cache_fresh should come first */
117                         write_unlock(&detail->hash_lock);
118                         cache_fresh(detail, old, new->expiry_time);
119                         return old;
120                 }
121                 write_unlock(&detail->hash_lock);
122         }
123         /* We need to insert a new entry */
124         tmp = detail->alloc();
125         if (!tmp) {
126                 detail->cache_put(old, detail);
127                 return NULL;
128         }
129         cache_init(tmp);
130         detail->init(tmp, old);
131         head = &detail->hash_table[hash];
132
133         write_lock(&detail->hash_lock);
134         if (test_bit(CACHE_NEGATIVE, &new->flags))
135                 set_bit(CACHE_NEGATIVE, &tmp->flags);
136         else
137                 detail->update(tmp, new);
138         tmp->next = *head;
139         *head = tmp;
140         cache_get(tmp);
141         write_unlock(&detail->hash_lock);
142         cache_fresh(detail, tmp, new->expiry_time);
143         cache_fresh(detail, old, 0);
144         detail->cache_put(old, detail);
145         return tmp;
146 }
147 EXPORT_SYMBOL(sunrpc_cache_update);
148
149 static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h);
150 /*
151  * This is the generic cache management routine for all
152  * the authentication caches.
153  * It checks the currency of a cache item and will (later)
154  * initiate an upcall to fill it if needed.
155  *
156  *
157  * Returns 0 if the cache_head can be used, or cache_puts it and returns
158  * -EAGAIN if upcall is pending,
159  * -ENOENT if cache entry was negative
160  */
161 int cache_check(struct cache_detail *detail,
162                     struct cache_head *h, struct cache_req *rqstp)
163 {
164         int rv;
165         long refresh_age, age;
166
167         /* First decide return status as best we can */
168         if (!test_bit(CACHE_VALID, &h->flags) ||
169             h->expiry_time < get_seconds())
170                 rv = -EAGAIN;
171         else if (detail->flush_time > h->last_refresh)
172                 rv = -EAGAIN;
173         else {
174                 /* entry is valid */
175                 if (test_bit(CACHE_NEGATIVE, &h->flags))
176                         rv = -ENOENT;
177                 else rv = 0;
178         }
179
180         /* now see if we want to start an upcall */
181         refresh_age = (h->expiry_time - h->last_refresh);
182         age = get_seconds() - h->last_refresh;
183
184         if (rqstp == NULL) {
185                 if (rv == -EAGAIN)
186                         rv = -ENOENT;
187         } else if (rv == -EAGAIN || age > refresh_age/2) {
188                 dprintk("Want update, refage=%ld, age=%ld\n", refresh_age, age);
189                 if (!test_and_set_bit(CACHE_PENDING, &h->flags)) {
190                         switch (cache_make_upcall(detail, h)) {
191                         case -EINVAL:
192                                 clear_bit(CACHE_PENDING, &h->flags);
193                                 if (rv == -EAGAIN) {
194                                         set_bit(CACHE_NEGATIVE, &h->flags);
195                                         cache_fresh(detail, h, get_seconds()+CACHE_NEW_EXPIRY);
196                                         rv = -ENOENT;
197                                 }
198                                 break;
199
200                         case -EAGAIN:
201                                 clear_bit(CACHE_PENDING, &h->flags);
202                                 cache_revisit_request(h);
203                                 break;
204                         }
205                 }
206         }
207
208         if (rv == -EAGAIN)
209                 cache_defer_req(rqstp, h);
210
211         if (rv)
212                 detail->cache_put(h, detail);
213         return rv;
214 }
215
216 static void queue_loose(struct cache_detail *detail, struct cache_head *ch);
217
218 void cache_fresh(struct cache_detail *detail,
219                  struct cache_head *head, time_t expiry)
220 {
221
222         head->expiry_time = expiry;
223         head->last_refresh = get_seconds();
224         if (!test_and_set_bit(CACHE_VALID, &head->flags))
225                 cache_revisit_request(head);
226         if (test_and_clear_bit(CACHE_PENDING, &head->flags)) {
227                 cache_revisit_request(head);
228                 queue_loose(detail, head);
229         }
230 }
231
232 /*
233  * caches need to be periodically cleaned.
234  * For this we maintain a list of cache_detail and
235  * a current pointer into that list and into the table
236  * for that entry.
237  *
238  * Each time clean_cache is called it finds the next non-empty entry
239  * in the current table and walks the list in that entry
240  * looking for entries that can be removed.
241  *
242  * An entry gets removed if:
243  * - The expiry is before current time
244  * - The last_refresh time is before the flush_time for that cache
245  *
246  * later we might drop old entries with non-NEVER expiry if that table
247  * is getting 'full' for some definition of 'full'
248  *
249  * The question of "how often to scan a table" is an interesting one
250  * and is answered in part by the use of the "nextcheck" field in the
251  * cache_detail.
252  * When a scan of a table begins, the nextcheck field is set to a time
253  * that is well into the future.
254  * While scanning, if an expiry time is found that is earlier than the
255  * current nextcheck time, nextcheck is set to that expiry time.
256  * If the flush_time is ever set to a time earlier than the nextcheck
257  * time, the nextcheck time is then set to that flush_time.
258  *
259  * A table is then only scanned if the current time is at least
260  * the nextcheck time.
261  * 
262  */
263
264 static LIST_HEAD(cache_list);
265 static DEFINE_SPINLOCK(cache_list_lock);
266 static struct cache_detail *current_detail;
267 static int current_index;
268
269 static struct file_operations cache_file_operations;
270 static struct file_operations content_file_operations;
271 static struct file_operations cache_flush_operations;
272
273 static void do_cache_clean(void *data);
274 static DECLARE_WORK(cache_cleaner, do_cache_clean, NULL);
275
276 void cache_register(struct cache_detail *cd)
277 {
278         cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc);
279         if (cd->proc_ent) {
280                 struct proc_dir_entry *p;
281                 cd->proc_ent->owner = cd->owner;
282                 cd->channel_ent = cd->content_ent = NULL;
283                 
284                 p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR,
285                                       cd->proc_ent);
286                 cd->flush_ent =  p;
287                 if (p) {
288                         p->proc_fops = &cache_flush_operations;
289                         p->owner = cd->owner;
290                         p->data = cd;
291                 }
292  
293                 if (cd->cache_request || cd->cache_parse) {
294                         p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR,
295                                               cd->proc_ent);
296                         cd->channel_ent = p;
297                         if (p) {
298                                 p->proc_fops = &cache_file_operations;
299                                 p->owner = cd->owner;
300                                 p->data = cd;
301                         }
302                 }
303                 if (cd->cache_show) {
304                         p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR,
305                                               cd->proc_ent);
306                         cd->content_ent = p;
307                         if (p) {
308                                 p->proc_fops = &content_file_operations;
309                                 p->owner = cd->owner;
310                                 p->data = cd;
311                         }
312                 }
313         }
314         rwlock_init(&cd->hash_lock);
315         INIT_LIST_HEAD(&cd->queue);
316         spin_lock(&cache_list_lock);
317         cd->nextcheck = 0;
318         cd->entries = 0;
319         atomic_set(&cd->readers, 0);
320         cd->last_close = 0;
321         cd->last_warn = -1;
322         list_add(&cd->others, &cache_list);
323         spin_unlock(&cache_list_lock);
324
325         /* start the cleaning process */
326         schedule_work(&cache_cleaner);
327 }
328
329 int cache_unregister(struct cache_detail *cd)
330 {
331         cache_purge(cd);
332         spin_lock(&cache_list_lock);
333         write_lock(&cd->hash_lock);
334         if (cd->entries || atomic_read(&cd->inuse)) {
335                 write_unlock(&cd->hash_lock);
336                 spin_unlock(&cache_list_lock);
337                 return -EBUSY;
338         }
339         if (current_detail == cd)
340                 current_detail = NULL;
341         list_del_init(&cd->others);
342         write_unlock(&cd->hash_lock);
343         spin_unlock(&cache_list_lock);
344         if (cd->proc_ent) {
345                 if (cd->flush_ent)
346                         remove_proc_entry("flush", cd->proc_ent);
347                 if (cd->channel_ent)
348                         remove_proc_entry("channel", cd->proc_ent);
349                 if (cd->content_ent)
350                         remove_proc_entry("content", cd->proc_ent);
351
352                 cd->proc_ent = NULL;
353                 remove_proc_entry(cd->name, proc_net_rpc);
354         }
355         if (list_empty(&cache_list)) {
356                 /* module must be being unloaded so its safe to kill the worker */
357                 cancel_delayed_work(&cache_cleaner);
358                 flush_scheduled_work();
359         }
360         return 0;
361 }
362
363 /* clean cache tries to find something to clean
364  * and cleans it.
365  * It returns 1 if it cleaned something,
366  *            0 if it didn't find anything this time
367  *           -1 if it fell off the end of the list.
368  */
369 static int cache_clean(void)
370 {
371         int rv = 0;
372         struct list_head *next;
373
374         spin_lock(&cache_list_lock);
375
376         /* find a suitable table if we don't already have one */
377         while (current_detail == NULL ||
378             current_index >= current_detail->hash_size) {
379                 if (current_detail)
380                         next = current_detail->others.next;
381                 else
382                         next = cache_list.next;
383                 if (next == &cache_list) {
384                         current_detail = NULL;
385                         spin_unlock(&cache_list_lock);
386                         return -1;
387                 }
388                 current_detail = list_entry(next, struct cache_detail, others);
389                 if (current_detail->nextcheck > get_seconds())
390                         current_index = current_detail->hash_size;
391                 else {
392                         current_index = 0;
393                         current_detail->nextcheck = get_seconds()+30*60;
394                 }
395         }
396
397         /* find a non-empty bucket in the table */
398         while (current_detail &&
399                current_index < current_detail->hash_size &&
400                current_detail->hash_table[current_index] == NULL)
401                 current_index++;
402
403         /* find a cleanable entry in the bucket and clean it, or set to next bucket */
404         
405         if (current_detail && current_index < current_detail->hash_size) {
406                 struct cache_head *ch, **cp;
407                 struct cache_detail *d;
408                 
409                 write_lock(&current_detail->hash_lock);
410
411                 /* Ok, now to clean this strand */
412                         
413                 cp = & current_detail->hash_table[current_index];
414                 ch = *cp;
415                 for (; ch; cp= & ch->next, ch= *cp) {
416                         if (current_detail->nextcheck > ch->expiry_time)
417                                 current_detail->nextcheck = ch->expiry_time+1;
418                         if (ch->expiry_time >= get_seconds()
419                             && ch->last_refresh >= current_detail->flush_time
420                                 )
421                                 continue;
422                         if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
423                                 queue_loose(current_detail, ch);
424
425                         if (atomic_read(&ch->refcnt) == 1)
426                                 break;
427                 }
428                 if (ch) {
429                         *cp = ch->next;
430                         ch->next = NULL;
431                         current_detail->entries--;
432                         rv = 1;
433                 }
434                 write_unlock(&current_detail->hash_lock);
435                 d = current_detail;
436                 if (!ch)
437                         current_index ++;
438                 spin_unlock(&cache_list_lock);
439                 if (ch)
440                         d->cache_put(ch, d);
441         } else
442                 spin_unlock(&cache_list_lock);
443
444         return rv;
445 }
446
447 /*
448  * We want to regularly clean the cache, so we need to schedule some work ...
449  */
450 static void do_cache_clean(void *data)
451 {
452         int delay = 5;
453         if (cache_clean() == -1)
454                 delay = 30*HZ;
455
456         if (list_empty(&cache_list))
457                 delay = 0;
458
459         if (delay)
460                 schedule_delayed_work(&cache_cleaner, delay);
461 }
462
463
464 /* 
465  * Clean all caches promptly.  This just calls cache_clean
466  * repeatedly until we are sure that every cache has had a chance to 
467  * be fully cleaned
468  */
469 void cache_flush(void)
470 {
471         while (cache_clean() != -1)
472                 cond_resched();
473         while (cache_clean() != -1)
474                 cond_resched();
475 }
476
477 void cache_purge(struct cache_detail *detail)
478 {
479         detail->flush_time = LONG_MAX;
480         detail->nextcheck = get_seconds();
481         cache_flush();
482         detail->flush_time = 1;
483 }
484
485
486
487 /*
488  * Deferral and Revisiting of Requests.
489  *
490  * If a cache lookup finds a pending entry, we
491  * need to defer the request and revisit it later.
492  * All deferred requests are stored in a hash table,
493  * indexed by "struct cache_head *".
494  * As it may be wasteful to store a whole request
495  * structure, we allow the request to provide a 
496  * deferred form, which must contain a
497  * 'struct cache_deferred_req'
498  * This cache_deferred_req contains a method to allow
499  * it to be revisited when cache info is available
500  */
501
502 #define DFR_HASHSIZE    (PAGE_SIZE/sizeof(struct list_head))
503 #define DFR_HASH(item)  ((((long)item)>>4 ^ (((long)item)>>13)) % DFR_HASHSIZE)
504
505 #define DFR_MAX 300     /* ??? */
506
507 static DEFINE_SPINLOCK(cache_defer_lock);
508 static LIST_HEAD(cache_defer_list);
509 static struct list_head cache_defer_hash[DFR_HASHSIZE];
510 static int cache_defer_cnt;
511
512 static void cache_defer_req(struct cache_req *req, struct cache_head *item)
513 {
514         struct cache_deferred_req *dreq;
515         int hash = DFR_HASH(item);
516
517         dreq = req->defer(req);
518         if (dreq == NULL)
519                 return;
520
521         dreq->item = item;
522         dreq->recv_time = get_seconds();
523
524         spin_lock(&cache_defer_lock);
525
526         list_add(&dreq->recent, &cache_defer_list);
527
528         if (cache_defer_hash[hash].next == NULL)
529                 INIT_LIST_HEAD(&cache_defer_hash[hash]);
530         list_add(&dreq->hash, &cache_defer_hash[hash]);
531
532         /* it is in, now maybe clean up */
533         dreq = NULL;
534         if (++cache_defer_cnt > DFR_MAX) {
535                 /* too much in the cache, randomly drop
536                  * first or last
537                  */
538                 if (net_random()&1) 
539                         dreq = list_entry(cache_defer_list.next,
540                                           struct cache_deferred_req,
541                                           recent);
542                 else
543                         dreq = list_entry(cache_defer_list.prev,
544                                           struct cache_deferred_req,
545                                           recent);
546                 list_del(&dreq->recent);
547                 list_del(&dreq->hash);
548                 cache_defer_cnt--;
549         }
550         spin_unlock(&cache_defer_lock);
551
552         if (dreq) {
553                 /* there was one too many */
554                 dreq->revisit(dreq, 1);
555         }
556         if (!test_bit(CACHE_PENDING, &item->flags)) {
557                 /* must have just been validated... */
558                 cache_revisit_request(item);
559         }
560 }
561
562 static void cache_revisit_request(struct cache_head *item)
563 {
564         struct cache_deferred_req *dreq;
565         struct list_head pending;
566
567         struct list_head *lp;
568         int hash = DFR_HASH(item);
569
570         INIT_LIST_HEAD(&pending);
571         spin_lock(&cache_defer_lock);
572         
573         lp = cache_defer_hash[hash].next;
574         if (lp) {
575                 while (lp != &cache_defer_hash[hash]) {
576                         dreq = list_entry(lp, struct cache_deferred_req, hash);
577                         lp = lp->next;
578                         if (dreq->item == item) {
579                                 list_del(&dreq->hash);
580                                 list_move(&dreq->recent, &pending);
581                                 cache_defer_cnt--;
582                         }
583                 }
584         }
585         spin_unlock(&cache_defer_lock);
586
587         while (!list_empty(&pending)) {
588                 dreq = list_entry(pending.next, struct cache_deferred_req, recent);
589                 list_del_init(&dreq->recent);
590                 dreq->revisit(dreq, 0);
591         }
592 }
593
594 void cache_clean_deferred(void *owner)
595 {
596         struct cache_deferred_req *dreq, *tmp;
597         struct list_head pending;
598
599
600         INIT_LIST_HEAD(&pending);
601         spin_lock(&cache_defer_lock);
602         
603         list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
604                 if (dreq->owner == owner) {
605                         list_del(&dreq->hash);
606                         list_move(&dreq->recent, &pending);
607                         cache_defer_cnt--;
608                 }
609         }
610         spin_unlock(&cache_defer_lock);
611
612         while (!list_empty(&pending)) {
613                 dreq = list_entry(pending.next, struct cache_deferred_req, recent);
614                 list_del_init(&dreq->recent);
615                 dreq->revisit(dreq, 1);
616         }
617 }
618
619 /*
620  * communicate with user-space
621  *
622  * We have a magic /proc file - /proc/sunrpc/cache
623  * On read, you get a full request, or block
624  * On write, an update request is processed
625  * Poll works if anything to read, and always allows write
626  *
627  * Implemented by linked list of requests.  Each open file has 
628  * a ->private that also exists in this list.  New request are added
629  * to the end and may wakeup and preceding readers.
630  * New readers are added to the head.  If, on read, an item is found with
631  * CACHE_UPCALLING clear, we free it from the list.
632  *
633  */
634
635 static DEFINE_SPINLOCK(queue_lock);
636 static DEFINE_MUTEX(queue_io_mutex);
637
638 struct cache_queue {
639         struct list_head        list;
640         int                     reader; /* if 0, then request */
641 };
642 struct cache_request {
643         struct cache_queue      q;
644         struct cache_head       *item;
645         char                    * buf;
646         int                     len;
647         int                     readers;
648 };
649 struct cache_reader {
650         struct cache_queue      q;
651         int                     offset; /* if non-0, we have a refcnt on next request */
652 };
653
654 static ssize_t
655 cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
656 {
657         struct cache_reader *rp = filp->private_data;
658         struct cache_request *rq;
659         struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
660         int err;
661
662         if (count == 0)
663                 return 0;
664
665         mutex_lock(&queue_io_mutex); /* protect against multiple concurrent
666                               * readers on this file */
667  again:
668         spin_lock(&queue_lock);
669         /* need to find next request */
670         while (rp->q.list.next != &cd->queue &&
671                list_entry(rp->q.list.next, struct cache_queue, list)
672                ->reader) {
673                 struct list_head *next = rp->q.list.next;
674                 list_move(&rp->q.list, next);
675         }
676         if (rp->q.list.next == &cd->queue) {
677                 spin_unlock(&queue_lock);
678                 mutex_unlock(&queue_io_mutex);
679                 BUG_ON(rp->offset);
680                 return 0;
681         }
682         rq = container_of(rp->q.list.next, struct cache_request, q.list);
683         BUG_ON(rq->q.reader);
684         if (rp->offset == 0)
685                 rq->readers++;
686         spin_unlock(&queue_lock);
687
688         if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
689                 err = -EAGAIN;
690                 spin_lock(&queue_lock);
691                 list_move(&rp->q.list, &rq->q.list);
692                 spin_unlock(&queue_lock);
693         } else {
694                 if (rp->offset + count > rq->len)
695                         count = rq->len - rp->offset;
696                 err = -EFAULT;
697                 if (copy_to_user(buf, rq->buf + rp->offset, count))
698                         goto out;
699                 rp->offset += count;
700                 if (rp->offset >= rq->len) {
701                         rp->offset = 0;
702                         spin_lock(&queue_lock);
703                         list_move(&rp->q.list, &rq->q.list);
704                         spin_unlock(&queue_lock);
705                 }
706                 err = 0;
707         }
708  out:
709         if (rp->offset == 0) {
710                 /* need to release rq */
711                 spin_lock(&queue_lock);
712                 rq->readers--;
713                 if (rq->readers == 0 &&
714                     !test_bit(CACHE_PENDING, &rq->item->flags)) {
715                         list_del(&rq->q.list);
716                         spin_unlock(&queue_lock);
717                         cd->cache_put(rq->item, cd);
718                         kfree(rq->buf);
719                         kfree(rq);
720                 } else
721                         spin_unlock(&queue_lock);
722         }
723         if (err == -EAGAIN)
724                 goto again;
725         mutex_unlock(&queue_io_mutex);
726         return err ? err :  count;
727 }
728
729 static char write_buf[8192]; /* protected by queue_io_mutex */
730
731 static ssize_t
732 cache_write(struct file *filp, const char __user *buf, size_t count,
733             loff_t *ppos)
734 {
735         int err;
736         struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
737
738         if (count == 0)
739                 return 0;
740         if (count >= sizeof(write_buf))
741                 return -EINVAL;
742
743         mutex_lock(&queue_io_mutex);
744
745         if (copy_from_user(write_buf, buf, count)) {
746                 mutex_unlock(&queue_io_mutex);
747                 return -EFAULT;
748         }
749         write_buf[count] = '\0';
750         if (cd->cache_parse)
751                 err = cd->cache_parse(cd, write_buf, count);
752         else
753                 err = -EINVAL;
754
755         mutex_unlock(&queue_io_mutex);
756         return err ? err : count;
757 }
758
759 static DECLARE_WAIT_QUEUE_HEAD(queue_wait);
760
761 static unsigned int
762 cache_poll(struct file *filp, poll_table *wait)
763 {
764         unsigned int mask;
765         struct cache_reader *rp = filp->private_data;
766         struct cache_queue *cq;
767         struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
768
769         poll_wait(filp, &queue_wait, wait);
770
771         /* alway allow write */
772         mask = POLL_OUT | POLLWRNORM;
773
774         if (!rp)
775                 return mask;
776
777         spin_lock(&queue_lock);
778
779         for (cq= &rp->q; &cq->list != &cd->queue;
780              cq = list_entry(cq->list.next, struct cache_queue, list))
781                 if (!cq->reader) {
782                         mask |= POLLIN | POLLRDNORM;
783                         break;
784                 }
785         spin_unlock(&queue_lock);
786         return mask;
787 }
788
789 static int
790 cache_ioctl(struct inode *ino, struct file *filp,
791             unsigned int cmd, unsigned long arg)
792 {
793         int len = 0;
794         struct cache_reader *rp = filp->private_data;
795         struct cache_queue *cq;
796         struct cache_detail *cd = PDE(ino)->data;
797
798         if (cmd != FIONREAD || !rp)
799                 return -EINVAL;
800
801         spin_lock(&queue_lock);
802
803         /* only find the length remaining in current request,
804          * or the length of the next request
805          */
806         for (cq= &rp->q; &cq->list != &cd->queue;
807              cq = list_entry(cq->list.next, struct cache_queue, list))
808                 if (!cq->reader) {
809                         struct cache_request *cr =
810                                 container_of(cq, struct cache_request, q);
811                         len = cr->len - rp->offset;
812                         break;
813                 }
814         spin_unlock(&queue_lock);
815
816         return put_user(len, (int __user *)arg);
817 }
818
819 static int
820 cache_open(struct inode *inode, struct file *filp)
821 {
822         struct cache_reader *rp = NULL;
823
824         nonseekable_open(inode, filp);
825         if (filp->f_mode & FMODE_READ) {
826                 struct cache_detail *cd = PDE(inode)->data;
827
828                 rp = kmalloc(sizeof(*rp), GFP_KERNEL);
829                 if (!rp)
830                         return -ENOMEM;
831                 rp->offset = 0;
832                 rp->q.reader = 1;
833                 atomic_inc(&cd->readers);
834                 spin_lock(&queue_lock);
835                 list_add(&rp->q.list, &cd->queue);
836                 spin_unlock(&queue_lock);
837         }
838         filp->private_data = rp;
839         return 0;
840 }
841
842 static int
843 cache_release(struct inode *inode, struct file *filp)
844 {
845         struct cache_reader *rp = filp->private_data;
846         struct cache_detail *cd = PDE(inode)->data;
847
848         if (rp) {
849                 spin_lock(&queue_lock);
850                 if (rp->offset) {
851                         struct cache_queue *cq;
852                         for (cq= &rp->q; &cq->list != &cd->queue;
853                              cq = list_entry(cq->list.next, struct cache_queue, list))
854                                 if (!cq->reader) {
855                                         container_of(cq, struct cache_request, q)
856                                                 ->readers--;
857                                         break;
858                                 }
859                         rp->offset = 0;
860                 }
861                 list_del(&rp->q.list);
862                 spin_unlock(&queue_lock);
863
864                 filp->private_data = NULL;
865                 kfree(rp);
866
867                 cd->last_close = get_seconds();
868                 atomic_dec(&cd->readers);
869         }
870         return 0;
871 }
872
873
874
875 static struct file_operations cache_file_operations = {
876         .owner          = THIS_MODULE,
877         .llseek         = no_llseek,
878         .read           = cache_read,
879         .write          = cache_write,
880         .poll           = cache_poll,
881         .ioctl          = cache_ioctl, /* for FIONREAD */
882         .open           = cache_open,
883         .release        = cache_release,
884 };
885
886
887 static void queue_loose(struct cache_detail *detail, struct cache_head *ch)
888 {
889         struct cache_queue *cq;
890         spin_lock(&queue_lock);
891         list_for_each_entry(cq, &detail->queue, list)
892                 if (!cq->reader) {
893                         struct cache_request *cr = container_of(cq, struct cache_request, q);
894                         if (cr->item != ch)
895                                 continue;
896                         if (cr->readers != 0)
897                                 continue;
898                         list_del(&cr->q.list);
899                         spin_unlock(&queue_lock);
900                         detail->cache_put(cr->item, detail);
901                         kfree(cr->buf);
902                         kfree(cr);
903                         return;
904                 }
905         spin_unlock(&queue_lock);
906 }
907
908 /*
909  * Support routines for text-based upcalls.
910  * Fields are separated by spaces.
911  * Fields are either mangled to quote space tab newline slosh with slosh
912  * or a hexified with a leading \x
913  * Record is terminated with newline.
914  *
915  */
916
917 void qword_add(char **bpp, int *lp, char *str)
918 {
919         char *bp = *bpp;
920         int len = *lp;
921         char c;
922
923         if (len < 0) return;
924
925         while ((c=*str++) && len)
926                 switch(c) {
927                 case ' ':
928                 case '\t':
929                 case '\n':
930                 case '\\':
931                         if (len >= 4) {
932                                 *bp++ = '\\';
933                                 *bp++ = '0' + ((c & 0300)>>6);
934                                 *bp++ = '0' + ((c & 0070)>>3);
935                                 *bp++ = '0' + ((c & 0007)>>0);
936                         }
937                         len -= 4;
938                         break;
939                 default:
940                         *bp++ = c;
941                         len--;
942                 }
943         if (c || len <1) len = -1;
944         else {
945                 *bp++ = ' ';
946                 len--;
947         }
948         *bpp = bp;
949         *lp = len;
950 }
951
952 void qword_addhex(char **bpp, int *lp, char *buf, int blen)
953 {
954         char *bp = *bpp;
955         int len = *lp;
956
957         if (len < 0) return;
958
959         if (len > 2) {
960                 *bp++ = '\\';
961                 *bp++ = 'x';
962                 len -= 2;
963                 while (blen && len >= 2) {
964                         unsigned char c = *buf++;
965                         *bp++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
966                         *bp++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
967                         len -= 2;
968                         blen--;
969                 }
970         }
971         if (blen || len<1) len = -1;
972         else {
973                 *bp++ = ' ';
974                 len--;
975         }
976         *bpp = bp;
977         *lp = len;
978 }
979
980 static void warn_no_listener(struct cache_detail *detail)
981 {
982         if (detail->last_warn != detail->last_close) {
983                 detail->last_warn = detail->last_close;
984                 if (detail->warn_no_listener)
985                         detail->warn_no_listener(detail);
986         }
987 }
988
989 /*
990  * register an upcall request to user-space.
991  * Each request is at most one page long.
992  */
993 static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h)
994 {
995
996         char *buf;
997         struct cache_request *crq;
998         char *bp;
999         int len;
1000
1001         if (detail->cache_request == NULL)
1002                 return -EINVAL;
1003
1004         if (atomic_read(&detail->readers) == 0 &&
1005             detail->last_close < get_seconds() - 30) {
1006                         warn_no_listener(detail);
1007                         return -EINVAL;
1008         }
1009
1010         buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1011         if (!buf)
1012                 return -EAGAIN;
1013
1014         crq = kmalloc(sizeof (*crq), GFP_KERNEL);
1015         if (!crq) {
1016                 kfree(buf);
1017                 return -EAGAIN;
1018         }
1019
1020         bp = buf; len = PAGE_SIZE;
1021
1022         detail->cache_request(detail, h, &bp, &len);
1023
1024         if (len < 0) {
1025                 kfree(buf);
1026                 kfree(crq);
1027                 return -EAGAIN;
1028         }
1029         crq->q.reader = 0;
1030         crq->item = cache_get(h);
1031         crq->buf = buf;
1032         crq->len = PAGE_SIZE - len;
1033         crq->readers = 0;
1034         spin_lock(&queue_lock);
1035         list_add_tail(&crq->q.list, &detail->queue);
1036         spin_unlock(&queue_lock);
1037         wake_up(&queue_wait);
1038         return 0;
1039 }
1040
1041 /*
1042  * parse a message from user-space and pass it
1043  * to an appropriate cache
1044  * Messages are, like requests, separated into fields by
1045  * spaces and dequotes as \xHEXSTRING or embedded \nnn octal
1046  *
1047  * Message is 
1048  *   reply cachename expiry key ... content....
1049  *
1050  * key and content are both parsed by cache 
1051  */
1052
1053 #define isodigit(c) (isdigit(c) && c <= '7')
1054 int qword_get(char **bpp, char *dest, int bufsize)
1055 {
1056         /* return bytes copied, or -1 on error */
1057         char *bp = *bpp;
1058         int len = 0;
1059
1060         while (*bp == ' ') bp++;
1061
1062         if (bp[0] == '\\' && bp[1] == 'x') {
1063                 /* HEX STRING */
1064                 bp += 2;
1065                 while (isxdigit(bp[0]) && isxdigit(bp[1]) && len < bufsize) {
1066                         int byte = isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10;
1067                         bp++;
1068                         byte <<= 4;
1069                         byte |= isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10;
1070                         *dest++ = byte;
1071                         bp++;
1072                         len++;
1073                 }
1074         } else {
1075                 /* text with \nnn octal quoting */
1076                 while (*bp != ' ' && *bp != '\n' && *bp && len < bufsize-1) {
1077                         if (*bp == '\\' &&
1078                             isodigit(bp[1]) && (bp[1] <= '3') &&
1079                             isodigit(bp[2]) &&
1080                             isodigit(bp[3])) {
1081                                 int byte = (*++bp -'0');
1082                                 bp++;
1083                                 byte = (byte << 3) | (*bp++ - '0');
1084                                 byte = (byte << 3) | (*bp++ - '0');
1085                                 *dest++ = byte;
1086                                 len++;
1087                         } else {
1088                                 *dest++ = *bp++;
1089                                 len++;
1090                         }
1091                 }
1092         }
1093
1094         if (*bp != ' ' && *bp != '\n' && *bp != '\0')
1095                 return -1;
1096         while (*bp == ' ') bp++;
1097         *bpp = bp;
1098         *dest = '\0';
1099         return len;
1100 }
1101
1102
1103 /*
1104  * support /proc/sunrpc/cache/$CACHENAME/content
1105  * as a seqfile.
1106  * We call ->cache_show passing NULL for the item to
1107  * get a header, then pass each real item in the cache
1108  */
1109
1110 struct handle {
1111         struct cache_detail *cd;
1112 };
1113
1114 static void *c_start(struct seq_file *m, loff_t *pos)
1115 {
1116         loff_t n = *pos;
1117         unsigned hash, entry;
1118         struct cache_head *ch;
1119         struct cache_detail *cd = ((struct handle*)m->private)->cd;
1120         
1121
1122         read_lock(&cd->hash_lock);
1123         if (!n--)
1124                 return SEQ_START_TOKEN;
1125         hash = n >> 32;
1126         entry = n & ((1LL<<32) - 1);
1127
1128         for (ch=cd->hash_table[hash]; ch; ch=ch->next)
1129                 if (!entry--)
1130                         return ch;
1131         n &= ~((1LL<<32) - 1);
1132         do {
1133                 hash++;
1134                 n += 1LL<<32;
1135         } while(hash < cd->hash_size && 
1136                 cd->hash_table[hash]==NULL);
1137         if (hash >= cd->hash_size)
1138                 return NULL;
1139         *pos = n+1;
1140         return cd->hash_table[hash];
1141 }
1142
1143 static void *c_next(struct seq_file *m, void *p, loff_t *pos)
1144 {
1145         struct cache_head *ch = p;
1146         int hash = (*pos >> 32);
1147         struct cache_detail *cd = ((struct handle*)m->private)->cd;
1148
1149         if (p == SEQ_START_TOKEN)
1150                 hash = 0;
1151         else if (ch->next == NULL) {
1152                 hash++;
1153                 *pos += 1LL<<32;
1154         } else {
1155                 ++*pos;
1156                 return ch->next;
1157         }
1158         *pos &= ~((1LL<<32) - 1);
1159         while (hash < cd->hash_size &&
1160                cd->hash_table[hash] == NULL) {
1161                 hash++;
1162                 *pos += 1LL<<32;
1163         }
1164         if (hash >= cd->hash_size)
1165                 return NULL;
1166         ++*pos;
1167         return cd->hash_table[hash];
1168 }
1169
1170 static void c_stop(struct seq_file *m, void *p)
1171 {
1172         struct cache_detail *cd = ((struct handle*)m->private)->cd;
1173         read_unlock(&cd->hash_lock);
1174 }
1175
1176 static int c_show(struct seq_file *m, void *p)
1177 {
1178         struct cache_head *cp = p;
1179         struct cache_detail *cd = ((struct handle*)m->private)->cd;
1180
1181         if (p == SEQ_START_TOKEN)
1182                 return cd->cache_show(m, cd, NULL);
1183
1184         ifdebug(CACHE)
1185                 seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n",
1186                            cp->expiry_time, atomic_read(&cp->refcnt), cp->flags);
1187         cache_get(cp);
1188         if (cache_check(cd, cp, NULL))
1189                 /* cache_check does a cache_put on failure */
1190                 seq_printf(m, "# ");
1191         else
1192                 cache_put(cp, cd);
1193
1194         return cd->cache_show(m, cd, cp);
1195 }
1196
1197 static struct seq_operations cache_content_op = {
1198         .start  = c_start,
1199         .next   = c_next,
1200         .stop   = c_stop,
1201         .show   = c_show,
1202 };
1203
1204 static int content_open(struct inode *inode, struct file *file)
1205 {
1206         int res;
1207         struct handle *han;
1208         struct cache_detail *cd = PDE(inode)->data;
1209
1210         han = kmalloc(sizeof(*han), GFP_KERNEL);
1211         if (han == NULL)
1212                 return -ENOMEM;
1213
1214         han->cd = cd;
1215
1216         res = seq_open(file, &cache_content_op);
1217         if (res)
1218                 kfree(han);
1219         else
1220                 ((struct seq_file *)file->private_data)->private = han;
1221
1222         return res;
1223 }
1224 static int content_release(struct inode *inode, struct file *file)
1225 {
1226         struct seq_file *m = (struct seq_file *)file->private_data;
1227         struct handle *han = m->private;
1228         kfree(han);
1229         m->private = NULL;
1230         return seq_release(inode, file);
1231 }
1232
1233 static struct file_operations content_file_operations = {
1234         .open           = content_open,
1235         .read           = seq_read,
1236         .llseek         = seq_lseek,
1237         .release        = content_release,
1238 };
1239
1240 static ssize_t read_flush(struct file *file, char __user *buf,
1241                             size_t count, loff_t *ppos)
1242 {
1243         struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data;
1244         char tbuf[20];
1245         unsigned long p = *ppos;
1246         int len;
1247
1248         sprintf(tbuf, "%lu\n", cd->flush_time);
1249         len = strlen(tbuf);
1250         if (p >= len)
1251                 return 0;
1252         len -= p;
1253         if (len > count) len = count;
1254         if (copy_to_user(buf, (void*)(tbuf+p), len))
1255                 len = -EFAULT;
1256         else
1257                 *ppos += len;
1258         return len;
1259 }
1260
1261 static ssize_t write_flush(struct file * file, const char __user * buf,
1262                              size_t count, loff_t *ppos)
1263 {
1264         struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data;
1265         char tbuf[20];
1266         char *ep;
1267         long flushtime;
1268         if (*ppos || count > sizeof(tbuf)-1)
1269                 return -EINVAL;
1270         if (copy_from_user(tbuf, buf, count))
1271                 return -EFAULT;
1272         tbuf[count] = 0;
1273         flushtime = simple_strtoul(tbuf, &ep, 0);
1274         if (*ep && *ep != '\n')
1275                 return -EINVAL;
1276
1277         cd->flush_time = flushtime;
1278         cd->nextcheck = get_seconds();
1279         cache_flush();
1280
1281         *ppos += count;
1282         return count;
1283 }
1284
1285 static struct file_operations cache_flush_operations = {
1286         .open           = nonseekable_open,
1287         .read           = read_flush,
1288         .write          = write_flush,
1289 };