[NET]: Make core networking code use seq_open_private
[safe/jmp/linux-2.6] / net / ipv6 / ip6_flowlabel.c
1 /*
2  *      ip6_flowlabel.c         IPv6 flowlabel manager.
3  *
4  *      This program is free software; you can redistribute it and/or
5  *      modify it under the terms of the GNU General Public License
6  *      as published by the Free Software Foundation; either version
7  *      2 of the License, or (at your option) any later version.
8  *
9  *      Authors:        Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  */
11
12 #include <linux/capability.h>
13 #include <linux/errno.h>
14 #include <linux/types.h>
15 #include <linux/socket.h>
16 #include <linux/net.h>
17 #include <linux/netdevice.h>
18 #include <linux/if_arp.h>
19 #include <linux/in6.h>
20 #include <linux/route.h>
21 #include <linux/proc_fs.h>
22 #include <linux/seq_file.h>
23
24 #include <net/net_namespace.h>
25 #include <net/sock.h>
26
27 #include <net/ipv6.h>
28 #include <net/ndisc.h>
29 #include <net/protocol.h>
30 #include <net/ip6_route.h>
31 #include <net/addrconf.h>
32 #include <net/rawv6.h>
33 #include <net/icmp.h>
34 #include <net/transp_v6.h>
35
36 #include <asm/uaccess.h>
37
38 #define FL_MIN_LINGER   6       /* Minimal linger. It is set to 6sec specified
39                                    in old IPv6 RFC. Well, it was reasonable value.
40                                  */
41 #define FL_MAX_LINGER   60      /* Maximal linger timeout */
42
43 /* FL hash table */
44
45 #define FL_MAX_PER_SOCK 32
46 #define FL_MAX_SIZE     4096
47 #define FL_HASH_MASK    255
48 #define FL_HASH(l)      (ntohl(l)&FL_HASH_MASK)
49
50 static atomic_t fl_size = ATOMIC_INIT(0);
51 static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1];
52
53 static void ip6_fl_gc(unsigned long dummy);
54 static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0);
55
56 /* FL hash table lock: it protects only of GC */
57
58 static DEFINE_RWLOCK(ip6_fl_lock);
59
60 /* Big socket sock */
61
62 static DEFINE_RWLOCK(ip6_sk_fl_lock);
63
64
65 static __inline__ struct ip6_flowlabel * __fl_lookup(__be32 label)
66 {
67         struct ip6_flowlabel *fl;
68
69         for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) {
70                 if (fl->label == label)
71                         return fl;
72         }
73         return NULL;
74 }
75
76 static struct ip6_flowlabel * fl_lookup(__be32 label)
77 {
78         struct ip6_flowlabel *fl;
79
80         read_lock_bh(&ip6_fl_lock);
81         fl = __fl_lookup(label);
82         if (fl)
83                 atomic_inc(&fl->users);
84         read_unlock_bh(&ip6_fl_lock);
85         return fl;
86 }
87
88
89 static void fl_free(struct ip6_flowlabel *fl)
90 {
91         if (fl)
92                 kfree(fl->opt);
93         kfree(fl);
94 }
95
96 static void fl_release(struct ip6_flowlabel *fl)
97 {
98         write_lock_bh(&ip6_fl_lock);
99
100         fl->lastuse = jiffies;
101         if (atomic_dec_and_test(&fl->users)) {
102                 unsigned long ttd = fl->lastuse + fl->linger;
103                 if (time_after(ttd, fl->expires))
104                         fl->expires = ttd;
105                 ttd = fl->expires;
106                 if (fl->opt && fl->share == IPV6_FL_S_EXCL) {
107                         struct ipv6_txoptions *opt = fl->opt;
108                         fl->opt = NULL;
109                         kfree(opt);
110                 }
111                 if (!timer_pending(&ip6_fl_gc_timer) ||
112                     time_after(ip6_fl_gc_timer.expires, ttd))
113                         mod_timer(&ip6_fl_gc_timer, ttd);
114         }
115
116         write_unlock_bh(&ip6_fl_lock);
117 }
118
119 static void ip6_fl_gc(unsigned long dummy)
120 {
121         int i;
122         unsigned long now = jiffies;
123         unsigned long sched = 0;
124
125         write_lock(&ip6_fl_lock);
126
127         for (i=0; i<=FL_HASH_MASK; i++) {
128                 struct ip6_flowlabel *fl, **flp;
129                 flp = &fl_ht[i];
130                 while ((fl=*flp) != NULL) {
131                         if (atomic_read(&fl->users) == 0) {
132                                 unsigned long ttd = fl->lastuse + fl->linger;
133                                 if (time_after(ttd, fl->expires))
134                                         fl->expires = ttd;
135                                 ttd = fl->expires;
136                                 if (time_after_eq(now, ttd)) {
137                                         *flp = fl->next;
138                                         fl_free(fl);
139                                         atomic_dec(&fl_size);
140                                         continue;
141                                 }
142                                 if (!sched || time_before(ttd, sched))
143                                         sched = ttd;
144                         }
145                         flp = &fl->next;
146                 }
147         }
148         if (!sched && atomic_read(&fl_size))
149                 sched = now + FL_MAX_LINGER;
150         if (sched) {
151                 ip6_fl_gc_timer.expires = sched;
152                 add_timer(&ip6_fl_gc_timer);
153         }
154         write_unlock(&ip6_fl_lock);
155 }
156
157 static int fl_intern(struct ip6_flowlabel *fl, __be32 label)
158 {
159         fl->label = label & IPV6_FLOWLABEL_MASK;
160
161         write_lock_bh(&ip6_fl_lock);
162         if (label == 0) {
163                 for (;;) {
164                         fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
165                         if (fl->label) {
166                                 struct ip6_flowlabel *lfl;
167                                 lfl = __fl_lookup(fl->label);
168                                 if (lfl == NULL)
169                                         break;
170                         }
171                 }
172         }
173
174         fl->lastuse = jiffies;
175         fl->next = fl_ht[FL_HASH(fl->label)];
176         fl_ht[FL_HASH(fl->label)] = fl;
177         atomic_inc(&fl_size);
178         write_unlock_bh(&ip6_fl_lock);
179         return 0;
180 }
181
182
183
184 /* Socket flowlabel lists */
185
186 struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
187 {
188         struct ipv6_fl_socklist *sfl;
189         struct ipv6_pinfo *np = inet6_sk(sk);
190
191         label &= IPV6_FLOWLABEL_MASK;
192
193         for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
194                 struct ip6_flowlabel *fl = sfl->fl;
195                 if (fl->label == label) {
196                         fl->lastuse = jiffies;
197                         atomic_inc(&fl->users);
198                         return fl;
199                 }
200         }
201         return NULL;
202 }
203
204 EXPORT_SYMBOL_GPL(fl6_sock_lookup);
205
206 void fl6_free_socklist(struct sock *sk)
207 {
208         struct ipv6_pinfo *np = inet6_sk(sk);
209         struct ipv6_fl_socklist *sfl;
210
211         while ((sfl = np->ipv6_fl_list) != NULL) {
212                 np->ipv6_fl_list = sfl->next;
213                 fl_release(sfl->fl);
214                 kfree(sfl);
215         }
216 }
217
218 /* Service routines */
219
220
221 /*
222    It is the only difficult place. flowlabel enforces equal headers
223    before and including routing header, however user may supply options
224    following rthdr.
225  */
226
227 struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space,
228                                          struct ip6_flowlabel * fl,
229                                          struct ipv6_txoptions * fopt)
230 {
231         struct ipv6_txoptions * fl_opt = fl->opt;
232
233         if (fopt == NULL || fopt->opt_flen == 0)
234                 return fl_opt;
235
236         if (fl_opt != NULL) {
237                 opt_space->hopopt = fl_opt->hopopt;
238                 opt_space->dst0opt = fl_opt->dst0opt;
239                 opt_space->srcrt = fl_opt->srcrt;
240                 opt_space->opt_nflen = fl_opt->opt_nflen;
241         } else {
242                 if (fopt->opt_nflen == 0)
243                         return fopt;
244                 opt_space->hopopt = NULL;
245                 opt_space->dst0opt = NULL;
246                 opt_space->srcrt = NULL;
247                 opt_space->opt_nflen = 0;
248         }
249         opt_space->dst1opt = fopt->dst1opt;
250         opt_space->opt_flen = fopt->opt_flen;
251         return opt_space;
252 }
253
254 static unsigned long check_linger(unsigned long ttl)
255 {
256         if (ttl < FL_MIN_LINGER)
257                 return FL_MIN_LINGER*HZ;
258         if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN))
259                 return 0;
260         return ttl*HZ;
261 }
262
263 static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires)
264 {
265         linger = check_linger(linger);
266         if (!linger)
267                 return -EPERM;
268         expires = check_linger(expires);
269         if (!expires)
270                 return -EPERM;
271         fl->lastuse = jiffies;
272         if (time_before(fl->linger, linger))
273                 fl->linger = linger;
274         if (time_before(expires, fl->linger))
275                 expires = fl->linger;
276         if (time_before(fl->expires, fl->lastuse + expires))
277                 fl->expires = fl->lastuse + expires;
278         return 0;
279 }
280
281 static struct ip6_flowlabel *
282 fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int *err_p)
283 {
284         struct ip6_flowlabel *fl;
285         int olen;
286         int addr_type;
287         int err;
288
289         err = -ENOMEM;
290         fl = kzalloc(sizeof(*fl), GFP_KERNEL);
291         if (fl == NULL)
292                 goto done;
293
294         olen = optlen - CMSG_ALIGN(sizeof(*freq));
295         if (olen > 0) {
296                 struct msghdr msg;
297                 struct flowi flowi;
298                 int junk;
299
300                 err = -ENOMEM;
301                 fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
302                 if (fl->opt == NULL)
303                         goto done;
304
305                 memset(fl->opt, 0, sizeof(*fl->opt));
306                 fl->opt->tot_len = sizeof(*fl->opt) + olen;
307                 err = -EFAULT;
308                 if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen))
309                         goto done;
310
311                 msg.msg_controllen = olen;
312                 msg.msg_control = (void*)(fl->opt+1);
313                 flowi.oif = 0;
314
315                 err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk);
316                 if (err)
317                         goto done;
318                 err = -EINVAL;
319                 if (fl->opt->opt_flen)
320                         goto done;
321                 if (fl->opt->opt_nflen == 0) {
322                         kfree(fl->opt);
323                         fl->opt = NULL;
324                 }
325         }
326
327         fl->expires = jiffies;
328         err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
329         if (err)
330                 goto done;
331         fl->share = freq->flr_share;
332         addr_type = ipv6_addr_type(&freq->flr_dst);
333         if ((addr_type&IPV6_ADDR_MAPPED)
334             || addr_type == IPV6_ADDR_ANY) {
335                 err = -EINVAL;
336                 goto done;
337         }
338         ipv6_addr_copy(&fl->dst, &freq->flr_dst);
339         atomic_set(&fl->users, 1);
340         switch (fl->share) {
341         case IPV6_FL_S_EXCL:
342         case IPV6_FL_S_ANY:
343                 break;
344         case IPV6_FL_S_PROCESS:
345                 fl->owner = current->pid;
346                 break;
347         case IPV6_FL_S_USER:
348                 fl->owner = current->euid;
349                 break;
350         default:
351                 err = -EINVAL;
352                 goto done;
353         }
354         return fl;
355
356 done:
357         fl_free(fl);
358         *err_p = err;
359         return NULL;
360 }
361
362 static int mem_check(struct sock *sk)
363 {
364         struct ipv6_pinfo *np = inet6_sk(sk);
365         struct ipv6_fl_socklist *sfl;
366         int room = FL_MAX_SIZE - atomic_read(&fl_size);
367         int count = 0;
368
369         if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
370                 return 0;
371
372         for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next)
373                 count++;
374
375         if (room <= 0 ||
376             ((count >= FL_MAX_PER_SOCK ||
377              (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4)
378              && !capable(CAP_NET_ADMIN)))
379                 return -ENOBUFS;
380
381         return 0;
382 }
383
384 static int ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2)
385 {
386         if (h1 == h2)
387                 return 0;
388         if (h1 == NULL || h2 == NULL)
389                 return 1;
390         if (h1->hdrlen != h2->hdrlen)
391                 return 1;
392         return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1));
393 }
394
395 static int ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
396 {
397         if (o1 == o2)
398                 return 0;
399         if (o1 == NULL || o2 == NULL)
400                 return 1;
401         if (o1->opt_nflen != o2->opt_nflen)
402                 return 1;
403         if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt))
404                 return 1;
405         if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt))
406                 return 1;
407         if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt))
408                 return 1;
409         return 0;
410 }
411
412 int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
413 {
414         int err;
415         struct ipv6_pinfo *np = inet6_sk(sk);
416         struct in6_flowlabel_req freq;
417         struct ipv6_fl_socklist *sfl1=NULL;
418         struct ipv6_fl_socklist *sfl, **sflp;
419         struct ip6_flowlabel *fl;
420
421         if (optlen < sizeof(freq))
422                 return -EINVAL;
423
424         if (copy_from_user(&freq, optval, sizeof(freq)))
425                 return -EFAULT;
426
427         switch (freq.flr_action) {
428         case IPV6_FL_A_PUT:
429                 write_lock_bh(&ip6_sk_fl_lock);
430                 for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) {
431                         if (sfl->fl->label == freq.flr_label) {
432                                 if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
433                                         np->flow_label &= ~IPV6_FLOWLABEL_MASK;
434                                 *sflp = sfl->next;
435                                 write_unlock_bh(&ip6_sk_fl_lock);
436                                 fl_release(sfl->fl);
437                                 kfree(sfl);
438                                 return 0;
439                         }
440                 }
441                 write_unlock_bh(&ip6_sk_fl_lock);
442                 return -ESRCH;
443
444         case IPV6_FL_A_RENEW:
445                 read_lock_bh(&ip6_sk_fl_lock);
446                 for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
447                         if (sfl->fl->label == freq.flr_label) {
448                                 err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
449                                 read_unlock_bh(&ip6_sk_fl_lock);
450                                 return err;
451                         }
452                 }
453                 read_unlock_bh(&ip6_sk_fl_lock);
454
455                 if (freq.flr_share == IPV6_FL_S_NONE && capable(CAP_NET_ADMIN)) {
456                         fl = fl_lookup(freq.flr_label);
457                         if (fl) {
458                                 err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
459                                 fl_release(fl);
460                                 return err;
461                         }
462                 }
463                 return -ESRCH;
464
465         case IPV6_FL_A_GET:
466                 if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
467                         return -EINVAL;
468
469                 fl = fl_create(&freq, optval, optlen, &err);
470                 if (fl == NULL)
471                         return err;
472                 sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
473
474                 if (freq.flr_label) {
475                         struct ip6_flowlabel *fl1 = NULL;
476
477                         err = -EEXIST;
478                         read_lock_bh(&ip6_sk_fl_lock);
479                         for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
480                                 if (sfl->fl->label == freq.flr_label) {
481                                         if (freq.flr_flags&IPV6_FL_F_EXCL) {
482                                                 read_unlock_bh(&ip6_sk_fl_lock);
483                                                 goto done;
484                                         }
485                                         fl1 = sfl->fl;
486                                         atomic_inc(&fl1->users);
487                                         break;
488                                 }
489                         }
490                         read_unlock_bh(&ip6_sk_fl_lock);
491
492                         if (fl1 == NULL)
493                                 fl1 = fl_lookup(freq.flr_label);
494                         if (fl1) {
495                                 err = -EEXIST;
496                                 if (freq.flr_flags&IPV6_FL_F_EXCL)
497                                         goto release;
498                                 err = -EPERM;
499                                 if (fl1->share == IPV6_FL_S_EXCL ||
500                                     fl1->share != fl->share ||
501                                     fl1->owner != fl->owner)
502                                         goto release;
503
504                                 err = -EINVAL;
505                                 if (!ipv6_addr_equal(&fl1->dst, &fl->dst) ||
506                                     ipv6_opt_cmp(fl1->opt, fl->opt))
507                                         goto release;
508
509                                 err = -ENOMEM;
510                                 if (sfl1 == NULL)
511                                         goto release;
512                                 if (fl->linger > fl1->linger)
513                                         fl1->linger = fl->linger;
514                                 if ((long)(fl->expires - fl1->expires) > 0)
515                                         fl1->expires = fl->expires;
516                                 write_lock_bh(&ip6_sk_fl_lock);
517                                 sfl1->fl = fl1;
518                                 sfl1->next = np->ipv6_fl_list;
519                                 np->ipv6_fl_list = sfl1;
520                                 write_unlock_bh(&ip6_sk_fl_lock);
521                                 fl_free(fl);
522                                 return 0;
523
524 release:
525                                 fl_release(fl1);
526                                 goto done;
527                         }
528                 }
529                 err = -ENOENT;
530                 if (!(freq.flr_flags&IPV6_FL_F_CREATE))
531                         goto done;
532
533                 err = -ENOMEM;
534                 if (sfl1 == NULL || (err = mem_check(sk)) != 0)
535                         goto done;
536
537                 err = fl_intern(fl, freq.flr_label);
538                 if (err)
539                         goto done;
540
541                 if (!freq.flr_label) {
542                         if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
543                                          &fl->label, sizeof(fl->label))) {
544                                 /* Intentionally ignore fault. */
545                         }
546                 }
547
548                 sfl1->fl = fl;
549                 sfl1->next = np->ipv6_fl_list;
550                 np->ipv6_fl_list = sfl1;
551                 return 0;
552
553         default:
554                 return -EINVAL;
555         }
556
557 done:
558         fl_free(fl);
559         kfree(sfl1);
560         return err;
561 }
562
563 #ifdef CONFIG_PROC_FS
564
565 struct ip6fl_iter_state {
566         int bucket;
567 };
568
569 #define ip6fl_seq_private(seq)  ((struct ip6fl_iter_state *)(seq)->private)
570
571 static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
572 {
573         struct ip6_flowlabel *fl = NULL;
574         struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
575
576         for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
577                 if (fl_ht[state->bucket]) {
578                         fl = fl_ht[state->bucket];
579                         break;
580                 }
581         }
582         return fl;
583 }
584
585 static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
586 {
587         struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
588
589         fl = fl->next;
590         while (!fl) {
591                 if (++state->bucket <= FL_HASH_MASK)
592                         fl = fl_ht[state->bucket];
593                 else
594                         break;
595         }
596         return fl;
597 }
598
599 static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
600 {
601         struct ip6_flowlabel *fl = ip6fl_get_first(seq);
602         if (fl)
603                 while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL)
604                         --pos;
605         return pos ? NULL : fl;
606 }
607
608 static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
609 {
610         read_lock_bh(&ip6_fl_lock);
611         return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
612 }
613
614 static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
615 {
616         struct ip6_flowlabel *fl;
617
618         if (v == SEQ_START_TOKEN)
619                 fl = ip6fl_get_first(seq);
620         else
621                 fl = ip6fl_get_next(seq, v);
622         ++*pos;
623         return fl;
624 }
625
626 static void ip6fl_seq_stop(struct seq_file *seq, void *v)
627 {
628         read_unlock_bh(&ip6_fl_lock);
629 }
630
631 static int ip6fl_seq_show(struct seq_file *seq, void *v)
632 {
633         if (v == SEQ_START_TOKEN)
634                 seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
635                            "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
636         else {
637                 struct ip6_flowlabel *fl = v;
638                 seq_printf(seq,
639                            "%05X %-1d %-6d %-6d %-6ld %-8ld " NIP6_SEQFMT " %-4d\n",
640                            (unsigned)ntohl(fl->label),
641                            fl->share,
642                            (unsigned)fl->owner,
643                            atomic_read(&fl->users),
644                            fl->linger/HZ,
645                            (long)(fl->expires - jiffies)/HZ,
646                            NIP6(fl->dst),
647                            fl->opt ? fl->opt->opt_nflen : 0);
648         }
649         return 0;
650 }
651
652 static const struct seq_operations ip6fl_seq_ops = {
653         .start  =       ip6fl_seq_start,
654         .next   =       ip6fl_seq_next,
655         .stop   =       ip6fl_seq_stop,
656         .show   =       ip6fl_seq_show,
657 };
658
659 static int ip6fl_seq_open(struct inode *inode, struct file *file)
660 {
661         return seq_open_private(file, &ip6fl_seq_ops,
662                         sizeof(struct ip6fl_iter_state));
663 }
664
665 static const struct file_operations ip6fl_seq_fops = {
666         .owner          =       THIS_MODULE,
667         .open           =       ip6fl_seq_open,
668         .read           =       seq_read,
669         .llseek         =       seq_lseek,
670         .release        =       seq_release_private,
671 };
672 #endif
673
674
675 void ip6_flowlabel_init(void)
676 {
677 #ifdef CONFIG_PROC_FS
678         proc_net_fops_create(&init_net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops);
679 #endif
680 }
681
682 void ip6_flowlabel_cleanup(void)
683 {
684         del_timer(&ip6_fl_gc_timer);
685 #ifdef CONFIG_PROC_FS
686         proc_net_remove(&init_net, "ip6_flowlabel");
687 #endif
688 }