ed7603fe5fe3c64081aeef537e1850cc88ef2bdb
[safe/jmp/linux-2.6] / net / ipv6 / netfilter / nf_conntrack_reasm.c
1 /*
2  * IPv6 fragment reassembly for connection tracking
3  *
4  * Copyright (C)2004 USAGI/WIDE Project
5  *
6  * Author:
7  *      Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
8  *
9  * Based on: net/ipv6/reassembly.c
10  *
11  * This program is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU General Public License
13  * as published by the Free Software Foundation; either version
14  * 2 of the License, or (at your option) any later version.
15  */
16
17 #include <linux/config.h>
18 #include <linux/errno.h>
19 #include <linux/types.h>
20 #include <linux/string.h>
21 #include <linux/socket.h>
22 #include <linux/sockios.h>
23 #include <linux/jiffies.h>
24 #include <linux/net.h>
25 #include <linux/list.h>
26 #include <linux/netdevice.h>
27 #include <linux/in6.h>
28 #include <linux/ipv6.h>
29 #include <linux/icmpv6.h>
30 #include <linux/random.h>
31 #include <linux/jhash.h>
32
33 #include <net/sock.h>
34 #include <net/snmp.h>
35
36 #include <net/ipv6.h>
37 #include <net/protocol.h>
38 #include <net/transp_v6.h>
39 #include <net/rawv6.h>
40 #include <net/ndisc.h>
41 #include <net/addrconf.h>
42 #include <linux/sysctl.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
45 #include <linux/kernel.h>
46 #include <linux/module.h>
47
48 #if 0
49 #define DEBUGP printk
50 #else
51 #define DEBUGP(format, args...)
52 #endif
53
54 #define NF_CT_FRAG6_HIGH_THRESH 262144 /* == 256*1024 */
55 #define NF_CT_FRAG6_LOW_THRESH 196608  /* == 192*1024 */
56 #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
57
58 unsigned int nf_ct_frag6_high_thresh = 256*1024;
59 unsigned int nf_ct_frag6_low_thresh = 192*1024;
60 unsigned long nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT;
61
62 struct nf_ct_frag6_skb_cb
63 {
64         struct inet6_skb_parm   h;
65         int                     offset;
66         struct sk_buff          *orig;
67 };
68
69 #define NFCT_FRAG6_CB(skb)      ((struct nf_ct_frag6_skb_cb*)((skb)->cb))
70
71 struct nf_ct_frag6_queue
72 {
73         struct nf_ct_frag6_queue        *next;
74         struct list_head lru_list;              /* lru list member      */
75
76         __u32                   id;             /* fragment id          */
77         struct in6_addr         saddr;
78         struct in6_addr         daddr;
79
80         spinlock_t              lock;
81         atomic_t                refcnt;
82         struct timer_list       timer;          /* expire timer         */
83         struct sk_buff          *fragments;
84         int                     len;
85         int                     meat;
86         struct timeval          stamp;
87         unsigned int            csum;
88         __u8                    last_in;        /* has first/last segment arrived? */
89 #define COMPLETE                4
90 #define FIRST_IN                2
91 #define LAST_IN                 1
92         __u16                   nhoffset;
93         struct nf_ct_frag6_queue        **pprev;
94 };
95
96 /* Hash table. */
97
98 #define FRAG6Q_HASHSZ   64
99
100 static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ];
101 static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED;
102 static u32 nf_ct_frag6_hash_rnd;
103 static LIST_HEAD(nf_ct_frag6_lru_list);
104 int nf_ct_frag6_nqueues = 0;
105
106 static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq)
107 {
108         if (fq->next)
109                 fq->next->pprev = fq->pprev;
110         *fq->pprev = fq->next;
111         list_del(&fq->lru_list);
112         nf_ct_frag6_nqueues--;
113 }
114
115 static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq)
116 {
117         write_lock(&nf_ct_frag6_lock);
118         __fq_unlink(fq);
119         write_unlock(&nf_ct_frag6_lock);
120 }
121
122 static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
123                                struct in6_addr *daddr)
124 {
125         u32 a, b, c;
126
127         a = saddr->s6_addr32[0];
128         b = saddr->s6_addr32[1];
129         c = saddr->s6_addr32[2];
130
131         a += JHASH_GOLDEN_RATIO;
132         b += JHASH_GOLDEN_RATIO;
133         c += nf_ct_frag6_hash_rnd;
134         __jhash_mix(a, b, c);
135
136         a += saddr->s6_addr32[3];
137         b += daddr->s6_addr32[0];
138         c += daddr->s6_addr32[1];
139         __jhash_mix(a, b, c);
140
141         a += daddr->s6_addr32[2];
142         b += daddr->s6_addr32[3];
143         c += id;
144         __jhash_mix(a, b, c);
145
146         return c & (FRAG6Q_HASHSZ - 1);
147 }
148
149 static struct timer_list nf_ct_frag6_secret_timer;
150 int nf_ct_frag6_secret_interval = 10 * 60 * HZ;
151
152 static void nf_ct_frag6_secret_rebuild(unsigned long dummy)
153 {
154         unsigned long now = jiffies;
155         int i;
156
157         write_lock(&nf_ct_frag6_lock);
158         get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32));
159         for (i = 0; i < FRAG6Q_HASHSZ; i++) {
160                 struct nf_ct_frag6_queue *q;
161
162                 q = nf_ct_frag6_hash[i];
163                 while (q) {
164                         struct nf_ct_frag6_queue *next = q->next;
165                         unsigned int hval = ip6qhashfn(q->id,
166                                                        &q->saddr,
167                                                        &q->daddr);
168
169                         if (hval != i) {
170                                 /* Unlink. */
171                                 if (q->next)
172                                         q->next->pprev = q->pprev;
173                                 *q->pprev = q->next;
174
175                                 /* Relink to new hash chain. */
176                                 if ((q->next = nf_ct_frag6_hash[hval]) != NULL)
177                                         q->next->pprev = &q->next;
178                                 nf_ct_frag6_hash[hval] = q;
179                                 q->pprev = &nf_ct_frag6_hash[hval];
180                         }
181
182                         q = next;
183                 }
184         }
185         write_unlock(&nf_ct_frag6_lock);
186
187         mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval);
188 }
189
190 atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0);
191
192 /* Memory Tracking Functions. */
193 static inline void frag_kfree_skb(struct sk_buff *skb)
194 {
195         atomic_sub(skb->truesize, &nf_ct_frag6_mem);
196         if (NFCT_FRAG6_CB(skb)->orig)
197                 kfree_skb(NFCT_FRAG6_CB(skb)->orig);
198
199         kfree_skb(skb);
200 }
201
202 static inline void frag_free_queue(struct nf_ct_frag6_queue *fq)
203 {
204         atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
205         kfree(fq);
206 }
207
208 static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
209 {
210         struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
211
212         if (!fq)
213                 return NULL;
214         atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
215         return fq;
216 }
217
218 /* Destruction primitives. */
219
220 /* Complete destruction of fq. */
221 static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq)
222 {
223         struct sk_buff *fp;
224
225         BUG_TRAP(fq->last_in&COMPLETE);
226         BUG_TRAP(del_timer(&fq->timer) == 0);
227
228         /* Release all fragment data. */
229         fp = fq->fragments;
230         while (fp) {
231                 struct sk_buff *xp = fp->next;
232
233                 frag_kfree_skb(fp);
234                 fp = xp;
235         }
236
237         frag_free_queue(fq);
238 }
239
240 static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
241 {
242         if (atomic_dec_and_test(&fq->refcnt))
243                 nf_ct_frag6_destroy(fq);
244 }
245
246 /* Kill fq entry. It is not destroyed immediately,
247  * because caller (and someone more) holds reference count.
248  */
249 static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
250 {
251         if (del_timer(&fq->timer))
252                 atomic_dec(&fq->refcnt);
253
254         if (!(fq->last_in & COMPLETE)) {
255                 fq_unlink(fq);
256                 atomic_dec(&fq->refcnt);
257                 fq->last_in |= COMPLETE;
258         }
259 }
260
261 static void nf_ct_frag6_evictor(void)
262 {
263         struct nf_ct_frag6_queue *fq;
264         struct list_head *tmp;
265
266         for (;;) {
267                 if (atomic_read(&nf_ct_frag6_mem) <= nf_ct_frag6_low_thresh)
268                         return;
269                 read_lock(&nf_ct_frag6_lock);
270                 if (list_empty(&nf_ct_frag6_lru_list)) {
271                         read_unlock(&nf_ct_frag6_lock);
272                         return;
273                 }
274                 tmp = nf_ct_frag6_lru_list.next;
275                 fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list);
276                 atomic_inc(&fq->refcnt);
277                 read_unlock(&nf_ct_frag6_lock);
278
279                 spin_lock(&fq->lock);
280                 if (!(fq->last_in&COMPLETE))
281                         fq_kill(fq);
282                 spin_unlock(&fq->lock);
283
284                 fq_put(fq);
285         }
286 }
287
288 static void nf_ct_frag6_expire(unsigned long data)
289 {
290         struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data;
291
292         spin_lock(&fq->lock);
293
294         if (fq->last_in & COMPLETE)
295                 goto out;
296
297         fq_kill(fq);
298
299 out:
300         spin_unlock(&fq->lock);
301         fq_put(fq);
302 }
303
304 /* Creation primitives. */
305
306
307 static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash,
308                                           struct nf_ct_frag6_queue *fq_in)
309 {
310         struct nf_ct_frag6_queue *fq;
311
312         write_lock(&nf_ct_frag6_lock);
313 #ifdef CONFIG_SMP
314         for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) {
315                 if (fq->id == fq_in->id && 
316                     !ipv6_addr_cmp(&fq_in->saddr, &fq->saddr) &&
317                     !ipv6_addr_cmp(&fq_in->daddr, &fq->daddr)) {
318                         atomic_inc(&fq->refcnt);
319                         write_unlock(&nf_ct_frag6_lock);
320                         fq_in->last_in |= COMPLETE;
321                         fq_put(fq_in);
322                         return fq;
323                 }
324         }
325 #endif
326         fq = fq_in;
327
328         if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout))
329                 atomic_inc(&fq->refcnt);
330
331         atomic_inc(&fq->refcnt);
332         if ((fq->next = nf_ct_frag6_hash[hash]) != NULL)
333                 fq->next->pprev = &fq->next;
334         nf_ct_frag6_hash[hash] = fq;
335         fq->pprev = &nf_ct_frag6_hash[hash];
336         INIT_LIST_HEAD(&fq->lru_list);
337         list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
338         nf_ct_frag6_nqueues++;
339         write_unlock(&nf_ct_frag6_lock);
340         return fq;
341 }
342
343
344 static struct nf_ct_frag6_queue *
345 nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src,                                struct in6_addr *dst)
346 {
347         struct nf_ct_frag6_queue *fq;
348
349         if ((fq = frag_alloc_queue()) == NULL) {
350                 DEBUGP("Can't alloc new queue\n");
351                 goto oom;
352         }
353
354         memset(fq, 0, sizeof(struct nf_ct_frag6_queue));
355
356         fq->id = id;
357         ipv6_addr_copy(&fq->saddr, src);
358         ipv6_addr_copy(&fq->daddr, dst);
359
360         init_timer(&fq->timer);
361         fq->timer.function = nf_ct_frag6_expire;
362         fq->timer.data = (long) fq;
363         fq->lock = SPIN_LOCK_UNLOCKED;
364         atomic_set(&fq->refcnt, 1);
365
366         return nf_ct_frag6_intern(hash, fq);
367
368 oom:
369         return NULL;
370 }
371
372 static __inline__ struct nf_ct_frag6_queue *
373 fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst)
374 {
375         struct nf_ct_frag6_queue *fq;
376         unsigned int hash = ip6qhashfn(id, src, dst);
377
378         read_lock(&nf_ct_frag6_lock);
379         for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) {
380                 if (fq->id == id && 
381                     !ipv6_addr_cmp(src, &fq->saddr) &&
382                     !ipv6_addr_cmp(dst, &fq->daddr)) {
383                         atomic_inc(&fq->refcnt);
384                         read_unlock(&nf_ct_frag6_lock);
385                         return fq;
386                 }
387         }
388         read_unlock(&nf_ct_frag6_lock);
389
390         return nf_ct_frag6_create(hash, id, src, dst);
391 }
392
393
394 static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, 
395                              struct frag_hdr *fhdr, int nhoff)
396 {
397         struct sk_buff *prev, *next;
398         int offset, end;
399
400         if (fq->last_in & COMPLETE) {
401                 DEBUGP("Allready completed\n");
402                 goto err;
403         }
404
405         offset = ntohs(fhdr->frag_off) & ~0x7;
406         end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
407                         ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
408
409         if ((unsigned int)end > IPV6_MAXPLEN) {
410                 DEBUGP("offset is too large.\n");
411                 return -1;
412         }
413
414         if (skb->ip_summed == CHECKSUM_HW)
415                 skb->csum = csum_sub(skb->csum,
416                                      csum_partial(skb->nh.raw,
417                                                   (u8*)(fhdr + 1) - skb->nh.raw,
418                                                   0));
419
420         /* Is this the final fragment? */
421         if (!(fhdr->frag_off & htons(IP6_MF))) {
422                 /* If we already have some bits beyond end
423                  * or have different end, the segment is corrupted.
424                  */
425                 if (end < fq->len ||
426                     ((fq->last_in & LAST_IN) && end != fq->len)) {
427                         DEBUGP("already received last fragment\n");
428                         goto err;
429                 }
430                 fq->last_in |= LAST_IN;
431                 fq->len = end;
432         } else {
433                 /* Check if the fragment is rounded to 8 bytes.
434                  * Required by the RFC.
435                  */
436                 if (end & 0x7) {
437                         /* RFC2460 says always send parameter problem in
438                          * this case. -DaveM
439                          */
440                         DEBUGP("the end of this fragment is not rounded to 8 bytes.\n");
441                         return -1;
442                 }
443                 if (end > fq->len) {
444                         /* Some bits beyond end -> corruption. */
445                         if (fq->last_in & LAST_IN) {
446                                 DEBUGP("last packet already reached.\n");
447                                 goto err;
448                         }
449                         fq->len = end;
450                 }
451         }
452
453         if (end == offset)
454                 goto err;
455
456         /* Point into the IP datagram 'data' part. */
457         if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) {
458                 DEBUGP("queue: message is too short.\n");
459                 goto err;
460         }
461         if (end-offset < skb->len) {
462                 if (pskb_trim(skb, end - offset)) {
463                         DEBUGP("Can't trim\n");
464                         goto err;
465                 }
466                 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
467                         skb->ip_summed = CHECKSUM_NONE;
468         }
469
470         /* Find out which fragments are in front and at the back of us
471          * in the chain of fragments so far.  We must know where to put
472          * this fragment, right?
473          */
474         prev = NULL;
475         for (next = fq->fragments; next != NULL; next = next->next) {
476                 if (NFCT_FRAG6_CB(next)->offset >= offset)
477                         break;  /* bingo! */
478                 prev = next;
479         }
480
481         /* We found where to put this one.  Check for overlap with
482          * preceding fragment, and, if needed, align things so that
483          * any overlaps are eliminated.
484          */
485         if (prev) {
486                 int i = (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset;
487
488                 if (i > 0) {
489                         offset += i;
490                         if (end <= offset) {
491                                 DEBUGP("overlap\n");
492                                 goto err;
493                         }
494                         if (!pskb_pull(skb, i)) {
495                                 DEBUGP("Can't pull\n");
496                                 goto err;
497                         }
498                         if (skb->ip_summed != CHECKSUM_UNNECESSARY)
499                                 skb->ip_summed = CHECKSUM_NONE;
500                 }
501         }
502
503         /* Look for overlap with succeeding segments.
504          * If we can merge fragments, do it.
505          */
506         while (next && NFCT_FRAG6_CB(next)->offset < end) {
507                 /* overlap is 'i' bytes */
508                 int i = end - NFCT_FRAG6_CB(next)->offset;
509
510                 if (i < next->len) {
511                         /* Eat head of the next overlapped fragment
512                          * and leave the loop. The next ones cannot overlap.
513                          */
514                         DEBUGP("Eat head of the overlapped parts.: %d", i);
515                         if (!pskb_pull(next, i))
516                                 goto err;
517
518                         /* next fragment */
519                         NFCT_FRAG6_CB(next)->offset += i;
520                         fq->meat -= i;
521                         if (next->ip_summed != CHECKSUM_UNNECESSARY)
522                                 next->ip_summed = CHECKSUM_NONE;
523                         break;
524                 } else {
525                         struct sk_buff *free_it = next;
526
527                         /* Old fragmnet is completely overridden with
528                          * new one drop it.
529                          */
530                         next = next->next;
531
532                         if (prev)
533                                 prev->next = next;
534                         else
535                                 fq->fragments = next;
536
537                         fq->meat -= free_it->len;
538                         frag_kfree_skb(free_it);
539                 }
540         }
541
542         NFCT_FRAG6_CB(skb)->offset = offset;
543
544         /* Insert this fragment in the chain of fragments. */
545         skb->next = next;
546         if (prev)
547                 prev->next = skb;
548         else
549                 fq->fragments = skb;
550
551         skb->dev = NULL;
552         skb_get_timestamp(skb, &fq->stamp);
553         fq->meat += skb->len;
554         atomic_add(skb->truesize, &nf_ct_frag6_mem);
555
556         /* The first fragment.
557          * nhoffset is obtained from the first fragment, of course.
558          */
559         if (offset == 0) {
560                 fq->nhoffset = nhoff;
561                 fq->last_in |= FIRST_IN;
562         }
563         write_lock(&nf_ct_frag6_lock);
564         list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
565         write_unlock(&nf_ct_frag6_lock);
566         return 0;
567
568 err:
569         return -1;
570 }
571
572 /*
573  *      Check if this packet is complete.
574  *      Returns NULL on failure by any reason, and pointer
575  *      to current nexthdr field in reassembled frame.
576  *
577  *      It is called with locked fq, and caller must check that
578  *      queue is eligible for reassembly i.e. it is not COMPLETE,
579  *      the last and the first frames arrived and all the bits are here.
580  */
581 static struct sk_buff *
582 nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
583 {
584         struct sk_buff *fp, *op, *head = fq->fragments;
585         int    payload_len;
586
587         fq_kill(fq);
588
589         BUG_TRAP(head != NULL);
590         BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0);
591
592         /* Unfragmented part is taken from the first segment. */
593         payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
594         if (payload_len > IPV6_MAXPLEN) {
595                 DEBUGP("payload len is too large.\n");
596                 goto out_oversize;
597         }
598
599         /* Head of list must not be cloned. */
600         if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) {
601                 DEBUGP("skb is cloned but can't expand head");
602                 goto out_oom;
603         }
604
605         /* If the first fragment is fragmented itself, we split
606          * it to two chunks: the first with data and paged part
607          * and the second, holding only fragments. */
608         if (skb_shinfo(head)->frag_list) {
609                 struct sk_buff *clone;
610                 int i, plen = 0;
611
612                 if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) {
613                         DEBUGP("Can't alloc skb\n");
614                         goto out_oom;
615                 }
616                 clone->next = head->next;
617                 head->next = clone;
618                 skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
619                 skb_shinfo(head)->frag_list = NULL;
620                 for (i=0; i<skb_shinfo(head)->nr_frags; i++)
621                         plen += skb_shinfo(head)->frags[i].size;
622                 clone->len = clone->data_len = head->data_len - plen;
623                 head->data_len -= clone->len;
624                 head->len -= clone->len;
625                 clone->csum = 0;
626                 clone->ip_summed = head->ip_summed;
627
628                 NFCT_FRAG6_CB(clone)->orig = NULL;
629                 atomic_add(clone->truesize, &nf_ct_frag6_mem);
630         }
631
632         /* We have to remove fragment header from datagram and to relocate
633          * header in order to calculate ICV correctly. */
634         head->nh.raw[fq->nhoffset] = head->h.raw[0];
635         memmove(head->head + sizeof(struct frag_hdr), head->head, 
636                 (head->data - head->head) - sizeof(struct frag_hdr));
637         head->mac.raw += sizeof(struct frag_hdr);
638         head->nh.raw += sizeof(struct frag_hdr);
639
640         skb_shinfo(head)->frag_list = head->next;
641         head->h.raw = head->data;
642         skb_push(head, head->data - head->nh.raw);
643         atomic_sub(head->truesize, &nf_ct_frag6_mem);
644
645         for (fp=head->next; fp; fp = fp->next) {
646                 head->data_len += fp->len;
647                 head->len += fp->len;
648                 if (head->ip_summed != fp->ip_summed)
649                         head->ip_summed = CHECKSUM_NONE;
650                 else if (head->ip_summed == CHECKSUM_HW)
651                         head->csum = csum_add(head->csum, fp->csum);
652                 head->truesize += fp->truesize;
653                 atomic_sub(fp->truesize, &nf_ct_frag6_mem);
654         }
655
656         head->next = NULL;
657         head->dev = dev;
658         skb_set_timestamp(head, &fq->stamp);
659         head->nh.ipv6h->payload_len = htons(payload_len);
660
661         /* Yes, and fold redundant checksum back. 8) */
662         if (head->ip_summed == CHECKSUM_HW)
663                 head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
664
665         fq->fragments = NULL;
666
667         /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
668         fp = skb_shinfo(head)->frag_list;
669         if (NFCT_FRAG6_CB(fp)->orig == NULL)
670                 /* at above code, head skb is divided into two skbs. */
671                 fp = fp->next;
672
673         op = NFCT_FRAG6_CB(head)->orig;
674         for (; fp; fp = fp->next) {
675                 struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig;
676
677                 op->next = orig;
678                 op = orig;
679                 NFCT_FRAG6_CB(fp)->orig = NULL;
680         }
681
682         return head;
683
684 out_oversize:
685         if (net_ratelimit())
686                 printk(KERN_DEBUG "nf_ct_frag6_reasm: payload len = %d\n", payload_len);
687         goto out_fail;
688 out_oom:
689         if (net_ratelimit())
690                 printk(KERN_DEBUG "nf_ct_frag6_reasm: no memory for reassembly\n");
691 out_fail:
692         return NULL;
693 }
694
695 /*
696  * find the header just before Fragment Header.
697  *
698  * if success return 0 and set ...
699  * (*prevhdrp): the value of "Next Header Field" in the header
700  *              just before Fragment Header.
701  * (*prevhoff): the offset of "Next Header Field" in the header
702  *              just before Fragment Header.
703  * (*fhoff)   : the offset of Fragment Header.
704  *
705  * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c
706  *
707  */
708 static int
709 find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
710 {
711         u8 nexthdr = skb->nh.ipv6h->nexthdr;
712         u8 prev_nhoff = (u8 *)&skb->nh.ipv6h->nexthdr - skb->data;
713         int start = (u8 *)(skb->nh.ipv6h+1) - skb->data;
714         int len = skb->len - start;
715         u8 prevhdr = NEXTHDR_IPV6;
716
717         while (nexthdr != NEXTHDR_FRAGMENT) {
718                 struct ipv6_opt_hdr hdr;
719                 int hdrlen;
720
721                 if (!ipv6_ext_hdr(nexthdr)) {
722                         return -1;
723                 }
724                 if (len < (int)sizeof(struct ipv6_opt_hdr)) {
725                         DEBUGP("too short\n");
726                         return -1;
727                 }
728                 if (nexthdr == NEXTHDR_NONE) {
729                         DEBUGP("next header is none\n");
730                         return -1;
731                 }
732                 if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
733                         BUG();
734                 if (nexthdr == NEXTHDR_AUTH)
735                         hdrlen = (hdr.hdrlen+2)<<2;
736                 else
737                         hdrlen = ipv6_optlen(&hdr);
738
739                 prevhdr = nexthdr;
740                 prev_nhoff = start;
741
742                 nexthdr = hdr.nexthdr;
743                 len -= hdrlen;
744                 start += hdrlen;
745         }
746
747         if (len < 0)
748                 return -1;
749
750         *prevhdrp = prevhdr;
751         *prevhoff = prev_nhoff;
752         *fhoff = start;
753
754         return 0;
755 }
756
757 struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
758 {
759         struct sk_buff *clone; 
760         struct net_device *dev = skb->dev;
761         struct frag_hdr *fhdr;
762         struct nf_ct_frag6_queue *fq;
763         struct ipv6hdr *hdr;
764         int fhoff, nhoff;
765         u8 prevhdr;
766         struct sk_buff *ret_skb = NULL;
767
768         /* Jumbo payload inhibits frag. header */
769         if (skb->nh.ipv6h->payload_len == 0) {
770                 DEBUGP("payload len = 0\n");
771                 return skb;
772         }
773
774         if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
775                 return skb;
776
777         clone = skb_clone(skb, GFP_ATOMIC);
778         if (clone == NULL) {
779                 DEBUGP("Can't clone skb\n");
780                 return skb;
781         }
782
783         NFCT_FRAG6_CB(clone)->orig = skb;
784
785         if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) {
786                 DEBUGP("message is too short.\n");
787                 goto ret_orig;
788         }
789
790         clone->h.raw = clone->data + fhoff;
791         hdr = clone->nh.ipv6h;
792         fhdr = (struct frag_hdr *)clone->h.raw;
793
794         if (!(fhdr->frag_off & htons(0xFFF9))) {
795                 DEBUGP("Invalid fragment offset\n");
796                 /* It is not a fragmented frame */
797                 goto ret_orig;
798         }
799
800         if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh)
801                 nf_ct_frag6_evictor();
802
803         fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
804         if (fq == NULL) {
805                 DEBUGP("Can't find and can't create new queue\n");
806                 goto ret_orig;
807         }
808
809         spin_lock(&fq->lock);
810
811         if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
812                 spin_unlock(&fq->lock);
813                 DEBUGP("Can't insert skb to queue\n");
814                 fq_put(fq);
815                 goto ret_orig;
816         }
817
818         if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) {
819                 ret_skb = nf_ct_frag6_reasm(fq, dev);
820                 if (ret_skb == NULL)
821                         DEBUGP("Can't reassemble fragmented packets\n");
822         }
823         spin_unlock(&fq->lock);
824
825         fq_put(fq);
826         return ret_skb;
827
828 ret_orig:
829         kfree_skb(clone);
830         return skb;
831 }
832
833 void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
834                         struct net_device *in, struct net_device *out,
835                         int (*okfn)(struct sk_buff *))
836 {
837         struct sk_buff *s, *s2;
838
839         for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
840                 nf_conntrack_put_reasm(s->nfct_reasm);
841                 nf_conntrack_get_reasm(skb);
842                 s->nfct_reasm = skb;
843
844                 s2 = s->next;
845                 NF_HOOK_THRESH(PF_INET6, hooknum, s, in, out, okfn,
846                                NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
847                 s = s2;
848         }
849         nf_conntrack_put_reasm(skb);
850 }
851
852 int nf_ct_frag6_kfree_frags(struct sk_buff *skb)
853 {
854         struct sk_buff *s, *s2;
855
856         for (s = NFCT_FRAG6_CB(skb)->orig; s; s = s2) {
857
858                 s2 = s->next;
859                 kfree_skb(s);
860         }
861
862         kfree_skb(skb);
863
864         return 0;
865 }
866
867 int nf_ct_frag6_init(void)
868 {
869         nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
870                                    (jiffies ^ (jiffies >> 6)));
871
872         init_timer(&nf_ct_frag6_secret_timer);
873         nf_ct_frag6_secret_timer.function = nf_ct_frag6_secret_rebuild;
874         nf_ct_frag6_secret_timer.expires = jiffies
875                                            + nf_ct_frag6_secret_interval;
876         add_timer(&nf_ct_frag6_secret_timer);
877
878         return 0;
879 }
880
881 void nf_ct_frag6_cleanup(void)
882 {
883         del_timer(&nf_ct_frag6_secret_timer);
884         nf_ct_frag6_evictor();
885 }