OSDN Git Service

inet: frags: break the 2GB limit for frags storage
[android-x86/kernel.git] / net / ieee802154 / 6lowpan / reassembly.c
1 /*      6LoWPAN fragment reassembly
2  *
3  *
4  *      Authors:
5  *      Alexander Aring         <aar@pengutronix.de>
6  *
7  *      Based on: net/ipv6/reassembly.c
8  *
9  *      This program is free software; you can redistribute it and/or
10  *      modify it under the terms of the GNU General Public License
11  *      as published by the Free Software Foundation; either version
12  *      2 of the License, or (at your option) any later version.
13  */
14
15 #define pr_fmt(fmt) "6LoWPAN: " fmt
16
17 #include <linux/net.h>
18 #include <linux/list.h>
19 #include <linux/netdevice.h>
20 #include <linux/random.h>
21 #include <linux/jhash.h>
22 #include <linux/skbuff.h>
23 #include <linux/slab.h>
24 #include <linux/export.h>
25
26 #include <net/ieee802154_netdev.h>
27 #include <net/6lowpan.h>
28 #include <net/ipv6.h>
29 #include <net/inet_frag.h>
30
31 #include "6lowpan_i.h"
32
33 static const char lowpan_frags_cache_name[] = "lowpan-frags";
34
35 static struct inet_frags lowpan_frags;
36
37 static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
38                              struct sk_buff *prev, struct net_device *ldev);
39
40 static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
41 {
42         const struct frag_lowpan_compare_key *key = a;
43         struct lowpan_frag_queue *fq;
44
45         fq = container_of(q, struct lowpan_frag_queue, q);
46
47         BUILD_BUG_ON(sizeof(*key) > sizeof(q->key));
48         memcpy(&q->key, key, sizeof(*key));
49 }
50
51 static void lowpan_frag_expire(unsigned long data)
52 {
53         struct frag_queue *fq;
54         struct net *net;
55
56         fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
57         net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags);
58
59         spin_lock(&fq->q.lock);
60
61         if (fq->q.flags & INET_FRAG_COMPLETE)
62                 goto out;
63
64         inet_frag_kill(&fq->q);
65 out:
66         spin_unlock(&fq->q.lock);
67         inet_frag_put(&fq->q);
68 }
69
70 static inline struct lowpan_frag_queue *
71 fq_find(struct net *net, const struct lowpan_802154_cb *cb,
72         const struct ieee802154_addr *src,
73         const struct ieee802154_addr *dst)
74 {
75         struct netns_ieee802154_lowpan *ieee802154_lowpan =
76                 net_ieee802154_lowpan(net);
77         struct frag_lowpan_compare_key key = {
78                 .tag = cb->d_tag,
79                 .d_size = cb->d_size,
80                 .src = *src,
81                 .dst = *dst,
82         };
83         struct inet_frag_queue *q;
84
85         q = inet_frag_find(&ieee802154_lowpan->frags, &key);
86         if (!q)
87                 return NULL;
88
89         return container_of(q, struct lowpan_frag_queue, q);
90 }
91
92 static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
93                              struct sk_buff *skb, u8 frag_type)
94 {
95         struct sk_buff *prev, *next;
96         struct net_device *ldev;
97         int end, offset;
98
99         if (fq->q.flags & INET_FRAG_COMPLETE)
100                 goto err;
101
102         offset = lowpan_802154_cb(skb)->d_offset << 3;
103         end = lowpan_802154_cb(skb)->d_size;
104
105         /* Is this the final fragment? */
106         if (offset + skb->len == end) {
107                 /* If we already have some bits beyond end
108                  * or have different end, the segment is corrupted.
109                  */
110                 if (end < fq->q.len ||
111                     ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
112                         goto err;
113                 fq->q.flags |= INET_FRAG_LAST_IN;
114                 fq->q.len = end;
115         } else {
116                 if (end > fq->q.len) {
117                         /* Some bits beyond end -> corruption. */
118                         if (fq->q.flags & INET_FRAG_LAST_IN)
119                                 goto err;
120                         fq->q.len = end;
121                 }
122         }
123
124         /* Find out which fragments are in front and at the back of us
125          * in the chain of fragments so far.  We must know where to put
126          * this fragment, right?
127          */
128         prev = fq->q.fragments_tail;
129         if (!prev ||
130             lowpan_802154_cb(prev)->d_offset <
131             lowpan_802154_cb(skb)->d_offset) {
132                 next = NULL;
133                 goto found;
134         }
135         prev = NULL;
136         for (next = fq->q.fragments; next != NULL; next = next->next) {
137                 if (lowpan_802154_cb(next)->d_offset >=
138                     lowpan_802154_cb(skb)->d_offset)
139                         break;  /* bingo! */
140                 prev = next;
141         }
142
143 found:
144         /* Insert this fragment in the chain of fragments. */
145         skb->next = next;
146         if (!next)
147                 fq->q.fragments_tail = skb;
148         if (prev)
149                 prev->next = skb;
150         else
151                 fq->q.fragments = skb;
152
153         ldev = skb->dev;
154         if (ldev)
155                 skb->dev = NULL;
156
157         fq->q.stamp = skb->tstamp;
158         if (frag_type == LOWPAN_DISPATCH_FRAG1)
159                 fq->q.flags |= INET_FRAG_FIRST_IN;
160
161         fq->q.meat += skb->len;
162         add_frag_mem_limit(fq->q.net, skb->truesize);
163
164         if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
165             fq->q.meat == fq->q.len) {
166                 int res;
167                 unsigned long orefdst = skb->_skb_refdst;
168
169                 skb->_skb_refdst = 0UL;
170                 res = lowpan_frag_reasm(fq, prev, ldev);
171                 skb->_skb_refdst = orefdst;
172                 return res;
173         }
174
175         return -1;
176 err:
177         kfree_skb(skb);
178         return -1;
179 }
180
181 /*      Check if this packet is complete.
182  *      Returns NULL on failure by any reason, and pointer
183  *      to current nexthdr field in reassembled frame.
184  *
185  *      It is called with locked fq, and caller must check that
186  *      queue is eligible for reassembly i.e. it is not COMPLETE,
187  *      the last and the first frames arrived and all the bits are here.
188  */
189 static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
190                              struct net_device *ldev)
191 {
192         struct sk_buff *fp, *head = fq->q.fragments;
193         int sum_truesize;
194
195         inet_frag_kill(&fq->q);
196
197         /* Make the one we just received the head. */
198         if (prev) {
199                 head = prev->next;
200                 fp = skb_clone(head, GFP_ATOMIC);
201
202                 if (!fp)
203                         goto out_oom;
204
205                 fp->next = head->next;
206                 if (!fp->next)
207                         fq->q.fragments_tail = fp;
208                 prev->next = fp;
209
210                 skb_morph(head, fq->q.fragments);
211                 head->next = fq->q.fragments->next;
212
213                 consume_skb(fq->q.fragments);
214                 fq->q.fragments = head;
215         }
216
217         /* Head of list must not be cloned. */
218         if (skb_unclone(head, GFP_ATOMIC))
219                 goto out_oom;
220
221         /* If the first fragment is fragmented itself, we split
222          * it to two chunks: the first with data and paged part
223          * and the second, holding only fragments.
224          */
225         if (skb_has_frag_list(head)) {
226                 struct sk_buff *clone;
227                 int i, plen = 0;
228
229                 clone = alloc_skb(0, GFP_ATOMIC);
230                 if (!clone)
231                         goto out_oom;
232                 clone->next = head->next;
233                 head->next = clone;
234                 skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
235                 skb_frag_list_init(head);
236                 for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
237                         plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
238                 clone->len = head->data_len - plen;
239                 clone->data_len = clone->len;
240                 head->data_len -= clone->len;
241                 head->len -= clone->len;
242                 add_frag_mem_limit(fq->q.net, clone->truesize);
243         }
244
245         WARN_ON(head == NULL);
246
247         sum_truesize = head->truesize;
248         for (fp = head->next; fp;) {
249                 bool headstolen;
250                 int delta;
251                 struct sk_buff *next = fp->next;
252
253                 sum_truesize += fp->truesize;
254                 if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
255                         kfree_skb_partial(fp, headstolen);
256                 } else {
257                         if (!skb_shinfo(head)->frag_list)
258                                 skb_shinfo(head)->frag_list = fp;
259                         head->data_len += fp->len;
260                         head->len += fp->len;
261                         head->truesize += fp->truesize;
262                 }
263                 fp = next;
264         }
265         sub_frag_mem_limit(fq->q.net, sum_truesize);
266
267         head->next = NULL;
268         head->dev = ldev;
269         head->tstamp = fq->q.stamp;
270
271         fq->q.fragments = NULL;
272         fq->q.fragments_tail = NULL;
273
274         return 1;
275 out_oom:
276         net_dbg_ratelimited("lowpan_frag_reasm: no memory for reassembly\n");
277         return -1;
278 }
279
280 static int lowpan_frag_rx_handlers_result(struct sk_buff *skb,
281                                           lowpan_rx_result res)
282 {
283         switch (res) {
284         case RX_QUEUED:
285                 return NET_RX_SUCCESS;
286         case RX_CONTINUE:
287                 /* nobody cared about this packet */
288                 net_warn_ratelimited("%s: received unknown dispatch\n",
289                                      __func__);
290
291                 /* fall-through */
292         default:
293                 /* all others failure */
294                 return NET_RX_DROP;
295         }
296 }
297
298 static lowpan_rx_result lowpan_frag_rx_h_iphc(struct sk_buff *skb)
299 {
300         int ret;
301
302         if (!lowpan_is_iphc(*skb_network_header(skb)))
303                 return RX_CONTINUE;
304
305         ret = lowpan_iphc_decompress(skb);
306         if (ret < 0)
307                 return RX_DROP;
308
309         return RX_QUEUED;
310 }
311
312 static int lowpan_invoke_frag_rx_handlers(struct sk_buff *skb)
313 {
314         lowpan_rx_result res;
315
316 #define CALL_RXH(rxh)                   \
317         do {                            \
318                 res = rxh(skb); \
319                 if (res != RX_CONTINUE) \
320                         goto rxh_next;  \
321         } while (0)
322
323         /* likely at first */
324         CALL_RXH(lowpan_frag_rx_h_iphc);
325         CALL_RXH(lowpan_rx_h_ipv6);
326
327 rxh_next:
328         return lowpan_frag_rx_handlers_result(skb, res);
329 #undef CALL_RXH
330 }
331
332 #define LOWPAN_FRAG_DGRAM_SIZE_HIGH_MASK        0x07
333 #define LOWPAN_FRAG_DGRAM_SIZE_HIGH_SHIFT       8
334
335 static int lowpan_get_cb(struct sk_buff *skb, u8 frag_type,
336                          struct lowpan_802154_cb *cb)
337 {
338         bool fail;
339         u8 high = 0, low = 0;
340         __be16 d_tag = 0;
341
342         fail = lowpan_fetch_skb(skb, &high, 1);
343         fail |= lowpan_fetch_skb(skb, &low, 1);
344         /* remove the dispatch value and use first three bits as high value
345          * for the datagram size
346          */
347         cb->d_size = (high & LOWPAN_FRAG_DGRAM_SIZE_HIGH_MASK) <<
348                 LOWPAN_FRAG_DGRAM_SIZE_HIGH_SHIFT | low;
349         fail |= lowpan_fetch_skb(skb, &d_tag, 2);
350         cb->d_tag = ntohs(d_tag);
351
352         if (frag_type == LOWPAN_DISPATCH_FRAGN) {
353                 fail |= lowpan_fetch_skb(skb, &cb->d_offset, 1);
354         } else {
355                 skb_reset_network_header(skb);
356                 cb->d_offset = 0;
357                 /* check if datagram_size has ipv6hdr on FRAG1 */
358                 fail |= cb->d_size < sizeof(struct ipv6hdr);
359                 /* check if we can dereference the dispatch value */
360                 fail |= !skb->len;
361         }
362
363         if (unlikely(fail))
364                 return -EIO;
365
366         return 0;
367 }
368
369 int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
370 {
371         struct lowpan_frag_queue *fq;
372         struct net *net = dev_net(skb->dev);
373         struct lowpan_802154_cb *cb = lowpan_802154_cb(skb);
374         struct ieee802154_hdr hdr;
375         int err;
376
377         if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
378                 goto err;
379
380         err = lowpan_get_cb(skb, frag_type, cb);
381         if (err < 0)
382                 goto err;
383
384         if (frag_type == LOWPAN_DISPATCH_FRAG1) {
385                 err = lowpan_invoke_frag_rx_handlers(skb);
386                 if (err == NET_RX_DROP)
387                         goto err;
388         }
389
390         if (cb->d_size > IPV6_MIN_MTU) {
391                 net_warn_ratelimited("lowpan_frag_rcv: datagram size exceeds MTU\n");
392                 goto err;
393         }
394
395         fq = fq_find(net, cb, &hdr.source, &hdr.dest);
396         if (fq != NULL) {
397                 int ret;
398
399                 spin_lock(&fq->q.lock);
400                 ret = lowpan_frag_queue(fq, skb, frag_type);
401                 spin_unlock(&fq->q.lock);
402
403                 inet_frag_put(&fq->q);
404                 return ret;
405         }
406
407 err:
408         kfree_skb(skb);
409         return -1;
410 }
411
412 #ifdef CONFIG_SYSCTL
413 static long zero;
414
415 static struct ctl_table lowpan_frags_ns_ctl_table[] = {
416         {
417                 .procname       = "6lowpanfrag_high_thresh",
418                 .data           = &init_net.ieee802154_lowpan.frags.high_thresh,
419                 .maxlen         = sizeof(unsigned long),
420                 .mode           = 0644,
421                 .proc_handler   = proc_doulongvec_minmax,
422                 .extra1         = &init_net.ieee802154_lowpan.frags.low_thresh
423         },
424         {
425                 .procname       = "6lowpanfrag_low_thresh",
426                 .data           = &init_net.ieee802154_lowpan.frags.low_thresh,
427                 .maxlen         = sizeof(unsigned long),
428                 .mode           = 0644,
429                 .proc_handler   = proc_doulongvec_minmax,
430                 .extra1         = &zero,
431                 .extra2         = &init_net.ieee802154_lowpan.frags.high_thresh
432         },
433         {
434                 .procname       = "6lowpanfrag_time",
435                 .data           = &init_net.ieee802154_lowpan.frags.timeout,
436                 .maxlen         = sizeof(int),
437                 .mode           = 0644,
438                 .proc_handler   = proc_dointvec_jiffies,
439         },
440         { }
441 };
442
443 /* secret interval has been deprecated */
444 static int lowpan_frags_secret_interval_unused;
445 static struct ctl_table lowpan_frags_ctl_table[] = {
446         {
447                 .procname       = "6lowpanfrag_secret_interval",
448                 .data           = &lowpan_frags_secret_interval_unused,
449                 .maxlen         = sizeof(int),
450                 .mode           = 0644,
451                 .proc_handler   = proc_dointvec_jiffies,
452         },
453         { }
454 };
455
456 static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
457 {
458         struct ctl_table *table;
459         struct ctl_table_header *hdr;
460         struct netns_ieee802154_lowpan *ieee802154_lowpan =
461                 net_ieee802154_lowpan(net);
462
463         table = lowpan_frags_ns_ctl_table;
464         if (!net_eq(net, &init_net)) {
465                 table = kmemdup(table, sizeof(lowpan_frags_ns_ctl_table),
466                                 GFP_KERNEL);
467                 if (table == NULL)
468                         goto err_alloc;
469
470                 table[0].data = &ieee802154_lowpan->frags.high_thresh;
471                 table[0].extra1 = &ieee802154_lowpan->frags.low_thresh;
472                 table[0].extra2 = &init_net.ieee802154_lowpan.frags.high_thresh;
473                 table[1].data = &ieee802154_lowpan->frags.low_thresh;
474                 table[1].extra2 = &ieee802154_lowpan->frags.high_thresh;
475                 table[2].data = &ieee802154_lowpan->frags.timeout;
476
477                 /* Don't export sysctls to unprivileged users */
478                 if (net->user_ns != &init_user_ns)
479                         table[0].procname = NULL;
480         }
481
482         hdr = register_net_sysctl(net, "net/ieee802154/6lowpan", table);
483         if (hdr == NULL)
484                 goto err_reg;
485
486         ieee802154_lowpan->sysctl.frags_hdr = hdr;
487         return 0;
488
489 err_reg:
490         if (!net_eq(net, &init_net))
491                 kfree(table);
492 err_alloc:
493         return -ENOMEM;
494 }
495
496 static void __net_exit lowpan_frags_ns_sysctl_unregister(struct net *net)
497 {
498         struct ctl_table *table;
499         struct netns_ieee802154_lowpan *ieee802154_lowpan =
500                 net_ieee802154_lowpan(net);
501
502         table = ieee802154_lowpan->sysctl.frags_hdr->ctl_table_arg;
503         unregister_net_sysctl_table(ieee802154_lowpan->sysctl.frags_hdr);
504         if (!net_eq(net, &init_net))
505                 kfree(table);
506 }
507
508 static struct ctl_table_header *lowpan_ctl_header;
509
510 static int __init lowpan_frags_sysctl_register(void)
511 {
512         lowpan_ctl_header = register_net_sysctl(&init_net,
513                                                 "net/ieee802154/6lowpan",
514                                                 lowpan_frags_ctl_table);
515         return lowpan_ctl_header == NULL ? -ENOMEM : 0;
516 }
517
518 static void lowpan_frags_sysctl_unregister(void)
519 {
520         unregister_net_sysctl_table(lowpan_ctl_header);
521 }
522 #else
523 static inline int lowpan_frags_ns_sysctl_register(struct net *net)
524 {
525         return 0;
526 }
527
528 static inline void lowpan_frags_ns_sysctl_unregister(struct net *net)
529 {
530 }
531
532 static inline int __init lowpan_frags_sysctl_register(void)
533 {
534         return 0;
535 }
536
537 static inline void lowpan_frags_sysctl_unregister(void)
538 {
539 }
540 #endif
541
542 static int __net_init lowpan_frags_init_net(struct net *net)
543 {
544         struct netns_ieee802154_lowpan *ieee802154_lowpan =
545                 net_ieee802154_lowpan(net);
546         int res;
547
548         ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
549         ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
550         ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
551         ieee802154_lowpan->frags.f = &lowpan_frags;
552
553         res = inet_frags_init_net(&ieee802154_lowpan->frags);
554         if (res < 0)
555                 return res;
556         res = lowpan_frags_ns_sysctl_register(net);
557         if (res < 0)
558                 inet_frags_exit_net(&ieee802154_lowpan->frags);
559         return res;
560 }
561
562 static void __net_exit lowpan_frags_exit_net(struct net *net)
563 {
564         struct netns_ieee802154_lowpan *ieee802154_lowpan =
565                 net_ieee802154_lowpan(net);
566
567         lowpan_frags_ns_sysctl_unregister(net);
568         inet_frags_exit_net(&ieee802154_lowpan->frags);
569 }
570
571 static struct pernet_operations lowpan_frags_ops = {
572         .init = lowpan_frags_init_net,
573         .exit = lowpan_frags_exit_net,
574 };
575
576 static u32 lowpan_key_hashfn(const void *data, u32 len, u32 seed)
577 {
578         return jhash2(data,
579                       sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
580 }
581
582 static u32 lowpan_obj_hashfn(const void *data, u32 len, u32 seed)
583 {
584         const struct inet_frag_queue *fq = data;
585
586         return jhash2((const u32 *)&fq->key,
587                       sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
588 }
589
590 static int lowpan_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
591 {
592         const struct frag_lowpan_compare_key *key = arg->key;
593         const struct inet_frag_queue *fq = ptr;
594
595         return !!memcmp(&fq->key, key, sizeof(*key));
596 }
597
598 static const struct rhashtable_params lowpan_rhash_params = {
599         .head_offset            = offsetof(struct inet_frag_queue, node),
600         .hashfn                 = lowpan_key_hashfn,
601         .obj_hashfn             = lowpan_obj_hashfn,
602         .obj_cmpfn              = lowpan_obj_cmpfn,
603         .automatic_shrinking    = true,
604 };
605
606 int __init lowpan_net_frag_init(void)
607 {
608         int ret;
609
610         lowpan_frags.constructor = lowpan_frag_init;
611         lowpan_frags.destructor = NULL;
612         lowpan_frags.qsize = sizeof(struct frag_queue);
613         lowpan_frags.frag_expire = lowpan_frag_expire;
614         lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
615         lowpan_frags.rhash_params = lowpan_rhash_params;
616         ret = inet_frags_init(&lowpan_frags);
617         if (ret)
618                 goto out;
619
620         ret = lowpan_frags_sysctl_register();
621         if (ret)
622                 goto err_sysctl;
623
624         ret = register_pernet_subsys(&lowpan_frags_ops);
625         if (ret)
626                 goto err_pernet;
627 out:
628         return ret;
629 err_pernet:
630         lowpan_frags_sysctl_unregister();
631 err_sysctl:
632         inet_frags_fini(&lowpan_frags);
633         return ret;
634 }
635
636 void lowpan_net_frag_exit(void)
637 {
638         inet_frags_fini(&lowpan_frags);
639         lowpan_frags_sysctl_unregister();
640         unregister_pernet_subsys(&lowpan_frags_ops);
641 }