OSDN Git Service

0e9ae80472f0e7f3bcc8f5f4fb73896aa66adf56
[sagit-ice-cold/kernel_xiaomi_msm8998.git] / net / xfrm / xfrm_policy.c
1 /*
2  * xfrm_policy.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      Kazunori MIYAZAWA @USAGI
10  *      YOSHIFUJI Hideaki
11  *              Split up af-specific portion
12  *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
13  *
14  */
15
16 #include <linux/err.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <linux/netfilter.h>
25 #include <linux/module.h>
26 #include <linux/cache.h>
27 #include <linux/audit.h>
28 #include <net/dst.h>
29 #include <net/flow.h>
30 #include <net/xfrm.h>
31 #include <net/ip.h>
32 #ifdef CONFIG_XFRM_STATISTICS
33 #include <net/snmp.h>
34 #endif
35
36 #include "xfrm_hash.h"
37
38 #define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
39 #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
40 #define XFRM_MAX_QUEUE_LEN      100
41
42 struct xfrm_flo {
43         struct dst_entry *dst_orig;
44         u8 flags;
45 };
46
47 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
48 static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
49                                                 __read_mostly;
50
51 static struct kmem_cache *xfrm_dst_cache __read_mostly;
52
53 static void xfrm_init_pmtu(struct dst_entry *dst);
54 static int stale_bundle(struct dst_entry *dst);
55 static int xfrm_bundle_ok(struct xfrm_dst *xdst);
56 static void xfrm_policy_queue_process(unsigned long arg);
57
58 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
59 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
60                                                 int dir);
61
62 static inline bool
63 __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
64 {
65         const struct flowi4 *fl4 = &fl->u.ip4;
66
67         return  addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) &&
68                 addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) &&
69                 !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
70                 !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
71                 (fl4->flowi4_proto == sel->proto || !sel->proto) &&
72                 (fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
73 }
74
75 static inline bool
76 __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
77 {
78         const struct flowi6 *fl6 = &fl->u.ip6;
79
80         return  addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) &&
81                 addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) &&
82                 !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) &&
83                 !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) &&
84                 (fl6->flowi6_proto == sel->proto || !sel->proto) &&
85                 (fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
86 }
87
88 bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
89                          unsigned short family)
90 {
91         switch (family) {
92         case AF_INET:
93                 return __xfrm4_selector_match(sel, fl);
94         case AF_INET6:
95                 return __xfrm6_selector_match(sel, fl);
96         }
97         return false;
98 }
99
100 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
101 {
102         struct xfrm_policy_afinfo *afinfo;
103
104         if (unlikely(family >= NPROTO))
105                 return NULL;
106         rcu_read_lock();
107         afinfo = rcu_dereference(xfrm_policy_afinfo[family]);
108         if (unlikely(!afinfo))
109                 rcu_read_unlock();
110         return afinfo;
111 }
112
113 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
114 {
115         rcu_read_unlock();
116 }
117
118 static inline struct dst_entry *__xfrm_dst_lookup(struct net *net,
119                                                   int tos, int oif,
120                                                   const xfrm_address_t *saddr,
121                                                   const xfrm_address_t *daddr,
122                                                   int family, u32 mark)
123 {
124         struct xfrm_policy_afinfo *afinfo;
125         struct dst_entry *dst;
126
127         afinfo = xfrm_policy_get_afinfo(family);
128         if (unlikely(afinfo == NULL))
129                 return ERR_PTR(-EAFNOSUPPORT);
130
131         dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr, mark);
132
133         xfrm_policy_put_afinfo(afinfo);
134
135         return dst;
136 }
137
138 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
139                                                 int tos, int oif,
140                                                 xfrm_address_t *prev_saddr,
141                                                 xfrm_address_t *prev_daddr,
142                                                 int family, u32 mark)
143 {
144         struct net *net = xs_net(x);
145         xfrm_address_t *saddr = &x->props.saddr;
146         xfrm_address_t *daddr = &x->id.daddr;
147         struct dst_entry *dst;
148
149         if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
150                 saddr = x->coaddr;
151                 daddr = prev_daddr;
152         }
153         if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
154                 saddr = prev_saddr;
155                 daddr = x->coaddr;
156         }
157
158         dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family, mark);
159
160         if (!IS_ERR(dst)) {
161                 if (prev_saddr != saddr)
162                         memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
163                 if (prev_daddr != daddr)
164                         memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
165         }
166
167         return dst;
168 }
169
170 static inline unsigned long make_jiffies(long secs)
171 {
172         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
173                 return MAX_SCHEDULE_TIMEOUT-1;
174         else
175                 return secs*HZ;
176 }
177
178 static void xfrm_policy_timer(unsigned long data)
179 {
180         struct xfrm_policy *xp = (struct xfrm_policy *)data;
181         unsigned long now = get_seconds();
182         long next = LONG_MAX;
183         int warn = 0;
184         int dir;
185
186         read_lock(&xp->lock);
187
188         if (unlikely(xp->walk.dead))
189                 goto out;
190
191         dir = xfrm_policy_id2dir(xp->index);
192
193         if (xp->lft.hard_add_expires_seconds) {
194                 long tmo = xp->lft.hard_add_expires_seconds +
195                         xp->curlft.add_time - now;
196                 if (tmo <= 0)
197                         goto expired;
198                 if (tmo < next)
199                         next = tmo;
200         }
201         if (xp->lft.hard_use_expires_seconds) {
202                 long tmo = xp->lft.hard_use_expires_seconds +
203                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
204                 if (tmo <= 0)
205                         goto expired;
206                 if (tmo < next)
207                         next = tmo;
208         }
209         if (xp->lft.soft_add_expires_seconds) {
210                 long tmo = xp->lft.soft_add_expires_seconds +
211                         xp->curlft.add_time - now;
212                 if (tmo <= 0) {
213                         warn = 1;
214                         tmo = XFRM_KM_TIMEOUT;
215                 }
216                 if (tmo < next)
217                         next = tmo;
218         }
219         if (xp->lft.soft_use_expires_seconds) {
220                 long tmo = xp->lft.soft_use_expires_seconds +
221                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
222                 if (tmo <= 0) {
223                         warn = 1;
224                         tmo = XFRM_KM_TIMEOUT;
225                 }
226                 if (tmo < next)
227                         next = tmo;
228         }
229
230         if (warn)
231                 km_policy_expired(xp, dir, 0, 0);
232         if (next != LONG_MAX &&
233             !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
234                 xfrm_pol_hold(xp);
235
236 out:
237         read_unlock(&xp->lock);
238         xfrm_pol_put(xp);
239         return;
240
241 expired:
242         read_unlock(&xp->lock);
243         if (!xfrm_policy_delete(xp, dir))
244                 km_policy_expired(xp, dir, 1, 0);
245         xfrm_pol_put(xp);
246 }
247
248 static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
249 {
250         struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
251
252         if (unlikely(pol->walk.dead))
253                 flo = NULL;
254         else
255                 xfrm_pol_hold(pol);
256
257         return flo;
258 }
259
260 static int xfrm_policy_flo_check(struct flow_cache_object *flo)
261 {
262         struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
263
264         return !pol->walk.dead;
265 }
266
267 static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
268 {
269         xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
270 }
271
272 static const struct flow_cache_ops xfrm_policy_fc_ops = {
273         .get = xfrm_policy_flo_get,
274         .check = xfrm_policy_flo_check,
275         .delete = xfrm_policy_flo_delete,
276 };
277
278 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
279  * SPD calls.
280  */
281
282 struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
283 {
284         struct xfrm_policy *policy;
285
286         policy = kzalloc(sizeof(struct xfrm_policy), gfp);
287
288         if (policy) {
289                 write_pnet(&policy->xp_net, net);
290                 INIT_LIST_HEAD(&policy->walk.all);
291                 INIT_HLIST_NODE(&policy->bydst);
292                 INIT_HLIST_NODE(&policy->byidx);
293                 rwlock_init(&policy->lock);
294                 atomic_set(&policy->refcnt, 1);
295                 skb_queue_head_init(&policy->polq.hold_queue);
296                 setup_timer(&policy->timer, xfrm_policy_timer,
297                                 (unsigned long)policy);
298                 setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
299                             (unsigned long)policy);
300                 policy->flo.ops = &xfrm_policy_fc_ops;
301         }
302         return policy;
303 }
304 EXPORT_SYMBOL(xfrm_policy_alloc);
305
306 static void xfrm_policy_destroy_rcu(struct rcu_head *head)
307 {
308         struct xfrm_policy *policy = container_of(head, struct xfrm_policy, rcu);
309
310         security_xfrm_policy_free(policy->security);
311         kfree(policy);
312 }
313
314 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
315
316 void xfrm_policy_destroy(struct xfrm_policy *policy)
317 {
318         BUG_ON(!policy->walk.dead);
319
320         if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
321                 BUG();
322
323         call_rcu(&policy->rcu, xfrm_policy_destroy_rcu);
324 }
325 EXPORT_SYMBOL(xfrm_policy_destroy);
326
327 /* Rule must be locked. Release descentant resources, announce
328  * entry dead. The rule must be unlinked from lists to the moment.
329  */
330
331 static void xfrm_policy_kill(struct xfrm_policy *policy)
332 {
333         policy->walk.dead = 1;
334
335         atomic_inc(&policy->genid);
336
337         if (del_timer(&policy->polq.hold_timer))
338                 xfrm_pol_put(policy);
339         skb_queue_purge(&policy->polq.hold_queue);
340
341         if (del_timer(&policy->timer))
342                 xfrm_pol_put(policy);
343
344         xfrm_pol_put(policy);
345 }
346
347 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
348
349 static inline unsigned int idx_hash(struct net *net, u32 index)
350 {
351         return __idx_hash(index, net->xfrm.policy_idx_hmask);
352 }
353
354 /* calculate policy hash thresholds */
355 static void __get_hash_thresh(struct net *net,
356                               unsigned short family, int dir,
357                               u8 *dbits, u8 *sbits)
358 {
359         switch (family) {
360         case AF_INET:
361                 *dbits = net->xfrm.policy_bydst[dir].dbits4;
362                 *sbits = net->xfrm.policy_bydst[dir].sbits4;
363                 break;
364
365         case AF_INET6:
366                 *dbits = net->xfrm.policy_bydst[dir].dbits6;
367                 *sbits = net->xfrm.policy_bydst[dir].sbits6;
368                 break;
369
370         default:
371                 *dbits = 0;
372                 *sbits = 0;
373         }
374 }
375
376 static struct hlist_head *policy_hash_bysel(struct net *net,
377                                             const struct xfrm_selector *sel,
378                                             unsigned short family, int dir)
379 {
380         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
381         unsigned int hash;
382         u8 dbits;
383         u8 sbits;
384
385         __get_hash_thresh(net, family, dir, &dbits, &sbits);
386         hash = __sel_hash(sel, family, hmask, dbits, sbits);
387
388         return (hash == hmask + 1 ?
389                 &net->xfrm.policy_inexact[dir] :
390                 net->xfrm.policy_bydst[dir].table + hash);
391 }
392
393 static struct hlist_head *policy_hash_direct(struct net *net,
394                                              const xfrm_address_t *daddr,
395                                              const xfrm_address_t *saddr,
396                                              unsigned short family, int dir)
397 {
398         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
399         unsigned int hash;
400         u8 dbits;
401         u8 sbits;
402
403         __get_hash_thresh(net, family, dir, &dbits, &sbits);
404         hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
405
406         return net->xfrm.policy_bydst[dir].table + hash;
407 }
408
409 static void xfrm_dst_hash_transfer(struct net *net,
410                                    struct hlist_head *list,
411                                    struct hlist_head *ndsttable,
412                                    unsigned int nhashmask,
413                                    int dir)
414 {
415         struct hlist_node *tmp, *entry0 = NULL;
416         struct xfrm_policy *pol;
417         unsigned int h0 = 0;
418         u8 dbits;
419         u8 sbits;
420
421 redo:
422         hlist_for_each_entry_safe(pol, tmp, list, bydst) {
423                 unsigned int h;
424
425                 __get_hash_thresh(net, pol->family, dir, &dbits, &sbits);
426                 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
427                                 pol->family, nhashmask, dbits, sbits);
428                 if (!entry0) {
429                         hlist_del(&pol->bydst);
430                         hlist_add_head(&pol->bydst, ndsttable+h);
431                         h0 = h;
432                 } else {
433                         if (h != h0)
434                                 continue;
435                         hlist_del(&pol->bydst);
436                         hlist_add_behind(&pol->bydst, entry0);
437                 }
438                 entry0 = &pol->bydst;
439         }
440         if (!hlist_empty(list)) {
441                 entry0 = NULL;
442                 goto redo;
443         }
444 }
445
446 static void xfrm_idx_hash_transfer(struct hlist_head *list,
447                                    struct hlist_head *nidxtable,
448                                    unsigned int nhashmask)
449 {
450         struct hlist_node *tmp;
451         struct xfrm_policy *pol;
452
453         hlist_for_each_entry_safe(pol, tmp, list, byidx) {
454                 unsigned int h;
455
456                 h = __idx_hash(pol->index, nhashmask);
457                 hlist_add_head(&pol->byidx, nidxtable+h);
458         }
459 }
460
461 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
462 {
463         return ((old_hmask + 1) << 1) - 1;
464 }
465
466 static void xfrm_bydst_resize(struct net *net, int dir)
467 {
468         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
469         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
470         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
471         struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
472         struct hlist_head *ndst = xfrm_hash_alloc(nsize);
473         int i;
474
475         if (!ndst)
476                 return;
477
478         write_lock_bh(&net->xfrm.xfrm_policy_lock);
479
480         for (i = hmask; i >= 0; i--)
481                 xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
482
483         net->xfrm.policy_bydst[dir].table = ndst;
484         net->xfrm.policy_bydst[dir].hmask = nhashmask;
485
486         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
487
488         xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
489 }
490
491 static void xfrm_byidx_resize(struct net *net, int total)
492 {
493         unsigned int hmask = net->xfrm.policy_idx_hmask;
494         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
495         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
496         struct hlist_head *oidx = net->xfrm.policy_byidx;
497         struct hlist_head *nidx = xfrm_hash_alloc(nsize);
498         int i;
499
500         if (!nidx)
501                 return;
502
503         write_lock_bh(&net->xfrm.xfrm_policy_lock);
504
505         for (i = hmask; i >= 0; i--)
506                 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
507
508         net->xfrm.policy_byidx = nidx;
509         net->xfrm.policy_idx_hmask = nhashmask;
510
511         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
512
513         xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
514 }
515
516 static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
517 {
518         unsigned int cnt = net->xfrm.policy_count[dir];
519         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
520
521         if (total)
522                 *total += cnt;
523
524         if ((hmask + 1) < xfrm_policy_hashmax &&
525             cnt > hmask)
526                 return 1;
527
528         return 0;
529 }
530
531 static inline int xfrm_byidx_should_resize(struct net *net, int total)
532 {
533         unsigned int hmask = net->xfrm.policy_idx_hmask;
534
535         if ((hmask + 1) < xfrm_policy_hashmax &&
536             total > hmask)
537                 return 1;
538
539         return 0;
540 }
541
542 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
543 {
544         read_lock_bh(&net->xfrm.xfrm_policy_lock);
545         si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
546         si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
547         si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
548         si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
549         si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
550         si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
551         si->spdhcnt = net->xfrm.policy_idx_hmask;
552         si->spdhmcnt = xfrm_policy_hashmax;
553         read_unlock_bh(&net->xfrm.xfrm_policy_lock);
554 }
555 EXPORT_SYMBOL(xfrm_spd_getinfo);
556
557 static DEFINE_MUTEX(hash_resize_mutex);
558 static void xfrm_hash_resize(struct work_struct *work)
559 {
560         struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
561         int dir, total;
562
563         mutex_lock(&hash_resize_mutex);
564
565         total = 0;
566         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
567                 if (xfrm_bydst_should_resize(net, dir, &total))
568                         xfrm_bydst_resize(net, dir);
569         }
570         if (xfrm_byidx_should_resize(net, total))
571                 xfrm_byidx_resize(net, total);
572
573         mutex_unlock(&hash_resize_mutex);
574 }
575
576 static void xfrm_hash_rebuild(struct work_struct *work)
577 {
578         struct net *net = container_of(work, struct net,
579                                        xfrm.policy_hthresh.work);
580         unsigned int hmask;
581         struct xfrm_policy *pol;
582         struct xfrm_policy *policy;
583         struct hlist_head *chain;
584         struct hlist_head *odst;
585         struct hlist_node *newpos;
586         int i;
587         int dir;
588         unsigned seq;
589         u8 lbits4, rbits4, lbits6, rbits6;
590
591         mutex_lock(&hash_resize_mutex);
592
593         /* read selector prefixlen thresholds */
594         do {
595                 seq = read_seqbegin(&net->xfrm.policy_hthresh.lock);
596
597                 lbits4 = net->xfrm.policy_hthresh.lbits4;
598                 rbits4 = net->xfrm.policy_hthresh.rbits4;
599                 lbits6 = net->xfrm.policy_hthresh.lbits6;
600                 rbits6 = net->xfrm.policy_hthresh.rbits6;
601         } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
602
603         write_lock_bh(&net->xfrm.xfrm_policy_lock);
604
605         /* reset the bydst and inexact table in all directions */
606         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
607                 INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
608                 hmask = net->xfrm.policy_bydst[dir].hmask;
609                 odst = net->xfrm.policy_bydst[dir].table;
610                 for (i = hmask; i >= 0; i--)
611                         INIT_HLIST_HEAD(odst + i);
612                 if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
613                         /* dir out => dst = remote, src = local */
614                         net->xfrm.policy_bydst[dir].dbits4 = rbits4;
615                         net->xfrm.policy_bydst[dir].sbits4 = lbits4;
616                         net->xfrm.policy_bydst[dir].dbits6 = rbits6;
617                         net->xfrm.policy_bydst[dir].sbits6 = lbits6;
618                 } else {
619                         /* dir in/fwd => dst = local, src = remote */
620                         net->xfrm.policy_bydst[dir].dbits4 = lbits4;
621                         net->xfrm.policy_bydst[dir].sbits4 = rbits4;
622                         net->xfrm.policy_bydst[dir].dbits6 = lbits6;
623                         net->xfrm.policy_bydst[dir].sbits6 = rbits6;
624                 }
625         }
626
627         /* re-insert all policies by order of creation */
628         list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
629                 newpos = NULL;
630                 chain = policy_hash_bysel(net, &policy->selector,
631                                           policy->family,
632                                           xfrm_policy_id2dir(policy->index));
633                 hlist_for_each_entry(pol, chain, bydst) {
634                         if (policy->priority >= pol->priority)
635                                 newpos = &pol->bydst;
636                         else
637                                 break;
638                 }
639                 if (newpos)
640                         hlist_add_behind(&policy->bydst, newpos);
641                 else
642                         hlist_add_head(&policy->bydst, chain);
643         }
644
645         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
646
647         mutex_unlock(&hash_resize_mutex);
648 }
649
650 void xfrm_policy_hash_rebuild(struct net *net)
651 {
652         schedule_work(&net->xfrm.policy_hthresh.work);
653 }
654 EXPORT_SYMBOL(xfrm_policy_hash_rebuild);
655
656 /* Generate new index... KAME seems to generate them ordered by cost
657  * of an absolute inpredictability of ordering of rules. This will not pass. */
658 static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
659 {
660         static u32 idx_generator;
661
662         for (;;) {
663                 struct hlist_head *list;
664                 struct xfrm_policy *p;
665                 u32 idx;
666                 int found;
667
668                 if (!index) {
669                         idx = (idx_generator | dir);
670                         idx_generator += 8;
671                 } else {
672                         idx = index;
673                         index = 0;
674                 }
675
676                 if (idx == 0)
677                         idx = 8;
678                 list = net->xfrm.policy_byidx + idx_hash(net, idx);
679                 found = 0;
680                 hlist_for_each_entry(p, list, byidx) {
681                         if (p->index == idx) {
682                                 found = 1;
683                                 break;
684                         }
685                 }
686                 if (!found)
687                         return idx;
688         }
689 }
690
691 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
692 {
693         u32 *p1 = (u32 *) s1;
694         u32 *p2 = (u32 *) s2;
695         int len = sizeof(struct xfrm_selector) / sizeof(u32);
696         int i;
697
698         for (i = 0; i < len; i++) {
699                 if (p1[i] != p2[i])
700                         return 1;
701         }
702
703         return 0;
704 }
705
706 static void xfrm_policy_requeue(struct xfrm_policy *old,
707                                 struct xfrm_policy *new)
708 {
709         struct xfrm_policy_queue *pq = &old->polq;
710         struct sk_buff_head list;
711
712         if (skb_queue_empty(&pq->hold_queue))
713                 return;
714
715         __skb_queue_head_init(&list);
716
717         spin_lock_bh(&pq->hold_queue.lock);
718         skb_queue_splice_init(&pq->hold_queue, &list);
719         if (del_timer(&pq->hold_timer))
720                 xfrm_pol_put(old);
721         spin_unlock_bh(&pq->hold_queue.lock);
722
723         pq = &new->polq;
724
725         spin_lock_bh(&pq->hold_queue.lock);
726         skb_queue_splice(&list, &pq->hold_queue);
727         pq->timeout = XFRM_QUEUE_TMO_MIN;
728         if (!mod_timer(&pq->hold_timer, jiffies))
729                 xfrm_pol_hold(new);
730         spin_unlock_bh(&pq->hold_queue.lock);
731 }
732
733 static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
734                                    struct xfrm_policy *pol)
735 {
736         u32 mark = policy->mark.v & policy->mark.m;
737
738         if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
739                 return true;
740
741         if ((mark & pol->mark.m) == pol->mark.v &&
742             policy->priority == pol->priority)
743                 return true;
744
745         return false;
746 }
747
748 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
749 {
750         struct net *net = xp_net(policy);
751         struct xfrm_policy *pol;
752         struct xfrm_policy *delpol;
753         struct hlist_head *chain;
754         struct hlist_node *newpos;
755
756         write_lock_bh(&net->xfrm.xfrm_policy_lock);
757         chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
758         delpol = NULL;
759         newpos = NULL;
760         hlist_for_each_entry(pol, chain, bydst) {
761                 if (pol->type == policy->type &&
762                     !selector_cmp(&pol->selector, &policy->selector) &&
763                     xfrm_policy_mark_match(policy, pol) &&
764                     xfrm_sec_ctx_match(pol->security, policy->security) &&
765                     !WARN_ON(delpol)) {
766                         if (excl) {
767                                 write_unlock_bh(&net->xfrm.xfrm_policy_lock);
768                                 return -EEXIST;
769                         }
770                         delpol = pol;
771                         if (policy->priority > pol->priority)
772                                 continue;
773                 } else if (policy->priority >= pol->priority) {
774                         newpos = &pol->bydst;
775                         continue;
776                 }
777                 if (delpol)
778                         break;
779         }
780         if (newpos)
781                 hlist_add_behind(&policy->bydst, newpos);
782         else
783                 hlist_add_head(&policy->bydst, chain);
784         __xfrm_policy_link(policy, dir);
785         atomic_inc(&net->xfrm.flow_cache_genid);
786
787         /* After previous checking, family can either be AF_INET or AF_INET6 */
788         if (policy->family == AF_INET)
789                 rt_genid_bump_ipv4(net);
790         else
791                 rt_genid_bump_ipv6(net);
792
793         if (delpol) {
794                 xfrm_policy_requeue(delpol, policy);
795                 __xfrm_policy_unlink(delpol, dir);
796         }
797         policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index);
798         hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
799         policy->curlft.add_time = get_seconds();
800         policy->curlft.use_time = 0;
801         if (!mod_timer(&policy->timer, jiffies + HZ))
802                 xfrm_pol_hold(policy);
803         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
804
805         if (delpol)
806                 xfrm_policy_kill(delpol);
807         else if (xfrm_bydst_should_resize(net, dir, NULL))
808                 schedule_work(&net->xfrm.policy_hash_work);
809
810         return 0;
811 }
812 EXPORT_SYMBOL(xfrm_policy_insert);
813
814 struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
815                                           int dir, struct xfrm_selector *sel,
816                                           struct xfrm_sec_ctx *ctx, int delete,
817                                           int *err)
818 {
819         struct xfrm_policy *pol, *ret;
820         struct hlist_head *chain;
821
822         *err = 0;
823         write_lock_bh(&net->xfrm.xfrm_policy_lock);
824         chain = policy_hash_bysel(net, sel, sel->family, dir);
825         ret = NULL;
826         hlist_for_each_entry(pol, chain, bydst) {
827                 if (pol->type == type &&
828                     (mark & pol->mark.m) == pol->mark.v &&
829                     !selector_cmp(sel, &pol->selector) &&
830                     xfrm_sec_ctx_match(ctx, pol->security)) {
831                         xfrm_pol_hold(pol);
832                         if (delete) {
833                                 *err = security_xfrm_policy_delete(
834                                                                 pol->security);
835                                 if (*err) {
836                                         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
837                                         return pol;
838                                 }
839                                 __xfrm_policy_unlink(pol, dir);
840                         }
841                         ret = pol;
842                         break;
843                 }
844         }
845         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
846
847         if (ret && delete)
848                 xfrm_policy_kill(ret);
849         return ret;
850 }
851 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
852
853 struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
854                                      int dir, u32 id, int delete, int *err)
855 {
856         struct xfrm_policy *pol, *ret;
857         struct hlist_head *chain;
858
859         *err = -ENOENT;
860         if (xfrm_policy_id2dir(id) != dir)
861                 return NULL;
862
863         *err = 0;
864         write_lock_bh(&net->xfrm.xfrm_policy_lock);
865         chain = net->xfrm.policy_byidx + idx_hash(net, id);
866         ret = NULL;
867         hlist_for_each_entry(pol, chain, byidx) {
868                 if (pol->type == type && pol->index == id &&
869                     (mark & pol->mark.m) == pol->mark.v) {
870                         xfrm_pol_hold(pol);
871                         if (delete) {
872                                 *err = security_xfrm_policy_delete(
873                                                                 pol->security);
874                                 if (*err) {
875                                         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
876                                         return pol;
877                                 }
878                                 __xfrm_policy_unlink(pol, dir);
879                         }
880                         ret = pol;
881                         break;
882                 }
883         }
884         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
885
886         if (ret && delete)
887                 xfrm_policy_kill(ret);
888         return ret;
889 }
890 EXPORT_SYMBOL(xfrm_policy_byid);
891
892 #ifdef CONFIG_SECURITY_NETWORK_XFRM
893 static inline int
894 xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
895 {
896         int dir, err = 0;
897
898         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
899                 struct xfrm_policy *pol;
900                 int i;
901
902                 hlist_for_each_entry(pol,
903                                      &net->xfrm.policy_inexact[dir], bydst) {
904                         if (pol->type != type)
905                                 continue;
906                         err = security_xfrm_policy_delete(pol->security);
907                         if (err) {
908                                 xfrm_audit_policy_delete(pol, 0, task_valid);
909                                 return err;
910                         }
911                 }
912                 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
913                         hlist_for_each_entry(pol,
914                                              net->xfrm.policy_bydst[dir].table + i,
915                                              bydst) {
916                                 if (pol->type != type)
917                                         continue;
918                                 err = security_xfrm_policy_delete(
919                                                                 pol->security);
920                                 if (err) {
921                                         xfrm_audit_policy_delete(pol, 0,
922                                                                  task_valid);
923                                         return err;
924                                 }
925                         }
926                 }
927         }
928         return err;
929 }
930 #else
931 static inline int
932 xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
933 {
934         return 0;
935 }
936 #endif
937
938 int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
939 {
940         int dir, err = 0, cnt = 0;
941
942         write_lock_bh(&net->xfrm.xfrm_policy_lock);
943
944         err = xfrm_policy_flush_secctx_check(net, type, task_valid);
945         if (err)
946                 goto out;
947
948         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
949                 struct xfrm_policy *pol;
950                 int i;
951
952         again1:
953                 hlist_for_each_entry(pol,
954                                      &net->xfrm.policy_inexact[dir], bydst) {
955                         if (pol->type != type)
956                                 continue;
957                         __xfrm_policy_unlink(pol, dir);
958                         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
959                         cnt++;
960
961                         xfrm_audit_policy_delete(pol, 1, task_valid);
962
963                         xfrm_policy_kill(pol);
964
965                         write_lock_bh(&net->xfrm.xfrm_policy_lock);
966                         goto again1;
967                 }
968
969                 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
970         again2:
971                         hlist_for_each_entry(pol,
972                                              net->xfrm.policy_bydst[dir].table + i,
973                                              bydst) {
974                                 if (pol->type != type)
975                                         continue;
976                                 __xfrm_policy_unlink(pol, dir);
977                                 write_unlock_bh(&net->xfrm.xfrm_policy_lock);
978                                 cnt++;
979
980                                 xfrm_audit_policy_delete(pol, 1, task_valid);
981                                 xfrm_policy_kill(pol);
982
983                                 write_lock_bh(&net->xfrm.xfrm_policy_lock);
984                                 goto again2;
985                         }
986                 }
987
988         }
989         if (!cnt)
990                 err = -ESRCH;
991 out:
992         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
993         return err;
994 }
995 EXPORT_SYMBOL(xfrm_policy_flush);
996
997 int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
998                      int (*func)(struct xfrm_policy *, int, int, void*),
999                      void *data)
1000 {
1001         struct xfrm_policy *pol;
1002         struct xfrm_policy_walk_entry *x;
1003         int error = 0;
1004
1005         if (walk->type >= XFRM_POLICY_TYPE_MAX &&
1006             walk->type != XFRM_POLICY_TYPE_ANY)
1007                 return -EINVAL;
1008
1009         if (list_empty(&walk->walk.all) && walk->seq != 0)
1010                 return 0;
1011
1012         write_lock_bh(&net->xfrm.xfrm_policy_lock);
1013         if (list_empty(&walk->walk.all))
1014                 x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
1015         else
1016                 x = list_first_entry(&walk->walk.all,
1017                                      struct xfrm_policy_walk_entry, all);
1018
1019         list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
1020                 if (x->dead)
1021                         continue;
1022                 pol = container_of(x, struct xfrm_policy, walk);
1023                 if (walk->type != XFRM_POLICY_TYPE_ANY &&
1024                     walk->type != pol->type)
1025                         continue;
1026                 error = func(pol, xfrm_policy_id2dir(pol->index),
1027                              walk->seq, data);
1028                 if (error) {
1029                         list_move_tail(&walk->walk.all, &x->all);
1030                         goto out;
1031                 }
1032                 walk->seq++;
1033         }
1034         if (walk->seq == 0) {
1035                 error = -ENOENT;
1036                 goto out;
1037         }
1038         list_del_init(&walk->walk.all);
1039 out:
1040         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1041         return error;
1042 }
1043 EXPORT_SYMBOL(xfrm_policy_walk);
1044
1045 void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
1046 {
1047         INIT_LIST_HEAD(&walk->walk.all);
1048         walk->walk.dead = 1;
1049         walk->type = type;
1050         walk->seq = 0;
1051 }
1052 EXPORT_SYMBOL(xfrm_policy_walk_init);
1053
1054 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net)
1055 {
1056         if (list_empty(&walk->walk.all))
1057                 return;
1058
1059         write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
1060         list_del(&walk->walk.all);
1061         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1062 }
1063 EXPORT_SYMBOL(xfrm_policy_walk_done);
1064
1065 /*
1066  * Find policy to apply to this flow.
1067  *
1068  * Returns 0 if policy found, else an -errno.
1069  */
1070 static int xfrm_policy_match(const struct xfrm_policy *pol,
1071                              const struct flowi *fl,
1072                              u8 type, u16 family, int dir)
1073 {
1074         const struct xfrm_selector *sel = &pol->selector;
1075         int ret = -ESRCH;
1076         bool match;
1077
1078         if (pol->family != family ||
1079             (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
1080             pol->type != type)
1081                 return ret;
1082
1083         match = xfrm_selector_match(sel, fl, family);
1084         if (match)
1085                 ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
1086                                                   dir);
1087
1088         return ret;
1089 }
1090
1091 static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
1092                                                      const struct flowi *fl,
1093                                                      u16 family, u8 dir)
1094 {
1095         int err;
1096         struct xfrm_policy *pol, *ret;
1097         const xfrm_address_t *daddr, *saddr;
1098         struct hlist_head *chain;
1099         u32 priority = ~0U;
1100
1101         daddr = xfrm_flowi_daddr(fl, family);
1102         saddr = xfrm_flowi_saddr(fl, family);
1103         if (unlikely(!daddr || !saddr))
1104                 return NULL;
1105
1106         read_lock_bh(&net->xfrm.xfrm_policy_lock);
1107         chain = policy_hash_direct(net, daddr, saddr, family, dir);
1108         ret = NULL;
1109         hlist_for_each_entry(pol, chain, bydst) {
1110                 err = xfrm_policy_match(pol, fl, type, family, dir);
1111                 if (err) {
1112                         if (err == -ESRCH)
1113                                 continue;
1114                         else {
1115                                 ret = ERR_PTR(err);
1116                                 goto fail;
1117                         }
1118                 } else {
1119                         ret = pol;
1120                         priority = ret->priority;
1121                         break;
1122                 }
1123         }
1124         chain = &net->xfrm.policy_inexact[dir];
1125         hlist_for_each_entry(pol, chain, bydst) {
1126                 if ((pol->priority >= priority) && ret)
1127                         break;
1128
1129                 err = xfrm_policy_match(pol, fl, type, family, dir);
1130                 if (err) {
1131                         if (err == -ESRCH)
1132                                 continue;
1133                         else {
1134                                 ret = ERR_PTR(err);
1135                                 goto fail;
1136                         }
1137                 } else {
1138                         ret = pol;
1139                         break;
1140                 }
1141         }
1142
1143         xfrm_pol_hold(ret);
1144 fail:
1145         read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1146
1147         return ret;
1148 }
1149
1150 static struct xfrm_policy *
1151 __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
1152 {
1153 #ifdef CONFIG_XFRM_SUB_POLICY
1154         struct xfrm_policy *pol;
1155
1156         pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
1157         if (pol != NULL)
1158                 return pol;
1159 #endif
1160         return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1161 }
1162
1163 static int flow_to_policy_dir(int dir)
1164 {
1165         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1166             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1167             XFRM_POLICY_FWD == FLOW_DIR_FWD)
1168                 return dir;
1169
1170         switch (dir) {
1171         default:
1172         case FLOW_DIR_IN:
1173                 return XFRM_POLICY_IN;
1174         case FLOW_DIR_OUT:
1175                 return XFRM_POLICY_OUT;
1176         case FLOW_DIR_FWD:
1177                 return XFRM_POLICY_FWD;
1178         }
1179 }
1180
1181 static struct flow_cache_object *
1182 xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
1183                    u8 dir, struct flow_cache_object *old_obj, void *ctx)
1184 {
1185         struct xfrm_policy *pol;
1186
1187         if (old_obj)
1188                 xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
1189
1190         pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
1191         if (IS_ERR_OR_NULL(pol))
1192                 return ERR_CAST(pol);
1193
1194         /* Resolver returns two references:
1195          * one for cache and one for caller of flow_cache_lookup() */
1196         xfrm_pol_hold(pol);
1197
1198         return &pol->flo;
1199 }
1200
1201 static inline int policy_to_flow_dir(int dir)
1202 {
1203         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1204             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1205             XFRM_POLICY_FWD == FLOW_DIR_FWD)
1206                 return dir;
1207         switch (dir) {
1208         default:
1209         case XFRM_POLICY_IN:
1210                 return FLOW_DIR_IN;
1211         case XFRM_POLICY_OUT:
1212                 return FLOW_DIR_OUT;
1213         case XFRM_POLICY_FWD:
1214                 return FLOW_DIR_FWD;
1215         }
1216 }
1217
1218 static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
1219                                                  const struct flowi *fl, u16 family)
1220 {
1221         struct xfrm_policy *pol;
1222         struct net *net = sock_net(sk);
1223
1224         rcu_read_lock();
1225         read_lock_bh(&net->xfrm.xfrm_policy_lock);
1226         pol = rcu_dereference(sk->sk_policy[dir]);
1227         if (pol != NULL) {
1228                 bool match;
1229                 int err = 0;
1230
1231                 if (pol->family != family) {
1232                         pol = NULL;
1233                         goto out;
1234                 }
1235
1236                 match = xfrm_selector_match(&pol->selector, fl, family);
1237                 if (match) {
1238                         if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
1239                                 pol = NULL;
1240                                 goto out;
1241                         }
1242                         err = security_xfrm_policy_lookup(pol->security,
1243                                                       fl->flowi_secid,
1244                                                       policy_to_flow_dir(dir));
1245                         if (!err)
1246                                 xfrm_pol_hold(pol);
1247                         else if (err == -ESRCH)
1248                                 pol = NULL;
1249                         else
1250                                 pol = ERR_PTR(err);
1251                 } else
1252                         pol = NULL;
1253         }
1254 out:
1255         read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1256         rcu_read_unlock();
1257         return pol;
1258 }
1259
1260 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1261 {
1262         struct net *net = xp_net(pol);
1263
1264         list_add(&pol->walk.all, &net->xfrm.policy_all);
1265         net->xfrm.policy_count[dir]++;
1266         xfrm_pol_hold(pol);
1267 }
1268
1269 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1270                                                 int dir)
1271 {
1272         struct net *net = xp_net(pol);
1273
1274         if (list_empty(&pol->walk.all))
1275                 return NULL;
1276
1277         /* Socket policies are not hashed. */
1278         if (!hlist_unhashed(&pol->bydst)) {
1279                 hlist_del(&pol->bydst);
1280                 hlist_del(&pol->byidx);
1281         }
1282
1283         list_del_init(&pol->walk.all);
1284         net->xfrm.policy_count[dir]--;
1285
1286         return pol;
1287 }
1288
1289 static void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir)
1290 {
1291         __xfrm_policy_link(pol, XFRM_POLICY_MAX + dir);
1292 }
1293
1294 static void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir)
1295 {
1296         __xfrm_policy_unlink(pol, XFRM_POLICY_MAX + dir);
1297 }
1298
1299 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1300 {
1301         struct net *net = xp_net(pol);
1302
1303         write_lock_bh(&net->xfrm.xfrm_policy_lock);
1304         pol = __xfrm_policy_unlink(pol, dir);
1305         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1306         if (pol) {
1307                 xfrm_policy_kill(pol);
1308                 return 0;
1309         }
1310         return -ENOENT;
1311 }
1312 EXPORT_SYMBOL(xfrm_policy_delete);
1313
1314 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1315 {
1316         struct net *net = sock_net(sk);
1317         struct xfrm_policy *old_pol;
1318
1319 #ifdef CONFIG_XFRM_SUB_POLICY
1320         if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1321                 return -EINVAL;
1322 #endif
1323
1324         write_lock_bh(&net->xfrm.xfrm_policy_lock);
1325         old_pol = rcu_dereference_protected(sk->sk_policy[dir],
1326                                 lockdep_is_held(&net->xfrm.xfrm_policy_lock));
1327         if (pol) {
1328                 pol->curlft.add_time = get_seconds();
1329                 pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
1330                 xfrm_sk_policy_link(pol, dir);
1331         }
1332         rcu_assign_pointer(sk->sk_policy[dir], pol);
1333         if (old_pol) {
1334                 if (pol)
1335                         xfrm_policy_requeue(old_pol, pol);
1336
1337                 /* Unlinking succeeds always. This is the only function
1338                  * allowed to delete or replace socket policy.
1339                  */
1340                 xfrm_sk_policy_unlink(old_pol, dir);
1341         }
1342         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1343
1344         if (old_pol) {
1345                 xfrm_policy_kill(old_pol);
1346         }
1347         return 0;
1348 }
1349
1350 static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
1351 {
1352         struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
1353         struct net *net = xp_net(old);
1354
1355         if (newp) {
1356                 newp->selector = old->selector;
1357                 if (security_xfrm_policy_clone(old->security,
1358                                                &newp->security)) {
1359                         kfree(newp);
1360                         return NULL;  /* ENOMEM */
1361                 }
1362                 newp->lft = old->lft;
1363                 newp->curlft = old->curlft;
1364                 newp->mark = old->mark;
1365                 newp->action = old->action;
1366                 newp->flags = old->flags;
1367                 newp->xfrm_nr = old->xfrm_nr;
1368                 newp->index = old->index;
1369                 newp->type = old->type;
1370                 newp->family = old->family;
1371                 memcpy(newp->xfrm_vec, old->xfrm_vec,
1372                        newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1373                 write_lock_bh(&net->xfrm.xfrm_policy_lock);
1374                 xfrm_sk_policy_link(newp, dir);
1375                 write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1376                 xfrm_pol_put(newp);
1377         }
1378         return newp;
1379 }
1380
1381 int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
1382 {
1383         const struct xfrm_policy *p;
1384         struct xfrm_policy *np;
1385         int i, ret = 0;
1386
1387         rcu_read_lock();
1388         for (i = 0; i < 2; i++) {
1389                 p = rcu_dereference(osk->sk_policy[i]);
1390                 if (p) {
1391                         np = clone_policy(p, i);
1392                         if (unlikely(!np)) {
1393                                 ret = -ENOMEM;
1394                                 break;
1395                         }
1396                         rcu_assign_pointer(sk->sk_policy[i], np);
1397                 }
1398         }
1399         rcu_read_unlock();
1400         return ret;
1401 }
1402
1403 static int
1404 xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local,
1405                xfrm_address_t *remote, unsigned short family, u32 mark)
1406 {
1407         int err;
1408         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1409
1410         if (unlikely(afinfo == NULL))
1411                 return -EINVAL;
1412         err = afinfo->get_saddr(net, oif, local, remote, mark);
1413         xfrm_policy_put_afinfo(afinfo);
1414         return err;
1415 }
1416
1417 /* Resolve list of templates for the flow, given policy. */
1418
1419 static int
1420 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
1421                       struct xfrm_state **xfrm, unsigned short family)
1422 {
1423         struct net *net = xp_net(policy);
1424         int nx;
1425         int i, error;
1426         xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1427         xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1428         xfrm_address_t tmp;
1429
1430         for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {
1431                 struct xfrm_state *x;
1432                 xfrm_address_t *remote = daddr;
1433                 xfrm_address_t *local  = saddr;
1434                 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1435
1436                 if (tmpl->mode == XFRM_MODE_TUNNEL ||
1437                     tmpl->mode == XFRM_MODE_BEET) {
1438                         remote = &tmpl->id.daddr;
1439                         local = &tmpl->saddr;
1440                         if (xfrm_addr_any(local, tmpl->encap_family)) {
1441                                 error = xfrm_get_saddr(net, fl->flowi_oif,
1442                                                        &tmp, remote,
1443                                                        tmpl->encap_family, 0);
1444                                 if (error)
1445                                         goto fail;
1446                                 local = &tmp;
1447                         }
1448                 }
1449
1450                 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1451
1452                 if (x && x->km.state == XFRM_STATE_VALID) {
1453                         xfrm[nx++] = x;
1454                         daddr = remote;
1455                         saddr = local;
1456                         continue;
1457                 }
1458                 if (x) {
1459                         error = (x->km.state == XFRM_STATE_ERROR ?
1460                                  -EINVAL : -EAGAIN);
1461                         xfrm_state_put(x);
1462                 } else if (error == -ESRCH) {
1463                         error = -EAGAIN;
1464                 }
1465
1466                 if (!tmpl->optional)
1467                         goto fail;
1468         }
1469         return nx;
1470
1471 fail:
1472         for (nx--; nx >= 0; nx--)
1473                 xfrm_state_put(xfrm[nx]);
1474         return error;
1475 }
1476
1477 static int
1478 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
1479                   struct xfrm_state **xfrm, unsigned short family)
1480 {
1481         struct xfrm_state *tp[XFRM_MAX_DEPTH];
1482         struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1483         int cnx = 0;
1484         int error;
1485         int ret;
1486         int i;
1487
1488         for (i = 0; i < npols; i++) {
1489                 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1490                         error = -ENOBUFS;
1491                         goto fail;
1492                 }
1493
1494                 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1495                 if (ret < 0) {
1496                         error = ret;
1497                         goto fail;
1498                 } else
1499                         cnx += ret;
1500         }
1501
1502         /* found states are sorted for outbound processing */
1503         if (npols > 1)
1504                 xfrm_state_sort(xfrm, tpp, cnx, family);
1505
1506         return cnx;
1507
1508  fail:
1509         for (cnx--; cnx >= 0; cnx--)
1510                 xfrm_state_put(tpp[cnx]);
1511         return error;
1512
1513 }
1514
1515 /* Check that the bundle accepts the flow and its components are
1516  * still valid.
1517  */
1518
1519 static inline int xfrm_get_tos(const struct flowi *fl, int family)
1520 {
1521         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1522         int tos;
1523
1524         if (!afinfo)
1525                 return -EINVAL;
1526
1527         tos = afinfo->get_tos(fl);
1528
1529         xfrm_policy_put_afinfo(afinfo);
1530
1531         return tos;
1532 }
1533
1534 static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
1535 {
1536         struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1537         struct dst_entry *dst = &xdst->u.dst;
1538
1539         if (xdst->route == NULL) {
1540                 /* Dummy bundle - if it has xfrms we were not
1541                  * able to build bundle as template resolution failed.
1542                  * It means we need to try again resolving. */
1543                 if (xdst->num_xfrms > 0)
1544                         return NULL;
1545         } else if (dst->flags & DST_XFRM_QUEUE) {
1546                 return NULL;
1547         } else {
1548                 /* Real bundle */
1549                 if (stale_bundle(dst))
1550                         return NULL;
1551         }
1552
1553         dst_hold(dst);
1554         return flo;
1555 }
1556
1557 static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
1558 {
1559         struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1560         struct dst_entry *dst = &xdst->u.dst;
1561
1562         if (!xdst->route)
1563                 return 0;
1564         if (stale_bundle(dst))
1565                 return 0;
1566
1567         return 1;
1568 }
1569
1570 static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
1571 {
1572         struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1573         struct dst_entry *dst = &xdst->u.dst;
1574
1575         dst_free(dst);
1576 }
1577
1578 static const struct flow_cache_ops xfrm_bundle_fc_ops = {
1579         .get = xfrm_bundle_flo_get,
1580         .check = xfrm_bundle_flo_check,
1581         .delete = xfrm_bundle_flo_delete,
1582 };
1583
1584 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1585 {
1586         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1587         struct dst_ops *dst_ops;
1588         struct xfrm_dst *xdst;
1589
1590         if (!afinfo)
1591                 return ERR_PTR(-EINVAL);
1592
1593         switch (family) {
1594         case AF_INET:
1595                 dst_ops = &net->xfrm.xfrm4_dst_ops;
1596                 break;
1597 #if IS_ENABLED(CONFIG_IPV6)
1598         case AF_INET6:
1599                 dst_ops = &net->xfrm.xfrm6_dst_ops;
1600                 break;
1601 #endif
1602         default:
1603                 BUG();
1604         }
1605         xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0);
1606
1607         if (likely(xdst)) {
1608                 struct dst_entry *dst = &xdst->u.dst;
1609
1610                 memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
1611                 xdst->flo.ops = &xfrm_bundle_fc_ops;
1612         } else
1613                 xdst = ERR_PTR(-ENOBUFS);
1614
1615         xfrm_policy_put_afinfo(afinfo);
1616
1617         return xdst;
1618 }
1619
1620 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1621                                  int nfheader_len)
1622 {
1623         struct xfrm_policy_afinfo *afinfo =
1624                 xfrm_policy_get_afinfo(dst->ops->family);
1625         int err;
1626
1627         if (!afinfo)
1628                 return -EINVAL;
1629
1630         err = afinfo->init_path(path, dst, nfheader_len);
1631
1632         xfrm_policy_put_afinfo(afinfo);
1633
1634         return err;
1635 }
1636
1637 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1638                                 const struct flowi *fl)
1639 {
1640         struct xfrm_policy_afinfo *afinfo =
1641                 xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1642         int err;
1643
1644         if (!afinfo)
1645                 return -EINVAL;
1646
1647         err = afinfo->fill_dst(xdst, dev, fl);
1648
1649         xfrm_policy_put_afinfo(afinfo);
1650
1651         return err;
1652 }
1653
1654
1655 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
1656  * all the metrics... Shortly, bundle a bundle.
1657  */
1658
1659 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1660                                             struct xfrm_state **xfrm, int nx,
1661                                             const struct flowi *fl,
1662                                             struct dst_entry *dst)
1663 {
1664         struct net *net = xp_net(policy);
1665         unsigned long now = jiffies;
1666         struct net_device *dev;
1667         struct xfrm_mode *inner_mode;
1668         struct dst_entry *dst_prev = NULL;
1669         struct dst_entry *dst0 = NULL;
1670         int i = 0;
1671         int err;
1672         int header_len = 0;
1673         int nfheader_len = 0;
1674         int trailer_len = 0;
1675         int tos;
1676         int family = policy->selector.family;
1677         xfrm_address_t saddr, daddr;
1678
1679         xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1680
1681         tos = xfrm_get_tos(fl, family);
1682         err = tos;
1683         if (tos < 0)
1684                 goto put_states;
1685
1686         dst_hold(dst);
1687
1688         for (; i < nx; i++) {
1689                 struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
1690                 struct dst_entry *dst1 = &xdst->u.dst;
1691
1692                 err = PTR_ERR(xdst);
1693                 if (IS_ERR(xdst)) {
1694                         dst_release(dst);
1695                         goto put_states;
1696                 }
1697
1698                 if (xfrm[i]->sel.family == AF_UNSPEC) {
1699                         inner_mode = xfrm_ip2inner_mode(xfrm[i],
1700                                                         xfrm_af2proto(family));
1701                         if (!inner_mode) {
1702                                 err = -EAFNOSUPPORT;
1703                                 dst_release(dst);
1704                                 goto put_states;
1705                         }
1706                 } else
1707                         inner_mode = xfrm[i]->inner_mode;
1708
1709                 if (!dst_prev)
1710                         dst0 = dst1;
1711                 else {
1712                         dst_prev->child = dst_clone(dst1);
1713                         dst1->flags |= DST_NOHASH;
1714                 }
1715
1716                 xdst->route = dst;
1717                 dst_copy_metrics(dst1, dst);
1718
1719                 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1720                         family = xfrm[i]->props.family;
1721                         dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
1722                                               &saddr, &daddr, family,
1723                                               xfrm[i]->props.output_mark);
1724                         err = PTR_ERR(dst);
1725                         if (IS_ERR(dst))
1726                                 goto put_states;
1727                 } else
1728                         dst_hold(dst);
1729
1730                 dst1->xfrm = xfrm[i];
1731                 xdst->xfrm_genid = xfrm[i]->genid;
1732
1733                 dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
1734                 dst1->flags |= DST_HOST;
1735                 dst1->lastuse = now;
1736
1737                 dst1->input = dst_discard;
1738                 dst1->output = inner_mode->afinfo->output;
1739
1740                 dst1->next = dst_prev;
1741                 dst_prev = dst1;
1742
1743                 header_len += xfrm[i]->props.header_len;
1744                 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1745                         nfheader_len += xfrm[i]->props.header_len;
1746                 trailer_len += xfrm[i]->props.trailer_len;
1747         }
1748
1749         dst_prev->child = dst;
1750         dst0->path = dst;
1751
1752         err = -ENODEV;
1753         dev = dst->dev;
1754         if (!dev)
1755                 goto free_dst;
1756
1757         xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1758         xfrm_init_pmtu(dst_prev);
1759
1760         for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1761                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1762
1763                 err = xfrm_fill_dst(xdst, dev, fl);
1764                 if (err)
1765                         goto free_dst;
1766
1767                 dst_prev->header_len = header_len;
1768                 dst_prev->trailer_len = trailer_len;
1769                 header_len -= xdst->u.dst.xfrm->props.header_len;
1770                 trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1771         }
1772
1773 out:
1774         return dst0;
1775
1776 put_states:
1777         for (; i < nx; i++)
1778                 xfrm_state_put(xfrm[i]);
1779 free_dst:
1780         if (dst0)
1781                 dst_free(dst0);
1782         dst0 = ERR_PTR(err);
1783         goto out;
1784 }
1785
1786 static int xfrm_expand_policies(const struct flowi *fl, u16 family,
1787                                 struct xfrm_policy **pols,
1788                                 int *num_pols, int *num_xfrms)
1789 {
1790         int i;
1791
1792         if (*num_pols == 0 || !pols[0]) {
1793                 *num_pols = 0;
1794                 *num_xfrms = 0;
1795                 return 0;
1796         }
1797         if (IS_ERR(pols[0]))
1798                 return PTR_ERR(pols[0]);
1799
1800         *num_xfrms = pols[0]->xfrm_nr;
1801
1802 #ifdef CONFIG_XFRM_SUB_POLICY
1803         if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
1804             pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1805                 pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
1806                                                     XFRM_POLICY_TYPE_MAIN,
1807                                                     fl, family,
1808                                                     XFRM_POLICY_OUT);
1809                 if (pols[1]) {
1810                         if (IS_ERR(pols[1])) {
1811                                 xfrm_pols_put(pols, *num_pols);
1812                                 return PTR_ERR(pols[1]);
1813                         }
1814                         (*num_pols)++;
1815                         (*num_xfrms) += pols[1]->xfrm_nr;
1816                 }
1817         }
1818 #endif
1819         for (i = 0; i < *num_pols; i++) {
1820                 if (pols[i]->action != XFRM_POLICY_ALLOW) {
1821                         *num_xfrms = -1;
1822                         break;
1823                 }
1824         }
1825
1826         return 0;
1827
1828 }
1829
1830 static struct xfrm_dst *
1831 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1832                                const struct flowi *fl, u16 family,
1833                                struct dst_entry *dst_orig)
1834 {
1835         struct net *net = xp_net(pols[0]);
1836         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1837         struct dst_entry *dst;
1838         struct xfrm_dst *xdst;
1839         int err;
1840
1841         /* Try to instantiate a bundle */
1842         err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1843         if (err <= 0) {
1844                 if (err != 0 && err != -EAGAIN)
1845                         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1846                 return ERR_PTR(err);
1847         }
1848
1849         dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
1850         if (IS_ERR(dst)) {
1851                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1852                 return ERR_CAST(dst);
1853         }
1854
1855         xdst = (struct xfrm_dst *)dst;
1856         xdst->num_xfrms = err;
1857         xdst->num_pols = num_pols;
1858         memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
1859         xdst->policy_genid = atomic_read(&pols[0]->genid);
1860
1861         return xdst;
1862 }
1863
1864 static void xfrm_policy_queue_process(unsigned long arg)
1865 {
1866         struct sk_buff *skb;
1867         struct sock *sk;
1868         struct dst_entry *dst;
1869         struct xfrm_policy *pol = (struct xfrm_policy *)arg;
1870         struct net *net = xp_net(pol);
1871         struct xfrm_policy_queue *pq = &pol->polq;
1872         struct flowi fl;
1873         struct sk_buff_head list;
1874
1875         spin_lock(&pq->hold_queue.lock);
1876         skb = skb_peek(&pq->hold_queue);
1877         if (!skb) {
1878                 spin_unlock(&pq->hold_queue.lock);
1879                 goto out;
1880         }
1881         dst = skb_dst(skb);
1882         sk = skb->sk;
1883         xfrm_decode_session(skb, &fl, dst->ops->family);
1884         spin_unlock(&pq->hold_queue.lock);
1885
1886         dst_hold(dst->path);
1887         dst = xfrm_lookup(net, dst->path, &fl, sk, 0);
1888         if (IS_ERR(dst))
1889                 goto purge_queue;
1890
1891         if (dst->flags & DST_XFRM_QUEUE) {
1892                 dst_release(dst);
1893
1894                 if (pq->timeout >= XFRM_QUEUE_TMO_MAX)
1895                         goto purge_queue;
1896
1897                 pq->timeout = pq->timeout << 1;
1898                 if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
1899                         xfrm_pol_hold(pol);
1900         goto out;
1901         }
1902
1903         dst_release(dst);
1904
1905         __skb_queue_head_init(&list);
1906
1907         spin_lock(&pq->hold_queue.lock);
1908         pq->timeout = 0;
1909         skb_queue_splice_init(&pq->hold_queue, &list);
1910         spin_unlock(&pq->hold_queue.lock);
1911
1912         while (!skb_queue_empty(&list)) {
1913                 skb = __skb_dequeue(&list);
1914
1915                 xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
1916                 dst_hold(skb_dst(skb)->path);
1917                 dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0);
1918                 if (IS_ERR(dst)) {
1919                         kfree_skb(skb);
1920                         continue;
1921                 }
1922
1923                 nf_reset(skb);
1924                 skb_dst_drop(skb);
1925                 skb_dst_set(skb, dst);
1926
1927                 dst_output(net, skb->sk, skb);
1928         }
1929
1930 out:
1931         xfrm_pol_put(pol);
1932         return;
1933
1934 purge_queue:
1935         pq->timeout = 0;
1936         skb_queue_purge(&pq->hold_queue);
1937         xfrm_pol_put(pol);
1938 }
1939
1940 static int xdst_queue_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1941 {
1942         unsigned long sched_next;
1943         struct dst_entry *dst = skb_dst(skb);
1944         struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
1945         struct xfrm_policy *pol = xdst->pols[0];
1946         struct xfrm_policy_queue *pq = &pol->polq;
1947
1948         if (unlikely(skb_fclone_busy(sk, skb))) {
1949                 kfree_skb(skb);
1950                 return 0;
1951         }
1952
1953         if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
1954                 kfree_skb(skb);
1955                 return -EAGAIN;
1956         }
1957
1958         skb_dst_force(skb);
1959
1960         spin_lock_bh(&pq->hold_queue.lock);
1961
1962         if (!pq->timeout)
1963                 pq->timeout = XFRM_QUEUE_TMO_MIN;
1964
1965         sched_next = jiffies + pq->timeout;
1966
1967         if (del_timer(&pq->hold_timer)) {
1968                 if (time_before(pq->hold_timer.expires, sched_next))
1969                         sched_next = pq->hold_timer.expires;
1970                 xfrm_pol_put(pol);
1971         }
1972
1973         __skb_queue_tail(&pq->hold_queue, skb);
1974         if (!mod_timer(&pq->hold_timer, sched_next))
1975                 xfrm_pol_hold(pol);
1976
1977         spin_unlock_bh(&pq->hold_queue.lock);
1978
1979         return 0;
1980 }
1981
1982 static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
1983                                                  struct xfrm_flo *xflo,
1984                                                  const struct flowi *fl,
1985                                                  int num_xfrms,
1986                                                  u16 family)
1987 {
1988         int err;
1989         struct net_device *dev;
1990         struct dst_entry *dst;
1991         struct dst_entry *dst1;
1992         struct xfrm_dst *xdst;
1993
1994         xdst = xfrm_alloc_dst(net, family);
1995         if (IS_ERR(xdst))
1996                 return xdst;
1997
1998         if (!(xflo->flags & XFRM_LOOKUP_QUEUE) ||
1999             net->xfrm.sysctl_larval_drop ||
2000             num_xfrms <= 0)
2001                 return xdst;
2002
2003         dst = xflo->dst_orig;
2004         dst1 = &xdst->u.dst;
2005         dst_hold(dst);
2006         xdst->route = dst;
2007
2008         dst_copy_metrics(dst1, dst);
2009
2010         dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
2011         dst1->flags |= DST_HOST | DST_XFRM_QUEUE;
2012         dst1->lastuse = jiffies;
2013
2014         dst1->input = dst_discard;
2015         dst1->output = xdst_queue_output;
2016
2017         dst_hold(dst);
2018         dst1->child = dst;
2019         dst1->path = dst;
2020
2021         xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
2022
2023         err = -ENODEV;
2024         dev = dst->dev;
2025         if (!dev)
2026                 goto free_dst;
2027
2028         err = xfrm_fill_dst(xdst, dev, fl);
2029         if (err)
2030                 goto free_dst;
2031
2032 out:
2033         return xdst;
2034
2035 free_dst:
2036         dst_release(dst1);
2037         xdst = ERR_PTR(err);
2038         goto out;
2039 }
2040
2041 static struct flow_cache_object *
2042 xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
2043                    struct flow_cache_object *oldflo, void *ctx)
2044 {
2045         struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
2046         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2047         struct xfrm_dst *xdst, *new_xdst;
2048         int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
2049
2050         /* Check if the policies from old bundle are usable */
2051         xdst = NULL;
2052         if (oldflo) {
2053                 xdst = container_of(oldflo, struct xfrm_dst, flo);
2054                 num_pols = xdst->num_pols;
2055                 num_xfrms = xdst->num_xfrms;
2056                 pol_dead = 0;
2057                 for (i = 0; i < num_pols; i++) {
2058                         pols[i] = xdst->pols[i];
2059                         pol_dead |= pols[i]->walk.dead;
2060                 }
2061                 if (pol_dead) {
2062                         dst_free(&xdst->u.dst);
2063                         xdst = NULL;
2064                         num_pols = 0;
2065                         num_xfrms = 0;
2066                         oldflo = NULL;
2067                 }
2068         }
2069
2070         /* Resolve policies to use if we couldn't get them from
2071          * previous cache entry */
2072         if (xdst == NULL) {
2073                 num_pols = 1;
2074                 pols[0] = __xfrm_policy_lookup(net, fl, family,
2075                                                flow_to_policy_dir(dir));
2076                 err = xfrm_expand_policies(fl, family, pols,
2077                                            &num_pols, &num_xfrms);
2078                 if (err < 0)
2079                         goto inc_error;
2080                 if (num_pols == 0)
2081                         return NULL;
2082                 if (num_xfrms <= 0)
2083                         goto make_dummy_bundle;
2084         }
2085
2086         new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
2087                                                   xflo->dst_orig);
2088         if (IS_ERR(new_xdst)) {
2089                 err = PTR_ERR(new_xdst);
2090                 if (err != -EAGAIN)
2091                         goto error;
2092                 if (oldflo == NULL)
2093                         goto make_dummy_bundle;
2094                 dst_hold(&xdst->u.dst);
2095                 return oldflo;
2096         } else if (new_xdst == NULL) {
2097                 num_xfrms = 0;
2098                 if (oldflo == NULL)
2099                         goto make_dummy_bundle;
2100                 xdst->num_xfrms = 0;
2101                 dst_hold(&xdst->u.dst);
2102                 return oldflo;
2103         }
2104
2105         /* Kill the previous bundle */
2106         if (xdst) {
2107                 /* The policies were stolen for newly generated bundle */
2108                 xdst->num_pols = 0;
2109                 dst_free(&xdst->u.dst);
2110         }
2111
2112         /* Flow cache does not have reference, it dst_free()'s,
2113          * but we do need to return one reference for original caller */
2114         dst_hold(&new_xdst->u.dst);
2115         return &new_xdst->flo;
2116
2117 make_dummy_bundle:
2118         /* We found policies, but there's no bundles to instantiate:
2119          * either because the policy blocks, has no transformations or
2120          * we could not build template (no xfrm_states).*/
2121         xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family);
2122         if (IS_ERR(xdst)) {
2123                 xfrm_pols_put(pols, num_pols);
2124                 return ERR_CAST(xdst);
2125         }
2126         xdst->num_pols = num_pols;
2127         xdst->num_xfrms = num_xfrms;
2128         memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
2129
2130         dst_hold(&xdst->u.dst);
2131         return &xdst->flo;
2132
2133 inc_error:
2134         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
2135 error:
2136         if (xdst != NULL)
2137                 dst_free(&xdst->u.dst);
2138         else
2139                 xfrm_pols_put(pols, num_pols);
2140         return ERR_PTR(err);
2141 }
2142
2143 static struct dst_entry *make_blackhole(struct net *net, u16 family,
2144                                         struct dst_entry *dst_orig)
2145 {
2146         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2147         struct dst_entry *ret;
2148
2149         if (!afinfo) {
2150                 dst_release(dst_orig);
2151                 return ERR_PTR(-EINVAL);
2152         } else {
2153                 ret = afinfo->blackhole_route(net, dst_orig);
2154         }
2155         xfrm_policy_put_afinfo(afinfo);
2156
2157         return ret;
2158 }
2159
2160 /* Main function: finds/creates a bundle for given flow.
2161  *
2162  * At the moment we eat a raw IP route. Mostly to speed up lookups
2163  * on interfaces with disabled IPsec.
2164  */
2165 struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
2166                               const struct flowi *fl,
2167                               const struct sock *sk, int flags)
2168 {
2169         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2170         struct flow_cache_object *flo;
2171         struct xfrm_dst *xdst;
2172         struct dst_entry *dst, *route;
2173         u16 family = dst_orig->ops->family;
2174         u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
2175         int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
2176
2177         dst = NULL;
2178         xdst = NULL;
2179         route = NULL;
2180
2181         sk = sk_const_to_full_sk(sk);
2182         if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
2183                 num_pols = 1;
2184                 pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family);
2185                 err = xfrm_expand_policies(fl, family, pols,
2186                                            &num_pols, &num_xfrms);
2187                 if (err < 0)
2188                         goto dropdst;
2189
2190                 if (num_pols) {
2191                         if (num_xfrms <= 0) {
2192                                 drop_pols = num_pols;
2193                                 goto no_transform;
2194                         }
2195
2196                         xdst = xfrm_resolve_and_create_bundle(
2197                                         pols, num_pols, fl,
2198                                         family, dst_orig);
2199                         if (IS_ERR(xdst)) {
2200                                 xfrm_pols_put(pols, num_pols);
2201                                 err = PTR_ERR(xdst);
2202                                 goto dropdst;
2203                         } else if (xdst == NULL) {
2204                                 num_xfrms = 0;
2205                                 drop_pols = num_pols;
2206                                 goto no_transform;
2207                         }
2208
2209                         dst_hold(&xdst->u.dst);
2210                         xdst->u.dst.flags |= DST_NOCACHE;
2211                         route = xdst->route;
2212                 }
2213         }
2214
2215         if (xdst == NULL) {
2216                 struct xfrm_flo xflo;
2217
2218                 xflo.dst_orig = dst_orig;
2219                 xflo.flags = flags;
2220
2221                 /* To accelerate a bit...  */
2222                 if ((dst_orig->flags & DST_NOXFRM) ||
2223                     !net->xfrm.policy_count[XFRM_POLICY_OUT])
2224                         goto nopol;
2225
2226                 flo = flow_cache_lookup(net, fl, family, dir,
2227                                         xfrm_bundle_lookup, &xflo);
2228                 if (flo == NULL)
2229                         goto nopol;
2230                 if (IS_ERR(flo)) {
2231                         err = PTR_ERR(flo);
2232                         goto dropdst;
2233                 }
2234                 xdst = container_of(flo, struct xfrm_dst, flo);
2235
2236                 num_pols = xdst->num_pols;
2237                 num_xfrms = xdst->num_xfrms;
2238                 memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols);
2239                 route = xdst->route;
2240         }
2241
2242         dst = &xdst->u.dst;
2243         if (route == NULL && num_xfrms > 0) {
2244                 /* The only case when xfrm_bundle_lookup() returns a
2245                  * bundle with null route, is when the template could
2246                  * not be resolved. It means policies are there, but
2247                  * bundle could not be created, since we don't yet
2248                  * have the xfrm_state's. We need to wait for KM to
2249                  * negotiate new SA's or bail out with error.*/
2250                 if (net->xfrm.sysctl_larval_drop) {
2251                         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2252                         err = -EREMOTE;
2253                         goto error;
2254                 }
2255
2256                 err = -EAGAIN;
2257
2258                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2259                 goto error;
2260         }
2261
2262 no_transform:
2263         if (num_pols == 0)
2264                 goto nopol;
2265
2266         if ((flags & XFRM_LOOKUP_ICMP) &&
2267             !(pols[0]->flags & XFRM_POLICY_ICMP)) {
2268                 err = -ENOENT;
2269                 goto error;
2270         }
2271
2272         for (i = 0; i < num_pols; i++)
2273                 pols[i]->curlft.use_time = get_seconds();
2274
2275         if (num_xfrms < 0) {
2276                 /* Prohibit the flow */
2277                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
2278                 err = -EPERM;
2279                 goto error;
2280         } else if (num_xfrms > 0) {
2281                 /* Flow transformed */
2282                 dst_release(dst_orig);
2283         } else {
2284                 /* Flow passes untransformed */
2285                 dst_release(dst);
2286                 dst = dst_orig;
2287         }
2288 ok:
2289         xfrm_pols_put(pols, drop_pols);
2290         if (dst && dst->xfrm &&
2291             dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
2292                 dst->flags |= DST_XFRM_TUNNEL;
2293         return dst;
2294
2295 nopol:
2296         if (!(flags & XFRM_LOOKUP_ICMP)) {
2297                 dst = dst_orig;
2298                 goto ok;
2299         }
2300         err = -ENOENT;
2301 error:
2302         dst_release(dst);
2303 dropdst:
2304         if (!(flags & XFRM_LOOKUP_KEEP_DST_REF))
2305                 dst_release(dst_orig);
2306         xfrm_pols_put(pols, drop_pols);
2307         return ERR_PTR(err);
2308 }
2309 EXPORT_SYMBOL(xfrm_lookup);
2310
2311 /* Callers of xfrm_lookup_route() must ensure a call to dst_output().
2312  * Otherwise we may send out blackholed packets.
2313  */
2314 struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
2315                                     const struct flowi *fl,
2316                                     const struct sock *sk, int flags)
2317 {
2318         struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
2319                                             flags | XFRM_LOOKUP_QUEUE |
2320                                             XFRM_LOOKUP_KEEP_DST_REF);
2321
2322         if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
2323                 return make_blackhole(net, dst_orig->ops->family, dst_orig);
2324
2325         return dst;
2326 }
2327 EXPORT_SYMBOL(xfrm_lookup_route);
2328
2329 static inline int
2330 xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
2331 {
2332         struct xfrm_state *x;
2333
2334         if (!skb->sp || idx < 0 || idx >= skb->sp->len)
2335                 return 0;
2336         x = skb->sp->xvec[idx];
2337         if (!x->type->reject)
2338                 return 0;
2339         return x->type->reject(x, skb, fl);
2340 }
2341
2342 /* When skb is transformed back to its "native" form, we have to
2343  * check policy restrictions. At the moment we make this in maximally
2344  * stupid way. Shame on me. :-) Of course, connected sockets must
2345  * have policy cached at them.
2346  */
2347
2348 static inline int
2349 xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
2350               unsigned short family)
2351 {
2352         if (xfrm_state_kern(x))
2353                 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
2354         return  x->id.proto == tmpl->id.proto &&
2355                 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
2356                 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
2357                 x->props.mode == tmpl->mode &&
2358                 (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
2359                  !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
2360                 !(x->props.mode != XFRM_MODE_TRANSPORT &&
2361                   xfrm_state_addr_cmp(tmpl, x, family));
2362 }
2363
2364 /*
2365  * 0 or more than 0 is returned when validation is succeeded (either bypass
2366  * because of optional transport mode, or next index of the mathced secpath
2367  * state with the template.
2368  * -1 is returned when no matching template is found.
2369  * Otherwise "-2 - errored_index" is returned.
2370  */
2371 static inline int
2372 xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
2373                unsigned short family)
2374 {
2375         int idx = start;
2376
2377         if (tmpl->optional) {
2378                 if (tmpl->mode == XFRM_MODE_TRANSPORT)
2379                         return start;
2380         } else
2381                 start = -1;
2382         for (; idx < sp->len; idx++) {
2383                 if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
2384                         return ++idx;
2385                 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
2386                         if (start == -1)
2387                                 start = -2-idx;
2388                         break;
2389                 }
2390         }
2391         return start;
2392 }
2393
2394 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
2395                           unsigned int family, int reverse)
2396 {
2397         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2398         int err;
2399
2400         if (unlikely(afinfo == NULL))
2401                 return -EAFNOSUPPORT;
2402
2403         afinfo->decode_session(skb, fl, reverse);
2404         err = security_xfrm_decode_session(skb, &fl->flowi_secid);
2405         xfrm_policy_put_afinfo(afinfo);
2406         return err;
2407 }
2408 EXPORT_SYMBOL(__xfrm_decode_session);
2409
2410 static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)
2411 {
2412         for (; k < sp->len; k++) {
2413                 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
2414                         *idxp = k;
2415                         return 1;
2416                 }
2417         }
2418
2419         return 0;
2420 }
2421
2422 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
2423                         unsigned short family)
2424 {
2425         struct net *net = dev_net(skb->dev);
2426         struct xfrm_policy *pol;
2427         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2428         int npols = 0;
2429         int xfrm_nr;
2430         int pi;
2431         int reverse;
2432         struct flowi fl;
2433         u8 fl_dir;
2434         int xerr_idx = -1;
2435
2436         reverse = dir & ~XFRM_POLICY_MASK;
2437         dir &= XFRM_POLICY_MASK;
2438         fl_dir = policy_to_flow_dir(dir);
2439
2440         if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
2441                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
2442                 return 0;
2443         }
2444
2445         nf_nat_decode_session(skb, &fl, family);
2446
2447         /* First, check used SA against their selectors. */
2448         if (skb->sp) {
2449                 int i;
2450
2451                 for (i = skb->sp->len-1; i >= 0; i--) {
2452                         struct xfrm_state *x = skb->sp->xvec[i];
2453                         if (!xfrm_selector_match(&x->sel, &fl, family)) {
2454                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
2455                                 return 0;
2456                         }
2457                 }
2458         }
2459
2460         pol = NULL;
2461         sk = sk_to_full_sk(sk);
2462         if (sk && sk->sk_policy[dir]) {
2463                 pol = xfrm_sk_policy_lookup(sk, dir, &fl, family);
2464                 if (IS_ERR(pol)) {
2465                         XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2466                         return 0;
2467                 }
2468         }
2469
2470         if (!pol) {
2471                 struct flow_cache_object *flo;
2472
2473                 flo = flow_cache_lookup(net, &fl, family, fl_dir,
2474                                         xfrm_policy_lookup, NULL);
2475                 if (IS_ERR_OR_NULL(flo))
2476                         pol = ERR_CAST(flo);
2477                 else
2478                         pol = container_of(flo, struct xfrm_policy, flo);
2479         }
2480
2481         if (IS_ERR(pol)) {
2482                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2483                 return 0;
2484         }
2485
2486         if (!pol) {
2487                 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
2488                         xfrm_secpath_reject(xerr_idx, skb, &fl);
2489                         XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
2490                         return 0;
2491                 }
2492                 return 1;
2493         }
2494
2495         pol->curlft.use_time = get_seconds();
2496
2497         pols[0] = pol;
2498         npols++;
2499 #ifdef CONFIG_XFRM_SUB_POLICY
2500         if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
2501                 pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
2502                                                     &fl, family,
2503                                                     XFRM_POLICY_IN);
2504                 if (pols[1]) {
2505                         if (IS_ERR(pols[1])) {
2506                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2507                                 return 0;
2508                         }
2509                         pols[1]->curlft.use_time = get_seconds();
2510                         npols++;
2511                 }
2512         }
2513 #endif
2514
2515         if (pol->action == XFRM_POLICY_ALLOW) {
2516                 struct sec_path *sp;
2517                 static struct sec_path dummy;
2518                 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
2519                 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
2520                 struct xfrm_tmpl **tpp = tp;
2521                 int ti = 0;
2522                 int i, k;
2523
2524                 if ((sp = skb->sp) == NULL)
2525                         sp = &dummy;
2526
2527                 for (pi = 0; pi < npols; pi++) {
2528                         if (pols[pi] != pol &&
2529                             pols[pi]->action != XFRM_POLICY_ALLOW) {
2530                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2531                                 goto reject;
2532                         }
2533                         if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
2534                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
2535                                 goto reject_error;
2536                         }
2537                         for (i = 0; i < pols[pi]->xfrm_nr; i++)
2538                                 tpp[ti++] = &pols[pi]->xfrm_vec[i];
2539                 }
2540                 xfrm_nr = ti;
2541                 if (npols > 1) {
2542                         xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net);
2543                         tpp = stp;
2544                 }
2545
2546                 /* For each tunnel xfrm, find the first matching tmpl.
2547                  * For each tmpl before that, find corresponding xfrm.
2548                  * Order is _important_. Later we will implement
2549                  * some barriers, but at the moment barriers
2550                  * are implied between each two transformations.
2551                  */
2552                 for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
2553                         k = xfrm_policy_ok(tpp[i], sp, k, family);
2554                         if (k < 0) {
2555                                 if (k < -1)
2556                                         /* "-2 - errored_index" returned */
2557                                         xerr_idx = -(2+k);
2558                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2559                                 goto reject;
2560                         }
2561                 }
2562
2563                 if (secpath_has_nontransport(sp, k, &xerr_idx)) {
2564                         XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2565                         goto reject;
2566                 }
2567
2568                 xfrm_pols_put(pols, npols);
2569                 return 1;
2570         }
2571         XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2572
2573 reject:
2574         xfrm_secpath_reject(xerr_idx, skb, &fl);
2575 reject_error:
2576         xfrm_pols_put(pols, npols);
2577         return 0;
2578 }
2579 EXPORT_SYMBOL(__xfrm_policy_check);
2580
2581 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2582 {
2583         struct net *net = dev_net(skb->dev);
2584         struct flowi fl;
2585         struct dst_entry *dst;
2586         int res = 1;
2587
2588         if (xfrm_decode_session(skb, &fl, family) < 0) {
2589                 XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
2590                 return 0;
2591         }
2592
2593         skb_dst_force(skb);
2594
2595         dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
2596         if (IS_ERR(dst)) {
2597                 res = 0;
2598                 dst = NULL;
2599         }
2600         skb_dst_set(skb, dst);
2601         return res;
2602 }
2603 EXPORT_SYMBOL(__xfrm_route_forward);
2604
2605 /* Optimize later using cookies and generation ids. */
2606
2607 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2608 {
2609         /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2610          * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to
2611          * get validated by dst_ops->check on every use.  We do this
2612          * because when a normal route referenced by an XFRM dst is
2613          * obsoleted we do not go looking around for all parent
2614          * referencing XFRM dsts so that we can invalidate them.  It
2615          * is just too much work.  Instead we make the checks here on
2616          * every use.  For example:
2617          *
2618          *      XFRM dst A --> IPv4 dst X
2619          *
2620          * X is the "xdst->route" of A (X is also the "dst->path" of A
2621          * in this example).  If X is marked obsolete, "A" will not
2622          * notice.  That's what we are validating here via the
2623          * stale_bundle() check.
2624          *
2625          * When a policy's bundle is pruned, we dst_free() the XFRM
2626          * dst which causes it's ->obsolete field to be set to
2627          * DST_OBSOLETE_DEAD.  If an XFRM dst has been pruned like
2628          * this, we want to force a new route lookup.
2629          */
2630         if (dst->obsolete < 0 && !stale_bundle(dst))
2631                 return dst;
2632
2633         return NULL;
2634 }
2635
2636 static int stale_bundle(struct dst_entry *dst)
2637 {
2638         return !xfrm_bundle_ok((struct xfrm_dst *)dst);
2639 }
2640
2641 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2642 {
2643         while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2644                 dst->dev = dev_net(dev)->loopback_dev;
2645                 dev_hold(dst->dev);
2646                 dev_put(dev);
2647         }
2648 }
2649 EXPORT_SYMBOL(xfrm_dst_ifdown);
2650
2651 static void xfrm_link_failure(struct sk_buff *skb)
2652 {
2653         /* Impossible. Such dst must be popped before reaches point of failure. */
2654 }
2655
2656 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2657 {
2658         if (dst) {
2659                 if (dst->obsolete) {
2660                         dst_release(dst);
2661                         dst = NULL;
2662                 }
2663         }
2664         return dst;
2665 }
2666
2667 void xfrm_garbage_collect(struct net *net)
2668 {
2669         flow_cache_flush(net);
2670 }
2671 EXPORT_SYMBOL(xfrm_garbage_collect);
2672
2673 static void xfrm_garbage_collect_deferred(struct net *net)
2674 {
2675         flow_cache_flush_deferred(net);
2676 }
2677
2678 static void xfrm_init_pmtu(struct dst_entry *dst)
2679 {
2680         do {
2681                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2682                 u32 pmtu, route_mtu_cached;
2683
2684                 pmtu = dst_mtu(dst->child);
2685                 xdst->child_mtu_cached = pmtu;
2686
2687                 pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2688
2689                 route_mtu_cached = dst_mtu(xdst->route);
2690                 xdst->route_mtu_cached = route_mtu_cached;
2691
2692                 if (pmtu > route_mtu_cached)
2693                         pmtu = route_mtu_cached;
2694
2695                 dst_metric_set(dst, RTAX_MTU, pmtu);
2696         } while ((dst = dst->next));
2697 }
2698
2699 /* Check that the bundle accepts the flow and its components are
2700  * still valid.
2701  */
2702
2703 static int xfrm_bundle_ok(struct xfrm_dst *first)
2704 {
2705         struct dst_entry *dst = &first->u.dst;
2706         struct xfrm_dst *last;
2707         u32 mtu;
2708
2709         if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2710             (dst->dev && !netif_running(dst->dev)))
2711                 return 0;
2712
2713         if (dst->flags & DST_XFRM_QUEUE)
2714                 return 1;
2715
2716         last = NULL;
2717
2718         do {
2719                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2720
2721                 if (dst->xfrm->km.state != XFRM_STATE_VALID)
2722                         return 0;
2723                 if (xdst->xfrm_genid != dst->xfrm->genid)
2724                         return 0;
2725                 if (xdst->num_pols > 0 &&
2726                     xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2727                         return 0;
2728
2729                 mtu = dst_mtu(dst->child);
2730                 if (xdst->child_mtu_cached != mtu) {
2731                         last = xdst;
2732                         xdst->child_mtu_cached = mtu;
2733                 }
2734
2735                 if (!dst_check(xdst->route, xdst->route_cookie))
2736                         return 0;
2737                 mtu = dst_mtu(xdst->route);
2738                 if (xdst->route_mtu_cached != mtu) {
2739                         last = xdst;
2740                         xdst->route_mtu_cached = mtu;
2741                 }
2742
2743                 dst = dst->child;
2744         } while (dst->xfrm);
2745
2746         if (likely(!last))
2747                 return 1;
2748
2749         mtu = last->child_mtu_cached;
2750         for (;;) {
2751                 dst = &last->u.dst;
2752
2753                 mtu = xfrm_state_mtu(dst->xfrm, mtu);
2754                 if (mtu > last->route_mtu_cached)
2755                         mtu = last->route_mtu_cached;
2756                 dst_metric_set(dst, RTAX_MTU, mtu);
2757
2758                 if (last == first)
2759                         break;
2760
2761                 last = (struct xfrm_dst *)last->u.dst.next;
2762                 last->child_mtu_cached = mtu;
2763         }
2764
2765         return 1;
2766 }
2767
2768 static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
2769 {
2770         return dst_metric_advmss(dst->path);
2771 }
2772
2773 static unsigned int xfrm_mtu(const struct dst_entry *dst)
2774 {
2775         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2776
2777         return mtu ? : dst_mtu(dst->path);
2778 }
2779
2780 static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
2781                                            struct sk_buff *skb,
2782                                            const void *daddr)
2783 {
2784         return dst->path->ops->neigh_lookup(dst, skb, daddr);
2785 }
2786
2787 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2788 {
2789         int err = 0;
2790         if (unlikely(afinfo == NULL))
2791                 return -EINVAL;
2792         if (unlikely(afinfo->family >= NPROTO))
2793                 return -EAFNOSUPPORT;
2794         spin_lock(&xfrm_policy_afinfo_lock);
2795         if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2796                 err = -EEXIST;
2797         else {
2798                 struct dst_ops *dst_ops = afinfo->dst_ops;
2799                 if (likely(dst_ops->kmem_cachep == NULL))
2800                         dst_ops->kmem_cachep = xfrm_dst_cache;
2801                 if (likely(dst_ops->check == NULL))
2802                         dst_ops->check = xfrm_dst_check;
2803                 if (likely(dst_ops->default_advmss == NULL))
2804                         dst_ops->default_advmss = xfrm_default_advmss;
2805                 if (likely(dst_ops->mtu == NULL))
2806                         dst_ops->mtu = xfrm_mtu;
2807                 if (likely(dst_ops->negative_advice == NULL))
2808                         dst_ops->negative_advice = xfrm_negative_advice;
2809                 if (likely(dst_ops->link_failure == NULL))
2810                         dst_ops->link_failure = xfrm_link_failure;
2811                 if (likely(dst_ops->neigh_lookup == NULL))
2812                         dst_ops->neigh_lookup = xfrm_neigh_lookup;
2813                 if (likely(afinfo->garbage_collect == NULL))
2814                         afinfo->garbage_collect = xfrm_garbage_collect_deferred;
2815                 rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo);
2816         }
2817         spin_unlock(&xfrm_policy_afinfo_lock);
2818
2819         return err;
2820 }
2821 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2822
2823 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2824 {
2825         int err = 0;
2826         if (unlikely(afinfo == NULL))
2827                 return -EINVAL;
2828         if (unlikely(afinfo->family >= NPROTO))
2829                 return -EAFNOSUPPORT;
2830         spin_lock(&xfrm_policy_afinfo_lock);
2831         if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2832                 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2833                         err = -EINVAL;
2834                 else
2835                         RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family],
2836                                          NULL);
2837         }
2838         spin_unlock(&xfrm_policy_afinfo_lock);
2839         if (!err) {
2840                 struct dst_ops *dst_ops = afinfo->dst_ops;
2841
2842                 synchronize_rcu();
2843
2844                 dst_ops->kmem_cachep = NULL;
2845                 dst_ops->check = NULL;
2846                 dst_ops->negative_advice = NULL;
2847                 dst_ops->link_failure = NULL;
2848                 afinfo->garbage_collect = NULL;
2849         }
2850         return err;
2851 }
2852 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2853
2854 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2855 {
2856         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2857
2858         switch (event) {
2859         case NETDEV_DOWN:
2860                 xfrm_garbage_collect(dev_net(dev));
2861         }
2862         return NOTIFY_DONE;
2863 }
2864
2865 static struct notifier_block xfrm_dev_notifier = {
2866         .notifier_call  = xfrm_dev_event,
2867 };
2868
2869 #ifdef CONFIG_XFRM_STATISTICS
2870 static int __net_init xfrm_statistics_init(struct net *net)
2871 {
2872         int rv;
2873         net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib);
2874         if (!net->mib.xfrm_statistics)
2875                 return -ENOMEM;
2876         rv = xfrm_proc_init(net);
2877         if (rv < 0)
2878                 free_percpu(net->mib.xfrm_statistics);
2879         return rv;
2880 }
2881
2882 static void xfrm_statistics_fini(struct net *net)
2883 {
2884         xfrm_proc_fini(net);
2885         free_percpu(net->mib.xfrm_statistics);
2886 }
2887 #else
2888 static int __net_init xfrm_statistics_init(struct net *net)
2889 {
2890         return 0;
2891 }
2892
2893 static void xfrm_statistics_fini(struct net *net)
2894 {
2895 }
2896 #endif
2897
2898 static int __net_init xfrm_policy_init(struct net *net)
2899 {
2900         unsigned int hmask, sz;
2901         int dir;
2902
2903         if (net_eq(net, &init_net))
2904                 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2905                                            sizeof(struct xfrm_dst),
2906                                            0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2907                                            NULL);
2908
2909         hmask = 8 - 1;
2910         sz = (hmask+1) * sizeof(struct hlist_head);
2911
2912         net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
2913         if (!net->xfrm.policy_byidx)
2914                 goto out_byidx;
2915         net->xfrm.policy_idx_hmask = hmask;
2916
2917         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
2918                 struct xfrm_policy_hash *htab;
2919
2920                 net->xfrm.policy_count[dir] = 0;
2921                 net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0;
2922                 INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
2923
2924                 htab = &net->xfrm.policy_bydst[dir];
2925                 htab->table = xfrm_hash_alloc(sz);
2926                 if (!htab->table)
2927                         goto out_bydst;
2928                 htab->hmask = hmask;
2929                 htab->dbits4 = 32;
2930                 htab->sbits4 = 32;
2931                 htab->dbits6 = 128;
2932                 htab->sbits6 = 128;
2933         }
2934         net->xfrm.policy_hthresh.lbits4 = 32;
2935         net->xfrm.policy_hthresh.rbits4 = 32;
2936         net->xfrm.policy_hthresh.lbits6 = 128;
2937         net->xfrm.policy_hthresh.rbits6 = 128;
2938
2939         seqlock_init(&net->xfrm.policy_hthresh.lock);
2940
2941         INIT_LIST_HEAD(&net->xfrm.policy_all);
2942         INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
2943         INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
2944         if (net_eq(net, &init_net))
2945                 register_netdevice_notifier(&xfrm_dev_notifier);
2946         return 0;
2947
2948 out_bydst:
2949         for (dir--; dir >= 0; dir--) {
2950                 struct xfrm_policy_hash *htab;
2951
2952                 htab = &net->xfrm.policy_bydst[dir];
2953                 xfrm_hash_free(htab->table, sz);
2954         }
2955         xfrm_hash_free(net->xfrm.policy_byidx, sz);
2956 out_byidx:
2957         return -ENOMEM;
2958 }
2959
2960 static void xfrm_policy_fini(struct net *net)
2961 {
2962         unsigned int sz;
2963         int dir;
2964
2965         flush_work(&net->xfrm.policy_hash_work);
2966 #ifdef CONFIG_XFRM_SUB_POLICY
2967         xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
2968 #endif
2969         xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);
2970
2971         WARN_ON(!list_empty(&net->xfrm.policy_all));
2972
2973         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
2974                 struct xfrm_policy_hash *htab;
2975
2976                 WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
2977
2978                 htab = &net->xfrm.policy_bydst[dir];
2979                 sz = (htab->hmask + 1) * sizeof(struct hlist_head);
2980                 WARN_ON(!hlist_empty(htab->table));
2981                 xfrm_hash_free(htab->table, sz);
2982         }
2983
2984         sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
2985         WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
2986         xfrm_hash_free(net->xfrm.policy_byidx, sz);
2987 }
2988
2989 static int __net_init xfrm_net_init(struct net *net)
2990 {
2991         int rv;
2992
2993         /* Initialize the per-net locks here */
2994         spin_lock_init(&net->xfrm.xfrm_state_lock);
2995         rwlock_init(&net->xfrm.xfrm_policy_lock);
2996         mutex_init(&net->xfrm.xfrm_cfg_mutex);
2997
2998         rv = xfrm_statistics_init(net);
2999         if (rv < 0)
3000                 goto out_statistics;
3001         rv = xfrm_state_init(net);
3002         if (rv < 0)
3003                 goto out_state;
3004         rv = xfrm_policy_init(net);
3005         if (rv < 0)
3006                 goto out_policy;
3007         rv = xfrm_sysctl_init(net);
3008         if (rv < 0)
3009                 goto out_sysctl;
3010         rv = flow_cache_init(net);
3011         if (rv < 0)
3012                 goto out;
3013
3014         return 0;
3015
3016 out:
3017         xfrm_sysctl_fini(net);
3018 out_sysctl:
3019         xfrm_policy_fini(net);
3020 out_policy:
3021         xfrm_state_fini(net);
3022 out_state:
3023         xfrm_statistics_fini(net);
3024 out_statistics:
3025         return rv;
3026 }
3027
3028 static void __net_exit xfrm_net_exit(struct net *net)
3029 {
3030         flow_cache_fini(net);
3031         xfrm_sysctl_fini(net);
3032         xfrm_policy_fini(net);
3033         xfrm_state_fini(net);
3034         xfrm_statistics_fini(net);
3035 }
3036
3037 static struct pernet_operations __net_initdata xfrm_net_ops = {
3038         .init = xfrm_net_init,
3039         .exit = xfrm_net_exit,
3040 };
3041
3042 void __init xfrm_init(void)
3043 {
3044         register_pernet_subsys(&xfrm_net_ops);
3045         xfrm_input_init();
3046 }
3047
3048 #ifdef CONFIG_AUDITSYSCALL
3049 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
3050                                          struct audit_buffer *audit_buf)
3051 {
3052         struct xfrm_sec_ctx *ctx = xp->security;
3053         struct xfrm_selector *sel = &xp->selector;
3054
3055         if (ctx)
3056                 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
3057                                  ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
3058
3059         switch (sel->family) {
3060         case AF_INET:
3061                 audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
3062                 if (sel->prefixlen_s != 32)
3063                         audit_log_format(audit_buf, " src_prefixlen=%d",
3064                                          sel->prefixlen_s);
3065                 audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
3066                 if (sel->prefixlen_d != 32)
3067                         audit_log_format(audit_buf, " dst_prefixlen=%d",
3068                                          sel->prefixlen_d);
3069                 break;
3070         case AF_INET6:
3071                 audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
3072                 if (sel->prefixlen_s != 128)
3073                         audit_log_format(audit_buf, " src_prefixlen=%d",
3074                                          sel->prefixlen_s);
3075                 audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
3076                 if (sel->prefixlen_d != 128)
3077                         audit_log_format(audit_buf, " dst_prefixlen=%d",
3078                                          sel->prefixlen_d);
3079                 break;
3080         }
3081 }
3082
3083 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid)
3084 {
3085         struct audit_buffer *audit_buf;
3086
3087         audit_buf = xfrm_audit_start("SPD-add");
3088         if (audit_buf == NULL)
3089                 return;
3090         xfrm_audit_helper_usrinfo(task_valid, audit_buf);
3091         audit_log_format(audit_buf, " res=%u", result);
3092         xfrm_audit_common_policyinfo(xp, audit_buf);
3093         audit_log_end(audit_buf);
3094 }
3095 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
3096
3097 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
3098                               bool task_valid)
3099 {
3100         struct audit_buffer *audit_buf;
3101
3102         audit_buf = xfrm_audit_start("SPD-delete");
3103         if (audit_buf == NULL)
3104                 return;
3105         xfrm_audit_helper_usrinfo(task_valid, audit_buf);
3106         audit_log_format(audit_buf, " res=%u", result);
3107         xfrm_audit_common_policyinfo(xp, audit_buf);
3108         audit_log_end(audit_buf);
3109 }
3110 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
3111 #endif
3112
3113 #ifdef CONFIG_XFRM_MIGRATE
3114 static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
3115                                         const struct xfrm_selector *sel_tgt)
3116 {
3117         if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
3118                 if (sel_tgt->family == sel_cmp->family &&
3119                     xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
3120                                     sel_cmp->family) &&
3121                     xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
3122                                     sel_cmp->family) &&
3123                     sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
3124                     sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
3125                         return true;
3126                 }
3127         } else {
3128                 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
3129                         return true;
3130                 }
3131         }
3132         return false;
3133 }
3134
3135 static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
3136                                                     u8 dir, u8 type, struct net *net)
3137 {
3138         struct xfrm_policy *pol, *ret = NULL;
3139         struct hlist_head *chain;
3140         u32 priority = ~0U;
3141
3142         read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/
3143         chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
3144         hlist_for_each_entry(pol, chain, bydst) {
3145                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
3146                     pol->type == type) {
3147                         ret = pol;
3148                         priority = ret->priority;
3149                         break;
3150                 }
3151         }
3152         chain = &net->xfrm.policy_inexact[dir];
3153         hlist_for_each_entry(pol, chain, bydst) {
3154                 if ((pol->priority >= priority) && ret)
3155                         break;
3156
3157                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
3158                     pol->type == type) {
3159                         ret = pol;
3160                         break;
3161                 }
3162         }
3163
3164         xfrm_pol_hold(ret);
3165
3166         read_unlock_bh(&net->xfrm.xfrm_policy_lock);
3167
3168         return ret;
3169 }
3170
3171 static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
3172 {
3173         int match = 0;
3174
3175         if (t->mode == m->mode && t->id.proto == m->proto &&
3176             (m->reqid == 0 || t->reqid == m->reqid)) {
3177                 switch (t->mode) {
3178                 case XFRM_MODE_TUNNEL:
3179                 case XFRM_MODE_BEET:
3180                         if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
3181                                             m->old_family) &&
3182                             xfrm_addr_equal(&t->saddr, &m->old_saddr,
3183                                             m->old_family)) {
3184                                 match = 1;
3185                         }
3186                         break;
3187                 case XFRM_MODE_TRANSPORT:
3188                         /* in case of transport mode, template does not store
3189                            any IP addresses, hence we just compare mode and
3190                            protocol */
3191                         match = 1;
3192                         break;
3193                 default:
3194                         break;
3195                 }
3196         }
3197         return match;
3198 }
3199
3200 /* update endpoint address(es) of template(s) */
3201 static int xfrm_policy_migrate(struct xfrm_policy *pol,
3202                                struct xfrm_migrate *m, int num_migrate)
3203 {
3204         struct xfrm_migrate *mp;
3205         int i, j, n = 0;
3206
3207         write_lock_bh(&pol->lock);
3208         if (unlikely(pol->walk.dead)) {
3209                 /* target policy has been deleted */
3210                 write_unlock_bh(&pol->lock);
3211                 return -ENOENT;
3212         }
3213
3214         for (i = 0; i < pol->xfrm_nr; i++) {
3215                 for (j = 0, mp = m; j < num_migrate; j++, mp++) {
3216                         if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
3217                                 continue;
3218                         n++;
3219                         if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
3220                             pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
3221                                 continue;
3222                         /* update endpoints */
3223                         memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
3224                                sizeof(pol->xfrm_vec[i].id.daddr));
3225                         memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
3226                                sizeof(pol->xfrm_vec[i].saddr));
3227                         pol->xfrm_vec[i].encap_family = mp->new_family;
3228                         /* flush bundles */
3229                         atomic_inc(&pol->genid);
3230                 }
3231         }
3232
3233         write_unlock_bh(&pol->lock);
3234
3235         if (!n)
3236                 return -ENODATA;
3237
3238         return 0;
3239 }
3240
3241 static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
3242 {
3243         int i, j;
3244
3245         if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
3246                 return -EINVAL;
3247
3248         for (i = 0; i < num_migrate; i++) {
3249                 if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr,
3250                                     m[i].old_family) &&
3251                     xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr,
3252                                     m[i].old_family))
3253                         return -EINVAL;
3254                 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
3255                     xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
3256                         return -EINVAL;
3257
3258                 /* check if there is any duplicated entry */
3259                 for (j = i + 1; j < num_migrate; j++) {
3260                         if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
3261                                     sizeof(m[i].old_daddr)) &&
3262                             !memcmp(&m[i].old_saddr, &m[j].old_saddr,
3263                                     sizeof(m[i].old_saddr)) &&
3264                             m[i].proto == m[j].proto &&
3265                             m[i].mode == m[j].mode &&
3266                             m[i].reqid == m[j].reqid &&
3267                             m[i].old_family == m[j].old_family)
3268                                 return -EINVAL;
3269                 }
3270         }
3271
3272         return 0;
3273 }
3274
3275 int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
3276                  struct xfrm_migrate *m, int num_migrate,
3277                  struct xfrm_kmaddress *k, struct net *net)
3278 {
3279         int i, err, nx_cur = 0, nx_new = 0;
3280         struct xfrm_policy *pol = NULL;
3281         struct xfrm_state *x, *xc;
3282         struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
3283         struct xfrm_state *x_new[XFRM_MAX_DEPTH];
3284         struct xfrm_migrate *mp;
3285
3286         /* Stage 0 - sanity checks */
3287         if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
3288                 goto out;
3289
3290         if (dir >= XFRM_POLICY_MAX) {
3291                 err = -EINVAL;
3292                 goto out;
3293         }
3294
3295         /* Stage 1 - find policy */
3296         if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
3297                 err = -ENOENT;
3298                 goto out;
3299         }
3300
3301         /* Stage 2 - find and update state(s) */
3302         for (i = 0, mp = m; i < num_migrate; i++, mp++) {
3303                 if ((x = xfrm_migrate_state_find(mp, net))) {
3304                         x_cur[nx_cur] = x;
3305                         nx_cur++;
3306                         if ((xc = xfrm_state_migrate(x, mp))) {
3307                                 x_new[nx_new] = xc;
3308                                 nx_new++;
3309                         } else {
3310                                 err = -ENODATA;
3311                                 goto restore_state;
3312                         }
3313                 }
3314         }
3315
3316         /* Stage 3 - update policy */
3317         if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
3318                 goto restore_state;
3319
3320         /* Stage 4 - delete old state(s) */
3321         if (nx_cur) {
3322                 xfrm_states_put(x_cur, nx_cur);
3323                 xfrm_states_delete(x_cur, nx_cur);
3324         }
3325
3326         /* Stage 5 - announce */
3327         km_migrate(sel, dir, type, m, num_migrate, k);
3328
3329         xfrm_pol_put(pol);
3330
3331         return 0;
3332 out:
3333         return err;
3334
3335 restore_state:
3336         if (pol)
3337                 xfrm_pol_put(pol);
3338         if (nx_cur)
3339                 xfrm_states_put(x_cur, nx_cur);
3340         if (nx_new)
3341                 xfrm_states_delete(x_new, nx_new);
3342
3343         return err;
3344 }
3345 EXPORT_SYMBOL(xfrm_migrate);
3346 #endif