OSDN Git Service

net: l2tp: unlock socket lock before returning from l2tp_ip_sendmsg
[android-x86/kernel.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39
40 #define NEIGH_DEBUG 1
41
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55
56 #define PNEIGH_HASHMASK         0xF
57
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67
68 /*
69    Neighbour hash table buckets are protected with rwlock tbl->lock.
70
71    - All the scans/updates to hash buckets MUST be made under this lock.
72    - NOTHING clever should be made under this lock: no callbacks
73      to protocol backends, no attempts to send something to network.
74      It will result in deadlocks, if backend/driver wants to use neighbour
75      cache.
76    - If the entry requires some non-trivial actions, increase
77      its reference count and release table lock.
78
79    Neighbour entries are protected:
80    - with reference count.
81    - with rwlock neigh->lock
82
83    Reference count prevents destruction.
84
85    neigh->lock mainly serializes ll address data and its validity state.
86    However, the same lock is used to protect another entry fields:
87     - timer
88     - resolution queue
89
90    Again, nothing clever shall be made under neigh->lock,
91    the most complicated procedure, which we allow is dev->hard_header.
92    It is supposed, that dev->hard_header is simplistic and does
93    not make callbacks to neighbour tables.
94
95    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96    list of neighbour tables. This list is used only in process context,
97  */
98
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100
101 static int neigh_blackhole(struct sk_buff *skb)
102 {
103         kfree_skb(skb);
104         return -ENETDOWN;
105 }
106
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109         if (neigh->parms->neigh_cleanup)
110                 neigh->parms->neigh_cleanup(neigh);
111
112         __neigh_notify(neigh, RTM_DELNEIGH, 0);
113         neigh_release(neigh);
114 }
115
116 /*
117  * It is random distribution in the interval (1/2)*base...(3/2)*base.
118  * It corresponds to default IPv6 settings and is not overridable,
119  * because it is really reasonable choice.
120  */
121
122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124         return base ? (net_random() % base) + (base >> 1) : 0;
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127
128
129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131         int shrunk = 0;
132         int i;
133         struct neigh_hash_table *nht;
134
135         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136
137         write_lock_bh(&tbl->lock);
138         nht = rcu_dereference_protected(tbl->nht,
139                                         lockdep_is_held(&tbl->lock));
140         for (i = 0; i <= nht->hash_mask; i++) {
141                 struct neighbour *n;
142                 struct neighbour __rcu **np;
143
144                 np = &nht->hash_buckets[i];
145                 while ((n = rcu_dereference_protected(*np,
146                                         lockdep_is_held(&tbl->lock))) != NULL) {
147                         /* Neighbour record may be discarded if:
148                          * - nobody refers to it.
149                          * - it is not permanent
150                          */
151                         write_lock(&n->lock);
152                         if (atomic_read(&n->refcnt) == 1 &&
153                             !(n->nud_state & NUD_PERMANENT)) {
154                                 rcu_assign_pointer(*np,
155                                         rcu_dereference_protected(n->next,
156                                                   lockdep_is_held(&tbl->lock)));
157                                 n->dead = 1;
158                                 shrunk  = 1;
159                                 write_unlock(&n->lock);
160                                 neigh_cleanup_and_release(n);
161                                 continue;
162                         }
163                         write_unlock(&n->lock);
164                         np = &n->next;
165                 }
166         }
167
168         tbl->last_flush = jiffies;
169
170         write_unlock_bh(&tbl->lock);
171
172         return shrunk;
173 }
174
175 static void neigh_add_timer(struct neighbour *n, unsigned long when)
176 {
177         neigh_hold(n);
178         if (unlikely(mod_timer(&n->timer, when))) {
179                 printk("NEIGH: BUG, double timer add, state is %x\n",
180                        n->nud_state);
181                 dump_stack();
182         }
183 }
184
185 static int neigh_del_timer(struct neighbour *n)
186 {
187         if ((n->nud_state & NUD_IN_TIMER) &&
188             del_timer(&n->timer)) {
189                 neigh_release(n);
190                 return 1;
191         }
192         return 0;
193 }
194
195 static void pneigh_queue_purge(struct sk_buff_head *list)
196 {
197         struct sk_buff *skb;
198
199         while ((skb = skb_dequeue(list)) != NULL) {
200                 dev_put(skb->dev);
201                 kfree_skb(skb);
202         }
203 }
204
205 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
206 {
207         int i;
208         struct neigh_hash_table *nht;
209
210         nht = rcu_dereference_protected(tbl->nht,
211                                         lockdep_is_held(&tbl->lock));
212
213         for (i = 0; i <= nht->hash_mask; i++) {
214                 struct neighbour *n;
215                 struct neighbour __rcu **np = &nht->hash_buckets[i];
216
217                 while ((n = rcu_dereference_protected(*np,
218                                         lockdep_is_held(&tbl->lock))) != NULL) {
219                         if (dev && n->dev != dev) {
220                                 np = &n->next;
221                                 continue;
222                         }
223                         rcu_assign_pointer(*np,
224                                    rcu_dereference_protected(n->next,
225                                                 lockdep_is_held(&tbl->lock)));
226                         write_lock(&n->lock);
227                         neigh_del_timer(n);
228                         n->dead = 1;
229
230                         if (atomic_read(&n->refcnt) != 1) {
231                                 /* The most unpleasant situation.
232                                    We must destroy neighbour entry,
233                                    but someone still uses it.
234
235                                    The destroy will be delayed until
236                                    the last user releases us, but
237                                    we must kill timers etc. and move
238                                    it to safe state.
239                                  */
240                                 skb_queue_purge(&n->arp_queue);
241                                 n->output = neigh_blackhole;
242                                 if (n->nud_state & NUD_VALID)
243                                         n->nud_state = NUD_NOARP;
244                                 else
245                                         n->nud_state = NUD_NONE;
246                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
247                         }
248                         write_unlock(&n->lock);
249                         neigh_cleanup_and_release(n);
250                 }
251         }
252 }
253
254 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
255 {
256         write_lock_bh(&tbl->lock);
257         neigh_flush_dev(tbl, dev);
258         write_unlock_bh(&tbl->lock);
259 }
260 EXPORT_SYMBOL(neigh_changeaddr);
261
262 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
263 {
264         write_lock_bh(&tbl->lock);
265         neigh_flush_dev(tbl, dev);
266         pneigh_ifdown(tbl, dev);
267         write_unlock_bh(&tbl->lock);
268
269         del_timer_sync(&tbl->proxy_timer);
270         pneigh_queue_purge(&tbl->proxy_queue);
271         return 0;
272 }
273 EXPORT_SYMBOL(neigh_ifdown);
274
275 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
276 {
277         struct neighbour *n = NULL;
278         unsigned long now = jiffies;
279         int entries;
280
281         entries = atomic_inc_return(&tbl->entries) - 1;
282         if (entries >= tbl->gc_thresh3 ||
283             (entries >= tbl->gc_thresh2 &&
284              time_after(now, tbl->last_flush + 5 * HZ))) {
285                 if (!neigh_forced_gc(tbl) &&
286                     entries >= tbl->gc_thresh3)
287                         goto out_entries;
288         }
289
290         n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
291         if (!n)
292                 goto out_entries;
293
294         skb_queue_head_init(&n->arp_queue);
295         rwlock_init(&n->lock);
296         seqlock_init(&n->ha_lock);
297         n->updated        = n->used = now;
298         n->nud_state      = NUD_NONE;
299         n->output         = neigh_blackhole;
300         n->parms          = neigh_parms_clone(&tbl->parms);
301         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
302
303         NEIGH_CACHE_STAT_INC(tbl, allocs);
304         n->tbl            = tbl;
305         atomic_set(&n->refcnt, 1);
306         n->dead           = 1;
307 out:
308         return n;
309
310 out_entries:
311         atomic_dec(&tbl->entries);
312         goto out;
313 }
314
315 static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
316 {
317         size_t size = entries * sizeof(struct neighbour *);
318         struct neigh_hash_table *ret;
319         struct neighbour __rcu **buckets;
320
321         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
322         if (!ret)
323                 return NULL;
324         if (size <= PAGE_SIZE)
325                 buckets = kzalloc(size, GFP_ATOMIC);
326         else
327                 buckets = (struct neighbour __rcu **)
328                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
329                                            get_order(size));
330         if (!buckets) {
331                 kfree(ret);
332                 return NULL;
333         }
334         ret->hash_buckets = buckets;
335         ret->hash_mask = entries - 1;
336         get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
337         return ret;
338 }
339
340 static void neigh_hash_free_rcu(struct rcu_head *head)
341 {
342         struct neigh_hash_table *nht = container_of(head,
343                                                     struct neigh_hash_table,
344                                                     rcu);
345         size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
346         struct neighbour __rcu **buckets = nht->hash_buckets;
347
348         if (size <= PAGE_SIZE)
349                 kfree(buckets);
350         else
351                 free_pages((unsigned long)buckets, get_order(size));
352         kfree(nht);
353 }
354
355 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
356                                                 unsigned long new_entries)
357 {
358         unsigned int i, hash;
359         struct neigh_hash_table *new_nht, *old_nht;
360
361         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
362
363         BUG_ON(!is_power_of_2(new_entries));
364         old_nht = rcu_dereference_protected(tbl->nht,
365                                             lockdep_is_held(&tbl->lock));
366         new_nht = neigh_hash_alloc(new_entries);
367         if (!new_nht)
368                 return old_nht;
369
370         for (i = 0; i <= old_nht->hash_mask; i++) {
371                 struct neighbour *n, *next;
372
373                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
374                                                    lockdep_is_held(&tbl->lock));
375                      n != NULL;
376                      n = next) {
377                         hash = tbl->hash(n->primary_key, n->dev,
378                                          new_nht->hash_rnd);
379
380                         hash &= new_nht->hash_mask;
381                         next = rcu_dereference_protected(n->next,
382                                                 lockdep_is_held(&tbl->lock));
383
384                         rcu_assign_pointer(n->next,
385                                            rcu_dereference_protected(
386                                                 new_nht->hash_buckets[hash],
387                                                 lockdep_is_held(&tbl->lock)));
388                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
389                 }
390         }
391
392         rcu_assign_pointer(tbl->nht, new_nht);
393         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
394         return new_nht;
395 }
396
397 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
398                                struct net_device *dev)
399 {
400         struct neighbour *n;
401         int key_len = tbl->key_len;
402         u32 hash_val;
403         struct neigh_hash_table *nht;
404
405         NEIGH_CACHE_STAT_INC(tbl, lookups);
406
407         rcu_read_lock_bh();
408         nht = rcu_dereference_bh(tbl->nht);
409         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
410
411         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
412              n != NULL;
413              n = rcu_dereference_bh(n->next)) {
414                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
415                         if (!atomic_inc_not_zero(&n->refcnt))
416                                 n = NULL;
417                         NEIGH_CACHE_STAT_INC(tbl, hits);
418                         break;
419                 }
420         }
421
422         rcu_read_unlock_bh();
423         return n;
424 }
425 EXPORT_SYMBOL(neigh_lookup);
426
427 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
428                                      const void *pkey)
429 {
430         struct neighbour *n;
431         int key_len = tbl->key_len;
432         u32 hash_val;
433         struct neigh_hash_table *nht;
434
435         NEIGH_CACHE_STAT_INC(tbl, lookups);
436
437         rcu_read_lock_bh();
438         nht = rcu_dereference_bh(tbl->nht);
439         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask;
440
441         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
442              n != NULL;
443              n = rcu_dereference_bh(n->next)) {
444                 if (!memcmp(n->primary_key, pkey, key_len) &&
445                     net_eq(dev_net(n->dev), net)) {
446                         if (!atomic_inc_not_zero(&n->refcnt))
447                                 n = NULL;
448                         NEIGH_CACHE_STAT_INC(tbl, hits);
449                         break;
450                 }
451         }
452
453         rcu_read_unlock_bh();
454         return n;
455 }
456 EXPORT_SYMBOL(neigh_lookup_nodev);
457
458 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
459                                struct net_device *dev)
460 {
461         u32 hash_val;
462         int key_len = tbl->key_len;
463         int error;
464         struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
465         struct neigh_hash_table *nht;
466
467         if (!n) {
468                 rc = ERR_PTR(-ENOBUFS);
469                 goto out;
470         }
471
472         memcpy(n->primary_key, pkey, key_len);
473         n->dev = dev;
474         dev_hold(dev);
475
476         /* Protocol specific setup. */
477         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
478                 rc = ERR_PTR(error);
479                 goto out_neigh_release;
480         }
481
482         /* Device specific setup. */
483         if (n->parms->neigh_setup &&
484             (error = n->parms->neigh_setup(n)) < 0) {
485                 rc = ERR_PTR(error);
486                 goto out_neigh_release;
487         }
488
489         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
490
491         write_lock_bh(&tbl->lock);
492         nht = rcu_dereference_protected(tbl->nht,
493                                         lockdep_is_held(&tbl->lock));
494
495         if (atomic_read(&tbl->entries) > (nht->hash_mask + 1))
496                 nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1);
497
498         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
499
500         if (n->parms->dead) {
501                 rc = ERR_PTR(-EINVAL);
502                 goto out_tbl_unlock;
503         }
504
505         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
506                                             lockdep_is_held(&tbl->lock));
507              n1 != NULL;
508              n1 = rcu_dereference_protected(n1->next,
509                         lockdep_is_held(&tbl->lock))) {
510                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
511                         neigh_hold(n1);
512                         rc = n1;
513                         goto out_tbl_unlock;
514                 }
515         }
516
517         n->dead = 0;
518         neigh_hold(n);
519         rcu_assign_pointer(n->next,
520                            rcu_dereference_protected(nht->hash_buckets[hash_val],
521                                                      lockdep_is_held(&tbl->lock)));
522         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
523         write_unlock_bh(&tbl->lock);
524         NEIGH_PRINTK2("neigh %p is created.\n", n);
525         rc = n;
526 out:
527         return rc;
528 out_tbl_unlock:
529         write_unlock_bh(&tbl->lock);
530 out_neigh_release:
531         neigh_release(n);
532         goto out;
533 }
534 EXPORT_SYMBOL(neigh_create);
535
536 static u32 pneigh_hash(const void *pkey, int key_len)
537 {
538         u32 hash_val = *(u32 *)(pkey + key_len - 4);
539         hash_val ^= (hash_val >> 16);
540         hash_val ^= hash_val >> 8;
541         hash_val ^= hash_val >> 4;
542         hash_val &= PNEIGH_HASHMASK;
543         return hash_val;
544 }
545
546 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
547                                               struct net *net,
548                                               const void *pkey,
549                                               int key_len,
550                                               struct net_device *dev)
551 {
552         while (n) {
553                 if (!memcmp(n->key, pkey, key_len) &&
554                     net_eq(pneigh_net(n), net) &&
555                     (n->dev == dev || !n->dev))
556                         return n;
557                 n = n->next;
558         }
559         return NULL;
560 }
561
562 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
563                 struct net *net, const void *pkey, struct net_device *dev)
564 {
565         int key_len = tbl->key_len;
566         u32 hash_val = pneigh_hash(pkey, key_len);
567
568         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
569                                  net, pkey, key_len, dev);
570 }
571 EXPORT_SYMBOL_GPL(__pneigh_lookup);
572
573 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
574                                     struct net *net, const void *pkey,
575                                     struct net_device *dev, int creat)
576 {
577         struct pneigh_entry *n;
578         int key_len = tbl->key_len;
579         u32 hash_val = pneigh_hash(pkey, key_len);
580
581         read_lock_bh(&tbl->lock);
582         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
583                               net, pkey, key_len, dev);
584         read_unlock_bh(&tbl->lock);
585
586         if (n || !creat)
587                 goto out;
588
589         ASSERT_RTNL();
590
591         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
592         if (!n)
593                 goto out;
594
595         write_pnet(&n->net, hold_net(net));
596         memcpy(n->key, pkey, key_len);
597         n->dev = dev;
598         if (dev)
599                 dev_hold(dev);
600
601         if (tbl->pconstructor && tbl->pconstructor(n)) {
602                 if (dev)
603                         dev_put(dev);
604                 release_net(net);
605                 kfree(n);
606                 n = NULL;
607                 goto out;
608         }
609
610         write_lock_bh(&tbl->lock);
611         n->next = tbl->phash_buckets[hash_val];
612         tbl->phash_buckets[hash_val] = n;
613         write_unlock_bh(&tbl->lock);
614 out:
615         return n;
616 }
617 EXPORT_SYMBOL(pneigh_lookup);
618
619
620 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
621                   struct net_device *dev)
622 {
623         struct pneigh_entry *n, **np;
624         int key_len = tbl->key_len;
625         u32 hash_val = pneigh_hash(pkey, key_len);
626
627         write_lock_bh(&tbl->lock);
628         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
629              np = &n->next) {
630                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
631                     net_eq(pneigh_net(n), net)) {
632                         *np = n->next;
633                         write_unlock_bh(&tbl->lock);
634                         if (tbl->pdestructor)
635                                 tbl->pdestructor(n);
636                         if (n->dev)
637                                 dev_put(n->dev);
638                         release_net(pneigh_net(n));
639                         kfree(n);
640                         return 0;
641                 }
642         }
643         write_unlock_bh(&tbl->lock);
644         return -ENOENT;
645 }
646
647 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
648 {
649         struct pneigh_entry *n, **np;
650         u32 h;
651
652         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
653                 np = &tbl->phash_buckets[h];
654                 while ((n = *np) != NULL) {
655                         if (!dev || n->dev == dev) {
656                                 *np = n->next;
657                                 if (tbl->pdestructor)
658                                         tbl->pdestructor(n);
659                                 if (n->dev)
660                                         dev_put(n->dev);
661                                 release_net(pneigh_net(n));
662                                 kfree(n);
663                                 continue;
664                         }
665                         np = &n->next;
666                 }
667         }
668         return -ENOENT;
669 }
670
671 static void neigh_parms_destroy(struct neigh_parms *parms);
672
673 static inline void neigh_parms_put(struct neigh_parms *parms)
674 {
675         if (atomic_dec_and_test(&parms->refcnt))
676                 neigh_parms_destroy(parms);
677 }
678
679 static void neigh_destroy_rcu(struct rcu_head *head)
680 {
681         struct neighbour *neigh = container_of(head, struct neighbour, rcu);
682
683         kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
684 }
685 /*
686  *      neighbour must already be out of the table;
687  *
688  */
689 void neigh_destroy(struct neighbour *neigh)
690 {
691         struct hh_cache *hh;
692
693         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
694
695         if (!neigh->dead) {
696                 printk(KERN_WARNING
697                        "Destroying alive neighbour %p\n", neigh);
698                 dump_stack();
699                 return;
700         }
701
702         if (neigh_del_timer(neigh))
703                 printk(KERN_WARNING "Impossible event.\n");
704
705         while ((hh = neigh->hh) != NULL) {
706                 neigh->hh = hh->hh_next;
707                 hh->hh_next = NULL;
708
709                 write_seqlock_bh(&hh->hh_lock);
710                 hh->hh_output = neigh_blackhole;
711                 write_sequnlock_bh(&hh->hh_lock);
712                 hh_cache_put(hh);
713         }
714
715         skb_queue_purge(&neigh->arp_queue);
716
717         dev_put(neigh->dev);
718         neigh_parms_put(neigh->parms);
719
720         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
721
722         atomic_dec(&neigh->tbl->entries);
723         call_rcu(&neigh->rcu, neigh_destroy_rcu);
724 }
725 EXPORT_SYMBOL(neigh_destroy);
726
727 /* Neighbour state is suspicious;
728    disable fast path.
729
730    Called with write_locked neigh.
731  */
732 static void neigh_suspect(struct neighbour *neigh)
733 {
734         struct hh_cache *hh;
735
736         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
737
738         neigh->output = neigh->ops->output;
739
740         for (hh = neigh->hh; hh; hh = hh->hh_next)
741                 hh->hh_output = neigh->ops->output;
742 }
743
744 /* Neighbour state is OK;
745    enable fast path.
746
747    Called with write_locked neigh.
748  */
749 static void neigh_connect(struct neighbour *neigh)
750 {
751         struct hh_cache *hh;
752
753         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
754
755         neigh->output = neigh->ops->connected_output;
756
757         for (hh = neigh->hh; hh; hh = hh->hh_next)
758                 hh->hh_output = neigh->ops->hh_output;
759 }
760
761 static void neigh_periodic_work(struct work_struct *work)
762 {
763         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
764         struct neighbour *n;
765         struct neighbour __rcu **np;
766         unsigned int i;
767         struct neigh_hash_table *nht;
768
769         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
770
771         write_lock_bh(&tbl->lock);
772         nht = rcu_dereference_protected(tbl->nht,
773                                         lockdep_is_held(&tbl->lock));
774
775         /*
776          *      periodically recompute ReachableTime from random function
777          */
778
779         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
780                 struct neigh_parms *p;
781                 tbl->last_rand = jiffies;
782                 for (p = &tbl->parms; p; p = p->next)
783                         p->reachable_time =
784                                 neigh_rand_reach_time(p->base_reachable_time);
785         }
786
787         for (i = 0 ; i <= nht->hash_mask; i++) {
788                 np = &nht->hash_buckets[i];
789
790                 while ((n = rcu_dereference_protected(*np,
791                                 lockdep_is_held(&tbl->lock))) != NULL) {
792                         unsigned int state;
793
794                         write_lock(&n->lock);
795
796                         state = n->nud_state;
797                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
798                                 write_unlock(&n->lock);
799                                 goto next_elt;
800                         }
801
802                         if (time_before(n->used, n->confirmed))
803                                 n->used = n->confirmed;
804
805                         if (atomic_read(&n->refcnt) == 1 &&
806                             (state == NUD_FAILED ||
807                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
808                                 *np = n->next;
809                                 n->dead = 1;
810                                 write_unlock(&n->lock);
811                                 neigh_cleanup_and_release(n);
812                                 continue;
813                         }
814                         write_unlock(&n->lock);
815
816 next_elt:
817                         np = &n->next;
818                 }
819                 /*
820                  * It's fine to release lock here, even if hash table
821                  * grows while we are preempted.
822                  */
823                 write_unlock_bh(&tbl->lock);
824                 cond_resched();
825                 write_lock_bh(&tbl->lock);
826                 nht = rcu_dereference_protected(tbl->nht,
827                                                 lockdep_is_held(&tbl->lock));
828         }
829         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
830          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
831          * base_reachable_time.
832          */
833         schedule_delayed_work(&tbl->gc_work,
834                               tbl->parms.base_reachable_time >> 1);
835         write_unlock_bh(&tbl->lock);
836 }
837
838 static __inline__ int neigh_max_probes(struct neighbour *n)
839 {
840         struct neigh_parms *p = n->parms;
841         return (n->nud_state & NUD_PROBE) ?
842                 p->ucast_probes :
843                 p->ucast_probes + p->app_probes + p->mcast_probes;
844 }
845
846 static void neigh_invalidate(struct neighbour *neigh)
847         __releases(neigh->lock)
848         __acquires(neigh->lock)
849 {
850         struct sk_buff *skb;
851
852         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
853         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
854         neigh->updated = jiffies;
855
856         /* It is very thin place. report_unreachable is very complicated
857            routine. Particularly, it can hit the same neighbour entry!
858
859            So that, we try to be accurate and avoid dead loop. --ANK
860          */
861         while (neigh->nud_state == NUD_FAILED &&
862                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
863                 write_unlock(&neigh->lock);
864                 neigh->ops->error_report(neigh, skb);
865                 write_lock(&neigh->lock);
866         }
867         skb_queue_purge(&neigh->arp_queue);
868 }
869
870 /* Called when a timer expires for a neighbour entry. */
871
872 static void neigh_timer_handler(unsigned long arg)
873 {
874         unsigned long now, next;
875         struct neighbour *neigh = (struct neighbour *)arg;
876         unsigned state;
877         int notify = 0;
878
879         write_lock(&neigh->lock);
880
881         state = neigh->nud_state;
882         now = jiffies;
883         next = now + HZ;
884
885         if (!(state & NUD_IN_TIMER)) {
886 #ifndef CONFIG_SMP
887                 printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
888 #endif
889                 goto out;
890         }
891
892         if (state & NUD_REACHABLE) {
893                 if (time_before_eq(now,
894                                    neigh->confirmed + neigh->parms->reachable_time)) {
895                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
896                         next = neigh->confirmed + neigh->parms->reachable_time;
897                 } else if (time_before_eq(now,
898                                           neigh->used + neigh->parms->delay_probe_time)) {
899                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
900                         neigh->nud_state = NUD_DELAY;
901                         neigh->updated = jiffies;
902                         neigh_suspect(neigh);
903                         next = now + neigh->parms->delay_probe_time;
904                 } else {
905                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
906                         neigh->nud_state = NUD_STALE;
907                         neigh->updated = jiffies;
908                         neigh_suspect(neigh);
909                         notify = 1;
910                 }
911         } else if (state & NUD_DELAY) {
912                 if (time_before_eq(now,
913                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
914                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
915                         neigh->nud_state = NUD_REACHABLE;
916                         neigh->updated = jiffies;
917                         neigh_connect(neigh);
918                         notify = 1;
919                         next = neigh->confirmed + neigh->parms->reachable_time;
920                 } else {
921                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
922                         neigh->nud_state = NUD_PROBE;
923                         neigh->updated = jiffies;
924                         atomic_set(&neigh->probes, 0);
925                         next = now + neigh->parms->retrans_time;
926                 }
927         } else {
928                 /* NUD_PROBE|NUD_INCOMPLETE */
929                 next = now + neigh->parms->retrans_time;
930         }
931
932         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
933             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
934                 neigh->nud_state = NUD_FAILED;
935                 notify = 1;
936                 neigh_invalidate(neigh);
937         }
938
939         if (neigh->nud_state & NUD_IN_TIMER) {
940                 if (time_before(next, jiffies + HZ/2))
941                         next = jiffies + HZ/2;
942                 if (!mod_timer(&neigh->timer, next))
943                         neigh_hold(neigh);
944         }
945         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
946                 struct sk_buff *skb = skb_peek(&neigh->arp_queue);
947                 /* keep skb alive even if arp_queue overflows */
948                 if (skb)
949                         skb = skb_copy(skb, GFP_ATOMIC);
950                 write_unlock(&neigh->lock);
951                 neigh->ops->solicit(neigh, skb);
952                 atomic_inc(&neigh->probes);
953                 kfree_skb(skb);
954         } else {
955 out:
956                 write_unlock(&neigh->lock);
957         }
958
959         if (notify)
960                 neigh_update_notify(neigh);
961
962         neigh_release(neigh);
963 }
964
965 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
966 {
967         int rc;
968         unsigned long now;
969
970         write_lock_bh(&neigh->lock);
971
972         rc = 0;
973         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
974                 goto out_unlock_bh;
975
976         now = jiffies;
977
978         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
979                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
980                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
981                         neigh->nud_state     = NUD_INCOMPLETE;
982                         neigh->updated = jiffies;
983                         neigh_add_timer(neigh, now + 1);
984                 } else {
985                         neigh->nud_state = NUD_FAILED;
986                         neigh->updated = jiffies;
987                         write_unlock_bh(&neigh->lock);
988
989                         kfree_skb(skb);
990                         return 1;
991                 }
992         } else if (neigh->nud_state & NUD_STALE) {
993                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
994                 neigh->nud_state = NUD_DELAY;
995                 neigh->updated = jiffies;
996                 neigh_add_timer(neigh,
997                                 jiffies + neigh->parms->delay_probe_time);
998         }
999
1000         if (neigh->nud_state == NUD_INCOMPLETE) {
1001                 if (skb) {
1002                         if (skb_queue_len(&neigh->arp_queue) >=
1003                             neigh->parms->queue_len) {
1004                                 struct sk_buff *buff;
1005                                 buff = __skb_dequeue(&neigh->arp_queue);
1006                                 kfree_skb(buff);
1007                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1008                         }
1009                         skb_dst_force(skb);
1010                         __skb_queue_tail(&neigh->arp_queue, skb);
1011                 }
1012                 rc = 1;
1013         }
1014 out_unlock_bh:
1015         write_unlock_bh(&neigh->lock);
1016         return rc;
1017 }
1018 EXPORT_SYMBOL(__neigh_event_send);
1019
1020 static void neigh_update_hhs(const struct neighbour *neigh)
1021 {
1022         struct hh_cache *hh;
1023         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1024                 = NULL;
1025
1026         if (neigh->dev->header_ops)
1027                 update = neigh->dev->header_ops->cache_update;
1028
1029         if (update) {
1030                 for (hh = neigh->hh; hh; hh = hh->hh_next) {
1031                         write_seqlock_bh(&hh->hh_lock);
1032                         update(hh, neigh->dev, neigh->ha);
1033                         write_sequnlock_bh(&hh->hh_lock);
1034                 }
1035         }
1036 }
1037
1038
1039
1040 /* Generic update routine.
1041    -- lladdr is new lladdr or NULL, if it is not supplied.
1042    -- new    is new state.
1043    -- flags
1044         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1045                                 if it is different.
1046         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1047                                 lladdr instead of overriding it
1048                                 if it is different.
1049                                 It also allows to retain current state
1050                                 if lladdr is unchanged.
1051         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1052
1053         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1054                                 NTF_ROUTER flag.
1055         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1056                                 a router.
1057
1058    Caller MUST hold reference count on the entry.
1059  */
1060
1061 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1062                  u32 flags)
1063 {
1064         u8 old;
1065         int err;
1066         int notify = 0;
1067         struct net_device *dev;
1068         int update_isrouter = 0;
1069
1070         write_lock_bh(&neigh->lock);
1071
1072         dev    = neigh->dev;
1073         old    = neigh->nud_state;
1074         err    = -EPERM;
1075
1076         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1077             (old & (NUD_NOARP | NUD_PERMANENT)))
1078                 goto out;
1079
1080         if (!(new & NUD_VALID)) {
1081                 neigh_del_timer(neigh);
1082                 if (old & NUD_CONNECTED)
1083                         neigh_suspect(neigh);
1084                 neigh->nud_state = new;
1085                 err = 0;
1086                 notify = old & NUD_VALID;
1087                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1088                     (new & NUD_FAILED)) {
1089                         neigh_invalidate(neigh);
1090                         notify = 1;
1091                 }
1092                 goto out;
1093         }
1094
1095         /* Compare new lladdr with cached one */
1096         if (!dev->addr_len) {
1097                 /* First case: device needs no address. */
1098                 lladdr = neigh->ha;
1099         } else if (lladdr) {
1100                 /* The second case: if something is already cached
1101                    and a new address is proposed:
1102                    - compare new & old
1103                    - if they are different, check override flag
1104                  */
1105                 if ((old & NUD_VALID) &&
1106                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1107                         lladdr = neigh->ha;
1108         } else {
1109                 /* No address is supplied; if we know something,
1110                    use it, otherwise discard the request.
1111                  */
1112                 err = -EINVAL;
1113                 if (!(old & NUD_VALID))
1114                         goto out;
1115                 lladdr = neigh->ha;
1116         }
1117
1118         if (new & NUD_CONNECTED)
1119                 neigh->confirmed = jiffies;
1120         neigh->updated = jiffies;
1121
1122         /* If entry was valid and address is not changed,
1123            do not change entry state, if new one is STALE.
1124          */
1125         err = 0;
1126         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1127         if (old & NUD_VALID) {
1128                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1129                         update_isrouter = 0;
1130                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1131                             (old & NUD_CONNECTED)) {
1132                                 lladdr = neigh->ha;
1133                                 new = NUD_STALE;
1134                         } else
1135                                 goto out;
1136                 } else {
1137                         if (lladdr == neigh->ha && new == NUD_STALE &&
1138                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1139                              (old & NUD_CONNECTED))
1140                             )
1141                                 new = old;
1142                 }
1143         }
1144
1145         if (new != old) {
1146                 neigh_del_timer(neigh);
1147                 if (new & NUD_IN_TIMER)
1148                         neigh_add_timer(neigh, (jiffies +
1149                                                 ((new & NUD_REACHABLE) ?
1150                                                  neigh->parms->reachable_time :
1151                                                  0)));
1152                 neigh->nud_state = new;
1153         }
1154
1155         if (lladdr != neigh->ha) {
1156                 write_seqlock(&neigh->ha_lock);
1157                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1158                 write_sequnlock(&neigh->ha_lock);
1159                 neigh_update_hhs(neigh);
1160                 if (!(new & NUD_CONNECTED))
1161                         neigh->confirmed = jiffies -
1162                                       (neigh->parms->base_reachable_time << 1);
1163                 notify = 1;
1164         }
1165         if (new == old)
1166                 goto out;
1167         if (new & NUD_CONNECTED)
1168                 neigh_connect(neigh);
1169         else
1170                 neigh_suspect(neigh);
1171         if (!(old & NUD_VALID)) {
1172                 struct sk_buff *skb;
1173
1174                 /* Again: avoid dead loop if something went wrong */
1175
1176                 while (neigh->nud_state & NUD_VALID &&
1177                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1178                         struct dst_entry *dst = skb_dst(skb);
1179                         struct neighbour *n2, *n1 = neigh;
1180                         write_unlock_bh(&neigh->lock);
1181
1182                         rcu_read_lock();
1183                         /* On shaper/eql skb->dst->neighbour != neigh :( */
1184                         if (dst && (n2 = dst_get_neighbour(dst)) != NULL)
1185                                 n1 = n2;
1186                         n1->output(skb);
1187                         rcu_read_unlock();
1188
1189                         write_lock_bh(&neigh->lock);
1190                 }
1191                 skb_queue_purge(&neigh->arp_queue);
1192         }
1193 out:
1194         if (update_isrouter) {
1195                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1196                         (neigh->flags | NTF_ROUTER) :
1197                         (neigh->flags & ~NTF_ROUTER);
1198         }
1199         write_unlock_bh(&neigh->lock);
1200
1201         if (notify)
1202                 neigh_update_notify(neigh);
1203
1204         return err;
1205 }
1206 EXPORT_SYMBOL(neigh_update);
1207
1208 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1209                                  u8 *lladdr, void *saddr,
1210                                  struct net_device *dev)
1211 {
1212         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1213                                                  lladdr || !dev->addr_len);
1214         if (neigh)
1215                 neigh_update(neigh, lladdr, NUD_STALE,
1216                              NEIGH_UPDATE_F_OVERRIDE);
1217         return neigh;
1218 }
1219 EXPORT_SYMBOL(neigh_event_ns);
1220
1221 static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
1222                                    __be16 protocol)
1223 {
1224         struct hh_cache *hh;
1225
1226         smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
1227         for (hh = n->hh; hh; hh = hh->hh_next) {
1228                 if (hh->hh_type == protocol) {
1229                         atomic_inc(&hh->hh_refcnt);
1230                         if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
1231                                 hh_cache_put(hh);
1232                         return true;
1233                 }
1234         }
1235         return false;
1236 }
1237
1238 /* called with read_lock_bh(&n->lock); */
1239 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
1240                           __be16 protocol)
1241 {
1242         struct hh_cache *hh;
1243         struct net_device *dev = dst->dev;
1244
1245         if (likely(neigh_hh_lookup(n, dst, protocol)))
1246                 return;
1247
1248         /* slow path */
1249         hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
1250         if (!hh)
1251                 return;
1252
1253         seqlock_init(&hh->hh_lock);
1254         hh->hh_type = protocol;
1255         atomic_set(&hh->hh_refcnt, 2);
1256
1257         if (dev->header_ops->cache(n, hh)) {
1258                 kfree(hh);
1259                 return;
1260         }
1261
1262         write_lock_bh(&n->lock);
1263
1264         /* must check if another thread already did the insert */
1265         if (neigh_hh_lookup(n, dst, protocol)) {
1266                 kfree(hh);
1267                 goto end;
1268         }
1269
1270         if (n->nud_state & NUD_CONNECTED)
1271                 hh->hh_output = n->ops->hh_output;
1272         else
1273                 hh->hh_output = n->ops->output;
1274
1275         hh->hh_next = n->hh;
1276         smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
1277         n->hh       = hh;
1278
1279         if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
1280                 hh_cache_put(hh);
1281 end:
1282         write_unlock_bh(&n->lock);
1283 }
1284
1285 /* This function can be used in contexts, where only old dev_queue_xmit
1286  * worked, f.e. if you want to override normal output path (eql, shaper),
1287  * but resolution is not made yet.
1288  */
1289
1290 int neigh_compat_output(struct sk_buff *skb)
1291 {
1292         struct net_device *dev = skb->dev;
1293
1294         __skb_pull(skb, skb_network_offset(skb));
1295
1296         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1297                             skb->len) < 0 &&
1298             dev->header_ops->rebuild(skb))
1299                 return 0;
1300
1301         return dev_queue_xmit(skb);
1302 }
1303 EXPORT_SYMBOL(neigh_compat_output);
1304
1305 /* Slow and careful. */
1306
1307 int neigh_resolve_output(struct sk_buff *skb)
1308 {
1309         struct dst_entry *dst = skb_dst(skb);
1310         struct neighbour *neigh = dst_get_neighbour(dst);
1311         int rc = 0;
1312
1313         if (!dst)
1314                 goto discard;
1315
1316         __skb_pull(skb, skb_network_offset(skb));
1317
1318         if (!neigh_event_send(neigh, skb)) {
1319                 int err;
1320                 struct net_device *dev = neigh->dev;
1321                 unsigned int seq;
1322
1323                 if (dev->header_ops->cache &&
1324                     !dst->hh &&
1325                     !(dst->flags & DST_NOCACHE))
1326                         neigh_hh_init(neigh, dst, dst->ops->protocol);
1327
1328                 do {
1329                         seq = read_seqbegin(&neigh->ha_lock);
1330                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1331                                               neigh->ha, NULL, skb->len);
1332                 } while (read_seqretry(&neigh->ha_lock, seq));
1333
1334                 if (err >= 0)
1335                         rc = neigh->ops->queue_xmit(skb);
1336                 else
1337                         goto out_kfree_skb;
1338         }
1339 out:
1340         return rc;
1341 discard:
1342         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1343                       dst, neigh);
1344 out_kfree_skb:
1345         rc = -EINVAL;
1346         kfree_skb(skb);
1347         goto out;
1348 }
1349 EXPORT_SYMBOL(neigh_resolve_output);
1350
1351 /* As fast as possible without hh cache */
1352
1353 int neigh_connected_output(struct sk_buff *skb)
1354 {
1355         int err;
1356         struct dst_entry *dst = skb_dst(skb);
1357         struct neighbour *neigh = dst_get_neighbour(dst);
1358         struct net_device *dev = neigh->dev;
1359         unsigned int seq;
1360
1361         __skb_pull(skb, skb_network_offset(skb));
1362
1363         do {
1364                 seq = read_seqbegin(&neigh->ha_lock);
1365                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1366                                       neigh->ha, NULL, skb->len);
1367         } while (read_seqretry(&neigh->ha_lock, seq));
1368
1369         if (err >= 0)
1370                 err = neigh->ops->queue_xmit(skb);
1371         else {
1372                 err = -EINVAL;
1373                 kfree_skb(skb);
1374         }
1375         return err;
1376 }
1377 EXPORT_SYMBOL(neigh_connected_output);
1378
1379 static void neigh_proxy_process(unsigned long arg)
1380 {
1381         struct neigh_table *tbl = (struct neigh_table *)arg;
1382         long sched_next = 0;
1383         unsigned long now = jiffies;
1384         struct sk_buff *skb, *n;
1385
1386         spin_lock(&tbl->proxy_queue.lock);
1387
1388         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1389                 long tdif = NEIGH_CB(skb)->sched_next - now;
1390
1391                 if (tdif <= 0) {
1392                         struct net_device *dev = skb->dev;
1393
1394                         __skb_unlink(skb, &tbl->proxy_queue);
1395                         if (tbl->proxy_redo && netif_running(dev)) {
1396                                 rcu_read_lock();
1397                                 tbl->proxy_redo(skb);
1398                                 rcu_read_unlock();
1399                         } else {
1400                                 kfree_skb(skb);
1401                         }
1402
1403                         dev_put(dev);
1404                 } else if (!sched_next || tdif < sched_next)
1405                         sched_next = tdif;
1406         }
1407         del_timer(&tbl->proxy_timer);
1408         if (sched_next)
1409                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1410         spin_unlock(&tbl->proxy_queue.lock);
1411 }
1412
1413 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1414                     struct sk_buff *skb)
1415 {
1416         unsigned long now = jiffies;
1417         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1418
1419         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1420                 kfree_skb(skb);
1421                 return;
1422         }
1423
1424         NEIGH_CB(skb)->sched_next = sched_next;
1425         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1426
1427         spin_lock(&tbl->proxy_queue.lock);
1428         if (del_timer(&tbl->proxy_timer)) {
1429                 if (time_before(tbl->proxy_timer.expires, sched_next))
1430                         sched_next = tbl->proxy_timer.expires;
1431         }
1432         skb_dst_drop(skb);
1433         dev_hold(skb->dev);
1434         __skb_queue_tail(&tbl->proxy_queue, skb);
1435         mod_timer(&tbl->proxy_timer, sched_next);
1436         spin_unlock(&tbl->proxy_queue.lock);
1437 }
1438 EXPORT_SYMBOL(pneigh_enqueue);
1439
1440 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1441                                                       struct net *net, int ifindex)
1442 {
1443         struct neigh_parms *p;
1444
1445         for (p = &tbl->parms; p; p = p->next) {
1446                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1447                     (!p->dev && !ifindex))
1448                         return p;
1449         }
1450
1451         return NULL;
1452 }
1453
1454 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1455                                       struct neigh_table *tbl)
1456 {
1457         struct neigh_parms *p, *ref;
1458         struct net *net = dev_net(dev);
1459         const struct net_device_ops *ops = dev->netdev_ops;
1460
1461         ref = lookup_neigh_parms(tbl, net, 0);
1462         if (!ref)
1463                 return NULL;
1464
1465         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1466         if (p) {
1467                 p->tbl            = tbl;
1468                 atomic_set(&p->refcnt, 1);
1469                 p->reachable_time =
1470                                 neigh_rand_reach_time(p->base_reachable_time);
1471
1472                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1473                         kfree(p);
1474                         return NULL;
1475                 }
1476
1477                 dev_hold(dev);
1478                 p->dev = dev;
1479                 write_pnet(&p->net, hold_net(net));
1480                 p->sysctl_table = NULL;
1481                 write_lock_bh(&tbl->lock);
1482                 p->next         = tbl->parms.next;
1483                 tbl->parms.next = p;
1484                 write_unlock_bh(&tbl->lock);
1485         }
1486         return p;
1487 }
1488 EXPORT_SYMBOL(neigh_parms_alloc);
1489
1490 static void neigh_rcu_free_parms(struct rcu_head *head)
1491 {
1492         struct neigh_parms *parms =
1493                 container_of(head, struct neigh_parms, rcu_head);
1494
1495         neigh_parms_put(parms);
1496 }
1497
1498 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1499 {
1500         struct neigh_parms **p;
1501
1502         if (!parms || parms == &tbl->parms)
1503                 return;
1504         write_lock_bh(&tbl->lock);
1505         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1506                 if (*p == parms) {
1507                         *p = parms->next;
1508                         parms->dead = 1;
1509                         write_unlock_bh(&tbl->lock);
1510                         if (parms->dev)
1511                                 dev_put(parms->dev);
1512                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1513                         return;
1514                 }
1515         }
1516         write_unlock_bh(&tbl->lock);
1517         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1518 }
1519 EXPORT_SYMBOL(neigh_parms_release);
1520
1521 static void neigh_parms_destroy(struct neigh_parms *parms)
1522 {
1523         release_net(neigh_parms_net(parms));
1524         kfree(parms);
1525 }
1526
1527 static struct lock_class_key neigh_table_proxy_queue_class;
1528
1529 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1530 {
1531         unsigned long now = jiffies;
1532         unsigned long phsize;
1533
1534         write_pnet(&tbl->parms.net, &init_net);
1535         atomic_set(&tbl->parms.refcnt, 1);
1536         tbl->parms.reachable_time =
1537                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1538
1539         if (!tbl->kmem_cachep)
1540                 tbl->kmem_cachep =
1541                         kmem_cache_create(tbl->id, tbl->entry_size, 0,
1542                                           SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1543                                           NULL);
1544         tbl->stats = alloc_percpu(struct neigh_statistics);
1545         if (!tbl->stats)
1546                 panic("cannot create neighbour cache statistics");
1547
1548 #ifdef CONFIG_PROC_FS
1549         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1550                               &neigh_stat_seq_fops, tbl))
1551                 panic("cannot create neighbour proc dir entry");
1552 #endif
1553
1554         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8));
1555
1556         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1557         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1558
1559         if (!tbl->nht || !tbl->phash_buckets)
1560                 panic("cannot allocate neighbour cache hashes");
1561
1562         rwlock_init(&tbl->lock);
1563         INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1564         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1565         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1566         skb_queue_head_init_class(&tbl->proxy_queue,
1567                         &neigh_table_proxy_queue_class);
1568
1569         tbl->last_flush = now;
1570         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1571 }
1572 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1573
1574 void neigh_table_init(struct neigh_table *tbl)
1575 {
1576         struct neigh_table *tmp;
1577
1578         neigh_table_init_no_netlink(tbl);
1579         write_lock(&neigh_tbl_lock);
1580         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1581                 if (tmp->family == tbl->family)
1582                         break;
1583         }
1584         tbl->next       = neigh_tables;
1585         neigh_tables    = tbl;
1586         write_unlock(&neigh_tbl_lock);
1587
1588         if (unlikely(tmp)) {
1589                 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1590                        "family %d\n", tbl->family);
1591                 dump_stack();
1592         }
1593 }
1594 EXPORT_SYMBOL(neigh_table_init);
1595
1596 int neigh_table_clear(struct neigh_table *tbl)
1597 {
1598         struct neigh_table **tp;
1599
1600         /* It is not clean... Fix it to unload IPv6 module safely */
1601         cancel_delayed_work_sync(&tbl->gc_work);
1602         del_timer_sync(&tbl->proxy_timer);
1603         pneigh_queue_purge(&tbl->proxy_queue);
1604         neigh_ifdown(tbl, NULL);
1605         if (atomic_read(&tbl->entries))
1606                 printk(KERN_CRIT "neighbour leakage\n");
1607         write_lock(&neigh_tbl_lock);
1608         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1609                 if (*tp == tbl) {
1610                         *tp = tbl->next;
1611                         break;
1612                 }
1613         }
1614         write_unlock(&neigh_tbl_lock);
1615
1616         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1617                  neigh_hash_free_rcu);
1618         tbl->nht = NULL;
1619
1620         kfree(tbl->phash_buckets);
1621         tbl->phash_buckets = NULL;
1622
1623         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1624
1625         free_percpu(tbl->stats);
1626         tbl->stats = NULL;
1627
1628         kmem_cache_destroy(tbl->kmem_cachep);
1629         tbl->kmem_cachep = NULL;
1630
1631         return 0;
1632 }
1633 EXPORT_SYMBOL(neigh_table_clear);
1634
1635 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1636 {
1637         struct net *net = sock_net(skb->sk);
1638         struct ndmsg *ndm;
1639         struct nlattr *dst_attr;
1640         struct neigh_table *tbl;
1641         struct net_device *dev = NULL;
1642         int err = -EINVAL;
1643
1644         ASSERT_RTNL();
1645         if (nlmsg_len(nlh) < sizeof(*ndm))
1646                 goto out;
1647
1648         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1649         if (dst_attr == NULL)
1650                 goto out;
1651
1652         ndm = nlmsg_data(nlh);
1653         if (ndm->ndm_ifindex) {
1654                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1655                 if (dev == NULL) {
1656                         err = -ENODEV;
1657                         goto out;
1658                 }
1659         }
1660
1661         read_lock(&neigh_tbl_lock);
1662         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1663                 struct neighbour *neigh;
1664
1665                 if (tbl->family != ndm->ndm_family)
1666                         continue;
1667                 read_unlock(&neigh_tbl_lock);
1668
1669                 if (nla_len(dst_attr) < tbl->key_len)
1670                         goto out;
1671
1672                 if (ndm->ndm_flags & NTF_PROXY) {
1673                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1674                         goto out;
1675                 }
1676
1677                 if (dev == NULL)
1678                         goto out;
1679
1680                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1681                 if (neigh == NULL) {
1682                         err = -ENOENT;
1683                         goto out;
1684                 }
1685
1686                 err = neigh_update(neigh, NULL, NUD_FAILED,
1687                                    NEIGH_UPDATE_F_OVERRIDE |
1688                                    NEIGH_UPDATE_F_ADMIN);
1689                 neigh_release(neigh);
1690                 goto out;
1691         }
1692         read_unlock(&neigh_tbl_lock);
1693         err = -EAFNOSUPPORT;
1694
1695 out:
1696         return err;
1697 }
1698
1699 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1700 {
1701         struct net *net = sock_net(skb->sk);
1702         struct ndmsg *ndm;
1703         struct nlattr *tb[NDA_MAX+1];
1704         struct neigh_table *tbl;
1705         struct net_device *dev = NULL;
1706         int err;
1707
1708         ASSERT_RTNL();
1709         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1710         if (err < 0)
1711                 goto out;
1712
1713         err = -EINVAL;
1714         if (tb[NDA_DST] == NULL)
1715                 goto out;
1716
1717         ndm = nlmsg_data(nlh);
1718         if (ndm->ndm_ifindex) {
1719                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1720                 if (dev == NULL) {
1721                         err = -ENODEV;
1722                         goto out;
1723                 }
1724
1725                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1726                         goto out;
1727         }
1728
1729         read_lock(&neigh_tbl_lock);
1730         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1731                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1732                 struct neighbour *neigh;
1733                 void *dst, *lladdr;
1734
1735                 if (tbl->family != ndm->ndm_family)
1736                         continue;
1737                 read_unlock(&neigh_tbl_lock);
1738
1739                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1740                         goto out;
1741                 dst = nla_data(tb[NDA_DST]);
1742                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1743
1744                 if (ndm->ndm_flags & NTF_PROXY) {
1745                         struct pneigh_entry *pn;
1746
1747                         err = -ENOBUFS;
1748                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1749                         if (pn) {
1750                                 pn->flags = ndm->ndm_flags;
1751                                 err = 0;
1752                         }
1753                         goto out;
1754                 }
1755
1756                 if (dev == NULL)
1757                         goto out;
1758
1759                 neigh = neigh_lookup(tbl, dst, dev);
1760                 if (neigh == NULL) {
1761                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1762                                 err = -ENOENT;
1763                                 goto out;
1764                         }
1765
1766                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1767                         if (IS_ERR(neigh)) {
1768                                 err = PTR_ERR(neigh);
1769                                 goto out;
1770                         }
1771                 } else {
1772                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1773                                 err = -EEXIST;
1774                                 neigh_release(neigh);
1775                                 goto out;
1776                         }
1777
1778                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1779                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1780                 }
1781
1782                 if (ndm->ndm_flags & NTF_USE) {
1783                         neigh_event_send(neigh, NULL);
1784                         err = 0;
1785                 } else
1786                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1787                 neigh_release(neigh);
1788                 goto out;
1789         }
1790
1791         read_unlock(&neigh_tbl_lock);
1792         err = -EAFNOSUPPORT;
1793 out:
1794         return err;
1795 }
1796
1797 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1798 {
1799         struct nlattr *nest;
1800
1801         nest = nla_nest_start(skb, NDTA_PARMS);
1802         if (nest == NULL)
1803                 return -ENOBUFS;
1804
1805         if (parms->dev)
1806                 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1807
1808         NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1809         NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1810         NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1811         NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1812         NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1813         NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1814         NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1815         NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1816                       parms->base_reachable_time);
1817         NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1818         NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1819         NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1820         NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1821         NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1822         NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1823
1824         return nla_nest_end(skb, nest);
1825
1826 nla_put_failure:
1827         nla_nest_cancel(skb, nest);
1828         return -EMSGSIZE;
1829 }
1830
1831 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1832                               u32 pid, u32 seq, int type, int flags)
1833 {
1834         struct nlmsghdr *nlh;
1835         struct ndtmsg *ndtmsg;
1836
1837         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1838         if (nlh == NULL)
1839                 return -EMSGSIZE;
1840
1841         ndtmsg = nlmsg_data(nlh);
1842
1843         read_lock_bh(&tbl->lock);
1844         ndtmsg->ndtm_family = tbl->family;
1845         ndtmsg->ndtm_pad1   = 0;
1846         ndtmsg->ndtm_pad2   = 0;
1847
1848         NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1849         NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1850         NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1851         NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1852         NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1853
1854         {
1855                 unsigned long now = jiffies;
1856                 unsigned int flush_delta = now - tbl->last_flush;
1857                 unsigned int rand_delta = now - tbl->last_rand;
1858                 struct neigh_hash_table *nht;
1859                 struct ndt_config ndc = {
1860                         .ndtc_key_len           = tbl->key_len,
1861                         .ndtc_entry_size        = tbl->entry_size,
1862                         .ndtc_entries           = atomic_read(&tbl->entries),
1863                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1864                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1865                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1866                 };
1867
1868                 rcu_read_lock_bh();
1869                 nht = rcu_dereference_bh(tbl->nht);
1870                 ndc.ndtc_hash_rnd = nht->hash_rnd;
1871                 ndc.ndtc_hash_mask = nht->hash_mask;
1872                 rcu_read_unlock_bh();
1873
1874                 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1875         }
1876
1877         {
1878                 int cpu;
1879                 struct ndt_stats ndst;
1880
1881                 memset(&ndst, 0, sizeof(ndst));
1882
1883                 for_each_possible_cpu(cpu) {
1884                         struct neigh_statistics *st;
1885
1886                         st = per_cpu_ptr(tbl->stats, cpu);
1887                         ndst.ndts_allocs                += st->allocs;
1888                         ndst.ndts_destroys              += st->destroys;
1889                         ndst.ndts_hash_grows            += st->hash_grows;
1890                         ndst.ndts_res_failed            += st->res_failed;
1891                         ndst.ndts_lookups               += st->lookups;
1892                         ndst.ndts_hits                  += st->hits;
1893                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1894                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1895                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1896                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1897                 }
1898
1899                 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1900         }
1901
1902         BUG_ON(tbl->parms.dev);
1903         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1904                 goto nla_put_failure;
1905
1906         read_unlock_bh(&tbl->lock);
1907         return nlmsg_end(skb, nlh);
1908
1909 nla_put_failure:
1910         read_unlock_bh(&tbl->lock);
1911         nlmsg_cancel(skb, nlh);
1912         return -EMSGSIZE;
1913 }
1914
1915 static int neightbl_fill_param_info(struct sk_buff *skb,
1916                                     struct neigh_table *tbl,
1917                                     struct neigh_parms *parms,
1918                                     u32 pid, u32 seq, int type,
1919                                     unsigned int flags)
1920 {
1921         struct ndtmsg *ndtmsg;
1922         struct nlmsghdr *nlh;
1923
1924         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1925         if (nlh == NULL)
1926                 return -EMSGSIZE;
1927
1928         ndtmsg = nlmsg_data(nlh);
1929
1930         read_lock_bh(&tbl->lock);
1931         ndtmsg->ndtm_family = tbl->family;
1932         ndtmsg->ndtm_pad1   = 0;
1933         ndtmsg->ndtm_pad2   = 0;
1934
1935         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1936             neightbl_fill_parms(skb, parms) < 0)
1937                 goto errout;
1938
1939         read_unlock_bh(&tbl->lock);
1940         return nlmsg_end(skb, nlh);
1941 errout:
1942         read_unlock_bh(&tbl->lock);
1943         nlmsg_cancel(skb, nlh);
1944         return -EMSGSIZE;
1945 }
1946
1947 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1948         [NDTA_NAME]             = { .type = NLA_STRING },
1949         [NDTA_THRESH1]          = { .type = NLA_U32 },
1950         [NDTA_THRESH2]          = { .type = NLA_U32 },
1951         [NDTA_THRESH3]          = { .type = NLA_U32 },
1952         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1953         [NDTA_PARMS]            = { .type = NLA_NESTED },
1954 };
1955
1956 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1957         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1958         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1959         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1960         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1961         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1962         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1963         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1964         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1965         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1966         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1967         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1968         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1969         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1970 };
1971
1972 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1973 {
1974         struct net *net = sock_net(skb->sk);
1975         struct neigh_table *tbl;
1976         struct ndtmsg *ndtmsg;
1977         struct nlattr *tb[NDTA_MAX+1];
1978         int err;
1979
1980         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1981                           nl_neightbl_policy);
1982         if (err < 0)
1983                 goto errout;
1984
1985         if (tb[NDTA_NAME] == NULL) {
1986                 err = -EINVAL;
1987                 goto errout;
1988         }
1989
1990         ndtmsg = nlmsg_data(nlh);
1991         read_lock(&neigh_tbl_lock);
1992         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1993                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1994                         continue;
1995
1996                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1997                         break;
1998         }
1999
2000         if (tbl == NULL) {
2001                 err = -ENOENT;
2002                 goto errout_locked;
2003         }
2004
2005         /*
2006          * We acquire tbl->lock to be nice to the periodic timers and
2007          * make sure they always see a consistent set of values.
2008          */
2009         write_lock_bh(&tbl->lock);
2010
2011         if (tb[NDTA_PARMS]) {
2012                 struct nlattr *tbp[NDTPA_MAX+1];
2013                 struct neigh_parms *p;
2014                 int i, ifindex = 0;
2015
2016                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2017                                        nl_ntbl_parm_policy);
2018                 if (err < 0)
2019                         goto errout_tbl_lock;
2020
2021                 if (tbp[NDTPA_IFINDEX])
2022                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2023
2024                 p = lookup_neigh_parms(tbl, net, ifindex);
2025                 if (p == NULL) {
2026                         err = -ENOENT;
2027                         goto errout_tbl_lock;
2028                 }
2029
2030                 for (i = 1; i <= NDTPA_MAX; i++) {
2031                         if (tbp[i] == NULL)
2032                                 continue;
2033
2034                         switch (i) {
2035                         case NDTPA_QUEUE_LEN:
2036                                 p->queue_len = nla_get_u32(tbp[i]);
2037                                 break;
2038                         case NDTPA_PROXY_QLEN:
2039                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2040                                 break;
2041                         case NDTPA_APP_PROBES:
2042                                 p->app_probes = nla_get_u32(tbp[i]);
2043                                 break;
2044                         case NDTPA_UCAST_PROBES:
2045                                 p->ucast_probes = nla_get_u32(tbp[i]);
2046                                 break;
2047                         case NDTPA_MCAST_PROBES:
2048                                 p->mcast_probes = nla_get_u32(tbp[i]);
2049                                 break;
2050                         case NDTPA_BASE_REACHABLE_TIME:
2051                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2052                                 break;
2053                         case NDTPA_GC_STALETIME:
2054                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2055                                 break;
2056                         case NDTPA_DELAY_PROBE_TIME:
2057                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2058                                 break;
2059                         case NDTPA_RETRANS_TIME:
2060                                 p->retrans_time = nla_get_msecs(tbp[i]);
2061                                 break;
2062                         case NDTPA_ANYCAST_DELAY:
2063                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2064                                 break;
2065                         case NDTPA_PROXY_DELAY:
2066                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2067                                 break;
2068                         case NDTPA_LOCKTIME:
2069                                 p->locktime = nla_get_msecs(tbp[i]);
2070                                 break;
2071                         }
2072                 }
2073         }
2074
2075         if (tb[NDTA_THRESH1])
2076                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2077
2078         if (tb[NDTA_THRESH2])
2079                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2080
2081         if (tb[NDTA_THRESH3])
2082                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2083
2084         if (tb[NDTA_GC_INTERVAL])
2085                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2086
2087         err = 0;
2088
2089 errout_tbl_lock:
2090         write_unlock_bh(&tbl->lock);
2091 errout_locked:
2092         read_unlock(&neigh_tbl_lock);
2093 errout:
2094         return err;
2095 }
2096
2097 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2098 {
2099         struct net *net = sock_net(skb->sk);
2100         int family, tidx, nidx = 0;
2101         int tbl_skip = cb->args[0];
2102         int neigh_skip = cb->args[1];
2103         struct neigh_table *tbl;
2104
2105         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2106
2107         read_lock(&neigh_tbl_lock);
2108         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2109                 struct neigh_parms *p;
2110
2111                 if (tidx < tbl_skip || (family && tbl->family != family))
2112                         continue;
2113
2114                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2115                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2116                                        NLM_F_MULTI) <= 0)
2117                         break;
2118
2119                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2120                         if (!net_eq(neigh_parms_net(p), net))
2121                                 continue;
2122
2123                         if (nidx < neigh_skip)
2124                                 goto next;
2125
2126                         if (neightbl_fill_param_info(skb, tbl, p,
2127                                                      NETLINK_CB(cb->skb).pid,
2128                                                      cb->nlh->nlmsg_seq,
2129                                                      RTM_NEWNEIGHTBL,
2130                                                      NLM_F_MULTI) <= 0)
2131                                 goto out;
2132                 next:
2133                         nidx++;
2134                 }
2135
2136                 neigh_skip = 0;
2137         }
2138 out:
2139         read_unlock(&neigh_tbl_lock);
2140         cb->args[0] = tidx;
2141         cb->args[1] = nidx;
2142
2143         return skb->len;
2144 }
2145
2146 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2147                            u32 pid, u32 seq, int type, unsigned int flags)
2148 {
2149         unsigned long now = jiffies;
2150         struct nda_cacheinfo ci;
2151         struct nlmsghdr *nlh;
2152         struct ndmsg *ndm;
2153
2154         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2155         if (nlh == NULL)
2156                 return -EMSGSIZE;
2157
2158         ndm = nlmsg_data(nlh);
2159         ndm->ndm_family  = neigh->ops->family;
2160         ndm->ndm_pad1    = 0;
2161         ndm->ndm_pad2    = 0;
2162         ndm->ndm_flags   = neigh->flags;
2163         ndm->ndm_type    = neigh->type;
2164         ndm->ndm_ifindex = neigh->dev->ifindex;
2165
2166         NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2167
2168         read_lock_bh(&neigh->lock);
2169         ndm->ndm_state   = neigh->nud_state;
2170         if (neigh->nud_state & NUD_VALID) {
2171                 char haddr[MAX_ADDR_LEN];
2172
2173                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2174                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2175                         read_unlock_bh(&neigh->lock);
2176                         goto nla_put_failure;
2177                 }
2178         }
2179
2180         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2181         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2182         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2183         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2184         read_unlock_bh(&neigh->lock);
2185
2186         NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2187         NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2188
2189         return nlmsg_end(skb, nlh);
2190
2191 nla_put_failure:
2192         nlmsg_cancel(skb, nlh);
2193         return -EMSGSIZE;
2194 }
2195
2196 static void neigh_update_notify(struct neighbour *neigh)
2197 {
2198         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2199         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2200 }
2201
2202 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2203                             struct netlink_callback *cb)
2204 {
2205         struct net *net = sock_net(skb->sk);
2206         struct neighbour *n;
2207         int rc, h, s_h = cb->args[1];
2208         int idx, s_idx = idx = cb->args[2];
2209         struct neigh_hash_table *nht;
2210
2211         rcu_read_lock_bh();
2212         nht = rcu_dereference_bh(tbl->nht);
2213
2214         for (h = 0; h <= nht->hash_mask; h++) {
2215                 if (h < s_h)
2216                         continue;
2217                 if (h > s_h)
2218                         s_idx = 0;
2219                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2220                      n != NULL;
2221                      n = rcu_dereference_bh(n->next)) {
2222                         if (!net_eq(dev_net(n->dev), net))
2223                                 continue;
2224                         if (idx < s_idx)
2225                                 goto next;
2226                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2227                                             cb->nlh->nlmsg_seq,
2228                                             RTM_NEWNEIGH,
2229                                             NLM_F_MULTI) <= 0) {
2230                                 rc = -1;
2231                                 goto out;
2232                         }
2233 next:
2234                         idx++;
2235                 }
2236         }
2237         rc = skb->len;
2238 out:
2239         rcu_read_unlock_bh();
2240         cb->args[1] = h;
2241         cb->args[2] = idx;
2242         return rc;
2243 }
2244
2245 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2246 {
2247         struct neigh_table *tbl;
2248         int t, family, s_t;
2249
2250         read_lock(&neigh_tbl_lock);
2251         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2252         s_t = cb->args[0];
2253
2254         for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2255                 if (t < s_t || (family && tbl->family != family))
2256                         continue;
2257                 if (t > s_t)
2258                         memset(&cb->args[1], 0, sizeof(cb->args) -
2259                                                 sizeof(cb->args[0]));
2260                 if (neigh_dump_table(tbl, skb, cb) < 0)
2261                         break;
2262         }
2263         read_unlock(&neigh_tbl_lock);
2264
2265         cb->args[0] = t;
2266         return skb->len;
2267 }
2268
2269 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2270 {
2271         int chain;
2272         struct neigh_hash_table *nht;
2273
2274         rcu_read_lock_bh();
2275         nht = rcu_dereference_bh(tbl->nht);
2276
2277         read_lock(&tbl->lock); /* avoid resizes */
2278         for (chain = 0; chain <= nht->hash_mask; chain++) {
2279                 struct neighbour *n;
2280
2281                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2282                      n != NULL;
2283                      n = rcu_dereference_bh(n->next))
2284                         cb(n, cookie);
2285         }
2286         read_unlock(&tbl->lock);
2287         rcu_read_unlock_bh();
2288 }
2289 EXPORT_SYMBOL(neigh_for_each);
2290
2291 /* The tbl->lock must be held as a writer and BH disabled. */
2292 void __neigh_for_each_release(struct neigh_table *tbl,
2293                               int (*cb)(struct neighbour *))
2294 {
2295         int chain;
2296         struct neigh_hash_table *nht;
2297
2298         nht = rcu_dereference_protected(tbl->nht,
2299                                         lockdep_is_held(&tbl->lock));
2300         for (chain = 0; chain <= nht->hash_mask; chain++) {
2301                 struct neighbour *n;
2302                 struct neighbour __rcu **np;
2303
2304                 np = &nht->hash_buckets[chain];
2305                 while ((n = rcu_dereference_protected(*np,
2306                                         lockdep_is_held(&tbl->lock))) != NULL) {
2307                         int release;
2308
2309                         write_lock(&n->lock);
2310                         release = cb(n);
2311                         if (release) {
2312                                 rcu_assign_pointer(*np,
2313                                         rcu_dereference_protected(n->next,
2314                                                 lockdep_is_held(&tbl->lock)));
2315                                 n->dead = 1;
2316                         } else
2317                                 np = &n->next;
2318                         write_unlock(&n->lock);
2319                         if (release)
2320                                 neigh_cleanup_and_release(n);
2321                 }
2322         }
2323 }
2324 EXPORT_SYMBOL(__neigh_for_each_release);
2325
2326 #ifdef CONFIG_PROC_FS
2327
2328 static struct neighbour *neigh_get_first(struct seq_file *seq)
2329 {
2330         struct neigh_seq_state *state = seq->private;
2331         struct net *net = seq_file_net(seq);
2332         struct neigh_hash_table *nht = state->nht;
2333         struct neighbour *n = NULL;
2334         int bucket = state->bucket;
2335
2336         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2337         for (bucket = 0; bucket <= nht->hash_mask; bucket++) {
2338                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2339
2340                 while (n) {
2341                         if (!net_eq(dev_net(n->dev), net))
2342                                 goto next;
2343                         if (state->neigh_sub_iter) {
2344                                 loff_t fakep = 0;
2345                                 void *v;
2346
2347                                 v = state->neigh_sub_iter(state, n, &fakep);
2348                                 if (!v)
2349                                         goto next;
2350                         }
2351                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2352                                 break;
2353                         if (n->nud_state & ~NUD_NOARP)
2354                                 break;
2355 next:
2356                         n = rcu_dereference_bh(n->next);
2357                 }
2358
2359                 if (n)
2360                         break;
2361         }
2362         state->bucket = bucket;
2363
2364         return n;
2365 }
2366
2367 static struct neighbour *neigh_get_next(struct seq_file *seq,
2368                                         struct neighbour *n,
2369                                         loff_t *pos)
2370 {
2371         struct neigh_seq_state *state = seq->private;
2372         struct net *net = seq_file_net(seq);
2373         struct neigh_hash_table *nht = state->nht;
2374
2375         if (state->neigh_sub_iter) {
2376                 void *v = state->neigh_sub_iter(state, n, pos);
2377                 if (v)
2378                         return n;
2379         }
2380         n = rcu_dereference_bh(n->next);
2381
2382         while (1) {
2383                 while (n) {
2384                         if (!net_eq(dev_net(n->dev), net))
2385                                 goto next;
2386                         if (state->neigh_sub_iter) {
2387                                 void *v = state->neigh_sub_iter(state, n, pos);
2388                                 if (v)
2389                                         return n;
2390                                 goto next;
2391                         }
2392                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2393                                 break;
2394
2395                         if (n->nud_state & ~NUD_NOARP)
2396                                 break;
2397 next:
2398                         n = rcu_dereference_bh(n->next);
2399                 }
2400
2401                 if (n)
2402                         break;
2403
2404                 if (++state->bucket > nht->hash_mask)
2405                         break;
2406
2407                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2408         }
2409
2410         if (n && pos)
2411                 --(*pos);
2412         return n;
2413 }
2414
2415 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2416 {
2417         struct neighbour *n = neigh_get_first(seq);
2418
2419         if (n) {
2420                 --(*pos);
2421                 while (*pos) {
2422                         n = neigh_get_next(seq, n, pos);
2423                         if (!n)
2424                                 break;
2425                 }
2426         }
2427         return *pos ? NULL : n;
2428 }
2429
2430 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2431 {
2432         struct neigh_seq_state *state = seq->private;
2433         struct net *net = seq_file_net(seq);
2434         struct neigh_table *tbl = state->tbl;
2435         struct pneigh_entry *pn = NULL;
2436         int bucket = state->bucket;
2437
2438         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2439         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2440                 pn = tbl->phash_buckets[bucket];
2441                 while (pn && !net_eq(pneigh_net(pn), net))
2442                         pn = pn->next;
2443                 if (pn)
2444                         break;
2445         }
2446         state->bucket = bucket;
2447
2448         return pn;
2449 }
2450
2451 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2452                                             struct pneigh_entry *pn,
2453                                             loff_t *pos)
2454 {
2455         struct neigh_seq_state *state = seq->private;
2456         struct net *net = seq_file_net(seq);
2457         struct neigh_table *tbl = state->tbl;
2458
2459         pn = pn->next;
2460         while (!pn) {
2461                 if (++state->bucket > PNEIGH_HASHMASK)
2462                         break;
2463                 pn = tbl->phash_buckets[state->bucket];
2464                 while (pn && !net_eq(pneigh_net(pn), net))
2465                         pn = pn->next;
2466                 if (pn)
2467                         break;
2468         }
2469
2470         if (pn && pos)
2471                 --(*pos);
2472
2473         return pn;
2474 }
2475
2476 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2477 {
2478         struct pneigh_entry *pn = pneigh_get_first(seq);
2479
2480         if (pn) {
2481                 --(*pos);
2482                 while (*pos) {
2483                         pn = pneigh_get_next(seq, pn, pos);
2484                         if (!pn)
2485                                 break;
2486                 }
2487         }
2488         return *pos ? NULL : pn;
2489 }
2490
2491 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2492 {
2493         struct neigh_seq_state *state = seq->private;
2494         void *rc;
2495         loff_t idxpos = *pos;
2496
2497         rc = neigh_get_idx(seq, &idxpos);
2498         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2499                 rc = pneigh_get_idx(seq, &idxpos);
2500
2501         return rc;
2502 }
2503
2504 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2505         __acquires(rcu_bh)
2506 {
2507         struct neigh_seq_state *state = seq->private;
2508
2509         state->tbl = tbl;
2510         state->bucket = 0;
2511         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2512
2513         rcu_read_lock_bh();
2514         state->nht = rcu_dereference_bh(tbl->nht);
2515
2516         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2517 }
2518 EXPORT_SYMBOL(neigh_seq_start);
2519
2520 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2521 {
2522         struct neigh_seq_state *state;
2523         void *rc;
2524
2525         if (v == SEQ_START_TOKEN) {
2526                 rc = neigh_get_first(seq);
2527                 goto out;
2528         }
2529
2530         state = seq->private;
2531         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2532                 rc = neigh_get_next(seq, v, NULL);
2533                 if (rc)
2534                         goto out;
2535                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2536                         rc = pneigh_get_first(seq);
2537         } else {
2538                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2539                 rc = pneigh_get_next(seq, v, NULL);
2540         }
2541 out:
2542         ++(*pos);
2543         return rc;
2544 }
2545 EXPORT_SYMBOL(neigh_seq_next);
2546
2547 void neigh_seq_stop(struct seq_file *seq, void *v)
2548         __releases(rcu_bh)
2549 {
2550         rcu_read_unlock_bh();
2551 }
2552 EXPORT_SYMBOL(neigh_seq_stop);
2553
2554 /* statistics via seq_file */
2555
2556 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2557 {
2558         struct neigh_table *tbl = seq->private;
2559         int cpu;
2560
2561         if (*pos == 0)
2562                 return SEQ_START_TOKEN;
2563
2564         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2565                 if (!cpu_possible(cpu))
2566                         continue;
2567                 *pos = cpu+1;
2568                 return per_cpu_ptr(tbl->stats, cpu);
2569         }
2570         return NULL;
2571 }
2572
2573 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2574 {
2575         struct neigh_table *tbl = seq->private;
2576         int cpu;
2577
2578         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2579                 if (!cpu_possible(cpu))
2580                         continue;
2581                 *pos = cpu+1;
2582                 return per_cpu_ptr(tbl->stats, cpu);
2583         }
2584         return NULL;
2585 }
2586
2587 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2588 {
2589
2590 }
2591
2592 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2593 {
2594         struct neigh_table *tbl = seq->private;
2595         struct neigh_statistics *st = v;
2596
2597         if (v == SEQ_START_TOKEN) {
2598                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2599                 return 0;
2600         }
2601
2602         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2603                         "%08lx %08lx  %08lx %08lx %08lx\n",
2604                    atomic_read(&tbl->entries),
2605
2606                    st->allocs,
2607                    st->destroys,
2608                    st->hash_grows,
2609
2610                    st->lookups,
2611                    st->hits,
2612
2613                    st->res_failed,
2614
2615                    st->rcv_probes_mcast,
2616                    st->rcv_probes_ucast,
2617
2618                    st->periodic_gc_runs,
2619                    st->forced_gc_runs,
2620                    st->unres_discards
2621                    );
2622
2623         return 0;
2624 }
2625
2626 static const struct seq_operations neigh_stat_seq_ops = {
2627         .start  = neigh_stat_seq_start,
2628         .next   = neigh_stat_seq_next,
2629         .stop   = neigh_stat_seq_stop,
2630         .show   = neigh_stat_seq_show,
2631 };
2632
2633 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2634 {
2635         int ret = seq_open(file, &neigh_stat_seq_ops);
2636
2637         if (!ret) {
2638                 struct seq_file *sf = file->private_data;
2639                 sf->private = PDE(inode)->data;
2640         }
2641         return ret;
2642 };
2643
2644 static const struct file_operations neigh_stat_seq_fops = {
2645         .owner   = THIS_MODULE,
2646         .open    = neigh_stat_seq_open,
2647         .read    = seq_read,
2648         .llseek  = seq_lseek,
2649         .release = seq_release,
2650 };
2651
2652 #endif /* CONFIG_PROC_FS */
2653
2654 static inline size_t neigh_nlmsg_size(void)
2655 {
2656         return NLMSG_ALIGN(sizeof(struct ndmsg))
2657                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2658                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2659                + nla_total_size(sizeof(struct nda_cacheinfo))
2660                + nla_total_size(4); /* NDA_PROBES */
2661 }
2662
2663 static void __neigh_notify(struct neighbour *n, int type, int flags)
2664 {
2665         struct net *net = dev_net(n->dev);
2666         struct sk_buff *skb;
2667         int err = -ENOBUFS;
2668
2669         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2670         if (skb == NULL)
2671                 goto errout;
2672
2673         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2674         if (err < 0) {
2675                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2676                 WARN_ON(err == -EMSGSIZE);
2677                 kfree_skb(skb);
2678                 goto errout;
2679         }
2680         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2681         return;
2682 errout:
2683         if (err < 0)
2684                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2685 }
2686
2687 #ifdef CONFIG_ARPD
2688 void neigh_app_ns(struct neighbour *n)
2689 {
2690         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2691 }
2692 EXPORT_SYMBOL(neigh_app_ns);
2693 #endif /* CONFIG_ARPD */
2694
2695 #ifdef CONFIG_SYSCTL
2696
2697 #define NEIGH_VARS_MAX 19
2698
2699 static struct neigh_sysctl_table {
2700         struct ctl_table_header *sysctl_header;
2701         struct ctl_table neigh_vars[NEIGH_VARS_MAX];
2702         char *dev_name;
2703 } neigh_sysctl_template __read_mostly = {
2704         .neigh_vars = {
2705                 {
2706                         .procname       = "mcast_solicit",
2707                         .maxlen         = sizeof(int),
2708                         .mode           = 0644,
2709                         .proc_handler   = proc_dointvec,
2710                 },
2711                 {
2712                         .procname       = "ucast_solicit",
2713                         .maxlen         = sizeof(int),
2714                         .mode           = 0644,
2715                         .proc_handler   = proc_dointvec,
2716                 },
2717                 {
2718                         .procname       = "app_solicit",
2719                         .maxlen         = sizeof(int),
2720                         .mode           = 0644,
2721                         .proc_handler   = proc_dointvec,
2722                 },
2723                 {
2724                         .procname       = "retrans_time",
2725                         .maxlen         = sizeof(int),
2726                         .mode           = 0644,
2727                         .proc_handler   = proc_dointvec_userhz_jiffies,
2728                 },
2729                 {
2730                         .procname       = "base_reachable_time",
2731                         .maxlen         = sizeof(int),
2732                         .mode           = 0644,
2733                         .proc_handler   = proc_dointvec_jiffies,
2734                 },
2735                 {
2736                         .procname       = "delay_first_probe_time",
2737                         .maxlen         = sizeof(int),
2738                         .mode           = 0644,
2739                         .proc_handler   = proc_dointvec_jiffies,
2740                 },
2741                 {
2742                         .procname       = "gc_stale_time",
2743                         .maxlen         = sizeof(int),
2744                         .mode           = 0644,
2745                         .proc_handler   = proc_dointvec_jiffies,
2746                 },
2747                 {
2748                         .procname       = "unres_qlen",
2749                         .maxlen         = sizeof(int),
2750                         .mode           = 0644,
2751                         .proc_handler   = proc_dointvec,
2752                 },
2753                 {
2754                         .procname       = "proxy_qlen",
2755                         .maxlen         = sizeof(int),
2756                         .mode           = 0644,
2757                         .proc_handler   = proc_dointvec,
2758                 },
2759                 {
2760                         .procname       = "anycast_delay",
2761                         .maxlen         = sizeof(int),
2762                         .mode           = 0644,
2763                         .proc_handler   = proc_dointvec_userhz_jiffies,
2764                 },
2765                 {
2766                         .procname       = "proxy_delay",
2767                         .maxlen         = sizeof(int),
2768                         .mode           = 0644,
2769                         .proc_handler   = proc_dointvec_userhz_jiffies,
2770                 },
2771                 {
2772                         .procname       = "locktime",
2773                         .maxlen         = sizeof(int),
2774                         .mode           = 0644,
2775                         .proc_handler   = proc_dointvec_userhz_jiffies,
2776                 },
2777                 {
2778                         .procname       = "retrans_time_ms",
2779                         .maxlen         = sizeof(int),
2780                         .mode           = 0644,
2781                         .proc_handler   = proc_dointvec_ms_jiffies,
2782                 },
2783                 {
2784                         .procname       = "base_reachable_time_ms",
2785                         .maxlen         = sizeof(int),
2786                         .mode           = 0644,
2787                         .proc_handler   = proc_dointvec_ms_jiffies,
2788                 },
2789                 {
2790                         .procname       = "gc_interval",
2791                         .maxlen         = sizeof(int),
2792                         .mode           = 0644,
2793                         .proc_handler   = proc_dointvec_jiffies,
2794                 },
2795                 {
2796                         .procname       = "gc_thresh1",
2797                         .maxlen         = sizeof(int),
2798                         .mode           = 0644,
2799                         .proc_handler   = proc_dointvec,
2800                 },
2801                 {
2802                         .procname       = "gc_thresh2",
2803                         .maxlen         = sizeof(int),
2804                         .mode           = 0644,
2805                         .proc_handler   = proc_dointvec,
2806                 },
2807                 {
2808                         .procname       = "gc_thresh3",
2809                         .maxlen         = sizeof(int),
2810                         .mode           = 0644,
2811                         .proc_handler   = proc_dointvec,
2812                 },
2813                 {},
2814         },
2815 };
2816
2817 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2818                           char *p_name, proc_handler *handler)
2819 {
2820         struct neigh_sysctl_table *t;
2821         const char *dev_name_source = NULL;
2822
2823 #define NEIGH_CTL_PATH_ROOT     0
2824 #define NEIGH_CTL_PATH_PROTO    1
2825 #define NEIGH_CTL_PATH_NEIGH    2
2826 #define NEIGH_CTL_PATH_DEV      3
2827
2828         struct ctl_path neigh_path[] = {
2829                 { .procname = "net",     },
2830                 { .procname = "proto",   },
2831                 { .procname = "neigh",   },
2832                 { .procname = "default", },
2833                 { },
2834         };
2835
2836         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2837         if (!t)
2838                 goto err;
2839
2840         t->neigh_vars[0].data  = &p->mcast_probes;
2841         t->neigh_vars[1].data  = &p->ucast_probes;
2842         t->neigh_vars[2].data  = &p->app_probes;
2843         t->neigh_vars[3].data  = &p->retrans_time;
2844         t->neigh_vars[4].data  = &p->base_reachable_time;
2845         t->neigh_vars[5].data  = &p->delay_probe_time;
2846         t->neigh_vars[6].data  = &p->gc_staletime;
2847         t->neigh_vars[7].data  = &p->queue_len;
2848         t->neigh_vars[8].data  = &p->proxy_qlen;
2849         t->neigh_vars[9].data  = &p->anycast_delay;
2850         t->neigh_vars[10].data = &p->proxy_delay;
2851         t->neigh_vars[11].data = &p->locktime;
2852         t->neigh_vars[12].data  = &p->retrans_time;
2853         t->neigh_vars[13].data  = &p->base_reachable_time;
2854
2855         if (dev) {
2856                 dev_name_source = dev->name;
2857                 /* Terminate the table early */
2858                 memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
2859         } else {
2860                 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2861                 t->neigh_vars[14].data = (int *)(p + 1);
2862                 t->neigh_vars[15].data = (int *)(p + 1) + 1;
2863                 t->neigh_vars[16].data = (int *)(p + 1) + 2;
2864                 t->neigh_vars[17].data = (int *)(p + 1) + 3;
2865         }
2866
2867
2868         if (handler) {
2869                 /* RetransTime */
2870                 t->neigh_vars[3].proc_handler = handler;
2871                 t->neigh_vars[3].extra1 = dev;
2872                 /* ReachableTime */
2873                 t->neigh_vars[4].proc_handler = handler;
2874                 t->neigh_vars[4].extra1 = dev;
2875                 /* RetransTime (in milliseconds)*/
2876                 t->neigh_vars[12].proc_handler = handler;
2877                 t->neigh_vars[12].extra1 = dev;
2878                 /* ReachableTime (in milliseconds) */
2879                 t->neigh_vars[13].proc_handler = handler;
2880                 t->neigh_vars[13].extra1 = dev;
2881         }
2882
2883         t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2884         if (!t->dev_name)
2885                 goto free;
2886
2887         neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2888         neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2889
2890         t->sysctl_header =
2891                 register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2892         if (!t->sysctl_header)
2893                 goto free_procname;
2894
2895         p->sysctl_table = t;
2896         return 0;
2897
2898 free_procname:
2899         kfree(t->dev_name);
2900 free:
2901         kfree(t);
2902 err:
2903         return -ENOBUFS;
2904 }
2905 EXPORT_SYMBOL(neigh_sysctl_register);
2906
2907 void neigh_sysctl_unregister(struct neigh_parms *p)
2908 {
2909         if (p->sysctl_table) {
2910                 struct neigh_sysctl_table *t = p->sysctl_table;
2911                 p->sysctl_table = NULL;
2912                 unregister_sysctl_table(t->sysctl_header);
2913                 kfree(t->dev_name);
2914                 kfree(t);
2915         }
2916 }
2917 EXPORT_SYMBOL(neigh_sysctl_unregister);
2918
2919 #endif  /* CONFIG_SYSCTL */
2920
2921 static int __init neigh_init(void)
2922 {
2923         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
2924         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
2925         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
2926
2927         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
2928         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
2929
2930         return 0;
2931 }
2932
2933 subsys_initcall(neigh_init);
2934