OSDN Git Service

input: touchscreen: Fix uninitialized variable usage in Atmel driver
[sagit-ice-cold/kernel_xiaomi_msm8998.git] / net / netfilter / xt_qtaguid.c
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /*
12  * There are run-time debug flags enabled via the debug_mask module param, or
13  * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
14  */
15 #define DEBUG
16
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/miscdevice.h>
21 #include <linux/netfilter/x_tables.h>
22 #include <linux/netfilter/xt_qtaguid.h>
23 #include <linux/ratelimit.h>
24 #include <linux/seq_file.h>
25 #include <linux/skbuff.h>
26 #include <linux/workqueue.h>
27 #include <net/addrconf.h>
28 #include <net/sock.h>
29 #include <net/tcp.h>
30 #include <net/udp.h>
31
32 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
33 #include <linux/netfilter_ipv6/ip6_tables.h>
34 #endif
35
36 #include <linux/netfilter/xt_socket.h>
37 #include "xt_qtaguid_internal.h"
38 #include "xt_qtaguid_print.h"
39 #include "../../fs/proc/internal.h"
40
41 /*
42  * We only use the xt_socket funcs within a similar context to avoid unexpected
43  * return values.
44  */
45 #define XT_SOCKET_SUPPORTED_HOOKS \
46         ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
47
48
49 static const char *module_procdirname = "xt_qtaguid";
50 static struct proc_dir_entry *xt_qtaguid_procdir;
51
52 static unsigned int proc_iface_perms = S_IRUGO;
53 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
54
55 static struct proc_dir_entry *xt_qtaguid_stats_file;
56 static unsigned int proc_stats_perms = S_IRUGO;
57 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
58
59 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
60
61 /* Everybody can write. But proc_ctrl_write_limited is true by default which
62  * limits what can be controlled. See the can_*() functions.
63  */
64 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
65 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
66
67 /* Limited by default, so the gid of the ctrl and stats proc entries
68  * will limit what can be done. See the can_*() functions.
69  */
70 static bool proc_stats_readall_limited = true;
71 static bool proc_ctrl_write_limited = true;
72
73 module_param_named(stats_readall_limited, proc_stats_readall_limited, bool,
74                    S_IRUGO | S_IWUSR);
75 module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool,
76                    S_IRUGO | S_IWUSR);
77
78 /*
79  * Limit the number of active tags (via socket tags) for a given UID.
80  * Multiple processes could share the UID.
81  */
82 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
83 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
84
85 /*
86  * After the kernel has initiallized this module, it is still possible
87  * to make it passive.
88  * Setting passive to Y:
89  *  - the iface stats handling will not act on notifications.
90  *  - iptables matches will never match.
91  *  - ctrl commands silently succeed.
92  *  - stats are always empty.
93  * This is mostly usefull when a bug is suspected.
94  */
95 static bool module_passive;
96 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
97
98 /*
99  * Control how qtaguid data is tracked per proc/uid.
100  * Setting tag_tracking_passive to Y:
101  *  - don't create proc specific structs to track tags
102  *  - don't check that active tag stats exceed some limits.
103  *  - don't clean up socket tags on process exits.
104  * This is mostly usefull when a bug is suspected.
105  */
106 static bool qtu_proc_handling_passive;
107 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
108                    S_IRUGO | S_IWUSR);
109
110 #define QTU_DEV_NAME "xt_qtaguid"
111
112 uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
113 module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
114
115 /*---------------------------------------------------------------------------*/
116 static const char *iface_stat_procdirname = "iface_stat";
117 static struct proc_dir_entry *iface_stat_procdir;
118 /*
119  * The iface_stat_all* will go away once userspace gets use to the new fields
120  * that have a format line.
121  */
122 static const char *iface_stat_all_procfilename = "iface_stat_all";
123 static struct proc_dir_entry *iface_stat_all_procfile;
124 static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
125 static struct proc_dir_entry *iface_stat_fmt_procfile;
126
127
128 static LIST_HEAD(iface_stat_list);
129 static DEFINE_SPINLOCK(iface_stat_list_lock);
130
131 static struct rb_root sock_tag_tree = RB_ROOT;
132 static DEFINE_SPINLOCK(sock_tag_list_lock);
133
134 static struct rb_root tag_counter_set_tree = RB_ROOT;
135 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
136
137 static struct rb_root uid_tag_data_tree = RB_ROOT;
138 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
139
140 static struct rb_root proc_qtu_data_tree = RB_ROOT;
141 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
142
143 static struct qtaguid_event_counts qtu_events;
144 /*----------------------------------------------*/
145 static bool can_manipulate_uids(void)
146 {
147         /* root pwnd */
148         return in_egroup_p(xt_qtaguid_ctrl_file->gid)
149                 || unlikely(!from_kuid(&init_user_ns, current_fsuid())) || unlikely(!proc_ctrl_write_limited)
150                 || unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid));
151 }
152
153 static bool can_impersonate_uid(kuid_t uid)
154 {
155         return uid_eq(uid, current_fsuid()) || can_manipulate_uids();
156 }
157
158 static bool can_read_other_uid_stats(kuid_t uid)
159 {
160         /* root pwnd */
161         return in_egroup_p(xt_qtaguid_stats_file->gid)
162                 || unlikely(!from_kuid(&init_user_ns, current_fsuid())) || uid_eq(uid, current_fsuid())
163                 || unlikely(!proc_stats_readall_limited)
164                 || unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid));
165 }
166
167 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
168                                   enum ifs_tx_rx direction,
169                                   enum ifs_proto ifs_proto,
170                                   int bytes,
171                                   int packets)
172 {
173         counters->bpc[set][direction][ifs_proto].bytes += bytes;
174         counters->bpc[set][direction][ifs_proto].packets += packets;
175 }
176
177 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
178 {
179         struct rb_node *node = root->rb_node;
180
181         while (node) {
182                 struct tag_node *data = rb_entry(node, struct tag_node, node);
183                 int result;
184                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
185                          " node=%p data=%p\n", tag, node, data);
186                 result = tag_compare(tag, data->tag);
187                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
188                          " data.tag=0x%llx (uid=%u) res=%d\n",
189                          tag, data->tag, get_uid_from_tag(data->tag), result);
190                 if (result < 0)
191                         node = node->rb_left;
192                 else if (result > 0)
193                         node = node->rb_right;
194                 else
195                         return data;
196         }
197         return NULL;
198 }
199
200 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
201 {
202         struct rb_node **new = &(root->rb_node), *parent = NULL;
203
204         /* Figure out where to put new node */
205         while (*new) {
206                 struct tag_node *this = rb_entry(*new, struct tag_node,
207                                                  node);
208                 int result = tag_compare(data->tag, this->tag);
209                 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
210                          " (uid=%u)\n", __func__,
211                          this->tag,
212                          get_uid_from_tag(this->tag));
213                 parent = *new;
214                 if (result < 0)
215                         new = &((*new)->rb_left);
216                 else if (result > 0)
217                         new = &((*new)->rb_right);
218                 else
219                         BUG();
220         }
221
222         /* Add new node and rebalance tree. */
223         rb_link_node(&data->node, parent, new);
224         rb_insert_color(&data->node, root);
225 }
226
227 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
228 {
229         tag_node_tree_insert(&data->tn, root);
230 }
231
232 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
233 {
234         struct tag_node *node = tag_node_tree_search(root, tag);
235         if (!node)
236                 return NULL;
237         return rb_entry(&node->node, struct tag_stat, tn.node);
238 }
239
240 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
241                                         struct rb_root *root)
242 {
243         tag_node_tree_insert(&data->tn, root);
244 }
245
246 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
247                                                            tag_t tag)
248 {
249         struct tag_node *node = tag_node_tree_search(root, tag);
250         if (!node)
251                 return NULL;
252         return rb_entry(&node->node, struct tag_counter_set, tn.node);
253
254 }
255
256 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
257 {
258         tag_node_tree_insert(&data->tn, root);
259 }
260
261 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
262 {
263         struct tag_node *node = tag_node_tree_search(root, tag);
264         if (!node)
265                 return NULL;
266         return rb_entry(&node->node, struct tag_ref, tn.node);
267 }
268
269 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
270                                              const struct sock *sk)
271 {
272         struct rb_node *node = root->rb_node;
273
274         while (node) {
275                 struct sock_tag *data = rb_entry(node, struct sock_tag,
276                                                  sock_node);
277                 if (sk < data->sk)
278                         node = node->rb_left;
279                 else if (sk > data->sk)
280                         node = node->rb_right;
281                 else
282                         return data;
283         }
284         return NULL;
285 }
286
287 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
288 {
289         struct rb_node **new = &(root->rb_node), *parent = NULL;
290
291         /* Figure out where to put new node */
292         while (*new) {
293                 struct sock_tag *this = rb_entry(*new, struct sock_tag,
294                                                  sock_node);
295                 parent = *new;
296                 if (data->sk < this->sk)
297                         new = &((*new)->rb_left);
298                 else if (data->sk > this->sk)
299                         new = &((*new)->rb_right);
300                 else
301                         BUG();
302         }
303
304         /* Add new node and rebalance tree. */
305         rb_link_node(&data->sock_node, parent, new);
306         rb_insert_color(&data->sock_node, root);
307 }
308
309 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
310 {
311         struct rb_node *node;
312         struct sock_tag *st_entry;
313
314         node = rb_first(st_to_free_tree);
315         while (node) {
316                 st_entry = rb_entry(node, struct sock_tag, sock_node);
317                 node = rb_next(node);
318                 CT_DEBUG("qtaguid: %s(): "
319                          "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
320                          st_entry->sk,
321                          st_entry->tag,
322                          get_uid_from_tag(st_entry->tag));
323                 rb_erase(&st_entry->sock_node, st_to_free_tree);
324                 sock_put(st_entry->sk);
325                 kfree(st_entry);
326         }
327 }
328
329 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
330                                                        const pid_t pid)
331 {
332         struct rb_node *node = root->rb_node;
333
334         while (node) {
335                 struct proc_qtu_data *data = rb_entry(node,
336                                                       struct proc_qtu_data,
337                                                       node);
338                 if (pid < data->pid)
339                         node = node->rb_left;
340                 else if (pid > data->pid)
341                         node = node->rb_right;
342                 else
343                         return data;
344         }
345         return NULL;
346 }
347
348 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
349                                       struct rb_root *root)
350 {
351         struct rb_node **new = &(root->rb_node), *parent = NULL;
352
353         /* Figure out where to put new node */
354         while (*new) {
355                 struct proc_qtu_data *this = rb_entry(*new,
356                                                       struct proc_qtu_data,
357                                                       node);
358                 parent = *new;
359                 if (data->pid < this->pid)
360                         new = &((*new)->rb_left);
361                 else if (data->pid > this->pid)
362                         new = &((*new)->rb_right);
363                 else
364                         BUG();
365         }
366
367         /* Add new node and rebalance tree. */
368         rb_link_node(&data->node, parent, new);
369         rb_insert_color(&data->node, root);
370 }
371
372 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
373                                      struct rb_root *root)
374 {
375         struct rb_node **new = &(root->rb_node), *parent = NULL;
376
377         /* Figure out where to put new node */
378         while (*new) {
379                 struct uid_tag_data *this = rb_entry(*new,
380                                                      struct uid_tag_data,
381                                                      node);
382                 parent = *new;
383                 if (data->uid < this->uid)
384                         new = &((*new)->rb_left);
385                 else if (data->uid > this->uid)
386                         new = &((*new)->rb_right);
387                 else
388                         BUG();
389         }
390
391         /* Add new node and rebalance tree. */
392         rb_link_node(&data->node, parent, new);
393         rb_insert_color(&data->node, root);
394 }
395
396 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
397                                                      uid_t uid)
398 {
399         struct rb_node *node = root->rb_node;
400
401         while (node) {
402                 struct uid_tag_data *data = rb_entry(node,
403                                                      struct uid_tag_data,
404                                                      node);
405                 if (uid < data->uid)
406                         node = node->rb_left;
407                 else if (uid > data->uid)
408                         node = node->rb_right;
409                 else
410                         return data;
411         }
412         return NULL;
413 }
414
415 /*
416  * Allocates a new uid_tag_data struct if needed.
417  * Returns a pointer to the found or allocated uid_tag_data.
418  * Returns a PTR_ERR on failures, and lock is not held.
419  * If found is not NULL:
420  *   sets *found to true if not allocated.
421  *   sets *found to false if allocated.
422  */
423 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
424 {
425         struct uid_tag_data *utd_entry;
426
427         /* Look for top level uid_tag_data for the UID */
428         utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
429         DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
430
431         if (found_res)
432                 *found_res = utd_entry;
433         if (utd_entry)
434                 return utd_entry;
435
436         utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
437         if (!utd_entry) {
438                 pr_err("qtaguid: get_uid_data(%u): "
439                        "tag data alloc failed\n", uid);
440                 return ERR_PTR(-ENOMEM);
441         }
442
443         utd_entry->uid = uid;
444         utd_entry->tag_ref_tree = RB_ROOT;
445         uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
446         DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
447         return utd_entry;
448 }
449
450 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
451 static struct tag_ref *new_tag_ref(tag_t new_tag,
452                                    struct uid_tag_data *utd_entry)
453 {
454         struct tag_ref *tr_entry;
455         int res;
456
457         if (utd_entry->num_active_tags + 1 > max_sock_tags) {
458                 pr_info("qtaguid: new_tag_ref(0x%llx): "
459                         "tag ref alloc quota exceeded. max=%d\n",
460                         new_tag, max_sock_tags);
461                 res = -EMFILE;
462                 goto err_res;
463
464         }
465
466         tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
467         if (!tr_entry) {
468                 pr_err("qtaguid: new_tag_ref(0x%llx): "
469                        "tag ref alloc failed\n",
470                        new_tag);
471                 res = -ENOMEM;
472                 goto err_res;
473         }
474         tr_entry->tn.tag = new_tag;
475         /* tr_entry->num_sock_tags  handled by caller */
476         utd_entry->num_active_tags++;
477         tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
478         DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
479                  " inserted new tag ref %p\n",
480                  new_tag, tr_entry);
481         return tr_entry;
482
483 err_res:
484         return ERR_PTR(res);
485 }
486
487 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
488                                       struct uid_tag_data **utd_res)
489 {
490         struct uid_tag_data *utd_entry;
491         struct tag_ref *tr_entry;
492         bool found_utd;
493         uid_t uid = get_uid_from_tag(full_tag);
494
495         DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
496                  full_tag, uid);
497
498         utd_entry = get_uid_data(uid, &found_utd);
499         if (IS_ERR_OR_NULL(utd_entry)) {
500                 if (utd_res)
501                         *utd_res = utd_entry;
502                 return NULL;
503         }
504
505         tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
506         if (utd_res)
507                 *utd_res = utd_entry;
508         DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
509                  full_tag, utd_entry, tr_entry);
510         return tr_entry;
511 }
512
513 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
514 static struct tag_ref *get_tag_ref(tag_t full_tag,
515                                    struct uid_tag_data **utd_res)
516 {
517         struct uid_tag_data *utd_entry;
518         struct tag_ref *tr_entry;
519
520         DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
521                  full_tag);
522         spin_lock_bh(&uid_tag_data_tree_lock);
523         tr_entry = lookup_tag_ref(full_tag, &utd_entry);
524         BUG_ON(IS_ERR_OR_NULL(utd_entry));
525         if (!tr_entry)
526                 tr_entry = new_tag_ref(full_tag, utd_entry);
527
528         spin_unlock_bh(&uid_tag_data_tree_lock);
529         if (utd_res)
530                 *utd_res = utd_entry;
531         DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
532                  full_tag, utd_entry, tr_entry);
533         return tr_entry;
534 }
535
536 /* Checks and maybe frees the UID Tag Data entry */
537 static void put_utd_entry(struct uid_tag_data *utd_entry)
538 {
539         /* Are we done with the UID tag data entry? */
540         if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
541                 !utd_entry->num_pqd) {
542                 DR_DEBUG("qtaguid: %s(): "
543                          "erase utd_entry=%p uid=%u "
544                          "by pid=%u tgid=%u uid=%u\n", __func__,
545                          utd_entry, utd_entry->uid,
546                          current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
547                 BUG_ON(utd_entry->num_active_tags);
548                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
549                 kfree(utd_entry);
550         } else {
551                 DR_DEBUG("qtaguid: %s(): "
552                          "utd_entry=%p still has %d tags %d proc_qtu_data\n",
553                          __func__, utd_entry, utd_entry->num_active_tags,
554                          utd_entry->num_pqd);
555                 BUG_ON(!(utd_entry->num_active_tags ||
556                          utd_entry->num_pqd));
557         }
558 }
559
560 /*
561  * If no sock_tags are using this tag_ref,
562  * decrements refcount of utd_entry, removes tr_entry
563  * from utd_entry->tag_ref_tree and frees.
564  */
565 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
566                                         struct uid_tag_data *utd_entry)
567 {
568         DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
569                  tr_entry, tr_entry->tn.tag,
570                  get_uid_from_tag(tr_entry->tn.tag));
571         if (!tr_entry->num_sock_tags) {
572                 BUG_ON(!utd_entry->num_active_tags);
573                 utd_entry->num_active_tags--;
574                 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
575                 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
576                 kfree(tr_entry);
577         }
578 }
579
580 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
581 {
582         struct rb_node *node;
583         struct tag_ref *tr_entry;
584         tag_t acct_tag;
585
586         DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
587                  full_tag, get_uid_from_tag(full_tag));
588         acct_tag = get_atag_from_tag(full_tag);
589         node = rb_first(&utd_entry->tag_ref_tree);
590         while (node) {
591                 tr_entry = rb_entry(node, struct tag_ref, tn.node);
592                 node = rb_next(node);
593                 if (!acct_tag || tr_entry->tn.tag == full_tag)
594                         free_tag_ref_from_utd_entry(tr_entry, utd_entry);
595         }
596 }
597
598 static ssize_t read_proc_u64(struct file *file, char __user *buf,
599                          size_t size, loff_t *ppos)
600 {
601         uint64_t *valuep = PDE_DATA(file_inode(file));
602         char tmp[24];
603         size_t tmp_size;
604
605         tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", *valuep);
606         return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
607 }
608
609 static ssize_t read_proc_bool(struct file *file, char __user *buf,
610                           size_t size, loff_t *ppos)
611 {
612         bool *valuep = PDE_DATA(file_inode(file));
613         char tmp[24];
614         size_t tmp_size;
615
616         tmp_size = scnprintf(tmp, sizeof(tmp), "%u\n", *valuep);
617         return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
618 }
619
620 static int get_active_counter_set(tag_t tag)
621 {
622         int active_set = 0;
623         struct tag_counter_set *tcs;
624
625         MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
626                  " (uid=%u)\n",
627                  tag, get_uid_from_tag(tag));
628         /* For now we only handle UID tags for active sets */
629         tag = get_utag_from_tag(tag);
630         spin_lock_bh(&tag_counter_set_list_lock);
631         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
632         if (tcs)
633                 active_set = tcs->active_set;
634         spin_unlock_bh(&tag_counter_set_list_lock);
635         return active_set;
636 }
637
638 /*
639  * Find the entry for tracking the specified interface.
640  * Caller must hold iface_stat_list_lock
641  */
642 static struct iface_stat *get_iface_entry(const char *ifname)
643 {
644         struct iface_stat *iface_entry;
645
646         /* Find the entry for tracking the specified tag within the interface */
647         if (ifname == NULL) {
648                 pr_info("qtaguid: iface_stat: get() NULL device name\n");
649                 return NULL;
650         }
651
652         /* Iterate over interfaces */
653         list_for_each_entry(iface_entry, &iface_stat_list, list) {
654                 if (!strcmp(ifname, iface_entry->ifname))
655                         goto done;
656         }
657         iface_entry = NULL;
658 done:
659         return iface_entry;
660 }
661
662 /* This is for fmt2 only */
663 static void pp_iface_stat_header(struct seq_file *m)
664 {
665         seq_puts(m,
666                  "ifname "
667                  "total_skb_rx_bytes total_skb_rx_packets "
668                  "total_skb_tx_bytes total_skb_tx_packets "
669                  "rx_tcp_bytes rx_tcp_packets "
670                  "rx_udp_bytes rx_udp_packets "
671                  "rx_other_bytes rx_other_packets "
672                  "tx_tcp_bytes tx_tcp_packets "
673                  "tx_udp_bytes tx_udp_packets "
674                  "tx_other_bytes tx_other_packets\n"
675         );
676 }
677
678 static void pp_iface_stat_line(struct seq_file *m,
679                                struct iface_stat *iface_entry)
680 {
681         struct data_counters *cnts;
682         int cnt_set = 0;   /* We only use one set for the device */
683         cnts = &iface_entry->totals_via_skb;
684         seq_printf(m, "%s %llu %llu %llu %llu %llu %llu %llu %llu "
685                    "%llu %llu %llu %llu %llu %llu %llu %llu\n",
686                    iface_entry->ifname,
687                    dc_sum_bytes(cnts, cnt_set, IFS_RX),
688                    dc_sum_packets(cnts, cnt_set, IFS_RX),
689                    dc_sum_bytes(cnts, cnt_set, IFS_TX),
690                    dc_sum_packets(cnts, cnt_set, IFS_TX),
691                    cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
692                    cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
693                    cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
694                    cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
695                    cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
696                    cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
697                    cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
698                    cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
699                    cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
700                    cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
701                    cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
702                    cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
703 }
704
705 struct proc_iface_stat_fmt_info {
706         int fmt;
707 };
708
709 static void *iface_stat_fmt_proc_start(struct seq_file *m, loff_t *pos)
710 {
711         struct proc_iface_stat_fmt_info *p = m->private;
712         loff_t n = *pos;
713
714         /*
715          * This lock will prevent iface_stat_update() from changing active,
716          * and in turn prevent an interface from unregistering itself.
717          */
718         spin_lock_bh(&iface_stat_list_lock);
719
720         if (unlikely(module_passive))
721                 return NULL;
722
723         if (!n && p->fmt == 2)
724                 pp_iface_stat_header(m);
725
726         return seq_list_start(&iface_stat_list, n);
727 }
728
729 static void *iface_stat_fmt_proc_next(struct seq_file *m, void *p, loff_t *pos)
730 {
731         return seq_list_next(p, &iface_stat_list, pos);
732 }
733
734 static void iface_stat_fmt_proc_stop(struct seq_file *m, void *p)
735 {
736         spin_unlock_bh(&iface_stat_list_lock);
737 }
738
739 static int iface_stat_fmt_proc_show(struct seq_file *m, void *v)
740 {
741         struct proc_iface_stat_fmt_info *p = m->private;
742         struct iface_stat *iface_entry;
743         struct rtnl_link_stats64 dev_stats, *stats;
744         struct rtnl_link_stats64 no_dev_stats = {0};
745
746
747         CT_DEBUG("qtaguid:proc iface_stat_fmt pid=%u tgid=%u uid=%u\n",
748                  current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
749
750         iface_entry = list_entry(v, struct iface_stat, list);
751
752         if (iface_entry->active) {
753                 stats = dev_get_stats(iface_entry->net_dev,
754                                       &dev_stats);
755         } else {
756                 stats = &no_dev_stats;
757         }
758         /*
759          * If the meaning of the data changes, then update the fmtX
760          * string.
761          */
762         if (p->fmt == 1) {
763                 seq_printf(m, "%s %d %llu %llu %llu %llu %llu %llu %llu %llu\n",
764                            iface_entry->ifname,
765                            iface_entry->active,
766                            iface_entry->totals_via_dev[IFS_RX].bytes,
767                            iface_entry->totals_via_dev[IFS_RX].packets,
768                            iface_entry->totals_via_dev[IFS_TX].bytes,
769                            iface_entry->totals_via_dev[IFS_TX].packets,
770                            stats->rx_bytes, stats->rx_packets,
771                            stats->tx_bytes, stats->tx_packets
772                            );
773         } else {
774                 pp_iface_stat_line(m, iface_entry);
775         }
776         return 0;
777 }
778
779 static const struct file_operations read_u64_fops = {
780         .read           = read_proc_u64,
781         .llseek         = default_llseek,
782 };
783
784 static const struct file_operations read_bool_fops = {
785         .read           = read_proc_bool,
786         .llseek         = default_llseek,
787 };
788
789 static void iface_create_proc_worker(struct work_struct *work)
790 {
791         struct proc_dir_entry *proc_entry;
792         struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
793                                                    iface_work);
794         struct iface_stat *new_iface  = isw->iface_entry;
795
796         /* iface_entries are not deleted, so safe to manipulate. */
797         proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
798         if (IS_ERR_OR_NULL(proc_entry)) {
799                 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
800                 kfree(isw);
801                 return;
802         }
803
804         new_iface->proc_ptr = proc_entry;
805
806         proc_create_data("tx_bytes", proc_iface_perms, proc_entry,
807                          &read_u64_fops,
808                          &new_iface->totals_via_dev[IFS_TX].bytes);
809         proc_create_data("rx_bytes", proc_iface_perms, proc_entry,
810                          &read_u64_fops,
811                          &new_iface->totals_via_dev[IFS_RX].bytes);
812         proc_create_data("tx_packets", proc_iface_perms, proc_entry,
813                          &read_u64_fops,
814                          &new_iface->totals_via_dev[IFS_TX].packets);
815         proc_create_data("rx_packets", proc_iface_perms, proc_entry,
816                          &read_u64_fops,
817                          &new_iface->totals_via_dev[IFS_RX].packets);
818         proc_create_data("active", proc_iface_perms, proc_entry,
819                          &read_bool_fops, &new_iface->active);
820
821         IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
822                  "entry=%p dev=%s\n", new_iface, new_iface->ifname);
823         kfree(isw);
824 }
825
826 /*
827  * Will set the entry's active state, and
828  * update the net_dev accordingly also.
829  */
830 static void _iface_stat_set_active(struct iface_stat *entry,
831                                    struct net_device *net_dev,
832                                    bool activate)
833 {
834         if (activate) {
835                 entry->net_dev = net_dev;
836                 entry->active = true;
837                 IF_DEBUG("qtaguid: %s(%s): "
838                          "enable tracking. rfcnt=%d\n", __func__,
839                          entry->ifname,
840                          __this_cpu_read(*net_dev->pcpu_refcnt));
841         } else {
842                 entry->active = false;
843                 entry->net_dev = NULL;
844                 IF_DEBUG("qtaguid: %s(%s): "
845                          "disable tracking. rfcnt=%d\n", __func__,
846                          entry->ifname,
847                          __this_cpu_read(*net_dev->pcpu_refcnt));
848
849         }
850 }
851
852 /* Caller must hold iface_stat_list_lock */
853 static struct iface_stat *iface_alloc(struct net_device *net_dev)
854 {
855         struct iface_stat *new_iface;
856         struct iface_stat_work *isw;
857
858         new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
859         if (new_iface == NULL) {
860                 pr_err("qtaguid: iface_stat: create(%s): "
861                        "iface_stat alloc failed\n", net_dev->name);
862                 return NULL;
863         }
864         new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
865         if (new_iface->ifname == NULL) {
866                 pr_err("qtaguid: iface_stat: create(%s): "
867                        "ifname alloc failed\n", net_dev->name);
868                 kfree(new_iface);
869                 return NULL;
870         }
871         spin_lock_init(&new_iface->tag_stat_list_lock);
872         new_iface->tag_stat_tree = RB_ROOT;
873         _iface_stat_set_active(new_iface, net_dev, true);
874
875         /*
876          * ipv6 notifier chains are atomic :(
877          * No create_proc_read_entry() for you!
878          */
879         isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
880         if (!isw) {
881                 pr_err("qtaguid: iface_stat: create(%s): "
882                        "work alloc failed\n", new_iface->ifname);
883                 _iface_stat_set_active(new_iface, net_dev, false);
884                 kfree(new_iface->ifname);
885                 kfree(new_iface);
886                 return NULL;
887         }
888         isw->iface_entry = new_iface;
889         INIT_WORK(&isw->iface_work, iface_create_proc_worker);
890         schedule_work(&isw->iface_work);
891         list_add(&new_iface->list, &iface_stat_list);
892         return new_iface;
893 }
894
895 static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
896                                                struct iface_stat *iface)
897 {
898         struct rtnl_link_stats64 dev_stats, *stats;
899         bool stats_rewound;
900
901         stats = dev_get_stats(net_dev, &dev_stats);
902         /* No empty packets */
903         stats_rewound =
904                 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
905                 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
906
907         IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
908                  "bytes rx/tx=%llu/%llu "
909                  "active=%d last_known=%d "
910                  "stats_rewound=%d\n", __func__,
911                  net_dev ? net_dev->name : "?",
912                  iface, net_dev,
913                  stats->rx_bytes, stats->tx_bytes,
914                  iface->active, iface->last_known_valid, stats_rewound);
915
916         if (iface->active && iface->last_known_valid && stats_rewound) {
917                 pr_warn_once("qtaguid: iface_stat: %s(%s): "
918                              "iface reset its stats unexpectedly\n", __func__,
919                              net_dev->name);
920
921                 iface->totals_via_dev[IFS_TX].bytes +=
922                         iface->last_known[IFS_TX].bytes;
923                 iface->totals_via_dev[IFS_TX].packets +=
924                         iface->last_known[IFS_TX].packets;
925                 iface->totals_via_dev[IFS_RX].bytes +=
926                         iface->last_known[IFS_RX].bytes;
927                 iface->totals_via_dev[IFS_RX].packets +=
928                         iface->last_known[IFS_RX].packets;
929                 iface->last_known_valid = false;
930                 IF_DEBUG("qtaguid: %s(%s): iface=%p "
931                          "used last known bytes rx/tx=%llu/%llu\n", __func__,
932                          iface->ifname, iface, iface->last_known[IFS_RX].bytes,
933                          iface->last_known[IFS_TX].bytes);
934         }
935 }
936
937 /*
938  * Create a new entry for tracking the specified interface.
939  * Do nothing if the entry already exists.
940  * Called when an interface is configured with a valid IP address.
941  */
942 static void iface_stat_create(struct net_device *net_dev,
943                               struct in_ifaddr *ifa)
944 {
945         struct in_device *in_dev = NULL;
946         const char *ifname;
947         struct iface_stat *entry;
948         __be32 ipaddr = 0;
949         struct iface_stat *new_iface;
950
951         IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
952                  net_dev ? net_dev->name : "?",
953                  ifa, net_dev);
954         if (!net_dev) {
955                 pr_err("qtaguid: iface_stat: create(): no net dev\n");
956                 return;
957         }
958
959         ifname = net_dev->name;
960         if (!ifa) {
961                 in_dev = in_dev_get(net_dev);
962                 if (!in_dev) {
963                         pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
964                                ifname);
965                         return;
966                 }
967                 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
968                          ifname, in_dev);
969                 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
970                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
971                                  "ifa=%p ifa_label=%s\n",
972                                  ifname, ifa,
973                                  ifa->ifa_label ? ifa->ifa_label : "(null)");
974                         if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
975                                 break;
976                 }
977         }
978
979         if (!ifa) {
980                 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
981                          ifname);
982                 goto done_put;
983         }
984         ipaddr = ifa->ifa_local;
985
986         spin_lock_bh(&iface_stat_list_lock);
987         entry = get_iface_entry(ifname);
988         if (entry != NULL) {
989                 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
990                          ifname, entry);
991                 iface_check_stats_reset_and_adjust(net_dev, entry);
992                 _iface_stat_set_active(entry, net_dev, true);
993                 IF_DEBUG("qtaguid: %s(%s): "
994                          "tracking now %d on ip=%pI4\n", __func__,
995                          entry->ifname, true, &ipaddr);
996                 goto done_unlock_put;
997         }
998
999         new_iface = iface_alloc(net_dev);
1000         IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1001                  "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1002 done_unlock_put:
1003         spin_unlock_bh(&iface_stat_list_lock);
1004 done_put:
1005         if (in_dev)
1006                 in_dev_put(in_dev);
1007 }
1008
1009 static void iface_stat_create_ipv6(struct net_device *net_dev,
1010                                    struct inet6_ifaddr *ifa)
1011 {
1012         struct in_device *in_dev;
1013         const char *ifname;
1014         struct iface_stat *entry;
1015         struct iface_stat *new_iface;
1016         int addr_type;
1017
1018         IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1019                  ifa, net_dev, net_dev ? net_dev->name : "");
1020         if (!net_dev) {
1021                 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1022                 return;
1023         }
1024         ifname = net_dev->name;
1025
1026         in_dev = in_dev_get(net_dev);
1027         if (!in_dev) {
1028                 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1029                        ifname);
1030                 return;
1031         }
1032
1033         IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1034                  ifname, in_dev);
1035
1036         if (!ifa) {
1037                 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1038                          ifname);
1039                 goto done_put;
1040         }
1041         addr_type = ipv6_addr_type(&ifa->addr);
1042
1043         spin_lock_bh(&iface_stat_list_lock);
1044         entry = get_iface_entry(ifname);
1045         if (entry != NULL) {
1046                 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1047                          ifname, entry);
1048                 iface_check_stats_reset_and_adjust(net_dev, entry);
1049                 _iface_stat_set_active(entry, net_dev, true);
1050                 IF_DEBUG("qtaguid: %s(%s): "
1051                          "tracking now %d on ip=%pI6c\n", __func__,
1052                          entry->ifname, true, &ifa->addr);
1053                 goto done_unlock_put;
1054         }
1055
1056         new_iface = iface_alloc(net_dev);
1057         IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1058                  "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1059
1060 done_unlock_put:
1061         spin_unlock_bh(&iface_stat_list_lock);
1062 done_put:
1063         in_dev_put(in_dev);
1064 }
1065
1066 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1067 {
1068         MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1069         return sock_tag_tree_search(&sock_tag_tree, sk);
1070 }
1071
1072 static struct sock_tag *get_sock_stat(const struct sock *sk)
1073 {
1074         struct sock_tag *sock_tag_entry;
1075         MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1076         if (!sk)
1077                 return NULL;
1078         spin_lock_bh(&sock_tag_list_lock);
1079         sock_tag_entry = get_sock_stat_nl(sk);
1080         spin_unlock_bh(&sock_tag_list_lock);
1081         return sock_tag_entry;
1082 }
1083
1084 static int ipx_proto(const struct sk_buff *skb,
1085                      struct xt_action_param *par)
1086 {
1087         int thoff = 0, tproto;
1088
1089         switch (par->family) {
1090         case NFPROTO_IPV6:
1091                 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
1092                 if (tproto < 0)
1093                         MT_DEBUG("%s(): transport header not found in ipv6"
1094                                  " skb=%p\n", __func__, skb);
1095                 break;
1096         case NFPROTO_IPV4:
1097                 tproto = ip_hdr(skb)->protocol;
1098                 break;
1099         default:
1100                 tproto = IPPROTO_RAW;
1101         }
1102         return tproto;
1103 }
1104
1105 static void
1106 data_counters_update(struct data_counters *dc, int set,
1107                      enum ifs_tx_rx direction, int proto, int bytes)
1108 {
1109         switch (proto) {
1110         case IPPROTO_TCP:
1111                 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1112                 break;
1113         case IPPROTO_UDP:
1114                 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1115                 break;
1116         case IPPROTO_IP:
1117         default:
1118                 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1119                                     1);
1120                 break;
1121         }
1122 }
1123
1124 /*
1125  * Update stats for the specified interface. Do nothing if the entry
1126  * does not exist (when a device was never configured with an IP address).
1127  * Called when an device is being unregistered.
1128  */
1129 static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1130 {
1131         struct rtnl_link_stats64 dev_stats, *stats;
1132         struct iface_stat *entry;
1133
1134         stats = dev_get_stats(net_dev, &dev_stats);
1135         spin_lock_bh(&iface_stat_list_lock);
1136         entry = get_iface_entry(net_dev->name);
1137         if (entry == NULL) {
1138                 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1139                          net_dev->name);
1140                 spin_unlock_bh(&iface_stat_list_lock);
1141                 return;
1142         }
1143
1144         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1145                  net_dev->name, entry);
1146         if (!entry->active) {
1147                 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1148                          net_dev->name);
1149                 spin_unlock_bh(&iface_stat_list_lock);
1150                 return;
1151         }
1152
1153         if (stash_only) {
1154                 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1155                 entry->last_known[IFS_TX].packets = stats->tx_packets;
1156                 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1157                 entry->last_known[IFS_RX].packets = stats->rx_packets;
1158                 entry->last_known_valid = true;
1159                 IF_DEBUG("qtaguid: %s(%s): "
1160                          "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1161                          net_dev->name, stats->rx_bytes, stats->tx_bytes);
1162                 spin_unlock_bh(&iface_stat_list_lock);
1163                 return;
1164         }
1165         entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
1166         entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
1167         entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
1168         entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
1169         /* We don't need the last_known[] anymore */
1170         entry->last_known_valid = false;
1171         _iface_stat_set_active(entry, net_dev, false);
1172         IF_DEBUG("qtaguid: %s(%s): "
1173                  "disable tracking. rx/tx=%llu/%llu\n", __func__,
1174                  net_dev->name, stats->rx_bytes, stats->tx_bytes);
1175         spin_unlock_bh(&iface_stat_list_lock);
1176 }
1177
1178 /* Guarantied to return a net_device that has a name */
1179 static void get_dev_and_dir(const struct sk_buff *skb,
1180                             struct xt_action_param *par,
1181                             enum ifs_tx_rx *direction,
1182                             const struct net_device **el_dev)
1183 {
1184         BUG_ON(!direction || !el_dev);
1185
1186         if (par->in) {
1187                 *el_dev = par->in;
1188                 *direction = IFS_RX;
1189         } else if (par->out) {
1190                 *el_dev = par->out;
1191                 *direction = IFS_TX;
1192         } else {
1193                 pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n",
1194                        par->hooknum, __func__);
1195                 BUG();
1196         }
1197         if (unlikely(!(*el_dev)->name)) {
1198                 pr_err("qtaguid[%d]: %s(): no dev->name?!!\n",
1199                        par->hooknum, __func__);
1200                 BUG();
1201         }
1202         if (skb->dev && *el_dev != skb->dev) {
1203                 MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs par->%s=%p %s\n",
1204                          par->hooknum, skb->dev, skb->dev->name,
1205                          *direction == IFS_RX ? "in" : "out",  *el_dev,
1206                          (*el_dev)->name);
1207         }
1208 }
1209
1210 /*
1211  * Update stats for the specified interface from the skb.
1212  * Do nothing if the entry
1213  * does not exist (when a device was never configured with an IP address).
1214  * Called on each sk.
1215  */
1216 static void iface_stat_update_from_skb(const struct sk_buff *skb,
1217                                        struct xt_action_param *par)
1218 {
1219         struct iface_stat *entry;
1220         const struct net_device *el_dev;
1221         enum ifs_tx_rx direction;
1222         int bytes = skb->len;
1223         int proto;
1224
1225         get_dev_and_dir(skb, par, &direction, &el_dev);
1226         proto = ipx_proto(skb, par);
1227         MT_DEBUG("qtaguid[%d]: iface_stat: %s(%s): "
1228                  "type=%d fam=%d proto=%d dir=%d\n",
1229                  par->hooknum, __func__, el_dev->name, el_dev->type,
1230                  par->family, proto, direction);
1231
1232         spin_lock_bh(&iface_stat_list_lock);
1233         entry = get_iface_entry(el_dev->name);
1234         if (entry == NULL) {
1235                 IF_DEBUG("qtaguid[%d]: iface_stat: %s(%s): not tracked\n",
1236                          par->hooknum, __func__, el_dev->name);
1237                 spin_unlock_bh(&iface_stat_list_lock);
1238                 return;
1239         }
1240
1241         IF_DEBUG("qtaguid[%d]: %s(%s): entry=%p\n", par->hooknum,  __func__,
1242                  el_dev->name, entry);
1243
1244         data_counters_update(&entry->totals_via_skb, 0, direction, proto,
1245                              bytes);
1246         spin_unlock_bh(&iface_stat_list_lock);
1247 }
1248
1249 static void tag_stat_update(struct tag_stat *tag_entry,
1250                         enum ifs_tx_rx direction, int proto, int bytes)
1251 {
1252         int active_set;
1253         active_set = get_active_counter_set(tag_entry->tn.tag);
1254         MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1255                  "dir=%d proto=%d bytes=%d)\n",
1256                  tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1257                  active_set, direction, proto, bytes);
1258         data_counters_update(&tag_entry->counters, active_set, direction,
1259                              proto, bytes);
1260         if (tag_entry->parent_counters)
1261                 data_counters_update(tag_entry->parent_counters, active_set,
1262                                      direction, proto, bytes);
1263 }
1264
1265 /*
1266  * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1267  * the interface.
1268  * iface_entry->tag_stat_list_lock should be held.
1269  */
1270 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1271                                            tag_t tag)
1272 {
1273         struct tag_stat *new_tag_stat_entry = NULL;
1274         IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1275                  " (uid=%u)\n", __func__,
1276                  iface_entry, tag, get_uid_from_tag(tag));
1277         new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1278         if (!new_tag_stat_entry) {
1279                 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1280                 goto done;
1281         }
1282         new_tag_stat_entry->tn.tag = tag;
1283         tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1284 done:
1285         return new_tag_stat_entry;
1286 }
1287
1288 static void if_tag_stat_update(const char *ifname, uid_t uid,
1289                                const struct sock *sk, enum ifs_tx_rx direction,
1290                                int proto, int bytes)
1291 {
1292         struct tag_stat *tag_stat_entry;
1293         tag_t tag, acct_tag;
1294         tag_t uid_tag;
1295         struct data_counters *uid_tag_counters;
1296         struct sock_tag *sock_tag_entry;
1297         struct iface_stat *iface_entry;
1298         struct tag_stat *new_tag_stat = NULL;
1299         MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1300                 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1301                  ifname, uid, sk, direction, proto, bytes);
1302
1303         spin_lock_bh(&iface_stat_list_lock);
1304         iface_entry = get_iface_entry(ifname);
1305         if (!iface_entry) {
1306                 pr_err_ratelimited("qtaguid: tag_stat: stat_update() "
1307                                    "%s not found\n", ifname);
1308                 spin_unlock_bh(&iface_stat_list_lock);
1309                 return;
1310         }
1311         /* It is ok to process data when an iface_entry is inactive */
1312
1313         MT_DEBUG("qtaguid: tag_stat: stat_update() dev=%s entry=%p\n",
1314                  ifname, iface_entry);
1315
1316         /*
1317          * Look for a tagged sock.
1318          * It will have an acct_uid.
1319          */
1320         sock_tag_entry = get_sock_stat(sk);
1321         if (sock_tag_entry) {
1322                 tag = sock_tag_entry->tag;
1323                 acct_tag = get_atag_from_tag(tag);
1324                 uid_tag = get_utag_from_tag(tag);
1325         } else {
1326                 acct_tag = make_atag_from_value(0);
1327                 tag = combine_atag_with_uid(acct_tag, uid);
1328                 uid_tag = make_tag_from_uid(uid);
1329         }
1330         MT_DEBUG("qtaguid: tag_stat: stat_update(): "
1331                  " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1332                  tag, get_uid_from_tag(tag), iface_entry);
1333         /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1334         spin_lock_bh(&iface_entry->tag_stat_list_lock);
1335
1336         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1337                                               tag);
1338         if (tag_stat_entry) {
1339                 /*
1340                  * Updating the {acct_tag, uid_tag} entry handles both stats:
1341                  * {0, uid_tag} will also get updated.
1342                  */
1343                 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1344                 goto unlock;
1345         }
1346
1347         /* Loop over tag list under this interface for {0,uid_tag} */
1348         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1349                                               uid_tag);
1350         if (!tag_stat_entry) {
1351                 /* Here: the base uid_tag did not exist */
1352                 /*
1353                  * No parent counters. So
1354                  *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1355                  */
1356                 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1357                 if (!new_tag_stat)
1358                         goto unlock;
1359                 uid_tag_counters = &new_tag_stat->counters;
1360         } else {
1361                 uid_tag_counters = &tag_stat_entry->counters;
1362         }
1363
1364         if (acct_tag) {
1365                 /* Create the child {acct_tag, uid_tag} and hook up parent. */
1366                 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1367                 if (!new_tag_stat)
1368                         goto unlock;
1369                 new_tag_stat->parent_counters = uid_tag_counters;
1370         } else {
1371                 /*
1372                  * For new_tag_stat to be still NULL here would require:
1373                  *  {0, uid_tag} exists
1374                  *  and {acct_tag, uid_tag} doesn't exist
1375                  *  AND acct_tag == 0.
1376                  * Impossible. This reassures us that new_tag_stat
1377                  * below will always be assigned.
1378                  */
1379                 BUG_ON(!new_tag_stat);
1380         }
1381         tag_stat_update(new_tag_stat, direction, proto, bytes);
1382 unlock:
1383         spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1384         spin_unlock_bh(&iface_stat_list_lock);
1385 }
1386
1387 static int iface_netdev_event_handler(struct notifier_block *nb,
1388                                       unsigned long event, void *ptr) {
1389         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1390
1391         if (unlikely(module_passive))
1392                 return NOTIFY_DONE;
1393
1394         IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1395                  "ev=0x%lx/%s netdev=%p->name=%s\n",
1396                  event, netdev_evt_str(event), dev, dev ? dev->name : "");
1397
1398         switch (event) {
1399         case NETDEV_UP:
1400                 iface_stat_create(dev, NULL);
1401                 atomic64_inc(&qtu_events.iface_events);
1402                 break;
1403         case NETDEV_DOWN:
1404         case NETDEV_UNREGISTER:
1405                 iface_stat_update(dev, event == NETDEV_DOWN);
1406                 atomic64_inc(&qtu_events.iface_events);
1407                 break;
1408         }
1409         return NOTIFY_DONE;
1410 }
1411
1412 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1413                                          unsigned long event, void *ptr)
1414 {
1415         struct inet6_ifaddr *ifa = ptr;
1416         struct net_device *dev;
1417
1418         if (unlikely(module_passive))
1419                 return NOTIFY_DONE;
1420
1421         IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1422                  "ev=0x%lx/%s ifa=%p\n",
1423                  event, netdev_evt_str(event), ifa);
1424
1425         switch (event) {
1426         case NETDEV_UP:
1427                 BUG_ON(!ifa || !ifa->idev);
1428                 dev = (struct net_device *)ifa->idev->dev;
1429                 iface_stat_create_ipv6(dev, ifa);
1430                 atomic64_inc(&qtu_events.iface_events);
1431                 break;
1432         case NETDEV_DOWN:
1433         case NETDEV_UNREGISTER:
1434                 BUG_ON(!ifa || !ifa->idev);
1435                 dev = (struct net_device *)ifa->idev->dev;
1436                 iface_stat_update(dev, event == NETDEV_DOWN);
1437                 atomic64_inc(&qtu_events.iface_events);
1438                 break;
1439         }
1440         return NOTIFY_DONE;
1441 }
1442
1443 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1444                                         unsigned long event, void *ptr)
1445 {
1446         struct in_ifaddr *ifa = ptr;
1447         struct net_device *dev;
1448
1449         if (unlikely(module_passive))
1450                 return NOTIFY_DONE;
1451
1452         IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1453                  "ev=0x%lx/%s ifa=%p\n",
1454                  event, netdev_evt_str(event), ifa);
1455
1456         switch (event) {
1457         case NETDEV_UP:
1458                 BUG_ON(!ifa || !ifa->ifa_dev);
1459                 dev = ifa->ifa_dev->dev;
1460                 iface_stat_create(dev, ifa);
1461                 atomic64_inc(&qtu_events.iface_events);
1462                 break;
1463         case NETDEV_DOWN:
1464         case NETDEV_UNREGISTER:
1465                 BUG_ON(!ifa || !ifa->ifa_dev);
1466                 dev = ifa->ifa_dev->dev;
1467                 iface_stat_update(dev, event == NETDEV_DOWN);
1468                 atomic64_inc(&qtu_events.iface_events);
1469                 break;
1470         }
1471         return NOTIFY_DONE;
1472 }
1473
1474 static struct notifier_block iface_netdev_notifier_blk = {
1475         .notifier_call = iface_netdev_event_handler,
1476 };
1477
1478 static struct notifier_block iface_inetaddr_notifier_blk = {
1479         .notifier_call = iface_inetaddr_event_handler,
1480 };
1481
1482 static struct notifier_block iface_inet6addr_notifier_blk = {
1483         .notifier_call = iface_inet6addr_event_handler,
1484 };
1485
1486 static const struct seq_operations iface_stat_fmt_proc_seq_ops = {
1487         .start  = iface_stat_fmt_proc_start,
1488         .next   = iface_stat_fmt_proc_next,
1489         .stop   = iface_stat_fmt_proc_stop,
1490         .show   = iface_stat_fmt_proc_show,
1491 };
1492
1493 static int proc_iface_stat_fmt_open(struct inode *inode, struct file *file)
1494 {
1495         struct proc_iface_stat_fmt_info *s;
1496
1497         s = __seq_open_private(file, &iface_stat_fmt_proc_seq_ops,
1498                         sizeof(struct proc_iface_stat_fmt_info));
1499         if (!s)
1500                 return -ENOMEM;
1501
1502         s->fmt = (uintptr_t)PDE_DATA(inode);
1503         return 0;
1504 }
1505
1506 static const struct file_operations proc_iface_stat_fmt_fops = {
1507         .open           = proc_iface_stat_fmt_open,
1508         .read           = seq_read,
1509         .llseek         = seq_lseek,
1510         .release        = seq_release_private,
1511 };
1512
1513 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1514 {
1515         int err;
1516
1517         iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1518         if (!iface_stat_procdir) {
1519                 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1520                 err = -1;
1521                 goto err;
1522         }
1523
1524         iface_stat_all_procfile = proc_create_data(iface_stat_all_procfilename,
1525                                                    proc_iface_perms,
1526                                                    parent_procdir,
1527                                                    &proc_iface_stat_fmt_fops,
1528                                                    (void *)1 /* fmt1 */);
1529         if (!iface_stat_all_procfile) {
1530                 pr_err("qtaguid: iface_stat: init "
1531                        " failed to create stat_old proc entry\n");
1532                 err = -1;
1533                 goto err_zap_entry;
1534         }
1535
1536         iface_stat_fmt_procfile = proc_create_data(iface_stat_fmt_procfilename,
1537                                                    proc_iface_perms,
1538                                                    parent_procdir,
1539                                                    &proc_iface_stat_fmt_fops,
1540                                                    (void *)2 /* fmt2 */);
1541         if (!iface_stat_fmt_procfile) {
1542                 pr_err("qtaguid: iface_stat: init "
1543                        " failed to create stat_all proc entry\n");
1544                 err = -1;
1545                 goto err_zap_all_stats_entry;
1546         }
1547
1548
1549         err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1550         if (err) {
1551                 pr_err("qtaguid: iface_stat: init "
1552                        "failed to register dev event handler\n");
1553                 goto err_zap_all_stats_entries;
1554         }
1555         err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1556         if (err) {
1557                 pr_err("qtaguid: iface_stat: init "
1558                        "failed to register ipv4 dev event handler\n");
1559                 goto err_unreg_nd;
1560         }
1561
1562         err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1563         if (err) {
1564                 pr_err("qtaguid: iface_stat: init "
1565                        "failed to register ipv6 dev event handler\n");
1566                 goto err_unreg_ip4_addr;
1567         }
1568         return 0;
1569
1570 err_unreg_ip4_addr:
1571         unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1572 err_unreg_nd:
1573         unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1574 err_zap_all_stats_entries:
1575         remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
1576 err_zap_all_stats_entry:
1577         remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1578 err_zap_entry:
1579         remove_proc_entry(iface_stat_procdirname, parent_procdir);
1580 err:
1581         return err;
1582 }
1583
1584 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1585                                     struct xt_action_param *par)
1586 {
1587         struct sock *sk;
1588         unsigned int hook_mask = (1 << par->hooknum);
1589
1590         MT_DEBUG("qtaguid[%d]: find_sk(skb=%p) family=%d\n",
1591                  par->hooknum, skb, par->family);
1592
1593         /*
1594          * Let's not abuse the the xt_socket_get*_sk(), or else it will
1595          * return garbage SKs.
1596          */
1597         if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1598                 return NULL;
1599
1600         switch (par->family) {
1601         case NFPROTO_IPV6:
1602                 sk = xt_socket_lookup_slow_v6(dev_net(skb->dev), skb, par->in);
1603                 break;
1604         case NFPROTO_IPV4:
1605                 sk = xt_socket_lookup_slow_v4(dev_net(skb->dev), skb, par->in);
1606                 break;
1607         default:
1608                 return NULL;
1609         }
1610
1611         if (sk) {
1612                 MT_DEBUG("qtaguid[%d]: %p->sk_proto=%u->sk_state=%d\n",
1613                          par->hooknum, sk, sk->sk_protocol, sk->sk_state);
1614                 /*
1615                  * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1616                  * "struct inet_timewait_sock" which is missing fields.
1617                  */
1618                 if (!sk_fullsock(sk) || sk->sk_state  == TCP_TIME_WAIT) {
1619                         sock_gen_put(sk);
1620                         sk = NULL;
1621                 }
1622         }
1623         return sk;
1624 }
1625
1626 static void account_for_uid(const struct sk_buff *skb,
1627                             const struct sock *alternate_sk, uid_t uid,
1628                             struct xt_action_param *par)
1629 {
1630         const struct net_device *el_dev;
1631         enum ifs_tx_rx direction;
1632         int proto;
1633
1634         get_dev_and_dir(skb, par, &direction, &el_dev);
1635         proto = ipx_proto(skb, par);
1636         MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d dir=%d\n",
1637                  par->hooknum, el_dev->name, el_dev->type,
1638                  par->family, proto, direction);
1639
1640         if_tag_stat_update(el_dev->name, uid,
1641                            skb->sk ? skb->sk : alternate_sk,
1642                            direction,
1643                            proto, skb->len);
1644 }
1645
1646 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1647 {
1648         const struct xt_qtaguid_match_info *info = par->matchinfo;
1649         const struct file *filp;
1650         bool got_sock = false;
1651         struct sock *sk;
1652         kuid_t sock_uid;
1653         bool res;
1654         bool set_sk_callback_lock = false;
1655         /*
1656          * TODO: unhack how to force just accounting.
1657          * For now we only do tag stats when the uid-owner is not requested
1658          */
1659         bool do_tag_stat = !(info->match & XT_QTAGUID_UID);
1660
1661         if (unlikely(module_passive))
1662                 return (info->match ^ info->invert) == 0;
1663
1664         MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1665                  par->hooknum, skb, par->in, par->out, par->family);
1666
1667         atomic64_inc(&qtu_events.match_calls);
1668         if (skb == NULL) {
1669                 res = (info->match ^ info->invert) == 0;
1670                 goto ret_res;
1671         }
1672
1673         switch (par->hooknum) {
1674         case NF_INET_PRE_ROUTING:
1675         case NF_INET_POST_ROUTING:
1676                 atomic64_inc(&qtu_events.match_calls_prepost);
1677                 iface_stat_update_from_skb(skb, par);
1678                 /*
1679                  * We are done in pre/post. The skb will get processed
1680                  * further alter.
1681                  */
1682                 res = (info->match ^ info->invert);
1683                 goto ret_res;
1684                 break;
1685         /* default: Fall through and do UID releated work */
1686         }
1687
1688         sk = skb_to_full_sk(skb);
1689         /*
1690          * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1691          * "struct inet_timewait_sock" which is missing fields.
1692          * So we ignore it.
1693          */
1694         if (sk && sk->sk_state == TCP_TIME_WAIT)
1695                 sk = NULL;
1696         if (sk == NULL) {
1697                 /*
1698                  * A missing sk->sk_socket happens when packets are in-flight
1699                  * and the matching socket is already closed and gone.
1700                  */
1701                 sk = qtaguid_find_sk(skb, par);
1702                 /*
1703                  * If we got the socket from the find_sk(), we will need to put
1704                  * it back, as nf_tproxy_get_sock_v4() got it.
1705                  */
1706                 got_sock = sk;
1707                 if (sk)
1708                         atomic64_inc(&qtu_events.match_found_sk_in_ct);
1709                 else
1710                         atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1711         } else {
1712                 atomic64_inc(&qtu_events.match_found_sk);
1713         }
1714         MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
1715                  par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
1716
1717         if (!sk) {
1718                 /*
1719                  * Here, the qtaguid_find_sk() using connection tracking
1720                  * couldn't find the owner, so for now we just count them
1721                  * against the system.
1722                  */
1723                 if (do_tag_stat)
1724                         account_for_uid(skb, sk, 0, par);
1725                 MT_DEBUG("qtaguid[%d]: leaving (sk=NULL)\n", par->hooknum);
1726                 res = (info->match ^ info->invert) == 0;
1727                 atomic64_inc(&qtu_events.match_no_sk);
1728                 goto put_sock_ret_res;
1729         } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1730                 res = false;
1731                 goto put_sock_ret_res;
1732         }
1733         sock_uid = sk->sk_uid;
1734         if (do_tag_stat)
1735                 account_for_uid(skb, sk, from_kuid(&init_user_ns, sock_uid),
1736                                 par);
1737
1738         /*
1739          * The following two tests fail the match when:
1740          *    id not in range AND no inverted condition requested
1741          * or id     in range AND    inverted condition requested
1742          * Thus (!a && b) || (a && !b) == a ^ b
1743          */
1744         if (info->match & XT_QTAGUID_UID) {
1745                 kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min);
1746                 kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max);
1747
1748                 if ((uid_gte(sock_uid, uid_min) &&
1749                      uid_lte(sock_uid, uid_max)) ^
1750                     !(info->invert & XT_QTAGUID_UID)) {
1751                         MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1752                                  par->hooknum);
1753                         res = false;
1754                         goto put_sock_ret_res;
1755                 }
1756         }
1757         if (info->match & XT_QTAGUID_GID) {
1758                 kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min);
1759                 kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max);
1760                 set_sk_callback_lock = true;
1761                 read_lock_bh(&sk->sk_callback_lock);
1762                 MT_DEBUG("qtaguid[%d]: sk=%pK->sk_socket=%pK->file=%pK\n",
1763                          par->hooknum, sk, sk->sk_socket,
1764                          sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1765                 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1766                 if (!filp) {
1767                         res = ((info->match ^ info->invert) &
1768                                XT_QTAGUID_GID) == 0;
1769                         atomic64_inc(&qtu_events.match_no_sk_gid);
1770                         goto put_sock_ret_res;
1771                 }
1772                 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1773                          par->hooknum, filp ?
1774                          from_kuid(&init_user_ns, filp->f_cred->fsuid) : -1);
1775                 if ((gid_gte(filp->f_cred->fsgid, gid_min) &&
1776                                 gid_lte(filp->f_cred->fsgid, gid_max)) ^
1777                         !(info->invert & XT_QTAGUID_GID)) {
1778                         MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1779                                 par->hooknum);
1780                         res = false;
1781                         goto put_sock_ret_res;
1782                 }
1783         }
1784         MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1785         res = true;
1786
1787 put_sock_ret_res:
1788         if (got_sock)
1789                 sock_gen_put(sk);
1790         if (set_sk_callback_lock)
1791                 read_unlock_bh(&sk->sk_callback_lock);
1792 ret_res:
1793         MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1794         return res;
1795 }
1796
1797 #ifdef DDEBUG
1798 /*
1799  * This function is not in xt_qtaguid_print.c because of locks visibility.
1800  * The lock of sock_tag_list must be aquired before calling this function
1801  */
1802 static void prdebug_full_state_locked(int indent_level, const char *fmt, ...)
1803 {
1804         va_list args;
1805         char *fmt_buff;
1806         char *buff;
1807
1808         if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1809                 return;
1810
1811         fmt_buff = kasprintf(GFP_ATOMIC,
1812                              "qtaguid: %s(): %s {\n", __func__, fmt);
1813         BUG_ON(!fmt_buff);
1814         va_start(args, fmt);
1815         buff = kvasprintf(GFP_ATOMIC,
1816                           fmt_buff, args);
1817         BUG_ON(!buff);
1818         pr_debug("%s", buff);
1819         kfree(fmt_buff);
1820         kfree(buff);
1821         va_end(args);
1822
1823         prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1824
1825         spin_lock_bh(&uid_tag_data_tree_lock);
1826         prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1827         prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1828         spin_unlock_bh(&uid_tag_data_tree_lock);
1829
1830         spin_lock_bh(&iface_stat_list_lock);
1831         prdebug_iface_stat_list(indent_level, &iface_stat_list);
1832         spin_unlock_bh(&iface_stat_list_lock);
1833
1834         pr_debug("qtaguid: %s(): }\n", __func__);
1835 }
1836 #else
1837 static void prdebug_full_state_locked(int indent_level, const char *fmt, ...) {}
1838 #endif
1839
1840 struct proc_ctrl_print_info {
1841         struct sock *sk; /* socket found by reading to sk_pos */
1842         loff_t sk_pos;
1843 };
1844
1845 static void *qtaguid_ctrl_proc_next(struct seq_file *m, void *v, loff_t *pos)
1846 {
1847         struct proc_ctrl_print_info *pcpi = m->private;
1848         struct sock_tag *sock_tag_entry = v;
1849         struct rb_node *node;
1850
1851         (*pos)++;
1852
1853         if (!v || v  == SEQ_START_TOKEN)
1854                 return NULL;
1855
1856         node = rb_next(&sock_tag_entry->sock_node);
1857         if (!node) {
1858                 pcpi->sk = NULL;
1859                 sock_tag_entry = SEQ_START_TOKEN;
1860         } else {
1861                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1862                 pcpi->sk = sock_tag_entry->sk;
1863         }
1864         pcpi->sk_pos = *pos;
1865         return sock_tag_entry;
1866 }
1867
1868 static void *qtaguid_ctrl_proc_start(struct seq_file *m, loff_t *pos)
1869 {
1870         struct proc_ctrl_print_info *pcpi = m->private;
1871         struct sock_tag *sock_tag_entry;
1872         struct rb_node *node;
1873
1874         spin_lock_bh(&sock_tag_list_lock);
1875
1876         if (unlikely(module_passive))
1877                 return NULL;
1878
1879         if (*pos == 0) {
1880                 pcpi->sk_pos = 0;
1881                 node = rb_first(&sock_tag_tree);
1882                 if (!node) {
1883                         pcpi->sk = NULL;
1884                         return SEQ_START_TOKEN;
1885                 }
1886                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1887                 pcpi->sk = sock_tag_entry->sk;
1888         } else {
1889                 sock_tag_entry = (pcpi->sk ? get_sock_stat_nl(pcpi->sk) :
1890                                                 NULL) ?: SEQ_START_TOKEN;
1891                 if (*pos != pcpi->sk_pos) {
1892                         /* seq_read skipped a next call */
1893                         *pos = pcpi->sk_pos;
1894                         return qtaguid_ctrl_proc_next(m, sock_tag_entry, pos);
1895                 }
1896         }
1897         return sock_tag_entry;
1898 }
1899
1900 static void qtaguid_ctrl_proc_stop(struct seq_file *m, void *v)
1901 {
1902         spin_unlock_bh(&sock_tag_list_lock);
1903 }
1904
1905 /*
1906  * Procfs reader to get all active socket tags using style "1)" as described in
1907  * fs/proc/generic.c
1908  */
1909 static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v)
1910 {
1911         struct sock_tag *sock_tag_entry = v;
1912         uid_t uid;
1913
1914         CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u\n",
1915                  current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
1916
1917         if (sock_tag_entry != SEQ_START_TOKEN) {
1918                 int sk_ref_count;
1919                 uid = get_uid_from_tag(sock_tag_entry->tag);
1920                 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1921                          "pid=%u\n",
1922                          sock_tag_entry->sk,
1923                          sock_tag_entry->tag,
1924                          uid,
1925                          sock_tag_entry->pid
1926                         );
1927                 sk_ref_count = atomic_read(
1928                         &sock_tag_entry->sk->sk_refcnt);
1929                 seq_printf(m, "sock=%pK tag=0x%llx (uid=%u) pid=%u "
1930                            "f_count=%d\n",
1931                            sock_tag_entry->sk,
1932                            sock_tag_entry->tag, uid,
1933                            sock_tag_entry->pid, sk_ref_count);
1934         } else {
1935                 seq_printf(m, "events: sockets_tagged=%llu "
1936                            "sockets_untagged=%llu "
1937                            "counter_set_changes=%llu "
1938                            "delete_cmds=%llu "
1939                            "iface_events=%llu "
1940                            "match_calls=%llu "
1941                            "match_calls_prepost=%llu "
1942                            "match_found_sk=%llu "
1943                            "match_found_sk_in_ct=%llu "
1944                            "match_found_no_sk_in_ct=%llu "
1945                            "match_no_sk=%llu "
1946                            "match_no_sk_gid=%llu\n",
1947                            (u64)atomic64_read(&qtu_events.sockets_tagged),
1948                            (u64)atomic64_read(&qtu_events.sockets_untagged),
1949                            (u64)atomic64_read(&qtu_events.counter_set_changes),
1950                            (u64)atomic64_read(&qtu_events.delete_cmds),
1951                            (u64)atomic64_read(&qtu_events.iface_events),
1952                            (u64)atomic64_read(&qtu_events.match_calls),
1953                            (u64)atomic64_read(&qtu_events.match_calls_prepost),
1954                            (u64)atomic64_read(&qtu_events.match_found_sk),
1955                            (u64)atomic64_read(&qtu_events.match_found_sk_in_ct),
1956                            (u64)atomic64_read(&qtu_events.match_found_no_sk_in_ct),
1957                            (u64)atomic64_read(&qtu_events.match_no_sk),
1958                            (u64)atomic64_read(&qtu_events.match_no_sk_gid));
1959
1960                 /* Count the following as part of the last item_index. No need
1961                  * to lock the sock_tag_list here since it is already locked when
1962                  * starting the seq_file operation
1963                  */
1964                 prdebug_full_state_locked(0, "proc ctrl");
1965         }
1966
1967         return 0;
1968 }
1969
1970 /*
1971  * Delete socket tags, and stat tags associated with a given
1972  * accouting tag and uid.
1973  */
1974 static int ctrl_cmd_delete(const char *input)
1975 {
1976         char cmd;
1977         int uid_int;
1978         kuid_t uid;
1979         uid_t entry_uid;
1980         tag_t acct_tag;
1981         tag_t tag;
1982         int res, argc;
1983         struct iface_stat *iface_entry;
1984         struct rb_node *node;
1985         struct sock_tag *st_entry;
1986         struct rb_root st_to_free_tree = RB_ROOT;
1987         struct tag_stat *ts_entry;
1988         struct tag_counter_set *tcs_entry;
1989         struct tag_ref *tr_entry;
1990         struct uid_tag_data *utd_entry;
1991
1992         argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid_int);
1993         uid = make_kuid(&init_user_ns, uid_int);
1994         CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1995                  "user_tag=0x%llx uid=%u\n", input, argc, cmd,
1996                  acct_tag, uid_int);
1997         if (argc < 2) {
1998                 res = -EINVAL;
1999                 goto err;
2000         }
2001         if (!valid_atag(acct_tag)) {
2002                 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
2003                 res = -EINVAL;
2004                 goto err;
2005         }
2006         if (argc < 3) {
2007                 uid = current_fsuid();
2008                 uid_int = from_kuid(&init_user_ns, uid);
2009         } else if (!can_impersonate_uid(uid)) {
2010                 pr_info("qtaguid: ctrl_delete(%s): "
2011                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2012                         input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2013                 res = -EPERM;
2014                 goto err;
2015         }
2016
2017         tag = combine_atag_with_uid(acct_tag, uid_int);
2018         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2019                  "looking for tag=0x%llx (uid=%u)\n",
2020                  input, tag, uid_int);
2021
2022         /* Delete socket tags */
2023         spin_lock_bh(&sock_tag_list_lock);
2024         node = rb_first(&sock_tag_tree);
2025         while (node) {
2026                 st_entry = rb_entry(node, struct sock_tag, sock_node);
2027                 entry_uid = get_uid_from_tag(st_entry->tag);
2028                 node = rb_next(node);
2029                 if (entry_uid != uid_int)
2030                         continue;
2031
2032                 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
2033                          input, st_entry->tag, entry_uid);
2034
2035                 if (!acct_tag || st_entry->tag == tag) {
2036                         rb_erase(&st_entry->sock_node, &sock_tag_tree);
2037                         /* Can't sockfd_put() within spinlock, do it later. */
2038                         sock_tag_tree_insert(st_entry, &st_to_free_tree);
2039                         tr_entry = lookup_tag_ref(st_entry->tag, NULL);
2040                         BUG_ON(tr_entry->num_sock_tags <= 0);
2041                         tr_entry->num_sock_tags--;
2042                         /*
2043                          * TODO: remove if, and start failing.
2044                          * This is a hack to work around the fact that in some
2045                          * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
2046                          * and are trying to work around apps
2047                          * that didn't open the /dev/xt_qtaguid.
2048                          */
2049                         if (st_entry->list.next && st_entry->list.prev)
2050                                 list_del(&st_entry->list);
2051                 }
2052         }
2053         spin_unlock_bh(&sock_tag_list_lock);
2054
2055         sock_tag_tree_erase(&st_to_free_tree);
2056
2057         /* Delete tag counter-sets */
2058         spin_lock_bh(&tag_counter_set_list_lock);
2059         /* Counter sets are only on the uid tag, not full tag */
2060         tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2061         if (tcs_entry) {
2062                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2063                          "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
2064                          input,
2065                          tcs_entry->tn.tag,
2066                          get_uid_from_tag(tcs_entry->tn.tag),
2067                          tcs_entry->active_set);
2068                 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
2069                 kfree(tcs_entry);
2070         }
2071         spin_unlock_bh(&tag_counter_set_list_lock);
2072
2073         /*
2074          * If acct_tag is 0, then all entries belonging to uid are
2075          * erased.
2076          */
2077         spin_lock_bh(&iface_stat_list_lock);
2078         list_for_each_entry(iface_entry, &iface_stat_list, list) {
2079                 spin_lock_bh(&iface_entry->tag_stat_list_lock);
2080                 node = rb_first(&iface_entry->tag_stat_tree);
2081                 while (node) {
2082                         ts_entry = rb_entry(node, struct tag_stat, tn.node);
2083                         entry_uid = get_uid_from_tag(ts_entry->tn.tag);
2084                         node = rb_next(node);
2085
2086                         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2087                                  "ts tag=0x%llx (uid=%u)\n",
2088                                  input, ts_entry->tn.tag, entry_uid);
2089
2090                         if (entry_uid != uid_int)
2091                                 continue;
2092                         if (!acct_tag || ts_entry->tn.tag == tag) {
2093                                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2094                                          "erase ts: %s 0x%llx %u\n",
2095                                          input, iface_entry->ifname,
2096                                          get_atag_from_tag(ts_entry->tn.tag),
2097                                          entry_uid);
2098                                 rb_erase(&ts_entry->tn.node,
2099                                          &iface_entry->tag_stat_tree);
2100                                 kfree(ts_entry);
2101                         }
2102                 }
2103                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
2104         }
2105         spin_unlock_bh(&iface_stat_list_lock);
2106
2107         /* Cleanup the uid_tag_data */
2108         spin_lock_bh(&uid_tag_data_tree_lock);
2109         node = rb_first(&uid_tag_data_tree);
2110         while (node) {
2111                 utd_entry = rb_entry(node, struct uid_tag_data, node);
2112                 entry_uid = utd_entry->uid;
2113                 node = rb_next(node);
2114
2115                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2116                          "utd uid=%u\n",
2117                          input, entry_uid);
2118
2119                 if (entry_uid != uid_int)
2120                         continue;
2121                 /*
2122                  * Go over the tag_refs, and those that don't have
2123                  * sock_tags using them are freed.
2124                  */
2125                 put_tag_ref_tree(tag, utd_entry);
2126                 put_utd_entry(utd_entry);
2127         }
2128         spin_unlock_bh(&uid_tag_data_tree_lock);
2129
2130         atomic64_inc(&qtu_events.delete_cmds);
2131         res = 0;
2132
2133 err:
2134         return res;
2135 }
2136
2137 static int ctrl_cmd_counter_set(const char *input)
2138 {
2139         char cmd;
2140         uid_t uid = 0;
2141         tag_t tag;
2142         int res, argc;
2143         struct tag_counter_set *tcs;
2144         int counter_set;
2145
2146         argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2147         CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2148                  "set=%d uid=%u\n", input, argc, cmd,
2149                  counter_set, uid);
2150         if (argc != 3) {
2151                 res = -EINVAL;
2152                 goto err;
2153         }
2154         if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2155                 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2156                         input);
2157                 res = -EINVAL;
2158                 goto err;
2159         }
2160         if (!can_manipulate_uids()) {
2161                 pr_info("qtaguid: ctrl_counterset(%s): "
2162                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2163                         input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2164                 res = -EPERM;
2165                 goto err;
2166         }
2167
2168         tag = make_tag_from_uid(uid);
2169         spin_lock_bh(&tag_counter_set_list_lock);
2170         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2171         if (!tcs) {
2172                 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2173                 if (!tcs) {
2174                         spin_unlock_bh(&tag_counter_set_list_lock);
2175                         pr_err("qtaguid: ctrl_counterset(%s): "
2176                                "failed to alloc counter set\n",
2177                                input);
2178                         res = -ENOMEM;
2179                         goto err;
2180                 }
2181                 tcs->tn.tag = tag;
2182                 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2183                 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2184                          "(uid=%u) set=%d\n",
2185                          input, tag, get_uid_from_tag(tag), counter_set);
2186         }
2187         tcs->active_set = counter_set;
2188         spin_unlock_bh(&tag_counter_set_list_lock);
2189         atomic64_inc(&qtu_events.counter_set_changes);
2190         res = 0;
2191
2192 err:
2193         return res;
2194 }
2195
2196 static int ctrl_cmd_tag(const char *input)
2197 {
2198         char cmd;
2199         int sock_fd = 0;
2200         kuid_t uid;
2201         unsigned int uid_int = 0;
2202         tag_t acct_tag = make_atag_from_value(0);
2203         tag_t full_tag;
2204         struct socket *el_socket;
2205         int res, argc;
2206         struct sock_tag *sock_tag_entry;
2207         struct tag_ref *tag_ref_entry;
2208         struct uid_tag_data *uid_tag_data_entry;
2209         struct proc_qtu_data *pqd_entry;
2210
2211         /* Unassigned args will get defaulted later. */
2212         argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid_int);
2213         uid = make_kuid(&init_user_ns, uid_int);
2214         CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2215                  "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2216                  acct_tag, uid_int);
2217         if (argc < 2) {
2218                 res = -EINVAL;
2219                 goto err;
2220         }
2221         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2222         if (!el_socket) {
2223                 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2224                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2225                         input, sock_fd, res, current->pid, current->tgid,
2226                         from_kuid(&init_user_ns, current_fsuid()));
2227                 goto err;
2228         }
2229         CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->sk_refcnt=%d ->sk=%p\n",
2230                  input, atomic_read(&el_socket->sk->sk_refcnt),
2231                  el_socket->sk);
2232         if (argc < 3) {
2233                 acct_tag = make_atag_from_value(0);
2234         } else if (!valid_atag(acct_tag)) {
2235                 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2236                 res = -EINVAL;
2237                 goto err_put;
2238         }
2239         CT_DEBUG("qtaguid: ctrl_tag(%s): "
2240                  "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2241                  "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
2242                  input, current->pid, current->tgid,
2243                  from_kuid(&init_user_ns, current_uid()),
2244                  from_kuid(&init_user_ns, current_euid()),
2245                  from_kuid(&init_user_ns, current_fsuid()),
2246                  from_kgid(&init_user_ns, xt_qtaguid_ctrl_file->gid),
2247                  in_group_p(xt_qtaguid_ctrl_file->gid),
2248                  in_egroup_p(xt_qtaguid_ctrl_file->gid));
2249         if (argc < 4) {
2250                 uid = current_fsuid();
2251                 uid_int = from_kuid(&init_user_ns, uid);
2252         } else if (!can_impersonate_uid(uid)) {
2253                 pr_info("qtaguid: ctrl_tag(%s): "
2254                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2255                         input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2256                 res = -EPERM;
2257                 goto err_put;
2258         }
2259         full_tag = combine_atag_with_uid(acct_tag, uid_int);
2260
2261         spin_lock_bh(&sock_tag_list_lock);
2262         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2263         tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2264         if (IS_ERR(tag_ref_entry)) {
2265                 res = PTR_ERR(tag_ref_entry);
2266                 spin_unlock_bh(&sock_tag_list_lock);
2267                 goto err_put;
2268         }
2269         tag_ref_entry->num_sock_tags++;
2270         if (sock_tag_entry) {
2271                 struct tag_ref *prev_tag_ref_entry;
2272
2273                 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2274                          "st@%p ...->sk_refcnt=%d\n",
2275                          input, el_socket->sk, sock_tag_entry,
2276                          atomic_read(&el_socket->sk->sk_refcnt));
2277                 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2278                                                     &uid_tag_data_entry);
2279                 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2280                 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2281                 prev_tag_ref_entry->num_sock_tags--;
2282                 sock_tag_entry->tag = full_tag;
2283         } else {
2284                 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2285                          input, el_socket->sk);
2286                 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2287                                          GFP_ATOMIC);
2288                 if (!sock_tag_entry) {
2289                         pr_err("qtaguid: ctrl_tag(%s): "
2290                                "socket tag alloc failed\n",
2291                                input);
2292                         spin_unlock_bh(&sock_tag_list_lock);
2293                         res = -ENOMEM;
2294                         goto err_tag_unref_put;
2295                 }
2296                 /*
2297                  * Hold the sk refcount here to make sure the sk pointer cannot
2298                  * be freed and reused
2299                  */
2300                 sock_hold(el_socket->sk);
2301                 sock_tag_entry->sk = el_socket->sk;
2302                 sock_tag_entry->pid = current->tgid;
2303                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag, uid_int);
2304                 spin_lock_bh(&uid_tag_data_tree_lock);
2305                 pqd_entry = proc_qtu_data_tree_search(
2306                         &proc_qtu_data_tree, current->tgid);
2307                 /*
2308                  * TODO: remove if, and start failing.
2309                  * At first, we want to catch user-space code that is not
2310                  * opening the /dev/xt_qtaguid.
2311                  */
2312                 if (IS_ERR_OR_NULL(pqd_entry))
2313                         pr_warn_once(
2314                                 "qtaguid: %s(): "
2315                                 "User space forgot to open /dev/xt_qtaguid? "
2316                                 "pid=%u tgid=%u uid=%u\n", __func__,
2317                                 current->pid, current->tgid,
2318                                 from_kuid(&init_user_ns, current_fsuid()));
2319                 else
2320                         list_add(&sock_tag_entry->list,
2321                                  &pqd_entry->sock_tag_list);
2322                 spin_unlock_bh(&uid_tag_data_tree_lock);
2323
2324                 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2325                 atomic64_inc(&qtu_events.sockets_tagged);
2326         }
2327         spin_unlock_bh(&sock_tag_list_lock);
2328         /* We keep the ref to the sk until it is untagged */
2329         CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->sk_refcnt=%d\n",
2330                  input, sock_tag_entry,
2331                  atomic_read(&el_socket->sk->sk_refcnt));
2332         sockfd_put(el_socket);
2333         return 0;
2334
2335 err_tag_unref_put:
2336         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2337         tag_ref_entry->num_sock_tags--;
2338         free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2339 err_put:
2340         CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->sk_refcnt=%d\n",
2341                  input, atomic_read(&el_socket->sk->sk_refcnt) - 1);
2342         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2343         sockfd_put(el_socket);
2344         return res;
2345
2346 err:
2347         CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2348         return res;
2349 }
2350
2351 static int ctrl_cmd_untag(const char *input)
2352 {
2353         char cmd;
2354         int sock_fd = 0;
2355         struct socket *el_socket;
2356         int res, argc;
2357
2358         argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2359         CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2360                  input, argc, cmd, sock_fd);
2361         if (argc < 2) {
2362                 res = -EINVAL;
2363                 return res;
2364         }
2365         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2366         if (!el_socket) {
2367                 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2368                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2369                         input, sock_fd, res, current->pid, current->tgid,
2370                         from_kuid(&init_user_ns, current_fsuid()));
2371                 return res;
2372         }
2373         CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2374                  input, atomic_long_read(&el_socket->file->f_count),
2375                  el_socket->sk);
2376         res = qtaguid_untag(el_socket, false);
2377         sockfd_put(el_socket);
2378         return res;
2379 }
2380
2381 int qtaguid_untag(struct socket *el_socket, bool kernel)
2382 {
2383         int res;
2384         pid_t pid;
2385         struct sock_tag *sock_tag_entry;
2386         struct tag_ref *tag_ref_entry;
2387         struct uid_tag_data *utd_entry;
2388         struct proc_qtu_data *pqd_entry;
2389
2390         spin_lock_bh(&sock_tag_list_lock);
2391         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2392         if (!sock_tag_entry) {
2393                 spin_unlock_bh(&sock_tag_list_lock);
2394                 res = -EINVAL;
2395                 return res;
2396         }
2397         /*
2398          * The socket already belongs to the current process
2399          * so it can do whatever it wants to it.
2400          */
2401         rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2402
2403         tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2404         BUG_ON(!tag_ref_entry);
2405         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2406         spin_lock_bh(&uid_tag_data_tree_lock);
2407         if (kernel)
2408                 pid = sock_tag_entry->pid;
2409         else
2410                 pid = current->tgid;
2411         pqd_entry = proc_qtu_data_tree_search(
2412                 &proc_qtu_data_tree, pid);
2413         /*
2414          * TODO: remove if, and start failing.
2415          * At first, we want to catch user-space code that is not
2416          * opening the /dev/xt_qtaguid.
2417          */
2418         if (IS_ERR_OR_NULL(pqd_entry) || !sock_tag_entry->list.next) {
2419                 pr_warn_once("qtaguid: %s(): "
2420                              "User space forgot to open /dev/xt_qtaguid? "
2421                              "pid=%u tgid=%u sk_pid=%u, uid=%u\n", __func__,
2422                              current->pid, current->tgid, sock_tag_entry->pid,
2423                              from_kuid(&init_user_ns, current_fsuid()));
2424         } else {
2425                 list_del(&sock_tag_entry->list);
2426         }
2427         spin_unlock_bh(&uid_tag_data_tree_lock);
2428         /*
2429          * We don't free tag_ref from the utd_entry here,
2430          * only during a cmd_delete().
2431          */
2432         tag_ref_entry->num_sock_tags--;
2433         spin_unlock_bh(&sock_tag_list_lock);
2434         /*
2435          * Release the sock_fd that was grabbed at tag time.
2436          */
2437         sock_put(sock_tag_entry->sk);
2438         CT_DEBUG("qtaguid: done. st@%p ...->sk_refcnt=%d\n",
2439                  sock_tag_entry,
2440                  atomic_read(&el_socket->sk->sk_refcnt));
2441
2442         kfree(sock_tag_entry);
2443         atomic64_inc(&qtu_events.sockets_untagged);
2444
2445         return 0;
2446 }
2447
2448 static ssize_t qtaguid_ctrl_parse(const char *input, size_t count)
2449 {
2450         char cmd;
2451         ssize_t res;
2452
2453         CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
2454                  input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2455
2456         cmd = input[0];
2457         /* Collect params for commands */
2458         switch (cmd) {
2459         case 'd':
2460                 res = ctrl_cmd_delete(input);
2461                 break;
2462
2463         case 's':
2464                 res = ctrl_cmd_counter_set(input);
2465                 break;
2466
2467         case 't':
2468                 res = ctrl_cmd_tag(input);
2469                 break;
2470
2471         case 'u':
2472                 res = ctrl_cmd_untag(input);
2473                 break;
2474
2475         default:
2476                 res = -EINVAL;
2477                 goto err;
2478         }
2479         if (!res)
2480                 res = count;
2481 err:
2482         CT_DEBUG("qtaguid: ctrl(%s): res=%zd\n", input, res);
2483         return res;
2484 }
2485
2486 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2487 static ssize_t qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2488                                    size_t count, loff_t *offp)
2489 {
2490         char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2491
2492         if (unlikely(module_passive))
2493                 return count;
2494
2495         if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2496                 return -EINVAL;
2497
2498         if (copy_from_user(input_buf, buffer, count))
2499                 return -EFAULT;
2500
2501         input_buf[count] = '\0';
2502         return qtaguid_ctrl_parse(input_buf, count);
2503 }
2504
2505 struct proc_print_info {
2506         struct iface_stat *iface_entry;
2507         int item_index;
2508         tag_t tag; /* tag found by reading to tag_pos */
2509         off_t tag_pos;
2510         int tag_item_index;
2511 };
2512
2513 static void pp_stats_header(struct seq_file *m)
2514 {
2515         seq_puts(m,
2516                  "idx iface acct_tag_hex uid_tag_int cnt_set "
2517                  "rx_bytes rx_packets "
2518                  "tx_bytes tx_packets "
2519                  "rx_tcp_bytes rx_tcp_packets "
2520                  "rx_udp_bytes rx_udp_packets "
2521                  "rx_other_bytes rx_other_packets "
2522                  "tx_tcp_bytes tx_tcp_packets "
2523                  "tx_udp_bytes tx_udp_packets "
2524                  "tx_other_bytes tx_other_packets\n");
2525 }
2526
2527 static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry,
2528                          int cnt_set)
2529 {
2530         struct data_counters *cnts;
2531         tag_t tag = ts_entry->tn.tag;
2532         uid_t stat_uid = get_uid_from_tag(tag);
2533         struct proc_print_info *ppi = m->private;
2534         /* Detailed tags are not available to everybody */
2535         if (!can_read_other_uid_stats(make_kuid(&init_user_ns,stat_uid))) {
2536                 CT_DEBUG("qtaguid: stats line: "
2537                          "%s 0x%llx %u: insufficient priv "
2538                          "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
2539                          ppi->iface_entry->ifname,
2540                          get_atag_from_tag(tag), stat_uid,
2541                          current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()),
2542                          from_kgid(&init_user_ns,xt_qtaguid_stats_file->gid));
2543                 return 0;
2544         }
2545         ppi->item_index++;
2546         cnts = &ts_entry->counters;
2547         seq_printf(m, "%d %s 0x%llx %u %u "
2548                 "%llu %llu "
2549                 "%llu %llu "
2550                 "%llu %llu "
2551                 "%llu %llu "
2552                 "%llu %llu "
2553                 "%llu %llu "
2554                 "%llu %llu "
2555                 "%llu %llu\n",
2556                 ppi->item_index,
2557                 ppi->iface_entry->ifname,
2558                 get_atag_from_tag(tag),
2559                 stat_uid,
2560                 cnt_set,
2561                 dc_sum_bytes(cnts, cnt_set, IFS_RX),
2562                 dc_sum_packets(cnts, cnt_set, IFS_RX),
2563                 dc_sum_bytes(cnts, cnt_set, IFS_TX),
2564                 dc_sum_packets(cnts, cnt_set, IFS_TX),
2565                 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2566                 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2567                 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2568                 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2569                 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2570                 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2571                 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2572                 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2573                 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2574                 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2575                 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2576                 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2577         return seq_has_overflowed(m) ? -ENOSPC : 1;
2578 }
2579
2580 static bool pp_sets(struct seq_file *m, struct tag_stat *ts_entry)
2581 {
2582         int ret;
2583         int counter_set;
2584         for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2585              counter_set++) {
2586                 ret = pp_stats_line(m, ts_entry, counter_set);
2587                 if (ret < 0)
2588                         return false;
2589         }
2590         return true;
2591 }
2592
2593 static int qtaguid_stats_proc_iface_stat_ptr_valid(struct iface_stat *ptr)
2594 {
2595         struct iface_stat *iface_entry;
2596
2597         if (!ptr)
2598                 return false;
2599
2600         list_for_each_entry(iface_entry, &iface_stat_list, list)
2601                 if (iface_entry == ptr)
2602                         return true;
2603         return false;
2604 }
2605
2606 static void qtaguid_stats_proc_next_iface_entry(struct proc_print_info *ppi)
2607 {
2608         spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2609         list_for_each_entry_continue(ppi->iface_entry, &iface_stat_list, list) {
2610                 spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2611                 return;
2612         }
2613         ppi->iface_entry = NULL;
2614 }
2615
2616 static void *qtaguid_stats_proc_next(struct seq_file *m, void *v, loff_t *pos)
2617 {
2618         struct proc_print_info *ppi = m->private;
2619         struct tag_stat *ts_entry;
2620         struct rb_node *node;
2621
2622         if (!v) {
2623                 pr_err("qtaguid: %s(): unexpected v: NULL\n", __func__);
2624                 return NULL;
2625         }
2626
2627         (*pos)++;
2628
2629         if (!ppi->iface_entry || unlikely(module_passive))
2630                 return NULL;
2631
2632         if (v == SEQ_START_TOKEN)
2633                 node = rb_first(&ppi->iface_entry->tag_stat_tree);
2634         else
2635                 node = rb_next(&((struct tag_stat *)v)->tn.node);
2636
2637         while (!node) {
2638                 qtaguid_stats_proc_next_iface_entry(ppi);
2639                 if (!ppi->iface_entry)
2640                         return NULL;
2641                 node = rb_first(&ppi->iface_entry->tag_stat_tree);
2642         }
2643
2644         ts_entry = rb_entry(node, struct tag_stat, tn.node);
2645         ppi->tag = ts_entry->tn.tag;
2646         ppi->tag_pos = *pos;
2647         ppi->tag_item_index = ppi->item_index;
2648         return ts_entry;
2649 }
2650
2651 static void *qtaguid_stats_proc_start(struct seq_file *m, loff_t *pos)
2652 {
2653         struct proc_print_info *ppi = m->private;
2654         struct tag_stat *ts_entry = NULL;
2655
2656         spin_lock_bh(&iface_stat_list_lock);
2657
2658         if (*pos == 0) {
2659                 ppi->item_index = 1;
2660                 ppi->tag_pos = 0;
2661                 if (list_empty(&iface_stat_list)) {
2662                         ppi->iface_entry = NULL;
2663                 } else {
2664                         ppi->iface_entry = list_first_entry(&iface_stat_list,
2665                                                             struct iface_stat,
2666                                                             list);
2667                         spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2668                 }
2669                 return SEQ_START_TOKEN;
2670         }
2671         if (!qtaguid_stats_proc_iface_stat_ptr_valid(ppi->iface_entry)) {
2672                 if (ppi->iface_entry) {
2673                         pr_err("qtaguid: %s(): iface_entry %p not found\n",
2674                                __func__, ppi->iface_entry);
2675                         ppi->iface_entry = NULL;
2676                 }
2677                 return NULL;
2678         }
2679
2680         spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2681
2682         if (!ppi->tag_pos) {
2683                 /* seq_read skipped first next call */
2684                 ts_entry = SEQ_START_TOKEN;
2685         } else {
2686                 ts_entry = tag_stat_tree_search(
2687                                 &ppi->iface_entry->tag_stat_tree, ppi->tag);
2688                 if (!ts_entry) {
2689                         pr_info("qtaguid: %s(): tag_stat.tag 0x%llx not found. Abort.\n",
2690                                 __func__, ppi->tag);
2691                         return NULL;
2692                 }
2693         }
2694
2695         if (*pos == ppi->tag_pos) { /* normal resume */
2696                 ppi->item_index = ppi->tag_item_index;
2697         } else {
2698                 /* seq_read skipped a next call */
2699                 *pos = ppi->tag_pos;
2700                 ts_entry = qtaguid_stats_proc_next(m, ts_entry, pos);
2701         }
2702
2703         return ts_entry;
2704 }
2705
2706 static void qtaguid_stats_proc_stop(struct seq_file *m, void *v)
2707 {
2708         struct proc_print_info *ppi = m->private;
2709         if (ppi->iface_entry)
2710                 spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2711         spin_unlock_bh(&iface_stat_list_lock);
2712 }
2713
2714 /*
2715  * Procfs reader to get all tag stats using style "1)" as described in
2716  * fs/proc/generic.c
2717  * Groups all protocols tx/rx bytes.
2718  */
2719 static int qtaguid_stats_proc_show(struct seq_file *m, void *v)
2720 {
2721         struct tag_stat *ts_entry = v;
2722
2723         if (v == SEQ_START_TOKEN)
2724                 pp_stats_header(m);
2725         else
2726                 pp_sets(m, ts_entry);
2727
2728         return 0;
2729 }
2730
2731 /*------------------------------------------*/
2732 static int qtudev_open(struct inode *inode, struct file *file)
2733 {
2734         struct uid_tag_data *utd_entry;
2735         struct proc_qtu_data  *pqd_entry;
2736         struct proc_qtu_data  *new_pqd_entry;
2737         int res;
2738         bool utd_entry_found;
2739
2740         if (unlikely(qtu_proc_handling_passive))
2741                 return 0;
2742
2743         DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2744                  current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2745
2746         spin_lock_bh(&uid_tag_data_tree_lock);
2747
2748         /* Look for existing uid data, or alloc one. */
2749         utd_entry = get_uid_data(from_kuid(&init_user_ns, current_fsuid()), &utd_entry_found);
2750         if (IS_ERR_OR_NULL(utd_entry)) {
2751                 res = PTR_ERR(utd_entry);
2752                 goto err_unlock;
2753         }
2754
2755         /* Look for existing PID based proc_data */
2756         pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2757                                               current->tgid);
2758         if (pqd_entry) {
2759                 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2760                        "%s already opened\n",
2761                        current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()),
2762                        QTU_DEV_NAME);
2763                 res = -EBUSY;
2764                 goto err_unlock_free_utd;
2765         }
2766
2767         new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2768         if (!new_pqd_entry) {
2769                 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2770                        "proc data alloc failed\n",
2771                        current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2772                 res = -ENOMEM;
2773                 goto err_unlock_free_utd;
2774         }
2775         new_pqd_entry->pid = current->tgid;
2776         INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2777         new_pqd_entry->parent_tag_data = utd_entry;
2778         utd_entry->num_pqd++;
2779
2780         proc_qtu_data_tree_insert(new_pqd_entry,
2781                                   &proc_qtu_data_tree);
2782
2783         spin_unlock_bh(&uid_tag_data_tree_lock);
2784         DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2785                  from_kuid(&init_user_ns, current_fsuid()), new_pqd_entry);
2786         file->private_data = new_pqd_entry;
2787         return 0;
2788
2789 err_unlock_free_utd:
2790         if (!utd_entry_found) {
2791                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2792                 kfree(utd_entry);
2793         }
2794 err_unlock:
2795         spin_unlock_bh(&uid_tag_data_tree_lock);
2796         return res;
2797 }
2798
2799 static int qtudev_release(struct inode *inode, struct file *file)
2800 {
2801         struct proc_qtu_data  *pqd_entry = file->private_data;
2802         struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data;
2803         struct sock_tag *st_entry;
2804         struct rb_root st_to_free_tree = RB_ROOT;
2805         struct list_head *entry, *next;
2806         struct tag_ref *tr;
2807
2808         if (unlikely(qtu_proc_handling_passive))
2809                 return 0;
2810
2811         /*
2812          * Do not trust the current->pid, it might just be a kworker cleaning
2813          * up after a dead proc.
2814          */
2815         DR_DEBUG("qtaguid: qtudev_release(): "
2816                  "pid=%u tgid=%u uid=%u "
2817                  "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2818                  current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2819                  pqd_entry, pqd_entry->pid, utd_entry,
2820                  utd_entry->num_active_tags);
2821
2822         spin_lock_bh(&sock_tag_list_lock);
2823         spin_lock_bh(&uid_tag_data_tree_lock);
2824
2825         list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2826                 st_entry = list_entry(entry, struct sock_tag, list);
2827                 DR_DEBUG("qtaguid: %s(): "
2828                          "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2829                          __func__,
2830                          st_entry, st_entry->sk,
2831                          current->pid, current->tgid,
2832                          pqd_entry->parent_tag_data->uid);
2833
2834                 utd_entry = uid_tag_data_tree_search(
2835                         &uid_tag_data_tree,
2836                         get_uid_from_tag(st_entry->tag));
2837                 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2838                 DR_DEBUG("qtaguid: %s(): "
2839                          "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2840                          st_entry->tag, utd_entry);
2841                 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2842                                          st_entry->tag);
2843                 BUG_ON(!tr);
2844                 BUG_ON(tr->num_sock_tags <= 0);
2845                 tr->num_sock_tags--;
2846                 free_tag_ref_from_utd_entry(tr, utd_entry);
2847
2848                 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2849                 list_del(&st_entry->list);
2850                 /* Can't sockfd_put() within spinlock, do it later. */
2851                 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2852
2853                 /*
2854                  * Try to free the utd_entry if no other proc_qtu_data is
2855                  * using it (num_pqd is 0) and it doesn't have active tags
2856                  * (num_active_tags is 0).
2857                  */
2858                 put_utd_entry(utd_entry);
2859         }
2860
2861         rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2862         BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2863         pqd_entry->parent_tag_data->num_pqd--;
2864         put_utd_entry(pqd_entry->parent_tag_data);
2865         kfree(pqd_entry);
2866         file->private_data = NULL;
2867
2868         spin_unlock_bh(&uid_tag_data_tree_lock);
2869         spin_unlock_bh(&sock_tag_list_lock);
2870
2871
2872         sock_tag_tree_erase(&st_to_free_tree);
2873
2874         spin_lock_bh(&sock_tag_list_lock);
2875         prdebug_full_state_locked(0, "%s(): pid=%u tgid=%u", __func__,
2876                            current->pid, current->tgid);
2877         spin_unlock_bh(&sock_tag_list_lock);
2878         return 0;
2879 }
2880
2881 /*------------------------------------------*/
2882 static const struct file_operations qtudev_fops = {
2883         .owner = THIS_MODULE,
2884         .open = qtudev_open,
2885         .release = qtudev_release,
2886 };
2887
2888 static struct miscdevice qtu_device = {
2889         .minor = MISC_DYNAMIC_MINOR,
2890         .name = QTU_DEV_NAME,
2891         .fops = &qtudev_fops,
2892         /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2893 };
2894
2895 static const struct seq_operations proc_qtaguid_ctrl_seqops = {
2896         .start = qtaguid_ctrl_proc_start,
2897         .next = qtaguid_ctrl_proc_next,
2898         .stop = qtaguid_ctrl_proc_stop,
2899         .show = qtaguid_ctrl_proc_show,
2900 };
2901
2902 static int proc_qtaguid_ctrl_open(struct inode *inode, struct file *file)
2903 {
2904         return seq_open_private(file, &proc_qtaguid_ctrl_seqops,
2905                                 sizeof(struct proc_ctrl_print_info));
2906 }
2907
2908 static const struct file_operations proc_qtaguid_ctrl_fops = {
2909         .open           = proc_qtaguid_ctrl_open,
2910         .read           = seq_read,
2911         .write          = qtaguid_ctrl_proc_write,
2912         .llseek         = seq_lseek,
2913         .release        = seq_release_private,
2914 };
2915
2916 static const struct seq_operations proc_qtaguid_stats_seqops = {
2917         .start = qtaguid_stats_proc_start,
2918         .next = qtaguid_stats_proc_next,
2919         .stop = qtaguid_stats_proc_stop,
2920         .show = qtaguid_stats_proc_show,
2921 };
2922
2923 static int proc_qtaguid_stats_open(struct inode *inode, struct file *file)
2924 {
2925         return seq_open_private(file, &proc_qtaguid_stats_seqops,
2926                                 sizeof(struct proc_print_info));
2927 }
2928
2929 static const struct file_operations proc_qtaguid_stats_fops = {
2930         .open           = proc_qtaguid_stats_open,
2931         .read           = seq_read,
2932         .llseek         = seq_lseek,
2933         .release        = seq_release_private,
2934 };
2935
2936 /*------------------------------------------*/
2937 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2938 {
2939         int ret;
2940         *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2941         if (!*res_procdir) {
2942                 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2943                 ret = -ENOMEM;
2944                 goto no_dir;
2945         }
2946
2947         xt_qtaguid_ctrl_file = proc_create_data("ctrl", proc_ctrl_perms,
2948                                                 *res_procdir,
2949                                                 &proc_qtaguid_ctrl_fops,
2950                                                 NULL);
2951         if (!xt_qtaguid_ctrl_file) {
2952                 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2953                         " file\n");
2954                 ret = -ENOMEM;
2955                 goto no_ctrl_entry;
2956         }
2957
2958         xt_qtaguid_stats_file = proc_create_data("stats", proc_stats_perms,
2959                                                  *res_procdir,
2960                                                  &proc_qtaguid_stats_fops,
2961                                                  NULL);
2962         if (!xt_qtaguid_stats_file) {
2963                 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2964                         "file\n");
2965                 ret = -ENOMEM;
2966                 goto no_stats_entry;
2967         }
2968         /*
2969          * TODO: add support counter hacking
2970          * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2971          */
2972         return 0;
2973
2974 no_stats_entry:
2975         remove_proc_entry("ctrl", *res_procdir);
2976 no_ctrl_entry:
2977         remove_proc_entry("xt_qtaguid", NULL);
2978 no_dir:
2979         return ret;
2980 }
2981
2982 static struct xt_match qtaguid_mt_reg __read_mostly = {
2983         /*
2984          * This module masquerades as the "owner" module so that iptables
2985          * tools can deal with it.
2986          */
2987         .name       = "owner",
2988         .revision   = 1,
2989         .family     = NFPROTO_UNSPEC,
2990         .match      = qtaguid_mt,
2991         .matchsize  = sizeof(struct xt_qtaguid_match_info),
2992         .me         = THIS_MODULE,
2993 };
2994
2995 static int __init qtaguid_mt_init(void)
2996 {
2997         if (qtaguid_proc_register(&xt_qtaguid_procdir)
2998             || iface_stat_init(xt_qtaguid_procdir)
2999             || xt_register_match(&qtaguid_mt_reg)
3000             || misc_register(&qtu_device))
3001                 return -1;
3002         return 0;
3003 }
3004
3005 /*
3006  * TODO: allow unloading of the module.
3007  * For now stats are permanent.
3008  * Kconfig forces'y/n' and never an 'm'.
3009  */
3010
3011 module_init(qtaguid_mt_init);
3012 MODULE_AUTHOR("jpa <jpa@google.com>");
3013 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
3014 MODULE_LICENSE("GPL");
3015 MODULE_ALIAS("ipt_owner");
3016 MODULE_ALIAS("ip6t_owner");
3017 MODULE_ALIAS("ipt_qtaguid");
3018 MODULE_ALIAS("ip6t_qtaguid");