OSDN Git Service

mediatek: add missing Kconfig
[immortalwrt/immortalwrt.git] / package / kernel / fast-classifier / src / fast-classifier.c
1 /*
2  * fast-classifier.c
3  *      Shortcut forwarding engine connection manager.
4  *      fast-classifier
5  *
6  * Copyright (c) 2013-2018 The Linux Foundation. All rights reserved.
7  * Permission to use, copy, modify, and/or distribute this software for
8  * any purpose with or without fee is hereby granted, provided that the
9  * above copyright notice and this permission notice appear in all copies.
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
16  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <linux/module.h>
19 #include <linux/sysfs.h>
20 #include <linux/skbuff.h>
21 #include <net/route.h>
22 #include <net/ip6_route.h>
23 #include <net/addrconf.h>
24 #include <net/dsfield.h>
25 #include <linux/inetdevice.h>
26 #include <linux/netfilter_bridge.h>
27 #include <linux/netfilter_ipv6.h>
28 #include <net/netfilter/nf_conntrack_acct.h>
29 #include <net/netfilter/nf_conntrack_helper.h>
30 #include <net/netfilter/nf_conntrack_zones.h>
31 #include <net/netfilter/nf_conntrack_core.h>
32 #include <linux/netfilter/xt_dscp.h>
33 #include <net/genetlink.h>
34 #include <linux/spinlock.h>
35 #include <linux/if_bridge.h>
36 #include <linux/hashtable.h>
37 #include <linux/version.h>
38
39 #include <sfe_backport.h>
40 #include <sfe.h>
41 #include <sfe_cm.h>
42 #include "fast-classifier.h"
43
44 typedef enum fast_classifier_exception {
45         FAST_CL_EXCEPTION_PACKET_BROADCAST,
46         FAST_CL_EXCEPTION_PACKET_MULTICAST,
47         FAST_CL_EXCEPTION_NO_IIF,
48         FAST_CL_EXCEPTION_NO_CT,
49         FAST_CL_EXCEPTION_CT_NO_TRACK,
50         FAST_CL_EXCEPTION_CT_NO_CONFIRM,
51         FAST_CL_EXCEPTION_CT_IS_ALG,
52         FAST_CL_EXCEPTION_IS_IPV4_MCAST,
53         FAST_CL_EXCEPTION_IS_IPV6_MCAST,
54         FAST_CL_EXCEPTION_TCP_NOT_ASSURED,
55         FAST_CL_EXCEPTION_TCP_NOT_ESTABLISHED,
56         FAST_CL_EXCEPTION_UNKNOW_PROTOCOL,
57         FAST_CL_EXCEPTION_NO_SRC_DEV,
58         FAST_CL_EXCEPTION_NO_SRC_XLATE_DEV,
59         FAST_CL_EXCEPTION_NO_DEST_DEV,
60         FAST_CL_EXCEPTION_NO_DEST_XLATE_DEV,
61         FAST_CL_EXCEPTION_NO_BRIDGE,
62         FAST_CL_EXCEPTION_LOCAL_OUT,
63         FAST_CL_EXCEPTION_WAIT_FOR_ACCELERATION,
64         FAST_CL_EXCEPTION_UPDATE_PROTOCOL_FAIL,
65         FAST_CL_EXCEPTION_CT_DESTROY_MISS,
66         FAST_CL_EXCEPTION_MAX
67 } fast_classifier_exception_t;
68
69 static char *fast_classifier_exception_events_string[FAST_CL_EXCEPTION_MAX] = {
70         "PACKET_BROADCAST",
71         "PACKET_MULTICAST",
72         "NO_IIF",
73         "NO_CT",
74         "CT_NO_TRACK",
75         "CT_NO_CONFIRM",
76         "CT_IS_ALG",
77         "IS_IPV4_MCAST",
78         "IS_IPV6_MCAST",
79         "TCP_NOT_ASSURED",
80         "TCP_NOT_ESTABLISHED",
81         "UNKNOW_PROTOCOL",
82         "NO_SRC_DEV",
83         "NO_SRC_XLATE_DEV",
84         "NO_DEST_DEV",
85         "NO_DEST_XLATE_DEV",
86         "NO_BRIDGE",
87         "LOCAL_OUT",
88         "WAIT_FOR_ACCELERATION",
89         "UPDATE_PROTOCOL_FAIL",
90         "CT_DESTROY_MISS",
91 };
92
93 /*
94  * Per-module structure.
95  */
96 struct fast_classifier {
97         spinlock_t lock;                /* Lock for SMP correctness */
98
99         /*
100          * Control state.
101          */
102         struct kobject *sys_fast_classifier;    /* sysfs linkage */
103
104         /*
105          * Callback notifiers.
106          */
107         struct notifier_block dev_notifier;     /* Device notifier */
108         struct notifier_block inet_notifier;    /* IPv4 notifier */
109         struct notifier_block inet6_notifier;   /* IPv6 notifier */
110         u32 exceptions[FAST_CL_EXCEPTION_MAX];
111 };
112
113 static struct fast_classifier __sc;
114
115 #if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0))
116 static struct nla_policy fast_classifier_genl_policy[FAST_CLASSIFIER_A_MAX + 1] = {
117         [FAST_CLASSIFIER_A_TUPLE] = {
118                 .type = NLA_UNSPEC,
119                 .len = sizeof(struct fast_classifier_tuple)
120         },
121 };
122 #endif /*KERNEL_VERSION(5, 2, 0)*/
123
124 static struct genl_multicast_group fast_classifier_genl_mcgrp[] = {
125         {
126                 .name = FAST_CLASSIFIER_GENL_MCGRP,
127         },
128 };
129
130 static int fast_classifier_offload_genl_msg(struct sk_buff *skb, struct genl_info *info);
131 static int fast_classifier_nl_genl_msg_DUMP(struct sk_buff *skb, struct netlink_callback *cb);
132
133 static struct genl_ops fast_classifier_gnl_ops[] = {
134         {
135                 .cmd = FAST_CLASSIFIER_C_OFFLOAD,
136                 .flags = 0,
137 #if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0))
138                 .policy = fast_classifier_genl_policy,
139 #endif /*KERNEL_VERSION(5, 2, 0)*/
140                 .doit = fast_classifier_offload_genl_msg,
141                 .dumpit = NULL,
142         },
143         {
144                 .cmd = FAST_CLASSIFIER_C_OFFLOADED,
145                 .flags = 0,
146 #if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0))
147                 .policy = fast_classifier_genl_policy,
148 #endif /*KERNEL_VERSION(5, 2, 0)*/
149                 .doit = NULL,
150                 .dumpit = fast_classifier_nl_genl_msg_DUMP,
151         },
152         {
153                 .cmd = FAST_CLASSIFIER_C_DONE,
154                 .flags = 0,
155 #if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0))
156                 .policy = fast_classifier_genl_policy,
157 #endif /*KERNEL_VERSION(5, 2, 0)*/
158                 .doit = NULL,
159                 .dumpit = fast_classifier_nl_genl_msg_DUMP,
160         },
161 };
162
163 static struct genl_family fast_classifier_gnl_family = {
164 #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
165         .id = GENL_ID_GENERATE,
166 #endif /*KERNEL_VERSION(4, 10, 0)*/
167         .hdrsize = FAST_CLASSIFIER_GENL_HDRSIZE,
168         .name = FAST_CLASSIFIER_GENL_NAME,
169         .version = FAST_CLASSIFIER_GENL_VERSION,
170         .maxattr = FAST_CLASSIFIER_A_MAX,
171 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
172         .ops = fast_classifier_gnl_ops,
173         .n_ops = ARRAY_SIZE(fast_classifier_gnl_ops),
174         .mcgrps = fast_classifier_genl_mcgrp,
175         .n_mcgrps = ARRAY_SIZE(fast_classifier_genl_mcgrp),
176 #endif /*KERNEL_VERSION(4, 10, 0)*/
177 };
178
179 static atomic_t offload_msgs = ATOMIC_INIT(0);
180 static atomic_t offload_no_match_msgs = ATOMIC_INIT(0);
181 static atomic_t offloaded_msgs = ATOMIC_INIT(0);
182 static atomic_t done_msgs = ATOMIC_INIT(0);
183
184 static atomic_t offloaded_fail_msgs = ATOMIC_INIT(0);
185 static atomic_t done_fail_msgs = ATOMIC_INIT(0);
186
187 /*
188  * Accelerate incoming packets destined for bridge device
189  *      If a incoming packet is ultimatly destined for
190  *      a bridge device we will first see the packet coming
191  *      from the phyiscal device, we can skip straight to
192  *      processing the packet like it came from the bridge
193  *      for some more performance gains
194  *
195  *      This only works when the hook is above the bridge. We
196  *      only implement ingress for now, because for egress we
197  *      want to have the bridge devices qdiscs be used.
198  */
199 static bool skip_to_bridge_ingress;
200
201 /*
202  * fast_classifier_incr_exceptions()
203  *      increase an exception counter.
204  */
205 static inline void fast_classifier_incr_exceptions(fast_classifier_exception_t except)
206 {
207         struct fast_classifier *sc = &__sc;
208
209         spin_lock_bh(&sc->lock);
210         sc->exceptions[except]++;
211         spin_unlock_bh(&sc->lock);
212 }
213
214 /*
215  * fast_classifier_recv()
216  *      Handle packet receives.
217  *
218  * Returns 1 if the packet is forwarded or 0 if it isn't.
219  */
220 int fast_classifier_recv(struct sk_buff *skb)
221 {
222         struct net_device *dev;
223         struct net_device *master_dev = NULL;
224         int ret = 0;
225
226         /*
227          * We know that for the vast majority of packets we need the transport
228          * layer header so we may as well start to fetch it now!
229          */
230         prefetch(skb->data + 32);
231         barrier();
232
233         dev = skb->dev;
234
235         /*
236          * Process packet like it arrived on the bridge device
237          */
238         if (skip_to_bridge_ingress &&
239             (dev->priv_flags & IFF_BRIDGE_PORT)) {
240                 master_dev = sfe_dev_get_master(dev);
241                 if (!master_dev) {
242                         DEBUG_WARN("master dev is NULL %s\n", dev->name);
243                         goto rx_exit;
244                 }
245                 dev = master_dev;
246         }
247
248         /*
249          * We're only interested in IPv4 and IPv6 packets.
250          */
251         if (likely(htons(ETH_P_IP) == skb->protocol)) {
252                 struct in_device *in_dev;
253
254                 /*
255                  * Does our input device support IP processing?
256                  */
257                 in_dev = (struct in_device *)dev->ip_ptr;
258                 if (unlikely(!in_dev)) {
259                         DEBUG_TRACE("no IP processing for device: %s\n", dev->name);
260                         goto rx_exit;
261                 }
262
263                 /*
264                  * Does it have an IP address?  If it doesn't then we can't do anything
265                  * interesting here!
266                  */
267                 if (unlikely(!in_dev->ifa_list)) {
268                         DEBUG_TRACE("no IP address for device: %s\n", dev->name);
269                         goto rx_exit;
270                 }
271
272                 ret = sfe_ipv4_recv(dev, skb);
273
274         } else if (likely(htons(ETH_P_IPV6) == skb->protocol)) {
275                 struct inet6_dev *in_dev;
276
277                 /*
278                  * Does our input device support IPv6 processing?
279                  */
280                 in_dev = (struct inet6_dev *)dev->ip6_ptr;
281                 if (unlikely(!in_dev)) {
282                         DEBUG_TRACE("no IPv6 processing for device: %s\n", dev->name);
283                         goto rx_exit;
284                 }
285
286                 /*
287                  * Does it have an IPv6 address?  If it doesn't then we can't do anything
288                  * interesting here!
289                  */
290                 if (unlikely(list_empty(&in_dev->addr_list))) {
291                         DEBUG_TRACE("no IPv6 address for device: %s\n", dev->name);
292                         goto rx_exit;
293                 }
294
295                 ret = sfe_ipv6_recv(dev, skb);
296
297         } else {
298                 DEBUG_TRACE("not IP packet\n");
299         }
300
301 rx_exit:
302         if (master_dev) {
303                 dev_put(master_dev);
304         }
305
306         return ret;
307 }
308
309 /*
310  * fast_classifier_find_dev_and_mac_addr()
311  *      Find the device and MAC address for a given IPv4 address.
312  *
313  * Returns true if we find the device and MAC address, otherwise false.
314  *
315  * We look up the rtable entry for the address and, from its neighbour
316  * structure, obtain the hardware address.  This means this function also
317  * works if the neighbours are routers too.
318  */
319 static bool fast_classifier_find_dev_and_mac_addr(struct sk_buff *skb, sfe_ip_addr_t *addr, struct net_device **dev, u8 *mac_addr, bool is_v4)
320 {
321         struct neighbour *neigh;
322         struct rtable *rt;
323         struct rt6_info *rt6;
324         struct dst_entry *dst;
325         struct net_device *mac_dev;
326
327         /*
328          * If we have skb provided, use it as the original code is unable
329          * to lookup routes that are policy routed.
330         */
331         if (unlikely(skb)) {
332                 dst = skb_dst(skb);
333                 goto skip_dst_lookup;
334         }
335
336         /*
337          * Look up the rtable entry for the IP address then get the hardware
338          * address from its neighbour structure.  This means this works when the
339          * neighbours are routers too.
340          */
341         if (likely(is_v4)) {
342                 rt = ip_route_output(&init_net, addr->ip, 0, 0, 0);
343                 if (unlikely(IS_ERR(rt))) {
344                         goto ret_fail;
345                 }
346
347                 dst = (struct dst_entry *)rt;
348         } else {
349 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0))
350                 rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, NULL, 0);
351 #else
352                 rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, 0);
353 #endif /*KERNEL_VERSION(4, 17, 0)*/
354                 if (!rt6) {
355                         goto ret_fail;
356                 }
357
358                 dst = (struct dst_entry *)rt6;
359         }
360
361 skip_dst_lookup:
362         rcu_read_lock();
363         neigh = sfe_dst_get_neighbour(dst, addr);
364         if (unlikely(!neigh)) {
365                 rcu_read_unlock();
366                 if (likely(!skb))
367                         dst_release(dst);
368
369                 goto ret_fail;
370         }
371
372         if (unlikely(!(neigh->nud_state & NUD_VALID))) {
373                 rcu_read_unlock();
374                 neigh_release(neigh);
375                 if (likely(!skb))
376                         dst_release(dst);
377
378                 goto ret_fail;
379         }
380
381         mac_dev = neigh->dev;
382         if (!mac_dev) {
383                 rcu_read_unlock();
384                 neigh_release(neigh);
385                 if (likely(!skb))
386                         dst_release(dst);
387
388                 goto ret_fail;
389         }
390
391         memcpy(mac_addr, neigh->ha, (size_t)mac_dev->addr_len);
392
393         dev_hold(mac_dev);
394         *dev = mac_dev;
395         rcu_read_unlock();
396         neigh_release(neigh);
397         if (likely(!skb))
398                 dst_release(dst);
399
400         return true;
401
402 ret_fail:
403         if (is_v4) {
404                 DEBUG_TRACE("failed to find MAC address for IP: %pI4\n", addr);
405
406         } else {
407                 DEBUG_TRACE("failed to find MAC address for IP: %pI6\n", addr);
408         }
409
410         return false;
411 }
412
413 static DEFINE_SPINLOCK(sfe_connections_lock);
414
415 struct sfe_connection {
416         struct hlist_node hl;
417         struct sfe_connection_create *sic;
418         struct nf_conn *ct;
419         int hits;
420         int offload_permit;
421         int offloaded;
422         bool is_v4;
423         unsigned char smac[ETH_ALEN];
424         unsigned char dmac[ETH_ALEN];
425 };
426
427 static int sfe_connections_size;
428
429 #define FC_CONN_HASH_ORDER 13
430 static DEFINE_HASHTABLE(fc_conn_ht, FC_CONN_HASH_ORDER);
431
432 static u32 fc_conn_hash(sfe_ip_addr_t *saddr, sfe_ip_addr_t *daddr,
433                         unsigned short sport, unsigned short dport, bool is_v4)
434 {
435         u32 idx, cnt = ((is_v4 ? sizeof(saddr->ip) : sizeof(saddr->ip6))/sizeof(u32));
436         u32 hash = 0;
437
438         for (idx = 0; idx < cnt; idx++) {
439                 hash ^= ((u32 *)saddr)[idx] ^ ((u32 *)daddr)[idx];
440         }
441
442         return hash ^ (sport | (dport << 16));
443 }
444
445 /*
446  * fast_classifier_update_protocol()
447  *      Update sfe_ipv4_create struct with new protocol information before we offload
448  */
449 static int fast_classifier_update_protocol(struct sfe_connection_create *p_sic, struct nf_conn *ct)
450 {
451         switch (p_sic->protocol) {
452         case IPPROTO_TCP:
453                 p_sic->src_td_window_scale = ct->proto.tcp.seen[0].td_scale;
454                 p_sic->src_td_max_window = ct->proto.tcp.seen[0].td_maxwin;
455                 p_sic->src_td_end = ct->proto.tcp.seen[0].td_end;
456                 p_sic->src_td_max_end = ct->proto.tcp.seen[0].td_maxend;
457                 p_sic->dest_td_window_scale = ct->proto.tcp.seen[1].td_scale;
458                 p_sic->dest_td_max_window = ct->proto.tcp.seen[1].td_maxwin;
459                 p_sic->dest_td_end = ct->proto.tcp.seen[1].td_end;
460                 p_sic->dest_td_max_end = ct->proto.tcp.seen[1].td_maxend;
461
462                 if (nf_ct_tcp_no_window_check
463                     || (ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_BE_LIBERAL)
464                     || (ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_BE_LIBERAL)) {
465                         p_sic->flags |= SFE_CREATE_FLAG_NO_SEQ_CHECK;
466                 }
467
468                 /*
469                  * If the connection is shutting down do not manage it.
470                  * state can not be SYN_SENT, SYN_RECV because connection is assured
471                  * Not managed states: FIN_WAIT, CLOSE_WAIT, LAST_ACK, TIME_WAIT, CLOSE.
472                  */
473                 spin_lock(&ct->lock);
474                 if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) {
475                         spin_unlock(&ct->lock);
476                         fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_TCP_NOT_ESTABLISHED);
477                         DEBUG_TRACE("connection in termination state: %#x, s: %pI4:%u, d: %pI4:%u\n",
478                                     ct->proto.tcp.state, &p_sic->src_ip, ntohs(p_sic->src_port),
479                                     &p_sic->dest_ip, ntohs(p_sic->dest_port));
480                         return 0;
481                 }
482                 spin_unlock(&ct->lock);
483                 break;
484
485         case IPPROTO_UDP:
486                 break;
487
488         default:
489                 fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_UNKNOW_PROTOCOL);
490                 DEBUG_TRACE("unhandled protocol %d\n", p_sic->protocol);
491                 return 0;
492         }
493
494         return 1;
495 }
496
497 /* fast_classifier_send_genl_msg()
498  *      Function to send a generic netlink message
499  */
500 static void fast_classifier_send_genl_msg(int msg, struct fast_classifier_tuple *fc_msg)
501 {
502         struct sk_buff *skb;
503         int rc;
504         int buf_len;
505         int total_len;
506         void *msg_head;
507
508         /*
509          * Calculate our packet payload size.
510          * Start with our family header.
511          */
512         buf_len = fast_classifier_gnl_family.hdrsize;
513
514         /*
515          * Add the nla_total_size of each attribute we're going to nla_put().
516          */
517         buf_len += nla_total_size(sizeof(*fc_msg));
518
519         /*
520          * Lastly we need to add space for the NL message header since
521          * genlmsg_new only accounts for the GENL header and not the
522          * outer NL header. To do this, we use a NL helper function which
523          * calculates the total size of a netlink message given a payload size.
524          * Note this value does not include the GENL header, but that's
525          * added automatically by genlmsg_new.
526          */
527         total_len = nlmsg_total_size(buf_len);
528         skb = genlmsg_new(total_len, GFP_ATOMIC);
529         if (!skb)
530                 return;
531
532         msg_head = genlmsg_put(skb, 0, 0, &fast_classifier_gnl_family, 0, msg);
533         if (!msg_head) {
534                 nlmsg_free(skb);
535                 return;
536         }
537
538         rc = nla_put(skb, FAST_CLASSIFIER_A_TUPLE, sizeof(struct fast_classifier_tuple), fc_msg);
539         if (rc != 0) {
540                 genlmsg_cancel(skb, msg_head);
541                 nlmsg_free(skb);
542                 return;
543         }
544
545 #if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 19 , 0))
546         rc = genlmsg_end(skb, msg_head);
547         if (rc < 0) {
548                 genlmsg_cancel(skb, msg_head);
549                 nlmsg_free(skb);
550                 return;
551         }
552 #else
553         genlmsg_end(skb, msg_head);
554
555 #endif
556
557 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
558         rc = genlmsg_multicast(&fast_classifier_gnl_family, skb, 0, 0, GFP_ATOMIC);
559 #else
560         rc = genlmsg_multicast(skb, 0, fast_classifier_genl_mcgrp[0].id, GFP_ATOMIC);
561 #endif
562         switch (msg) {
563         case FAST_CLASSIFIER_C_OFFLOADED:
564                 if (rc == 0) {
565                         atomic_inc(&offloaded_msgs);
566                 } else {
567                         atomic_inc(&offloaded_fail_msgs);
568                 }
569                 break;
570         case FAST_CLASSIFIER_C_DONE:
571                 if (rc == 0) {
572                         atomic_inc(&done_msgs);
573                 } else {
574                         atomic_inc(&done_fail_msgs);
575                 }
576                 break;
577         default:
578                 DEBUG_ERROR("fast-classifer: Unknown message type sent!\n");
579                 break;
580         }
581
582         DEBUG_TRACE("Notify NL message %d ", msg);
583         if (fc_msg->ethertype == AF_INET) {
584                 DEBUG_TRACE("sip=%pI4 dip=%pI4 ", &fc_msg->src_saddr, &fc_msg->dst_saddr);
585         } else {
586                 DEBUG_TRACE("sip=%pI6 dip=%pI6 ", &fc_msg->src_saddr, &fc_msg->dst_saddr);
587         }
588         DEBUG_TRACE("protocol=%d sport=%d dport=%d smac=%pM dmac=%pM\n",
589                     fc_msg->proto, fc_msg->sport, fc_msg->dport, fc_msg->smac, fc_msg->dmac);
590 }
591
592 /*
593  * fast_classifier_find_conn()
594  *      find a connection object in the hash table
595  *      @pre the sfe_connection_lock must be held before calling this function
596  */
597 static struct sfe_connection *
598 fast_classifier_find_conn(sfe_ip_addr_t *saddr, sfe_ip_addr_t *daddr,
599                           unsigned short sport, unsigned short dport,
600                           unsigned char proto, bool is_v4)
601 {
602         struct sfe_connection_create *p_sic;
603         struct sfe_connection *conn;
604         u32 key;
605 #if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0))
606         struct hlist_node *node;
607 #endif
608
609         key = fc_conn_hash(saddr, daddr, sport, dport, is_v4);
610
611         sfe_hash_for_each_possible(fc_conn_ht, conn, node, hl, key) {
612                 if (conn->is_v4 != is_v4) {
613                         continue;
614                 }
615
616                 p_sic = conn->sic;
617
618                 if (p_sic->protocol == proto &&
619                     p_sic->src_port == sport &&
620                     p_sic->dest_port == dport &&
621                     sfe_addr_equal(&p_sic->src_ip, saddr, is_v4) &&
622                     sfe_addr_equal(&p_sic->dest_ip, daddr, is_v4)) {
623                         return conn;
624                 }
625         }
626
627         DEBUG_TRACE("connection not found\n");
628         return NULL;
629 }
630
631 /*
632  * fast_classifier_sb_find_conn()
633  *      find a connection object in the hash table according to information of packet
634  *      if not found, reverse the tuple and try again.
635  *      @pre the sfe_connection_lock must be held before calling this function
636  */
637 static struct sfe_connection *
638 fast_classifier_sb_find_conn(sfe_ip_addr_t *saddr, sfe_ip_addr_t *daddr,
639                           unsigned short sport, unsigned short dport,
640                           unsigned char proto, bool is_v4)
641 {
642         struct sfe_connection_create *p_sic;
643         struct sfe_connection *conn;
644         u32 key;
645 #if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0))
646         struct hlist_node *node;
647 #endif
648
649         key = fc_conn_hash(saddr, daddr, sport, dport, is_v4);
650
651         sfe_hash_for_each_possible(fc_conn_ht, conn, node, hl, key) {
652                 if (conn->is_v4 != is_v4) {
653                         continue;
654                 }
655
656                 p_sic = conn->sic;
657
658                 if (p_sic->protocol == proto &&
659                     p_sic->src_port == sport &&
660                     p_sic->dest_port_xlate == dport &&
661                     sfe_addr_equal(&p_sic->src_ip, saddr, is_v4) &&
662                     sfe_addr_equal(&p_sic->dest_ip_xlate, daddr, is_v4)) {
663                         return conn;
664                 }
665         }
666
667         /*
668          * Reverse the tuple and try again
669          */
670         key = fc_conn_hash(daddr, saddr, dport, sport, is_v4);
671
672         sfe_hash_for_each_possible(fc_conn_ht, conn, node, hl, key) {
673                 if (conn->is_v4 != is_v4) {
674                         continue;
675                 }
676
677                 p_sic = conn->sic;
678
679                 if (p_sic->protocol == proto &&
680                     p_sic->src_port == dport &&
681                     p_sic->dest_port_xlate == sport &&
682                     sfe_addr_equal(&p_sic->src_ip, daddr, is_v4) &&
683                     sfe_addr_equal(&p_sic->dest_ip_xlate, saddr, is_v4)) {
684                         return conn;
685                 }
686         }
687
688         DEBUG_TRACE("connection not found\n");
689         return NULL;
690 }
691
692 /*
693  * fast_classifier_add_conn()
694  *      add a connection object in the hash table if no duplicate
695  *      @conn connection to add
696  *      @return conn if successful, NULL if duplicate
697  */
698 static struct sfe_connection *
699 fast_classifier_add_conn(struct sfe_connection *conn)
700 {
701         struct sfe_connection_create *sic = conn->sic;
702         u32 key;
703
704         spin_lock_bh(&sfe_connections_lock);
705         if (fast_classifier_find_conn(&sic->src_ip, &sic->dest_ip, sic->src_port,
706                                         sic->dest_port, sic->protocol, conn->is_v4)) {
707                 spin_unlock_bh(&sfe_connections_lock);
708                 return NULL;
709         }
710
711         key = fc_conn_hash(&sic->src_ip, &sic->dest_ip,
712                            sic->src_port, sic->dest_port, conn->is_v4);
713
714         hash_add(fc_conn_ht, &conn->hl, key);
715         sfe_connections_size++;
716         spin_unlock_bh(&sfe_connections_lock);
717
718         DEBUG_TRACE(" -> adding item to sfe_connections, new size: %d\n", sfe_connections_size);
719
720         if (conn->is_v4) {
721                 DEBUG_TRACE("new offloadable: key: %u proto: %d src_ip: %pI4 dst_ip: %pI4, src_port: %d, dst_port: %d\n",
722                                 key, sic->protocol, &(sic->src_ip), &(sic->dest_ip), sic->src_port, sic->dest_port);
723         } else {
724                 DEBUG_TRACE("new offloadable: key: %u proto: %d src_ip: %pI6 dst_ip: %pI6, src_port: %d, dst_port: %d\n",
725                                 key, sic->protocol, &(sic->src_ip), &(sic->dest_ip), sic->src_port, sic->dest_port);
726         }
727
728         return conn;
729 }
730
731 /*
732  * fast_classifier_offload_genl_msg()
733  *      Called from user space to offload a connection
734  */
735 static int
736 fast_classifier_offload_genl_msg(struct sk_buff *skb, struct genl_info *info)
737 {
738         struct nlattr *na;
739         struct fast_classifier_tuple *fc_msg;
740         struct sfe_connection *conn;
741
742         na = info->attrs[FAST_CLASSIFIER_A_TUPLE];
743         fc_msg = nla_data(na);
744
745         if (fc_msg->ethertype == AF_INET) {
746                 DEBUG_TRACE("want to offload: %d-%d, %pI4, %pI4, %d, %d SMAC=%pM DMAC=%pM\n",
747                             fc_msg->ethertype,
748                             fc_msg->proto,
749                             &fc_msg->src_saddr,
750                             &fc_msg->dst_saddr,
751                             fc_msg->sport,
752                             fc_msg->dport,
753                             fc_msg->smac,
754                             fc_msg->dmac);
755         } else {
756                 DEBUG_TRACE("want to offload: %d-%d, %pI6, %pI6, %d, %d SMAC=%pM DMAC=%pM\n",
757                             fc_msg->ethertype,
758                             fc_msg->proto,
759                             &fc_msg->src_saddr,
760                             &fc_msg->dst_saddr,
761                             fc_msg->sport,
762                             fc_msg->dport,
763                             fc_msg->smac,
764                             fc_msg->dmac);
765         }
766
767         spin_lock_bh(&sfe_connections_lock);
768         conn = fast_classifier_sb_find_conn((sfe_ip_addr_t *)&fc_msg->src_saddr,
769                                          (sfe_ip_addr_t *)&fc_msg->dst_saddr,
770                                          fc_msg->sport,
771                                          fc_msg->dport,
772                                          fc_msg->proto,
773                                          (fc_msg->ethertype == AF_INET));
774         if (!conn) {
775                 spin_unlock_bh(&sfe_connections_lock);
776                 DEBUG_TRACE("REQUEST OFFLOAD NO MATCH\n");
777                 atomic_inc(&offload_no_match_msgs);
778                 return 0;
779         }
780
781         conn->offload_permit = 1;
782         spin_unlock_bh(&sfe_connections_lock);
783         atomic_inc(&offload_msgs);
784
785         DEBUG_TRACE("INFO: calling sfe rule creation!\n");
786         return 0;
787 }
788
789 /*
790  * fast_classifier_nl_genl_msg_DUMP()
791  *      ignore fast_classifier_messages OFFLOADED and DONE
792  */
793 static int fast_classifier_nl_genl_msg_DUMP(struct sk_buff *skb,
794                                             struct netlink_callback *cb)
795 {
796         return 0;
797 }
798
799 /* auto offload connection once we have this many packets*/
800 static int offload_at_pkts = 128;
801
802 /*
803  * fast_classifier_post_routing()
804  *      Called for packets about to leave the box - either locally generated or forwarded from another interface
805  */
806 static unsigned int fast_classifier_post_routing(struct sk_buff *skb, bool is_v4)
807 {
808         int ret;
809         struct sfe_connection_create sic;
810         struct sfe_connection_create *p_sic;
811         struct net_device *in;
812         struct nf_conn *ct;
813         enum ip_conntrack_info ctinfo;
814         struct net_device *dev;
815         struct net_device *src_dev;
816         struct net_device *dest_dev;
817         struct net_device *src_dev_tmp;
818         struct net_device *dest_dev_tmp;
819         struct net_device *src_br_dev = NULL;
820         struct net_device *dest_br_dev = NULL;
821         struct nf_conntrack_tuple orig_tuple;
822         struct nf_conntrack_tuple reply_tuple;
823         struct sfe_connection *conn;
824         struct sk_buff *tmp_skb = NULL;
825
826         /*
827          * Don't process broadcast or multicast packets.
828          */
829         if (unlikely(skb->pkt_type == PACKET_BROADCAST)) {
830                 fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_PACKET_BROADCAST);
831                 DEBUG_TRACE("broadcast, ignoring\n");
832                 return NF_ACCEPT;
833         }
834         if (unlikely(skb->pkt_type == PACKET_MULTICAST)) {
835                 fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_PACKET_MULTICAST);
836                 DEBUG_TRACE("multicast, ignoring\n");
837                 return NF_ACCEPT;
838         }
839
840         /*
841          * Don't process packets that are not being forwarded.
842          */
843         in = dev_get_by_index(&init_net, skb->skb_iif);
844         if (!in) {
845                 fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_IIF);
846                 DEBUG_TRACE("packet not forwarding\n");
847                 return NF_ACCEPT;
848         }
849
850         dev_put(in);
851
852         /*
853          * Don't process packets that aren't being tracked by conntrack.
854          */
855         ct = nf_ct_get(skb, &ctinfo);
856         if (unlikely(!ct)) {
857                 fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_CT);
858                 DEBUG_TRACE("no conntrack connection, ignoring\n");
859                 return NF_ACCEPT;
860         }
861
862 #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0))
863         /*
864          * Don't process untracked connections.
865          */
866         if (unlikely(nf_ct_is_untracked(ct))) {
867                 fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_CT_NO_TRACK);
868                 DEBUG_TRACE("untracked connection\n");
869                 return NF_ACCEPT;
870         }
871 #endif /*KERNEL_VERSION(4, 12, 0)*/
872
873         /*
874          * Unconfirmed connection may be dropped by Linux at the final step,
875          * So we don't process unconfirmed connections.
876          */
877         if (!nf_ct_is_confirmed(ct)) {
878                 fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_CT_NO_CONFIRM);
879                 DEBUG_TRACE("unconfirmed connection\n");
880                 return NF_ACCEPT;
881         }
882
883         /*
884          * Don't process connections that require support from a 'helper' (typically a NAT ALG).
885          */
886         if (unlikely(nfct_help(ct))) {
887                 fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_CT_IS_ALG);
888                 DEBUG_TRACE("connection has helper\n");
889                 return NF_ACCEPT;
890         }
891
892         memset(&sic, 0, sizeof(sic));
893
894         /*
895          * Look up the details of our connection in conntrack.
896          *
897          * Note that the data we get from conntrack is for the "ORIGINAL" direction
898          * but our packet may actually be in the "REPLY" direction.
899          */
900         orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
901         reply_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
902         sic.protocol = (s32)orig_tuple.dst.protonum;
903
904         sic.flags = 0;
905
906         /*
907          * Get addressing information, non-NAT first
908          */
909         if (likely(is_v4)) {
910                 u32 dscp;
911
912                 sic.src_ip.ip = (__be32)orig_tuple.src.u3.ip;
913                 sic.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip;
914
915                 if (ipv4_is_multicast(sic.src_ip.ip) || ipv4_is_multicast(sic.dest_ip.ip)) {
916                         fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_IS_IPV4_MCAST);
917                         DEBUG_TRACE("multicast address\n");
918                         return NF_ACCEPT;
919                 }
920
921                 /*
922                  * NAT'ed addresses - note these are as seen from the 'reply' direction
923                  * When NAT does not apply to this connection these will be identical to the above.
924                  */
925                 sic.src_ip_xlate.ip = (__be32)reply_tuple.dst.u3.ip;
926                 sic.dest_ip_xlate.ip = (__be32)reply_tuple.src.u3.ip;
927
928                 dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
929                 if (dscp) {
930                         sic.dest_dscp = dscp;
931                         sic.src_dscp = sic.dest_dscp;
932                         sic.flags |= SFE_CREATE_FLAG_REMARK_DSCP;
933                 }
934         } else {
935                 u32 dscp;
936
937                 sic.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6);
938                 sic.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6);
939
940                 if (ipv6_addr_is_multicast((struct in6_addr *)sic.src_ip.ip6) ||
941                     ipv6_addr_is_multicast((struct in6_addr *)sic.dest_ip.ip6)) {
942                         fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_IS_IPV6_MCAST);
943                         DEBUG_TRACE("multicast address\n");
944                         return NF_ACCEPT;
945                 }
946
947                 /*
948                  * NAT'ed addresses - note these are as seen from the 'reply' direction
949                  * When NAT does not apply to this connection these will be identical to the above.
950                  */
951                 sic.src_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.dst.u3.in6);
952                 sic.dest_ip_xlate.ip6[0] = *((struct sfe_ipv6_addr *)&reply_tuple.src.u3.in6);
953
954                 dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
955                 if (dscp) {
956                         sic.dest_dscp = dscp;
957                         sic.src_dscp = sic.dest_dscp;
958                         sic.flags |= SFE_CREATE_FLAG_REMARK_DSCP;
959                 }
960         }
961
962         switch (sic.protocol) {
963         case IPPROTO_TCP:
964                 sic.src_port = orig_tuple.src.u.tcp.port;
965                 sic.dest_port = orig_tuple.dst.u.tcp.port;
966                 sic.src_port_xlate = reply_tuple.dst.u.tcp.port;
967                 sic.dest_port_xlate = reply_tuple.src.u.tcp.port;
968
969                 /*
970                  * Don't try to manage a non-established connection.
971                  */
972                 if (!test_bit(IPS_ASSURED_BIT, &ct->status)) {
973                         fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_TCP_NOT_ASSURED);
974                         DEBUG_TRACE("non-established connection\n");
975                         return NF_ACCEPT;
976                 }
977
978                 break;
979
980         case IPPROTO_UDP:
981                 sic.src_port = orig_tuple.src.u.udp.port;
982                 sic.dest_port = orig_tuple.dst.u.udp.port;
983                 sic.src_port_xlate = reply_tuple.dst.u.udp.port;
984                 sic.dest_port_xlate = reply_tuple.src.u.udp.port;
985
986                 /*
987                  * Somehow, SFE is not playing nice with IPSec traffic.
988                  * Do not accelerate for now.
989                  */
990                 if (ntohs(sic.dest_port) == 4500 || ntohs(sic.dest_port) == 500) {
991                         if (likely(is_v4))
992                                 DEBUG_TRACE("quarkysg:: IPsec bypass: %pI4:%d(%pI4:%d) to %pI4:%d(%pI4:%d)\n",
993                                         &sic.src_ip.ip, ntohs(sic.src_port), &sic.src_ip_xlate.ip, ntohs(sic.src_port_xlate),
994                                         &sic.dest_ip.ip, ntohs(sic.dest_port), &sic.dest_ip_xlate.ip, ntohs(sic.dest_port_xlate));
995                         else
996                                 DEBUG_TRACE("quarkysg:: IPsec bypass: %pI6:%d to %pI6:%d\n",
997                                         &sic.src_ip.ip6, ntohs(sic.src_port), &sic.dest_ip.ip6, ntohs(sic.dest_port));
998                         return NF_ACCEPT;
999                 }
1000                 break;
1001
1002         default:
1003                 fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_UNKNOW_PROTOCOL);
1004                 DEBUG_TRACE("unhandled protocol %d\n", sic.protocol);
1005                 return NF_ACCEPT;
1006         }
1007
1008 #ifdef CONFIG_XFRM
1009         sic.original_accel = 1;
1010         sic.reply_accel = 1;
1011 #endif
1012
1013         /*
1014          * Get QoS information
1015          */
1016         if (skb->priority) {
1017                 sic.dest_priority = skb->priority;
1018                 sic.src_priority = sic.dest_priority;
1019                 sic.flags |= SFE_CREATE_FLAG_REMARK_PRIORITY;
1020         }
1021
1022         if (is_v4) {
1023                 DEBUG_TRACE("POST_ROUTE: checking new connection: %d src_ip: %pI4 dst_ip: %pI4, src_port: %d, dst_port: %d\n",
1024                             sic.protocol, &sic.src_ip, &sic.dest_ip, sic.src_port, sic.dest_port);
1025         } else {
1026                 DEBUG_TRACE("POST_ROUTE: checking new connection: %d src_ip: %pI6 dst_ip: %pI6, src_port: %d, dst_port: %d\n",
1027                             sic.protocol, &sic.src_ip, &sic.dest_ip, sic.src_port, sic.dest_port);
1028         }
1029
1030         /*
1031          * If we already have this connection in our list, skip it
1032          * XXX: this may need to be optimized
1033          */
1034         spin_lock_bh(&sfe_connections_lock);
1035
1036         conn = fast_classifier_find_conn(&sic.src_ip, &sic.dest_ip, sic.src_port, sic.dest_port, sic.protocol, is_v4);
1037         if (conn) {
1038                 conn->hits++;
1039
1040                 if (!conn->offloaded) {
1041                         if (conn->offload_permit || conn->hits >= offload_at_pkts) {
1042                                 DEBUG_TRACE("OFFLOADING CONNECTION, TOO MANY HITS\n");
1043
1044                                 if (fast_classifier_update_protocol(conn->sic, conn->ct) == 0) {
1045                                         spin_unlock_bh(&sfe_connections_lock);
1046                                         fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_UPDATE_PROTOCOL_FAIL);
1047                                         DEBUG_TRACE("UNKNOWN PROTOCOL OR CONNECTION CLOSING, SKIPPING\n");
1048                                         return NF_ACCEPT;
1049                                 }
1050
1051                                 DEBUG_TRACE("INFO: calling sfe rule creation!\n");
1052                                 spin_unlock_bh(&sfe_connections_lock);
1053
1054                                 ret = is_v4 ? sfe_ipv4_create_rule(conn->sic) : sfe_ipv6_create_rule(conn->sic);
1055                                 if ((ret == 0) || (ret == -EADDRINUSE)) {
1056                                         struct fast_classifier_tuple fc_msg;
1057
1058                                         if (is_v4) {
1059                                                 fc_msg.ethertype = AF_INET;
1060                                                 fc_msg.src_saddr.in = *((struct in_addr *)&sic.src_ip);
1061                                                 fc_msg.dst_saddr.in = *((struct in_addr *)&sic.dest_ip_xlate);
1062                                         } else {
1063                                                 fc_msg.ethertype = AF_INET6;
1064                                                 fc_msg.src_saddr.in6 = *((struct in6_addr *)&sic.src_ip);
1065                                                 fc_msg.dst_saddr.in6 = *((struct in6_addr *)&sic.dest_ip_xlate);
1066                                         }
1067
1068                                         fc_msg.proto = sic.protocol;
1069                                         fc_msg.sport = sic.src_port;
1070                                         fc_msg.dport = sic.dest_port_xlate;
1071                                         memcpy(fc_msg.smac, conn->smac, ETH_ALEN);
1072                                         memcpy(fc_msg.dmac, conn->dmac, ETH_ALEN);
1073                                         fast_classifier_send_genl_msg(FAST_CLASSIFIER_C_OFFLOADED, &fc_msg);
1074                                         conn->offloaded = 1;
1075                                 }
1076
1077                                 return NF_ACCEPT;
1078                         }
1079                 }
1080
1081                 spin_unlock_bh(&sfe_connections_lock);
1082                 if (conn->offloaded) {
1083                         is_v4 ? sfe_ipv4_update_rule(conn->sic) : sfe_ipv6_update_rule(conn->sic);
1084                 }
1085
1086                 DEBUG_TRACE("FOUND, SKIPPING\n");
1087                 fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_WAIT_FOR_ACCELERATION);
1088                 return NF_ACCEPT;
1089         }
1090
1091         spin_unlock_bh(&sfe_connections_lock);
1092
1093         /*
1094          * Get the net device and MAC addresses that correspond to the various source and
1095          * destination host addresses.
1096          */
1097         if (!fast_classifier_find_dev_and_mac_addr(NULL, &sic.src_ip, &src_dev_tmp, sic.src_mac, is_v4)) {
1098                 fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_SRC_DEV);
1099                 return NF_ACCEPT;
1100         }
1101         src_dev = src_dev_tmp;
1102
1103         if (!fast_classifier_find_dev_and_mac_addr(NULL, &sic.src_ip_xlate, &dev, sic.src_mac_xlate, is_v4)) {
1104                 fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_SRC_XLATE_DEV);
1105                 goto done1;
1106         }
1107         dev_put(dev);
1108
1109         if (unlikely(!is_v4))
1110                 tmp_skb = skb;
1111
1112         if (!fast_classifier_find_dev_and_mac_addr(tmp_skb, &sic.dest_ip, &dev, sic.dest_mac, is_v4)) {
1113                 fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_DEST_DEV);
1114                 goto done1;
1115         }
1116         dev_put(dev);
1117
1118         if (!fast_classifier_find_dev_and_mac_addr(skb, &sic.dest_ip_xlate, &dest_dev_tmp, sic.dest_mac_xlate, is_v4)) {
1119                 fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_DEST_XLATE_DEV);
1120                 goto done1;
1121         }
1122         dest_dev = dest_dev_tmp;
1123
1124         /*
1125          * Our devices may actually be part of a bridge interface. If that's
1126          * the case then find the bridge interface instead.
1127          */
1128         if (src_dev->priv_flags & IFF_BRIDGE_PORT) {
1129                 src_br_dev = sfe_dev_get_master(src_dev);
1130                 if (!src_br_dev) {
1131                         fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_BRIDGE);
1132                         DEBUG_TRACE("no bridge found for: %s\n", src_dev->name);
1133                         goto done2;
1134                 }
1135                 src_dev = src_br_dev;
1136         }
1137
1138         if (dest_dev->priv_flags & IFF_BRIDGE_PORT) {
1139                 dest_br_dev = sfe_dev_get_master(dest_dev);
1140                 if (!dest_br_dev) {
1141                         fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_BRIDGE);
1142                         DEBUG_TRACE("no bridge found for: %s\n", dest_dev->name);
1143                         goto done3;
1144                 }
1145                 dest_dev = dest_br_dev;
1146         }
1147
1148         sic.src_dev = src_dev;
1149         sic.dest_dev = dest_dev;
1150
1151         sic.src_mtu = src_dev->mtu;
1152         sic.dest_mtu = dest_dev->mtu;
1153
1154         if (skb->mark) {
1155                 DEBUG_TRACE("SKB MARK NON ZERO %x\n", skb->mark);
1156         }
1157         sic.mark = skb->mark;
1158
1159         conn = kmalloc(sizeof(*conn), GFP_ATOMIC);
1160         if (!conn) {
1161                 printk(KERN_CRIT "ERROR: no memory for sfe\n");
1162                 goto done4;
1163         }
1164         conn->hits = 0;
1165         conn->offload_permit = 0;
1166         conn->offloaded = 0;
1167         conn->is_v4 = is_v4;
1168         DEBUG_TRACE("Source MAC=%pM\n", sic.src_mac);
1169         memcpy(conn->smac, sic.src_mac, ETH_ALEN);
1170         memcpy(conn->dmac, sic.dest_mac_xlate, ETH_ALEN);
1171
1172         p_sic = kmalloc(sizeof(*p_sic), GFP_ATOMIC);
1173         if (!p_sic) {
1174                 printk(KERN_CRIT "ERROR: no memory for sfe\n");
1175                 kfree(conn);
1176                 goto done4;
1177         }
1178
1179         memcpy(p_sic, &sic, sizeof(sic));
1180         conn->sic = p_sic;
1181         conn->ct = ct;
1182
1183         if (!fast_classifier_add_conn(conn)) {
1184                 kfree(conn->sic);
1185                 kfree(conn);
1186         }
1187
1188         /*
1189          * If we had bridge ports then release them too.
1190          */
1191 done4:
1192         if (dest_br_dev) {
1193                 dev_put(dest_br_dev);
1194         }
1195 done3:
1196         if (src_br_dev) {
1197                 dev_put(src_br_dev);
1198         }
1199 done2:
1200         dev_put(dest_dev_tmp);
1201 done1:
1202         dev_put(src_dev_tmp);
1203
1204         return NF_ACCEPT;
1205 }
1206
1207 /*
1208  * fast_classifier_ipv4_post_routing_hook()
1209  *      Called for packets about to leave the box - either locally generated or forwarded from another interface
1210  */
1211 fast_classifier_ipv4_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn)
1212 {
1213         return fast_classifier_post_routing(skb, true);
1214 }
1215
1216 /*
1217  * fast_classifier_ipv6_post_routing_hook()
1218  *      Called for packets about to leave the box - either locally generated or forwarded from another interface
1219  */
1220 fast_classifier_ipv6_post_routing_hook(hooknum, ops, skb, in_unused, out, okfn)
1221 {
1222         return fast_classifier_post_routing(skb, false);
1223 }
1224
1225 /*
1226  * fast_classifier_update_mark()
1227  *      updates the mark for a fast-classifier connection
1228  */
1229 static void fast_classifier_update_mark(struct sfe_connection_mark *mark, bool is_v4)
1230 {
1231         struct sfe_connection *conn;
1232
1233         spin_lock_bh(&sfe_connections_lock);
1234
1235         conn = fast_classifier_find_conn(&mark->src_ip, &mark->dest_ip,
1236                                          mark->src_port, mark->dest_port,
1237                                          mark->protocol, is_v4);
1238         if (conn) {
1239                 conn->sic->mark = mark->mark;
1240         }
1241
1242         spin_unlock_bh(&sfe_connections_lock);
1243 }
1244
1245 #ifdef CONFIG_NF_CONNTRACK_EVENTS
1246 /*
1247  * fast_classifier_conntrack_event()
1248  *      Callback event invoked when a conntrack connection's state changes.
1249  */
1250 #ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
1251 static int fast_classifier_conntrack_event(struct notifier_block *this,
1252                                            unsigned long events, void *ptr)
1253 #else
1254 static int fast_classifier_conntrack_event(unsigned int events, struct nf_ct_event *item)
1255 #endif
1256 {
1257 #ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
1258         struct nf_ct_event *item = ptr;
1259 #endif
1260         struct sfe_connection_destroy sid;
1261         struct nf_conn *ct = item->ct;
1262         struct nf_conntrack_tuple orig_tuple;
1263         struct sfe_connection *conn;
1264         struct fast_classifier_tuple fc_msg;
1265         int offloaded = 0;
1266         bool is_v4;
1267
1268         /*
1269          * If we don't have a conntrack entry then we're done.
1270          */
1271         if (unlikely(!ct)) {
1272                 DEBUG_WARN("no ct in conntrack event callback\n");
1273                 return NOTIFY_DONE;
1274         }
1275
1276 #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0))
1277         /*
1278          * If this is an untracked connection then we can't have any state either.
1279          */
1280         if (unlikely(nf_ct_is_untracked(ct))) {
1281                 DEBUG_TRACE("ignoring untracked conn\n");
1282                 return NOTIFY_DONE;
1283         }
1284 #endif /*KERNEL_VERSION(4, 12, 0)*/
1285
1286         orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
1287         sid.protocol = (s32)orig_tuple.dst.protonum;
1288
1289         /*
1290          * Extract information from the conntrack connection.  We're only interested
1291          * in nominal connection information (i.e. we're ignoring any NAT information).
1292          */
1293         if (likely(nf_ct_l3num(ct) == AF_INET)) {
1294                 sid.src_ip.ip = (__be32)orig_tuple.src.u3.ip;
1295                 sid.dest_ip.ip = (__be32)orig_tuple.dst.u3.ip;
1296                 is_v4 = true;
1297         } else if (likely(nf_ct_l3num(ct) == AF_INET6)) {
1298                 sid.src_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.src.u3.in6);
1299                 sid.dest_ip.ip6[0] = *((struct sfe_ipv6_addr *)&orig_tuple.dst.u3.in6);
1300                 is_v4 = false;
1301         } else {
1302                 DEBUG_TRACE("ignoring non-IPv4 and non-IPv6 connection\n");
1303                 return NOTIFY_DONE;
1304         }
1305
1306         switch (sid.protocol) {
1307         case IPPROTO_TCP:
1308                 sid.src_port = orig_tuple.src.u.tcp.port;
1309                 sid.dest_port = orig_tuple.dst.u.tcp.port;
1310                 break;
1311
1312         case IPPROTO_UDP:
1313                 sid.src_port = orig_tuple.src.u.udp.port;
1314                 sid.dest_port = orig_tuple.dst.u.udp.port;
1315                 break;
1316
1317         default:
1318                 DEBUG_TRACE("unhandled protocol: %d\n", sid.protocol);
1319                 return NOTIFY_DONE;
1320         }
1321
1322         /*
1323          * Check for an updated mark
1324          */
1325         if ((events & (1 << IPCT_MARK)) && (ct->mark != 0)) {
1326                 struct sfe_connection_mark mark;
1327
1328                 mark.protocol = sid.protocol;
1329                 mark.src_ip = sid.src_ip;
1330                 mark.dest_ip = sid.dest_ip;
1331                 mark.src_port = sid.src_port;
1332                 mark.dest_port = sid.dest_port;
1333                 mark.mark = ct->mark;
1334
1335                 is_v4 ? sfe_ipv4_mark_rule(&mark) : sfe_ipv6_mark_rule(&mark);
1336                 fast_classifier_update_mark(&mark, is_v4);
1337         }
1338
1339         /*
1340          * We're only interested in destroy events at this point
1341          */
1342         if (unlikely(!(events & (1 << IPCT_DESTROY)))) {
1343                 DEBUG_TRACE("ignoring non-destroy event\n");
1344                 return NOTIFY_DONE;
1345         }
1346
1347         if (is_v4) {
1348                 DEBUG_TRACE("Try to clean up: proto: %d src_ip: %pI4 dst_ip: %pI4, src_port: %d, dst_port: %d\n",
1349                             sid.protocol, &sid.src_ip, &sid.dest_ip, ntohs(sid.src_port), ntohs(sid.dest_port));
1350         } else {
1351                 DEBUG_TRACE("Try to clean up: proto: %d src_ip: %pI6 dst_ip: %pI6, src_port: %d, dst_port: %d\n",
1352                             sid.protocol, &sid.src_ip, &sid.dest_ip, ntohs(sid.src_port), ntohs(sid.dest_port));
1353         }
1354
1355         spin_lock_bh(&sfe_connections_lock);
1356
1357         conn = fast_classifier_find_conn(&sid.src_ip, &sid.dest_ip, sid.src_port, sid.dest_port, sid.protocol, is_v4);
1358         if (conn && conn->offloaded) {
1359                 if (is_v4) {
1360                         fc_msg.ethertype = AF_INET;
1361                         fc_msg.src_saddr.in = *((struct in_addr *)&conn->sic->src_ip);
1362                         fc_msg.dst_saddr.in = *((struct in_addr *)&conn->sic->dest_ip_xlate);
1363                 } else {
1364                         fc_msg.ethertype = AF_INET6;
1365                         fc_msg.src_saddr.in6 = *((struct in6_addr *)&conn->sic->src_ip);
1366                         fc_msg.dst_saddr.in6 = *((struct in6_addr *)&conn->sic->dest_ip_xlate);
1367                 }
1368
1369                 fc_msg.proto = conn->sic->protocol;
1370                 fc_msg.sport = conn->sic->src_port;
1371                 fc_msg.dport = conn->sic->dest_port_xlate;
1372                 memcpy(fc_msg.smac, conn->smac, ETH_ALEN);
1373                 memcpy(fc_msg.dmac, conn->dmac, ETH_ALEN);
1374                 offloaded = 1;
1375         }
1376
1377         if (conn) {
1378                 DEBUG_TRACE("Free connection\n");
1379
1380                 hash_del(&conn->hl);
1381                 sfe_connections_size--;
1382                 kfree(conn->sic);
1383                 kfree(conn);
1384         } else {
1385                 fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_CT_DESTROY_MISS);
1386         }
1387
1388         spin_unlock_bh(&sfe_connections_lock);
1389
1390         is_v4 ? sfe_ipv4_destroy_rule(&sid) : sfe_ipv6_destroy_rule(&sid);
1391
1392         if (offloaded) {
1393                 fast_classifier_send_genl_msg(FAST_CLASSIFIER_C_DONE, &fc_msg);
1394         }
1395
1396         return NOTIFY_DONE;
1397 }
1398
1399 /*
1400  * Netfilter conntrack event system to monitor connection tracking changes
1401  */
1402 #ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
1403 static struct notifier_block fast_classifier_conntrack_notifier = {
1404         .notifier_call = fast_classifier_conntrack_event,
1405 };
1406 #else
1407 static struct nf_ct_event_notifier fast_classifier_conntrack_notifier = {
1408         .fcn = fast_classifier_conntrack_event,
1409 };
1410 #endif
1411 #endif
1412
1413 /*
1414  * Structure to establish a hook into the post routing netfilter point - this
1415  * will pick up local outbound and packets going from one interface to another.
1416  *
1417  * Note: see include/linux/netfilter_ipv4.h for info related to priority levels.
1418  * We want to examine packets after NAT translation and any ALG processing.
1419  */
1420 static struct nf_hook_ops fast_classifier_ops_post_routing[] __read_mostly = {
1421         SFE_IPV4_NF_POST_ROUTING_HOOK(__fast_classifier_ipv4_post_routing_hook),
1422         SFE_IPV6_NF_POST_ROUTING_HOOK(__fast_classifier_ipv6_post_routing_hook),
1423 };
1424
1425 /*
1426  * fast_classifier_sync_rule()
1427  *      Synchronize a connection's state.
1428  */
1429 static void fast_classifier_sync_rule(struct sfe_connection_sync *sis)
1430 {
1431         struct nf_conntrack_tuple_hash *h;
1432         struct nf_conntrack_tuple tuple;
1433         struct nf_conn *ct;
1434         SFE_NF_CONN_ACCT(acct);
1435
1436         /*
1437          * Create a tuple so as to be able to look up a connection
1438          */
1439         memset(&tuple, 0, sizeof(tuple));
1440         tuple.src.u.all = (__be16)sis->src_port;
1441         tuple.dst.dir = IP_CT_DIR_ORIGINAL;
1442         tuple.dst.protonum = (u8)sis->protocol;
1443         tuple.dst.u.all = (__be16)sis->dest_port;
1444
1445         if (sis->is_v6) {
1446                 tuple.src.u3.in6 = *((struct in6_addr *)sis->src_ip.ip6);
1447                 tuple.dst.u3.in6 = *((struct in6_addr *)sis->dest_ip.ip6);
1448                 tuple.src.l3num = AF_INET6;
1449
1450                 DEBUG_TRACE("update connection - p: %d, s: %pI6:%u, d: %pI6:%u\n",
1451                             (int)tuple.dst.protonum,
1452                             &tuple.src.u3.in6, (unsigned int)ntohs(tuple.src.u.all),
1453                             &tuple.dst.u3.in6, (unsigned int)ntohs(tuple.dst.u.all));
1454         } else {
1455                 tuple.src.u3.ip = sis->src_ip.ip;
1456                 tuple.dst.u3.ip = sis->dest_ip.ip;
1457                 tuple.src.l3num = AF_INET;
1458
1459                 DEBUG_TRACE("update connection - p: %d, s: %pI4:%u, d: %pI4:%u\n",
1460                             (int)tuple.dst.protonum,
1461                             &tuple.src.u3.ip, (unsigned int)ntohs(tuple.src.u.all),
1462                             &tuple.dst.u3.ip, (unsigned int)ntohs(tuple.dst.u.all));
1463         }
1464
1465         /*
1466          * Update packet count for ingress on bridge device
1467          */
1468         if (skip_to_bridge_ingress) {
1469                 struct rtnl_link_stats64 nlstats;
1470                 nlstats.tx_packets = 0;
1471                 nlstats.tx_bytes = 0;
1472
1473                 if (sis->src_dev && IFF_EBRIDGE &&
1474                     (sis->src_new_packet_count || sis->src_new_byte_count)) {
1475                         nlstats.rx_packets = sis->src_new_packet_count;
1476                         nlstats.rx_bytes = sis->src_new_byte_count;
1477                         spin_lock_bh(&sfe_connections_lock);
1478                         br_dev_update_stats(sis->src_dev, &nlstats);
1479                         spin_unlock_bh(&sfe_connections_lock);
1480                 }
1481                 if (sis->dest_dev && IFF_EBRIDGE &&
1482                     (sis->dest_new_packet_count || sis->dest_new_byte_count)) {
1483                         nlstats.rx_packets = sis->dest_new_packet_count;
1484                         nlstats.rx_bytes = sis->dest_new_byte_count;
1485                         spin_lock_bh(&sfe_connections_lock);
1486                         br_dev_update_stats(sis->dest_dev, &nlstats);
1487                         spin_unlock_bh(&sfe_connections_lock);
1488                 }
1489         }
1490
1491         /*
1492          * Look up conntrack connection
1493          */
1494         h = nf_conntrack_find_get(&init_net, SFE_NF_CT_DEFAULT_ZONE, &tuple);
1495         if (unlikely(!h)) {
1496                 DEBUG_TRACE("no connection found\n");
1497                 return;
1498         }
1499
1500         ct = nf_ct_tuplehash_to_ctrack(h);
1501 #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0))
1502         NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
1503 #endif /*KERNEL_VERSION(4, 9, 0)*/
1504
1505         /*
1506          * Only update if this is not a fixed timeout
1507          */
1508         if (!test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
1509                 spin_lock_bh(&ct->lock);
1510 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0))
1511                 ct->timeout += sis->delta_jiffies;
1512 #else
1513                 ct->timeout.expires += sis->delta_jiffies;
1514 #endif /*KERNEL_VERSION(4, 9, 0)*/
1515                 spin_unlock_bh(&ct->lock);
1516         }
1517
1518         acct = nf_conn_acct_find(ct);
1519         if (acct) {
1520                 spin_lock_bh(&ct->lock);
1521                 atomic64_add(sis->src_new_packet_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].packets);
1522                 atomic64_add(sis->src_new_byte_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_ORIGINAL].bytes);
1523                 atomic64_add(sis->dest_new_packet_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].packets);
1524                 atomic64_add(sis->dest_new_byte_count, &SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].bytes);
1525                 spin_unlock_bh(&ct->lock);
1526         }
1527
1528         switch (sis->protocol) {
1529         case IPPROTO_TCP:
1530                 spin_lock_bh(&ct->lock);
1531                 if (ct->proto.tcp.seen[0].td_maxwin < sis->src_td_max_window) {
1532                         ct->proto.tcp.seen[0].td_maxwin = sis->src_td_max_window;
1533                 }
1534                 if ((s32)(ct->proto.tcp.seen[0].td_end - sis->src_td_end) < 0) {
1535                         ct->proto.tcp.seen[0].td_end = sis->src_td_end;
1536                 }
1537                 if ((s32)(ct->proto.tcp.seen[0].td_maxend - sis->src_td_max_end) < 0) {
1538                         ct->proto.tcp.seen[0].td_maxend = sis->src_td_max_end;
1539                 }
1540                 if (ct->proto.tcp.seen[1].td_maxwin < sis->dest_td_max_window) {
1541                         ct->proto.tcp.seen[1].td_maxwin = sis->dest_td_max_window;
1542                 }
1543                 if ((s32)(ct->proto.tcp.seen[1].td_end - sis->dest_td_end) < 0) {
1544                         ct->proto.tcp.seen[1].td_end = sis->dest_td_end;
1545                 }
1546                 if ((s32)(ct->proto.tcp.seen[1].td_maxend - sis->dest_td_max_end) < 0) {
1547                         ct->proto.tcp.seen[1].td_maxend = sis->dest_td_max_end;
1548                 }
1549                 spin_unlock_bh(&ct->lock);
1550                 break;
1551         }
1552
1553         /*
1554          * Release connection
1555          */
1556         nf_ct_put(ct);
1557 }
1558
1559 /*
1560  * fast_classifier_device_event()
1561  */
1562 static int fast_classifier_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1563 {
1564         struct net_device *dev = SFE_DEV_EVENT_PTR(ptr);
1565
1566         if (dev && (event == NETDEV_DOWN)) {
1567                 sfe_ipv4_destroy_all_rules_for_dev(dev);
1568                 sfe_ipv6_destroy_all_rules_for_dev(dev);
1569         }
1570
1571         return NOTIFY_DONE;
1572 }
1573
1574 /*
1575  * fast_classifier_inet_event()
1576  */
1577 static int fast_classifier_inet_event(struct notifier_block *this, unsigned long event, void *ptr)
1578 {
1579         struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
1580
1581         if (dev && (event == NETDEV_DOWN)) {
1582                 sfe_ipv4_destroy_all_rules_for_dev(dev);
1583         }
1584
1585         return NOTIFY_DONE;
1586 }
1587
1588 /*
1589  * fast_classifier_inet6_event()
1590  */
1591 static int fast_classifier_inet6_event(struct notifier_block *this, unsigned long event, void *ptr)
1592 {
1593         struct net_device *dev = ((struct inet6_ifaddr *)ptr)->idev->dev;
1594
1595         if (dev && (event == NETDEV_DOWN)) {
1596                 sfe_ipv6_destroy_all_rules_for_dev(dev);
1597         }
1598
1599         return NOTIFY_DONE;
1600 }
1601
1602 /*
1603  * fast_classifier_get_offload_at_pkts()
1604  */
1605 static ssize_t fast_classifier_get_offload_at_pkts(struct device *dev,
1606                                                    struct device_attribute *attr,
1607                                                    char *buf)
1608 {
1609         return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", offload_at_pkts);
1610 }
1611
1612 /*
1613  * fast_classifier_set_offload_at_pkts()
1614  */
1615 static ssize_t fast_classifier_set_offload_at_pkts(struct device *dev,
1616                                                    struct device_attribute *attr,
1617                                                    const char *buf, size_t size)
1618 {
1619         long new;
1620         int ret;
1621
1622         ret = kstrtol(buf, 0, &new);
1623         if (ret == -EINVAL || ((int)new != new))
1624                 return -EINVAL;
1625
1626         offload_at_pkts = new;
1627
1628         return size;
1629 }
1630
1631 /*
1632  * fast_classifier_get_debug_info()
1633  */
1634 static ssize_t fast_classifier_get_debug_info(struct device *dev,
1635                                       struct device_attribute *attr,
1636                                       char *buf)
1637 {
1638         size_t len = 0;
1639         struct sfe_connection *conn;
1640         u32 i;
1641 #if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0))
1642         struct hlist_node *node;
1643 #endif
1644
1645         spin_lock_bh(&sfe_connections_lock);
1646         len += scnprintf(buf, PAGE_SIZE - len, "size=%d offload=%d offload_no_match=%d"
1647                         " offloaded=%d done=%d offloaded_fail=%d done_fail=%d\n",
1648                         sfe_connections_size,
1649                         atomic_read(&offload_msgs),
1650                         atomic_read(&offload_no_match_msgs),
1651                         atomic_read(&offloaded_msgs),
1652                         atomic_read(&done_msgs),
1653                         atomic_read(&offloaded_fail_msgs),
1654                         atomic_read(&done_fail_msgs));
1655         sfe_hash_for_each(fc_conn_ht, i, node, conn, hl) {
1656                 len += scnprintf(buf + len, PAGE_SIZE - len,
1657                                 (conn->is_v4 ? "o=%d, p=%d [%pM]:%pI4:%u %pI4:%u:[%pM] m=%08x h=%d\n" : "o=%d, p=%d [%pM]:%pI6:%u %pI6:%u:[%pM] m=%08x h=%d\n"),
1658                                 conn->offloaded,
1659                                 conn->sic->protocol,
1660                                 conn->sic->src_mac,
1661                                 &conn->sic->src_ip,
1662                                 ntohs(conn->sic->src_port),
1663                                 &conn->sic->dest_ip,
1664                                 ntohs(conn->sic->dest_port),
1665                                 conn->sic->dest_mac_xlate,
1666                                 conn->sic->mark,
1667                                 conn->hits);
1668         }
1669         spin_unlock_bh(&sfe_connections_lock);
1670
1671         return len;
1672 }
1673
1674 /*
1675  * fast_classifier_get_skip_bridge_ingress()
1676  */
1677 static ssize_t fast_classifier_get_skip_bridge_ingress(struct device *dev,
1678                                                        struct device_attribute *attr,
1679                                                        char *buf)
1680 {
1681         return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", skip_to_bridge_ingress);
1682 }
1683
1684 /*
1685  * fast_classifier_set_skip_bridge_ingress()
1686  */
1687 static ssize_t fast_classifier_set_skip_bridge_ingress(struct device *dev,
1688                                                        struct device_attribute *attr,
1689                                                        const char *buf, size_t size)
1690 {
1691         long new;
1692         int ret;
1693
1694         ret = kstrtol(buf, 0, &new);
1695         if (ret == -EINVAL || ((int)new != new))
1696                 return -EINVAL;
1697
1698         skip_to_bridge_ingress = new ? 1 : 0;
1699
1700         return size;
1701 }
1702
1703 /*
1704  * fast_classifier_get_exceptions
1705  *      dump exception counters
1706  */
1707 static ssize_t fast_classifier_get_exceptions(struct device *dev,
1708                                      struct device_attribute *attr,
1709                                      char *buf)
1710 {
1711         int idx, len;
1712         struct fast_classifier *sc = &__sc;
1713
1714         spin_lock_bh(&sc->lock);
1715         for (len = 0, idx = 0; idx < FAST_CL_EXCEPTION_MAX; idx++) {
1716                 if (sc->exceptions[idx]) {
1717                         len += snprintf(buf + len, (ssize_t)(PAGE_SIZE - len), "%s = %d\n", fast_classifier_exception_events_string[idx], sc->exceptions[idx]);
1718                 }
1719         }
1720         spin_unlock_bh(&sc->lock);
1721
1722         return len;
1723 }
1724
1725 /*
1726  * sysfs attributes.
1727  */
1728 static const struct device_attribute fast_classifier_offload_at_pkts_attr =
1729         __ATTR(offload_at_pkts, S_IWUSR | S_IRUGO, fast_classifier_get_offload_at_pkts, fast_classifier_set_offload_at_pkts);
1730 static const struct device_attribute fast_classifier_debug_info_attr =
1731         __ATTR(debug_info, S_IRUGO, fast_classifier_get_debug_info, NULL);
1732 static const struct device_attribute fast_classifier_skip_bridge_ingress =
1733         __ATTR(skip_to_bridge_ingress, S_IWUSR | S_IRUGO, fast_classifier_get_skip_bridge_ingress, fast_classifier_set_skip_bridge_ingress);
1734 static const struct device_attribute fast_classifier_exceptions_attr =
1735         __ATTR(exceptions, S_IRUGO, fast_classifier_get_exceptions, NULL);
1736
1737 /*
1738  * fast_classifier_init()
1739  */
1740 static int __init fast_classifier_init(void)
1741 {
1742         struct fast_classifier *sc = &__sc;
1743         int result = -1;
1744
1745         printk(KERN_ALERT "fast-classifier (PBR safe v2.1.4a): starting up\n");
1746         DEBUG_INFO("SFE CM init\n");
1747
1748         hash_init(fc_conn_ht);
1749
1750         /*
1751          * Create sys/fast_classifier
1752          */
1753         sc->sys_fast_classifier = kobject_create_and_add("fast_classifier", NULL);
1754         if (!sc->sys_fast_classifier) {
1755                 DEBUG_ERROR("failed to register fast_classifier\n");
1756                 goto exit1;
1757         }
1758
1759         result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr);
1760         if (result) {
1761                 DEBUG_ERROR("failed to register offload at pkgs: %d\n", result);
1762                 goto exit2;
1763         }
1764
1765         result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr);
1766         if (result) {
1767                 DEBUG_ERROR("failed to register debug dev: %d\n", result);
1768                 sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr);
1769                 goto exit2;
1770         }
1771
1772         result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_skip_bridge_ingress.attr);
1773         if (result) {
1774                 DEBUG_ERROR("failed to register skip bridge on ingress: %d\n", result);
1775                 sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr);
1776                 sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr);
1777                 goto exit2;
1778         }
1779
1780         result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_exceptions_attr.attr);
1781         if (result) {
1782                 DEBUG_ERROR("failed to register exceptions file: %d\n", result);
1783                 sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr);
1784                 sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr);
1785                 sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_skip_bridge_ingress.attr);
1786                 goto exit2;
1787         }
1788
1789         sc->dev_notifier.notifier_call = fast_classifier_device_event;
1790         sc->dev_notifier.priority = 1;
1791         register_netdevice_notifier(&sc->dev_notifier);
1792
1793         sc->inet_notifier.notifier_call = fast_classifier_inet_event;
1794         sc->inet_notifier.priority = 1;
1795         register_inetaddr_notifier(&sc->inet_notifier);
1796
1797         sc->inet6_notifier.notifier_call = fast_classifier_inet6_event;
1798         sc->inet6_notifier.priority = 1;
1799         register_inet6addr_notifier(&sc->inet6_notifier);
1800
1801         /*
1802          * Register our netfilter hooks.
1803          */
1804         result = nf_register_net_hooks(&init_net, fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing));
1805         if (result < 0) {
1806                 DEBUG_ERROR("can't register nf post routing hook: %d\n", result);
1807                 goto exit3;
1808         }
1809
1810         /*
1811          * Register a notifier hook to get fast notifications of expired connections.
1812          */
1813 #ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
1814         result = nf_conntrack_register_chain_notifier(&init_net, &fast_classifier_conntrack_notifier);
1815 #else
1816         result = nf_conntrack_register_notifier(&init_net, &fast_classifier_conntrack_notifier);
1817         if (result < 0) {
1818                 DEBUG_ERROR("can't register nf notifier hook: %d\n", result);
1819                 goto exit4;
1820         }
1821 #endif
1822
1823 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
1824         result = genl_register_family(&fast_classifier_gnl_family);
1825         if (result) {
1826                 DEBUG_ERROR("failed to register genl family: %d\n", result);
1827                 goto exit5;
1828         }
1829 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
1830         result = genl_register_family_with_ops_groups(&fast_classifier_gnl_family,
1831                                                       fast_classifier_gnl_ops,
1832                                                       fast_classifier_genl_mcgrp);
1833         if (result) {
1834                 DEBUG_ERROR("failed to register genl ops: %d\n", result);
1835                 goto exit5;
1836         }
1837 #else
1838         result = genl_register_family(&fast_classifier_gnl_family);
1839         if (result) {
1840                 printk(KERN_CRIT "unable to register genl family\n");
1841                 goto exit5;
1842         }
1843
1844         result = genl_register_ops(&fast_classifier_gnl_family, fast_classifier_gnl_ops);
1845         if (result) {
1846                 printk(KERN_CRIT "unable to register ops\n");
1847                 goto exit6;
1848         }
1849
1850         result = genl_register_mc_group(&fast_classifier_gnl_family,
1851                                         fast_classifier_genl_mcgrp);
1852         if (result) {
1853                 printk(KERN_CRIT "unable to register multicast group\n");
1854                 goto exit6;
1855         }
1856 #endif
1857
1858         printk(KERN_ALERT "fast-classifier: registered\n");
1859
1860         spin_lock_init(&sc->lock);
1861
1862         /*
1863          * Hook the receive path in the network stack.
1864          */
1865         BUG_ON(athrs_fast_nat_recv);
1866         RCU_INIT_POINTER(athrs_fast_nat_recv, fast_classifier_recv);
1867
1868         /*
1869          * Hook the shortcut sync callback.
1870          */
1871         sfe_ipv4_register_sync_rule_callback(fast_classifier_sync_rule);
1872         sfe_ipv6_register_sync_rule_callback(fast_classifier_sync_rule);
1873         return 0;
1874
1875 #if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0))
1876 exit6:
1877         genl_unregister_family(&fast_classifier_gnl_family);
1878 #endif
1879
1880 exit5:
1881 #ifdef CONFIG_NF_CONNTRACK_EVENTS
1882 #ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
1883         nf_conntrack_unregister_chain_notifier(&init_net, &fast_classifier_conntrack_notifier);
1884 #else
1885         nf_conntrack_unregister_notifier(&init_net, &fast_classifier_conntrack_notifier);
1886 #endif
1887
1888 exit4:
1889 #endif
1890         nf_unregister_net_hooks(&init_net, fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing));
1891
1892 exit3:
1893         unregister_inetaddr_notifier(&sc->inet_notifier);
1894         unregister_inet6addr_notifier(&sc->inet6_notifier);
1895         unregister_netdevice_notifier(&sc->dev_notifier);
1896         sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr);
1897         sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr);
1898         sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_skip_bridge_ingress.attr);
1899         sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_exceptions_attr.attr);
1900
1901 exit2:
1902         kobject_put(sc->sys_fast_classifier);
1903
1904 exit1:
1905         return result;
1906 }
1907
1908 /*
1909  * fast_classifier_exit()
1910  */
1911 static void __exit fast_classifier_exit(void)
1912 {
1913         struct fast_classifier *sc = &__sc;
1914         int result = -1;
1915
1916         DEBUG_INFO("SFE CM exit\n");
1917         printk(KERN_ALERT "fast-classifier: shutting down\n");
1918
1919         /*
1920          * Unregister our sync callback.
1921          */
1922         sfe_ipv4_register_sync_rule_callback(NULL);
1923         sfe_ipv6_register_sync_rule_callback(NULL);
1924
1925         /*
1926          * Unregister our receive callback.
1927          */
1928         RCU_INIT_POINTER(athrs_fast_nat_recv, NULL);
1929
1930         /*
1931          * Wait for all callbacks to complete.
1932          */
1933         rcu_barrier();
1934
1935         /*
1936          * Destroy all connections.
1937          */
1938         sfe_ipv4_destroy_all_rules_for_dev(NULL);
1939         sfe_ipv6_destroy_all_rules_for_dev(NULL);
1940
1941 #if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0))
1942         result = genl_unregister_ops(&fast_classifier_gnl_family, fast_classifier_gnl_ops);
1943         if (result != 0) {
1944                 printk(KERN_CRIT "Unable to unreigster genl_ops\n");
1945         }
1946 #endif
1947
1948         result = genl_unregister_family(&fast_classifier_gnl_family);
1949         if (result != 0) {
1950                 printk(KERN_CRIT "Unable to unregister genl_family\n");
1951         }
1952
1953 #ifdef CONFIG_NF_CONNTRACK_EVENTS
1954 #ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
1955         nf_conntrack_unregister_chain_notifier(&init_net, &fast_classifier_conntrack_notifier);
1956 #else
1957         nf_conntrack_unregister_notifier(&init_net, &fast_classifier_conntrack_notifier);
1958 #endif
1959 #endif
1960         nf_unregister_net_hooks(&init_net, fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing));
1961
1962         unregister_inet6addr_notifier(&sc->inet6_notifier);
1963         unregister_inetaddr_notifier(&sc->inet_notifier);
1964         unregister_netdevice_notifier(&sc->dev_notifier);
1965
1966         kobject_put(sc->sys_fast_classifier);
1967 }
1968
1969 module_init(fast_classifier_init)
1970 module_exit(fast_classifier_exit)
1971
1972 MODULE_DESCRIPTION("Shortcut Forwarding Engine - Connection Manager");
1973 MODULE_LICENSE("Dual BSD/GPL");
1974