OSDN Git Service

mediatek: add missing Kconfig
[immortalwrt/immortalwrt.git] / package / kernel / shortcut-fe / src / sfe_ipv4.c
1 /*
2  * sfe_ipv4.c
3  *      Shortcut forwarding engine - IPv4 edition.
4  *
5  * Copyright (c) 2013-2016, 2019, The Linux Foundation. All rights reserved.
6  * Permission to use, copy, modify, and/or distribute this software for
7  * any purpose with or without fee is hereby granted, provided that the
8  * above copyright notice and this permission notice appear in all copies.
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
15  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 #include <linux/module.h>
19 #include <linux/sysfs.h>
20 #include <linux/skbuff.h>
21 #include <linux/icmp.h>
22 #include <net/tcp.h>
23 #include <linux/etherdevice.h>
24 #include <linux/version.h>
25
26 #include "sfe.h"
27 #include "sfe_cm.h"
28
29 /*
30  * By default Linux IP header and transport layer header structures are
31  * unpacked, assuming that such headers should be 32-bit aligned.
32  * Unfortunately some wireless adaptors can't cope with this requirement and
33  * some CPUs can't handle misaligned accesses.  For those platforms we
34  * define SFE_IPV4_UNALIGNED_IP_HEADER and mark the structures as packed.
35  * When we do this the compiler will generate slightly worse code than for the
36  * aligned case (on most platforms) but will be much quicker than fixing
37  * things up in an unaligned trap handler.
38  */
39 #define SFE_IPV4_UNALIGNED_IP_HEADER 1
40 #if SFE_IPV4_UNALIGNED_IP_HEADER
41 #define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed))
42 #else
43 #define SFE_IPV4_UNALIGNED_STRUCT
44 #endif
45
46 /*
47  * An Ethernet header, but with an optional "packed" attribute to
48  * help with performance on some platforms (see the definition of
49  * SFE_IPV4_UNALIGNED_STRUCT)
50  */
51 struct sfe_ipv4_eth_hdr {
52         __be16 h_dest[ETH_ALEN / 2];
53         __be16 h_source[ETH_ALEN / 2];
54         __be16 h_proto;
55 } SFE_IPV4_UNALIGNED_STRUCT;
56
57 #define SFE_IPV4_DSCP_MASK 0x3
58 #define SFE_IPV4_DSCP_SHIFT 2
59
60 /*
61  * An IPv4 header, but with an optional "packed" attribute to
62  * help with performance on some platforms (see the definition of
63  * SFE_IPV4_UNALIGNED_STRUCT)
64  */
65 struct sfe_ipv4_ip_hdr {
66 #if defined(__LITTLE_ENDIAN_BITFIELD)
67         __u8 ihl:4,
68              version:4;
69 #elif defined (__BIG_ENDIAN_BITFIELD)
70         __u8 version:4,
71              ihl:4;
72 #else
73 #error  "Please fix <asm/byteorder.h>"
74 #endif
75         __u8 tos;
76         __be16 tot_len;
77         __be16 id;
78         __be16 frag_off;
79         __u8 ttl;
80         __u8 protocol;
81         __sum16 check;
82         __be32 saddr;
83         __be32 daddr;
84
85         /*
86          * The options start here.
87          */
88 } SFE_IPV4_UNALIGNED_STRUCT;
89
90 /*
91  * A UDP header, but with an optional "packed" attribute to
92  * help with performance on some platforms (see the definition of
93  * SFE_IPV4_UNALIGNED_STRUCT)
94  */
95 struct sfe_ipv4_udp_hdr {
96         __be16 source;
97         __be16 dest;
98         __be16 len;
99         __sum16 check;
100 } SFE_IPV4_UNALIGNED_STRUCT;
101
102 /*
103  * A TCP header, but with an optional "packed" attribute to
104  * help with performance on some platforms (see the definition of
105  * SFE_IPV4_UNALIGNED_STRUCT)
106  */
107 struct sfe_ipv4_tcp_hdr {
108         __be16 source;
109         __be16 dest;
110         __be32 seq;
111         __be32 ack_seq;
112 #if defined(__LITTLE_ENDIAN_BITFIELD)
113         __u16 res1:4,
114               doff:4,
115               fin:1,
116               syn:1,
117               rst:1,
118               psh:1,
119               ack:1,
120               urg:1,
121               ece:1,
122               cwr:1;
123 #elif defined(__BIG_ENDIAN_BITFIELD)
124         __u16 doff:4,
125               res1:4,
126               cwr:1,
127               ece:1,
128               urg:1,
129               ack:1,
130               psh:1,
131               rst:1,
132               syn:1,
133               fin:1;
134 #else
135 #error  "Adjust your <asm/byteorder.h> defines"
136 #endif
137         __be16 window;
138         __sum16 check;
139         __be16 urg_ptr;
140 } SFE_IPV4_UNALIGNED_STRUCT;
141
142 /*
143  * Specifies the lower bound on ACK numbers carried in the TCP header
144  */
145 #define SFE_IPV4_TCP_MAX_ACK_WINDOW 65520
146
147 /*
148  * IPv4 TCP connection match additional data.
149  */
150 struct sfe_ipv4_tcp_connection_match {
151         u8 win_scale;           /* Window scale */
152         u32 max_win;            /* Maximum window size seen */
153         u32 end;                        /* Sequence number of the next byte to send (seq + segment length) */
154         u32 max_end;            /* Sequence number of the last byte to ack */
155 };
156
157 /*
158  * Bit flags for IPv4 connection matching entry.
159  */
160 #define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC (1<<0)
161                                         /* Perform source translation */
162 #define SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST (1<<1)
163                                         /* Perform destination translation */
164 #define SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK (1<<2)
165                                         /* Ignore TCP sequence numbers */
166 #define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR (1<<3)
167                                         /* Fast Ethernet header write */
168 #define SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR (1<<4)
169                                         /* Fast Ethernet header write */
170 #define SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK (1<<5)
171                                         /* remark priority of SKB */
172 #define SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK (1<<6)
173                                         /* remark DSCP of packet */
174
175 /*
176  * IPv4 connection matching structure.
177  */
178 struct sfe_ipv4_connection_match {
179         /*
180          * References to other objects.
181          */
182         struct sfe_ipv4_connection_match *next;
183         struct sfe_ipv4_connection_match *prev;
184         struct sfe_ipv4_connection *connection;
185         struct sfe_ipv4_connection_match *counter_match;
186                                         /* Matches the flow in the opposite direction as the one in *connection */
187         struct sfe_ipv4_connection_match *active_next;
188         struct sfe_ipv4_connection_match *active_prev;
189         bool active;                    /* Flag to indicate if we're on the active list */
190
191         /*
192          * Characteristics that identify flows that match this rule.
193          */
194         struct net_device *match_dev;   /* Network device */
195         u8 match_protocol;              /* Protocol */
196         __be32 match_src_ip;            /* Source IP address */
197         __be32 match_dest_ip;           /* Destination IP address */
198         __be16 match_src_port;          /* Source port/connection ident */
199         __be16 match_dest_port;         /* Destination port/connection ident */
200
201         /*
202          * Control the operations of the match.
203          */
204         u32 flags;                      /* Bit flags */
205 #ifdef CONFIG_NF_FLOW_COOKIE
206         u32 flow_cookie;                /* used flow cookie, for debug */
207 #endif
208 #ifdef CONFIG_XFRM
209         u32 flow_accel;             /* The flow accelerated or not */
210 #endif
211
212         /*
213          * Connection state that we track once we match.
214          */
215         union {                         /* Protocol-specific state */
216                 struct sfe_ipv4_tcp_connection_match tcp;
217         } protocol_state;
218         /*
219          * Stats recorded in a sync period. These stats will be added to
220          * rx_packet_count64/rx_byte_count64 after a sync period.
221          */
222         u32 rx_packet_count;
223         u32 rx_byte_count;
224
225         /*
226          * Packet translation information.
227          */
228         __be32 xlate_src_ip;            /* Address after source translation */
229         __be16 xlate_src_port;  /* Port/connection ident after source translation */
230         u16 xlate_src_csum_adjustment;
231                                         /* Transport layer checksum adjustment after source translation */
232         u16 xlate_src_partial_csum_adjustment;
233                                         /* Transport layer pseudo header checksum adjustment after source translation */
234
235         __be32 xlate_dest_ip;           /* Address after destination translation */
236         __be16 xlate_dest_port; /* Port/connection ident after destination translation */
237         u16 xlate_dest_csum_adjustment;
238                                         /* Transport layer checksum adjustment after destination translation */
239         u16 xlate_dest_partial_csum_adjustment;
240                                         /* Transport layer pseudo header checksum adjustment after destination translation */
241
242         /*
243          * QoS information
244          */
245         u32 priority;
246         u32 dscp;
247
248         /*
249          * Packet transmit information.
250          */
251         struct net_device *xmit_dev;    /* Network device on which to transmit */
252         unsigned short int xmit_dev_mtu;
253                                         /* Interface MTU */
254         u16 xmit_dest_mac[ETH_ALEN / 2];
255                                         /* Destination MAC address to use when forwarding */
256         u16 xmit_src_mac[ETH_ALEN / 2];
257                                         /* Source MAC address to use when forwarding */
258
259         /*
260          * Summary stats.
261          */
262         u64 rx_packet_count64;
263         u64 rx_byte_count64;
264 };
265
266 /*
267  * Per-connection data structure.
268  */
269 struct sfe_ipv4_connection {
270         struct sfe_ipv4_connection *next;
271                                         /* Pointer to the next entry in a hash chain */
272         struct sfe_ipv4_connection *prev;
273                                         /* Pointer to the previous entry in a hash chain */
274         int protocol;                   /* IP protocol number */
275         __be32 src_ip;                  /* Src IP addr pre-translation */
276         __be32 src_ip_xlate;            /* Src IP addr post-translation */
277         __be32 dest_ip;                 /* Dest IP addr pre-translation */
278         __be32 dest_ip_xlate;           /* Dest IP addr post-translation */
279         __be16 src_port;                /* Src port pre-translation */
280         __be16 src_port_xlate;          /* Src port post-translation */
281         __be16 dest_port;               /* Dest port pre-translation */
282         __be16 dest_port_xlate;         /* Dest port post-translation */
283         struct sfe_ipv4_connection_match *original_match;
284                                         /* Original direction matching structure */
285         struct net_device *original_dev;
286                                         /* Original direction source device */
287         struct sfe_ipv4_connection_match *reply_match;
288                                         /* Reply direction matching structure */
289         struct net_device *reply_dev;   /* Reply direction source device */
290         u64 last_sync_jiffies;          /* Jiffies count for the last sync */
291         struct sfe_ipv4_connection *all_connections_next;
292                                         /* Pointer to the next entry in the list of all connections */
293         struct sfe_ipv4_connection *all_connections_prev;
294                                         /* Pointer to the previous entry in the list of all connections */
295         u32 mark;                       /* mark for outgoing packet */
296         u32 debug_read_seq;             /* sequence number for debug dump */
297 };
298
299 /*
300  * IPv4 connections and hash table size information.
301  */
302 #define SFE_IPV4_CONNECTION_HASH_SHIFT 12
303 #define SFE_IPV4_CONNECTION_HASH_SIZE (1 << SFE_IPV4_CONNECTION_HASH_SHIFT)
304 #define SFE_IPV4_CONNECTION_HASH_MASK (SFE_IPV4_CONNECTION_HASH_SIZE - 1)
305
306 #ifdef CONFIG_NF_FLOW_COOKIE
307 #define SFE_FLOW_COOKIE_SIZE 2048
308 #define SFE_FLOW_COOKIE_MASK 0x7ff
309
310 struct sfe_flow_cookie_entry {
311         struct sfe_ipv4_connection_match *match;
312         unsigned long last_clean_time;
313 };
314 #endif
315
316 enum sfe_ipv4_exception_events {
317         SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE,
318         SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION,
319         SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
320         SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL,
321         SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION,
322         SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE,
323         SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS,
324         SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS,
325         SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
326         SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL,
327         SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION,
328         SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS,
329         SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE,
330         SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS,
331         SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK,
332         SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS,
333         SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE,
334         SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE,
335         SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE,
336         SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE,
337         SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE,
338         SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE,
339         SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4,
340         SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE,
341         SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE,
342         SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE,
343         SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL,
344         SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION,
345         SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION,
346         SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE,
347         SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH,
348         SFE_IPV4_EXCEPTION_EVENT_NON_V4,
349         SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT,
350         SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE,
351         SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE,
352         SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL,
353         SFE_IPV4_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR,
354         SFE_IPV4_EXCEPTION_EVENT_LAST
355 };
356
357 static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = {
358         "UDP_HEADER_INCOMPLETE",
359         "UDP_NO_CONNECTION",
360         "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
361         "UDP_SMALL_TTL",
362         "UDP_NEEDS_FRAGMENTATION",
363         "TCP_HEADER_INCOMPLETE",
364         "TCP_NO_CONNECTION_SLOW_FLAGS",
365         "TCP_NO_CONNECTION_FAST_FLAGS",
366         "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
367         "TCP_SMALL_TTL",
368         "TCP_NEEDS_FRAGMENTATION",
369         "TCP_FLAGS",
370         "TCP_SEQ_EXCEEDS_RIGHT_EDGE",
371         "TCP_SMALL_DATA_OFFS",
372         "TCP_BAD_SACK",
373         "TCP_BIG_DATA_OFFS",
374         "TCP_SEQ_BEFORE_LEFT_EDGE",
375         "TCP_ACK_EXCEEDS_RIGHT_EDGE",
376         "TCP_ACK_BEFORE_LEFT_EDGE",
377         "ICMP_HEADER_INCOMPLETE",
378         "ICMP_UNHANDLED_TYPE",
379         "ICMP_IPV4_HEADER_INCOMPLETE",
380         "ICMP_IPV4_NON_V4",
381         "ICMP_IPV4_IP_OPTIONS_INCOMPLETE",
382         "ICMP_IPV4_UDP_HEADER_INCOMPLETE",
383         "ICMP_IPV4_TCP_HEADER_INCOMPLETE",
384         "ICMP_IPV4_UNHANDLED_PROTOCOL",
385         "ICMP_NO_CONNECTION",
386         "ICMP_FLUSHED_CONNECTION",
387         "HEADER_INCOMPLETE",
388         "BAD_TOTAL_LENGTH",
389         "NON_V4",
390         "NON_INITIAL_FRAGMENT",
391         "DATAGRAM_INCOMPLETE",
392         "IP_OPTIONS_INCOMPLETE",
393         "UNHANDLED_PROTOCOL",
394         "CLONED_SKB_UNSHARE_ERROR"
395 };
396
397 /*
398  * Per-module structure.
399  */
400 struct sfe_ipv4 {
401         spinlock_t lock;                /* Lock for SMP correctness */
402         struct sfe_ipv4_connection_match *active_head;
403                                         /* Head of the list of recently active connections */
404         struct sfe_ipv4_connection_match *active_tail;
405                                         /* Tail of the list of recently active connections */
406         struct sfe_ipv4_connection *all_connections_head;
407                                         /* Head of the list of all connections */
408         struct sfe_ipv4_connection *all_connections_tail;
409                                         /* Tail of the list of all connections */
410         unsigned int num_connections;   /* Number of connections */
411         struct timer_list timer;        /* Timer used for periodic sync ops */
412         sfe_sync_rule_callback_t __rcu sync_rule_callback;
413                                         /* Callback function registered by a connection manager for stats syncing */
414         struct sfe_ipv4_connection *conn_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
415                                         /* Connection hash table */
416         struct sfe_ipv4_connection_match *conn_match_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
417                                         /* Connection match hash table */
418 #ifdef CONFIG_NF_FLOW_COOKIE
419         struct sfe_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE];
420                                         /* flow cookie table*/
421         flow_cookie_set_func_t flow_cookie_set_func;
422                                         /* function used to configure flow cookie in hardware*/
423         int flow_cookie_enable;
424                                         /* Enable/disable flow cookie at runtime */
425 #endif
426
427         /*
428          * Stats recorded in a sync period. These stats will be added to
429          * connection_xxx64 after a sync period.
430          */
431         u32 connection_create_requests;
432                                         /* Number of IPv4 connection create requests */
433         u32 connection_create_collisions;
434                                         /* Number of IPv4 connection create requests that collided with existing hash table entries */
435         u32 connection_destroy_requests;
436                                         /* Number of IPv4 connection destroy requests */
437         u32 connection_destroy_misses;
438                                         /* Number of IPv4 connection destroy requests that missed our hash table */
439         u32 connection_match_hash_hits;
440                                         /* Number of IPv4 connection match hash hits */
441         u32 connection_match_hash_reorders;
442                                         /* Number of IPv4 connection match hash reorders */
443         u32 connection_flushes;         /* Number of IPv4 connection flushes */
444         u32 packets_forwarded;          /* Number of IPv4 packets forwarded */
445         u32 packets_not_forwarded;      /* Number of IPv4 packets not forwarded */
446         u32 exception_events[SFE_IPV4_EXCEPTION_EVENT_LAST];
447
448         /*
449          * Summary statistics.
450          */
451         u64 connection_create_requests64;
452                                         /* Number of IPv4 connection create requests */
453         u64 connection_create_collisions64;
454                                         /* Number of IPv4 connection create requests that collided with existing hash table entries */
455         u64 connection_destroy_requests64;
456                                         /* Number of IPv4 connection destroy requests */
457         u64 connection_destroy_misses64;
458                                         /* Number of IPv4 connection destroy requests that missed our hash table */
459         u64 connection_match_hash_hits64;
460                                         /* Number of IPv4 connection match hash hits */
461         u64 connection_match_hash_reorders64;
462                                         /* Number of IPv4 connection match hash reorders */
463         u64 connection_flushes64;       /* Number of IPv4 connection flushes */
464         u64 packets_forwarded64;        /* Number of IPv4 packets forwarded */
465         u64 packets_not_forwarded64;
466                                         /* Number of IPv4 packets not forwarded */
467         u64 exception_events64[SFE_IPV4_EXCEPTION_EVENT_LAST];
468
469         /*
470          * Control state.
471          */
472         struct kobject *sys_sfe_ipv4;   /* sysfs linkage */
473         int debug_dev;                  /* Major number of the debug char device */
474         u32 debug_read_seq;     /* sequence number for debug dump */
475 };
476
477 /*
478  * Enumeration of the XML output.
479  */
480 enum sfe_ipv4_debug_xml_states {
481         SFE_IPV4_DEBUG_XML_STATE_START,
482         SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_START,
483         SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_CONNECTION,
484         SFE_IPV4_DEBUG_XML_STATE_CONNECTIONS_END,
485         SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_START,
486         SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION,
487         SFE_IPV4_DEBUG_XML_STATE_EXCEPTIONS_END,
488         SFE_IPV4_DEBUG_XML_STATE_STATS,
489         SFE_IPV4_DEBUG_XML_STATE_END,
490         SFE_IPV4_DEBUG_XML_STATE_DONE
491 };
492
493 /*
494  * XML write state.
495  */
496 struct sfe_ipv4_debug_xml_write_state {
497         enum sfe_ipv4_debug_xml_states state;
498                                         /* XML output file state machine state */
499         int iter_exception;             /* Next exception iterator */
500 };
501
502 typedef bool (*sfe_ipv4_debug_xml_write_method_t)(struct sfe_ipv4 *si, char *buffer, char *msg, size_t *length,
503                                                   int *total_read, struct sfe_ipv4_debug_xml_write_state *ws);
504
505 static struct sfe_ipv4 __si;
506
507 /*
508  * sfe_ipv4_gen_ip_csum()
509  *      Generate the IP checksum for an IPv4 header.
510  *
511  * Note that this function assumes that we have only 20 bytes of IP header.
512  */
513 static inline u16 sfe_ipv4_gen_ip_csum(struct sfe_ipv4_ip_hdr *iph)
514 {
515         u32 sum;
516         u16 *i = (u16 *)iph;
517
518         iph->check = 0;
519
520         /*
521          * Generate the sum.
522          */
523         sum = i[0] + i[1] + i[2] + i[3] + i[4] + i[5] + i[6] + i[7] + i[8] + i[9];
524
525         /*
526          * Fold it to ones-complement form.
527          */
528         sum = (sum & 0xffff) + (sum >> 16);
529         sum = (sum & 0xffff) + (sum >> 16);
530
531         return (u16)sum ^ 0xffff;
532 }
533
534 /*
535  * sfe_ipv4_get_connection_match_hash()
536  *      Generate the hash used in connection match lookups.
537  */
538 static inline unsigned int sfe_ipv4_get_connection_match_hash(struct net_device *dev, u8 protocol,
539                                                               __be32 src_ip, __be16 src_port,
540                                                               __be32 dest_ip, __be16 dest_port)
541 {
542         size_t dev_addr = (size_t)dev;
543         u32 hash = ((u32)dev_addr) ^ ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
544         return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
545 }
546
547 /*
548  * sfe_ipv4_find_sfe_ipv4_connection_match()
549  *      Get the IPv4 flow match info that corresponds to a particular 5-tuple.
550  *
551  * On entry we must be holding the lock that protects the hash table.
552  */
553 static struct sfe_ipv4_connection_match *
554 sfe_ipv4_find_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct net_device *dev, u8 protocol,
555                                         __be32 src_ip, __be16 src_port,
556                                         __be32 dest_ip, __be16 dest_port)
557 {
558         struct sfe_ipv4_connection_match *cm;
559         struct sfe_ipv4_connection_match *head;
560         unsigned int conn_match_idx;
561
562         conn_match_idx = sfe_ipv4_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port);
563         cm = si->conn_match_hash[conn_match_idx];
564
565         /*
566          * If we don't have anything in this chain then bail.
567          */
568         if (unlikely(!cm)) {
569                 return NULL;
570         }
571
572         /*
573          * Hopefully the first entry is the one we want.
574          */
575         if ((cm->match_src_port == src_port)
576             && (cm->match_dest_port == dest_port)
577             && (cm->match_src_ip == src_ip)
578             && (cm->match_dest_ip == dest_ip)
579             && (cm->match_protocol == protocol)
580             && (cm->match_dev == dev)) {
581                 si->connection_match_hash_hits++;
582                 return cm;
583         }
584
585         /*
586          * Unfortunately we didn't find it at head, so we search it in chain and
587          * move matching entry to the top of the hash chain. We presume that this
588          * will be reused again very quickly.
589          */
590         head = cm;
591         do {
592                 cm = cm->next;
593         } while (cm && (cm->match_src_port != src_port
594                  || cm->match_dest_port != dest_port
595                  || cm->match_src_ip != src_ip
596                  || cm->match_dest_ip != dest_ip
597                  || cm->match_protocol != protocol
598                  || cm->match_dev != dev));
599
600         /*
601          * Not found then we're done.
602          */
603         if (unlikely(!cm)) {
604                 return NULL;
605         }
606
607         /*
608          * We found a match so move it.
609          */
610         if (cm->next) {
611                 cm->next->prev = cm->prev;
612         }
613         cm->prev->next = cm->next;
614         cm->prev = NULL;
615         cm->next = head;
616         head->prev = cm;
617         si->conn_match_hash[conn_match_idx] = cm;
618         si->connection_match_hash_reorders++;
619
620         return cm;
621 }
622
623 /*
624  * sfe_ipv4_connection_match_update_summary_stats()
625  *      Update the summary stats for a connection match entry.
626  */
627 static inline void sfe_ipv4_connection_match_update_summary_stats(struct sfe_ipv4_connection_match *cm)
628 {
629         cm->rx_packet_count64 += cm->rx_packet_count;
630         cm->rx_packet_count = 0;
631         cm->rx_byte_count64 += cm->rx_byte_count;
632         cm->rx_byte_count = 0;
633 }
634
635 /*
636  * sfe_ipv4_connection_match_compute_translations()
637  *      Compute port and address translations for a connection match entry.
638  */
639 static void sfe_ipv4_connection_match_compute_translations(struct sfe_ipv4_connection_match *cm)
640 {
641         /*
642          * Before we insert the entry look to see if this is tagged as doing address
643          * translations.  If it is then work out the adjustment that we need to apply
644          * to the transport checksum.
645          */
646         if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
647                 /*
648                  * Precompute an incremental checksum adjustment so we can
649                  * edit packets in this stream very quickly.  The algorithm is from RFC1624.
650                  */
651                 u16 src_ip_hi = cm->match_src_ip >> 16;
652                 u16 src_ip_lo = cm->match_src_ip & 0xffff;
653                 u32 xlate_src_ip = ~cm->xlate_src_ip;
654                 u16 xlate_src_ip_hi = xlate_src_ip >> 16;
655                 u16 xlate_src_ip_lo = xlate_src_ip & 0xffff;
656                 u16 xlate_src_port = ~cm->xlate_src_port;
657                 u32 adj;
658
659                 /*
660                  * When we compute this fold it down to a 16-bit offset
661                  * as that way we can avoid having to do a double
662                  * folding of the twos-complement result because the
663                  * addition of 2 16-bit values cannot cause a double
664                  * wrap-around!
665                  */
666                 adj = src_ip_hi + src_ip_lo + cm->match_src_port
667                       + xlate_src_ip_hi + xlate_src_ip_lo + xlate_src_port;
668                 adj = (adj & 0xffff) + (adj >> 16);
669                 adj = (adj & 0xffff) + (adj >> 16);
670                 cm->xlate_src_csum_adjustment = (u16)adj;
671
672         }
673
674         if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
675                 /*
676                  * Precompute an incremental checksum adjustment so we can
677                  * edit packets in this stream very quickly.  The algorithm is from RFC1624.
678                  */
679                 u16 dest_ip_hi = cm->match_dest_ip >> 16;
680                 u16 dest_ip_lo = cm->match_dest_ip & 0xffff;
681                 u32 xlate_dest_ip = ~cm->xlate_dest_ip;
682                 u16 xlate_dest_ip_hi = xlate_dest_ip >> 16;
683                 u16 xlate_dest_ip_lo = xlate_dest_ip & 0xffff;
684                 u16 xlate_dest_port = ~cm->xlate_dest_port;
685                 u32 adj;
686
687                 /*
688                  * When we compute this fold it down to a 16-bit offset
689                  * as that way we can avoid having to do a double
690                  * folding of the twos-complement result because the
691                  * addition of 2 16-bit values cannot cause a double
692                  * wrap-around!
693                  */
694                 adj = dest_ip_hi + dest_ip_lo + cm->match_dest_port
695                       + xlate_dest_ip_hi + xlate_dest_ip_lo + xlate_dest_port;
696                 adj = (adj & 0xffff) + (adj >> 16);
697                 adj = (adj & 0xffff) + (adj >> 16);
698                 cm->xlate_dest_csum_adjustment = (u16)adj;
699         }
700
701         if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC) {
702                 u32 adj = ~cm->match_src_ip + cm->xlate_src_ip;
703                 if (adj < cm->xlate_src_ip) {
704                         adj++;
705                 }
706
707                 adj = (adj & 0xffff) + (adj >> 16);
708                 adj = (adj & 0xffff) + (adj >> 16);
709                 cm->xlate_src_partial_csum_adjustment = (u16)adj;
710         }
711
712         if (cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST) {
713                 u32 adj = ~cm->match_dest_ip + cm->xlate_dest_ip;
714                 if (adj < cm->xlate_dest_ip) {
715                         adj++;
716                 }
717
718                 adj = (adj & 0xffff) + (adj >> 16);
719                 adj = (adj & 0xffff) + (adj >> 16);
720                 cm->xlate_dest_partial_csum_adjustment = (u16)adj;
721         }
722
723 }
724
725 /*
726  * sfe_ipv4_update_summary_stats()
727  *      Update the summary stats.
728  */
729 static void sfe_ipv4_update_summary_stats(struct sfe_ipv4 *si)
730 {
731         int i;
732
733         si->connection_create_requests64 += si->connection_create_requests;
734         si->connection_create_requests = 0;
735         si->connection_create_collisions64 += si->connection_create_collisions;
736         si->connection_create_collisions = 0;
737         si->connection_destroy_requests64 += si->connection_destroy_requests;
738         si->connection_destroy_requests = 0;
739         si->connection_destroy_misses64 += si->connection_destroy_misses;
740         si->connection_destroy_misses = 0;
741         si->connection_match_hash_hits64 += si->connection_match_hash_hits;
742         si->connection_match_hash_hits = 0;
743         si->connection_match_hash_reorders64 += si->connection_match_hash_reorders;
744         si->connection_match_hash_reorders = 0;
745         si->connection_flushes64 += si->connection_flushes;
746         si->connection_flushes = 0;
747         si->packets_forwarded64 += si->packets_forwarded;
748         si->packets_forwarded = 0;
749         si->packets_not_forwarded64 += si->packets_not_forwarded;
750         si->packets_not_forwarded = 0;
751
752         for (i = 0; i < SFE_IPV4_EXCEPTION_EVENT_LAST; i++) {
753                 si->exception_events64[i] += si->exception_events[i];
754                 si->exception_events[i] = 0;
755         }
756 }
757
758 /*
759  * sfe_ipv4_insert_sfe_ipv4_connection_match()
760  *      Insert a connection match into the hash.
761  *
762  * On entry we must be holding the lock that protects the hash table.
763  */
764 static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si,
765                                                              struct sfe_ipv4_connection_match *cm)
766 {
767         struct sfe_ipv4_connection_match **hash_head;
768         struct sfe_ipv4_connection_match *prev_head;
769         unsigned int conn_match_idx
770                 = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
771                                                      cm->match_src_ip, cm->match_src_port,
772                                                      cm->match_dest_ip, cm->match_dest_port);
773
774         hash_head = &si->conn_match_hash[conn_match_idx];
775         prev_head = *hash_head;
776         cm->prev = NULL;
777         if (prev_head) {
778                 prev_head->prev = cm;
779         }
780
781         cm->next = prev_head;
782         *hash_head = cm;
783
784 #ifdef CONFIG_NF_FLOW_COOKIE
785         if (!si->flow_cookie_enable)
786                 return;
787
788         /*
789          * Configure hardware to put a flow cookie in packet of this flow,
790          * then we can accelerate the lookup process when we received this packet.
791          */
792         for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
793                 struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
794
795                 if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) {
796                         flow_cookie_set_func_t func;
797
798                         rcu_read_lock();
799                         func = rcu_dereference(si->flow_cookie_set_func);
800                         if (func) {
801                                 if (!func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
802                                          cm->match_dest_ip, cm->match_dest_port, conn_match_idx)) {
803                                         entry->match = cm;
804                                         cm->flow_cookie = conn_match_idx;
805                                 }
806                         }
807                         rcu_read_unlock();
808
809                         break;
810                 }
811         }
812 #endif
813 }
814
815 /*
816  * sfe_ipv4_remove_sfe_ipv4_connection_match()
817  *      Remove a connection match object from the hash.
818  *
819  * On entry we must be holding the lock that protects the hash table.
820  */
821 static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
822 {
823 #ifdef CONFIG_NF_FLOW_COOKIE
824         if (si->flow_cookie_enable) {
825                 /*
826                  * Tell hardware that we no longer need a flow cookie in packet of this flow
827                  */
828                 unsigned int conn_match_idx;
829
830                 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
831                         struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
832
833                         if (cm == entry->match) {
834                                 flow_cookie_set_func_t func;
835
836                                 rcu_read_lock();
837                                 func = rcu_dereference(si->flow_cookie_set_func);
838                                 if (func) {
839                                         func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
840                                              cm->match_dest_ip, cm->match_dest_port, 0);
841                                 }
842                                 rcu_read_unlock();
843
844                                 cm->flow_cookie = 0;
845                                 entry->match = NULL;
846                                 entry->last_clean_time = jiffies;
847                                 break;
848                         }
849                 }
850         }
851 #endif
852
853         /*
854          * Unlink the connection match entry from the hash.
855          */
856         if (cm->prev) {
857                 cm->prev->next = cm->next;
858         } else {
859                 unsigned int conn_match_idx
860                         = sfe_ipv4_get_connection_match_hash(cm->match_dev, cm->match_protocol,
861                                                              cm->match_src_ip, cm->match_src_port,
862                                                              cm->match_dest_ip, cm->match_dest_port);
863                 si->conn_match_hash[conn_match_idx] = cm->next;
864         }
865
866         if (cm->next) {
867                 cm->next->prev = cm->prev;
868         }
869
870         /*
871          * If the connection match entry is in the active list remove it.
872          */
873         if (cm->active) {
874                 if (likely(cm->active_prev)) {
875                         cm->active_prev->active_next = cm->active_next;
876                 } else {
877                         si->active_head = cm->active_next;
878                 }
879
880                 if (likely(cm->active_next)) {
881                         cm->active_next->active_prev = cm->active_prev;
882                 } else {
883                         si->active_tail = cm->active_prev;
884                 }
885         }
886 }
887
888 /*
889  * sfe_ipv4_get_connection_hash()
890  *      Generate the hash used in connection lookups.
891  */
892 static inline unsigned int sfe_ipv4_get_connection_hash(u8 protocol, __be32 src_ip, __be16 src_port,
893                                                         __be32 dest_ip, __be16 dest_port)
894 {
895         u32 hash = ntohl(src_ip ^ dest_ip) ^ protocol ^ ntohs(src_port ^ dest_port);
896         return ((hash >> SFE_IPV4_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV4_CONNECTION_HASH_MASK;
897 }
898
899 /*
900  * sfe_ipv4_find_sfe_ipv4_connection()
901  *      Get the IPv4 connection info that corresponds to a particular 5-tuple.
902  *
903  * On entry we must be holding the lock that protects the hash table.
904  */
905 static inline struct sfe_ipv4_connection *sfe_ipv4_find_sfe_ipv4_connection(struct sfe_ipv4 *si, u32 protocol,
906                                                                             __be32 src_ip, __be16 src_port,
907                                                                             __be32 dest_ip, __be16 dest_port)
908 {
909         struct sfe_ipv4_connection *c;
910         unsigned int conn_idx = sfe_ipv4_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port);
911         c = si->conn_hash[conn_idx];
912
913         /*
914          * If we don't have anything in this chain then bale.
915          */
916         if (unlikely(!c)) {
917                 return NULL;
918         }
919
920         /*
921          * Hopefully the first entry is the one we want.
922          */
923         if ((c->src_port == src_port)
924             && (c->dest_port == dest_port)
925             && (c->src_ip == src_ip)
926             && (c->dest_ip == dest_ip)
927             && (c->protocol == protocol)) {
928                 return c;
929         }
930
931         /*
932          * Unfortunately we didn't find it at head, so we search it in chain.
933          */
934         do {
935                 c = c->next;
936         } while (c && (c->src_port != src_port
937                  || c->dest_port != dest_port
938                  || c->src_ip != src_ip
939                  || c->dest_ip != dest_ip
940                  || c->protocol != protocol));
941
942         /*
943          * Will need connection entry for next create/destroy metadata,
944          * So no need to re-order entry for these requests
945          */
946         return c;
947 }
948
949 /*
950  * sfe_ipv4_mark_rule()
951  *      Updates the mark for a current offloaded connection
952  *
953  * Will take hash lock upon entry
954  */
955 void sfe_ipv4_mark_rule(struct sfe_connection_mark *mark)
956 {
957         struct sfe_ipv4 *si = &__si;
958         struct sfe_ipv4_connection *c;
959
960         spin_lock_bh(&si->lock);
961         c = sfe_ipv4_find_sfe_ipv4_connection(si, mark->protocol,
962                                               mark->src_ip.ip, mark->src_port,
963                                               mark->dest_ip.ip, mark->dest_port);
964         if (c) {
965                 WARN_ON((0 != c->mark) && (0 == mark->mark));
966                 c->mark = mark->mark;
967         }
968         spin_unlock_bh(&si->lock);
969
970         if (c) {
971                 DEBUG_TRACE("Matching connection found for mark, "
972                             "setting from %08x to %08x\n",
973                             c->mark, mark->mark);
974         }
975 }
976
977 /*
978  * sfe_ipv4_insert_sfe_ipv4_connection()
979  *      Insert a connection into the hash.
980  *
981  * On entry we must be holding the lock that protects the hash table.
982  */
983 static void sfe_ipv4_insert_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
984 {
985         struct sfe_ipv4_connection **hash_head;
986         struct sfe_ipv4_connection *prev_head;
987         unsigned int conn_idx;
988
989         /*
990          * Insert entry into the connection hash.
991          */
992         conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
993                                                 c->dest_ip, c->dest_port);
994         hash_head = &si->conn_hash[conn_idx];
995         prev_head = *hash_head;
996         c->prev = NULL;
997         if (prev_head) {
998                 prev_head->prev = c;
999         }
1000
1001         c->next = prev_head;
1002         *hash_head = c;
1003
1004         /*
1005          * Insert entry into the "all connections" list.
1006          */
1007         if (si->all_connections_tail) {
1008                 c->all_connections_prev = si->all_connections_tail;
1009                 si->all_connections_tail->all_connections_next = c;
1010         } else {
1011                 c->all_connections_prev = NULL;
1012                 si->all_connections_head = c;
1013         }
1014
1015         si->all_connections_tail = c;
1016         c->all_connections_next = NULL;
1017         si->num_connections++;
1018
1019         /*
1020          * Insert the connection match objects too.
1021          */
1022         sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->original_match);
1023         sfe_ipv4_insert_sfe_ipv4_connection_match(si, c->reply_match);
1024 }
1025
1026 /*
1027  * sfe_ipv4_remove_sfe_ipv4_connection()
1028  *      Remove a sfe_ipv4_connection object from the hash.
1029  *
1030  * On entry we must be holding the lock that protects the hash table.
1031  */
1032 static void sfe_ipv4_remove_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c)
1033 {
1034         /*
1035          * Remove the connection match objects.
1036          */
1037         sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->reply_match);
1038         sfe_ipv4_remove_sfe_ipv4_connection_match(si, c->original_match);
1039
1040         /*
1041          * Unlink the connection.
1042          */
1043         if (c->prev) {
1044                 c->prev->next = c->next;
1045         } else {
1046                 unsigned int conn_idx = sfe_ipv4_get_connection_hash(c->protocol, c->src_ip, c->src_port,
1047                                                                      c->dest_ip, c->dest_port);
1048                 si->conn_hash[conn_idx] = c->next;
1049         }
1050
1051         if (c->next) {
1052                 c->next->prev = c->prev;
1053         }
1054
1055         /*
1056          * Unlink connection from all_connections list
1057          */
1058         if (c->all_connections_prev) {
1059                 c->all_connections_prev->all_connections_next = c->all_connections_next;
1060         } else {
1061                 si->all_connections_head = c->all_connections_next;
1062         }
1063
1064         if (c->all_connections_next) {
1065                 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1066         } else {
1067                 si->all_connections_tail = c->all_connections_prev;
1068         }
1069
1070         si->num_connections--;
1071 }
1072
1073 /*
1074  * sfe_ipv4_sync_sfe_ipv4_connection()
1075  *      Sync a connection.
1076  *
1077  * On entry to this function we expect that the lock for the connection is either
1078  * already held or isn't required.
1079  */
1080 static void sfe_ipv4_gen_sync_sfe_ipv4_connection(struct sfe_ipv4 *si, struct sfe_ipv4_connection *c,
1081                                                   struct sfe_connection_sync *sis, sfe_sync_reason_t reason,
1082                                                   u64 now_jiffies)
1083 {
1084         struct sfe_ipv4_connection_match *original_cm;
1085         struct sfe_ipv4_connection_match *reply_cm;
1086
1087         /*
1088          * Fill in the update message.
1089          */
1090         sis->is_v6 = 0;
1091         sis->protocol = c->protocol;
1092         sis->src_ip.ip = c->src_ip;
1093         sis->src_ip_xlate.ip = c->src_ip_xlate;
1094         sis->dest_ip.ip = c->dest_ip;
1095         sis->dest_ip_xlate.ip = c->dest_ip_xlate;
1096         sis->src_port = c->src_port;
1097         sis->src_port_xlate = c->src_port_xlate;
1098         sis->dest_port = c->dest_port;
1099         sis->dest_port_xlate = c->dest_port_xlate;
1100
1101         original_cm = c->original_match;
1102         reply_cm = c->reply_match;
1103         sis->src_td_max_window = original_cm->protocol_state.tcp.max_win;
1104         sis->src_td_end = original_cm->protocol_state.tcp.end;
1105         sis->src_td_max_end = original_cm->protocol_state.tcp.max_end;
1106         sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win;
1107         sis->dest_td_end = reply_cm->protocol_state.tcp.end;
1108         sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end;
1109
1110         sis->src_new_packet_count = original_cm->rx_packet_count;
1111         sis->src_new_byte_count = original_cm->rx_byte_count;
1112         sis->dest_new_packet_count = reply_cm->rx_packet_count;
1113         sis->dest_new_byte_count = reply_cm->rx_byte_count;
1114
1115         sfe_ipv4_connection_match_update_summary_stats(original_cm);
1116         sfe_ipv4_connection_match_update_summary_stats(reply_cm);
1117
1118         sis->src_dev = original_cm->match_dev;
1119         sis->src_packet_count = original_cm->rx_packet_count64;
1120         sis->src_byte_count = original_cm->rx_byte_count64;
1121
1122         sis->dest_dev = reply_cm->match_dev;
1123         sis->dest_packet_count = reply_cm->rx_packet_count64;
1124         sis->dest_byte_count = reply_cm->rx_byte_count64;
1125
1126         sis->reason = reason;
1127
1128         /*
1129          * Get the time increment since our last sync.
1130          */
1131         sis->delta_jiffies = now_jiffies - c->last_sync_jiffies;
1132         c->last_sync_jiffies = now_jiffies;
1133 }
1134
1135 /*
1136  * sfe_ipv4_flush_sfe_ipv4_connection()
1137  *      Flush a connection and free all associated resources.
1138  *
1139  * We need to be called with bottom halves disabled locally as we need to acquire
1140  * the connection hash lock and release it again.  In general we're actually called
1141  * from within a BH and so we're fine, but we're also called when connections are
1142  * torn down.
1143  */
1144 static void sfe_ipv4_flush_sfe_ipv4_connection(struct sfe_ipv4 *si,
1145                                                struct sfe_ipv4_connection *c,
1146                                                sfe_sync_reason_t reason)
1147 {
1148         struct sfe_connection_sync sis;
1149         u64 now_jiffies;
1150         sfe_sync_rule_callback_t sync_rule_callback;
1151
1152         rcu_read_lock();
1153         spin_lock_bh(&si->lock);
1154         si->connection_flushes++;
1155         sync_rule_callback = rcu_dereference(si->sync_rule_callback);
1156         spin_unlock_bh(&si->lock);
1157
1158         if (sync_rule_callback) {
1159                 /*
1160                  * Generate a sync message and then sync.
1161                  */
1162                 now_jiffies = get_jiffies_64();
1163                 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, reason, now_jiffies);
1164                 sync_rule_callback(&sis);
1165         }
1166
1167         rcu_read_unlock();
1168
1169         /*
1170          * Release our hold of the source and dest devices and free the memory
1171          * for our connection objects.
1172          */
1173         dev_put(c->original_dev);
1174         dev_put(c->reply_dev);
1175         kfree(c->original_match);
1176         kfree(c->reply_match);
1177         kfree(c);
1178 }
1179
1180 /*
1181  * sfe_ipv4_recv_udp()
1182  *      Handle UDP packet receives and forwarding.
1183  */
1184 static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
1185                              unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
1186 {
1187         struct sfe_ipv4_udp_hdr *udph;
1188         __be32 src_ip;
1189         __be32 dest_ip;
1190         __be16 src_port;
1191         __be16 dest_port;
1192         struct sfe_ipv4_connection_match *cm;
1193         u8 ttl;
1194         struct net_device *xmit_dev;
1195
1196         /*
1197          * Is our packet too short to contain a valid UDP header?
1198          */
1199         if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_udp_hdr) + ihl)))) {
1200                 spin_lock_bh(&si->lock);
1201                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++;
1202                 si->packets_not_forwarded++;
1203                 spin_unlock_bh(&si->lock);
1204
1205                 DEBUG_TRACE("packet too short for UDP header\n");
1206                 return 0;
1207         }
1208
1209         /*
1210          * Read the IP address and port information.  Read the IP header data first
1211          * because we've almost certainly got that in the cache.  We may not yet have
1212          * the UDP header cached though so allow more time for any prefetching.
1213          */
1214         src_ip = iph->saddr;
1215         dest_ip = iph->daddr;
1216
1217         udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl);
1218         src_port = udph->source;
1219         dest_port = udph->dest;
1220
1221         spin_lock_bh(&si->lock);
1222
1223         /*
1224          * Look for a connection match.
1225          */
1226 #ifdef CONFIG_NF_FLOW_COOKIE
1227         cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1228         if (unlikely(!cm)) {
1229                 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1230         }
1231 #else
1232         cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1233 #endif
1234         if (unlikely(!cm)) {
1235                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
1236                 si->packets_not_forwarded++;
1237                 spin_unlock_bh(&si->lock);
1238
1239                 DEBUG_TRACE("no connection found\n");
1240                 return 0;
1241         }
1242
1243         /*
1244          * If our packet has beern marked as "flush on find" we can't actually
1245          * forward it in the fast path, but now that we've found an associated
1246          * connection we can flush that out before we process the packet.
1247          */
1248         if (unlikely(flush_on_find)) {
1249                 struct sfe_ipv4_connection *c = cm->connection;
1250                 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1251                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1252                 si->packets_not_forwarded++;
1253                 spin_unlock_bh(&si->lock);
1254
1255                 DEBUG_TRACE("flush on find\n");
1256                 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1257                 return 0;
1258         }
1259
1260 #ifdef CONFIG_XFRM
1261         /*
1262          * We can't accelerate the flow on this direction, just let it go
1263          * through the slow path.
1264          */
1265         if (unlikely(!cm->flow_accel)) {
1266                 si->packets_not_forwarded++;
1267                 spin_unlock_bh(&si->lock);
1268                 return 0;
1269         }
1270 #endif
1271
1272         /*
1273          * Does our TTL allow forwarding?
1274          */
1275         ttl = iph->ttl;
1276         if (unlikely(ttl < 2)) {
1277                 struct sfe_ipv4_connection *c = cm->connection;
1278                 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1279                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_SMALL_TTL]++;
1280                 si->packets_not_forwarded++;
1281                 spin_unlock_bh(&si->lock);
1282
1283                 DEBUG_TRACE("ttl too low\n");
1284                 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1285                 return 0;
1286         }
1287
1288         /*
1289          * If our packet is larger than the MTU of the transmit interface then
1290          * we can't forward it easily.
1291          */
1292         if (unlikely(len > cm->xmit_dev_mtu)) {
1293                 struct sfe_ipv4_connection *c = cm->connection;
1294                 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1295                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++;
1296                 si->packets_not_forwarded++;
1297                 spin_unlock_bh(&si->lock);
1298
1299                 DEBUG_TRACE("larger than mtu\n");
1300                 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1301                 return 0;
1302         }
1303
1304         /*
1305          * From this point on we're good to modify the packet.
1306          */
1307
1308         /*
1309          * Check if skb was cloned. If it was, unshare it. Because
1310          * the data area is going to be written in this path and we don't want to
1311          * change the cloned skb's data section.
1312          */
1313         if (unlikely(skb_cloned(skb))) {
1314                 DEBUG_TRACE("%p: skb is a cloned skb\n", skb);
1315                 skb = skb_unshare(skb, GFP_ATOMIC);
1316                 if (!skb) {
1317                         DEBUG_WARN("Failed to unshare the cloned skb\n");
1318                         si->exception_events[SFE_IPV4_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR]++;
1319                         si->packets_not_forwarded++;
1320                         spin_unlock_bh(&si->lock);
1321
1322                         return 0;
1323                 }
1324
1325                 /*
1326                  * Update the iph and udph pointers with the unshared skb's data area.
1327                  */
1328                 iph = (struct sfe_ipv4_ip_hdr *)skb->data;
1329                 udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl);
1330         }
1331
1332         /*
1333          * Update DSCP
1334          */
1335         if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) {
1336                 iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp;
1337         }
1338
1339         /*
1340          * Decrement our TTL.
1341          */
1342         iph->ttl = ttl - 1;
1343
1344         /*
1345          * Do we have to perform translations of the source address/port?
1346          */
1347         if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1348                 u16 udp_csum;
1349
1350                 iph->saddr = cm->xlate_src_ip;
1351                 udph->source = cm->xlate_src_port;
1352
1353                 /*
1354                  * Do we have a non-zero UDP checksum?  If we do then we need
1355                  * to update it.
1356                  */
1357                 udp_csum = udph->check;
1358                 if (likely(udp_csum)) {
1359                         u32 sum;
1360
1361                         if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1362                                 sum = udp_csum + cm->xlate_src_partial_csum_adjustment;
1363                         } else {
1364                                 sum = udp_csum + cm->xlate_src_csum_adjustment;
1365                         }
1366
1367                         sum = (sum & 0xffff) + (sum >> 16);
1368                         udph->check = (u16)sum;
1369                 }
1370         }
1371
1372         /*
1373          * Do we have to perform translations of the destination address/port?
1374          */
1375         if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1376                 u16 udp_csum;
1377
1378                 iph->daddr = cm->xlate_dest_ip;
1379                 udph->dest = cm->xlate_dest_port;
1380
1381                 /*
1382                  * Do we have a non-zero UDP checksum?  If we do then we need
1383                  * to update it.
1384                  */
1385                 udp_csum = udph->check;
1386                 if (likely(udp_csum)) {
1387                         u32 sum;
1388
1389                         if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1390                                 sum = udp_csum + cm->xlate_dest_partial_csum_adjustment;
1391                         } else {
1392                                 sum = udp_csum + cm->xlate_dest_csum_adjustment;
1393                         }
1394
1395                         sum = (sum & 0xffff) + (sum >> 16);
1396                         udph->check = (u16)sum;
1397                 }
1398         }
1399
1400         /*
1401          * Replace the IP checksum.
1402          */
1403         iph->check = sfe_ipv4_gen_ip_csum(iph);
1404
1405         /*
1406          * Update traffic stats.
1407          */
1408         cm->rx_packet_count++;
1409         cm->rx_byte_count += len;
1410
1411         /*
1412          * If we're not already on the active list then insert ourselves at the tail
1413          * of the current list.
1414          */
1415         if (unlikely(!cm->active)) {
1416                 cm->active = true;
1417                 cm->active_prev = si->active_tail;
1418                 if (likely(si->active_tail)) {
1419                         si->active_tail->active_next = cm;
1420                 } else {
1421                         si->active_head = cm;
1422                 }
1423                 si->active_tail = cm;
1424         }
1425
1426         xmit_dev = cm->xmit_dev;
1427         skb->dev = xmit_dev;
1428
1429         /*
1430          * Check to see if we need to write a header.
1431          */
1432         if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1433                 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
1434                         dev_hard_header(skb, xmit_dev, ETH_P_IP,
1435                                         cm->xmit_dest_mac, cm->xmit_src_mac, len);
1436                 } else {
1437                         /*
1438                          * For the simple case we write this really fast.
1439                          */
1440                         struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
1441                         eth->h_proto = htons(ETH_P_IP);
1442                         eth->h_dest[0] = cm->xmit_dest_mac[0];
1443                         eth->h_dest[1] = cm->xmit_dest_mac[1];
1444                         eth->h_dest[2] = cm->xmit_dest_mac[2];
1445                         eth->h_source[0] = cm->xmit_src_mac[0];
1446                         eth->h_source[1] = cm->xmit_src_mac[1];
1447                         eth->h_source[2] = cm->xmit_src_mac[2];
1448                 }
1449         }
1450
1451         /*
1452          * Update priority of skb.
1453          */
1454         if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) {
1455                 skb->priority = cm->priority;
1456         }
1457
1458         /*
1459          * Mark outgoing packet.
1460          */
1461         skb->mark = cm->connection->mark;
1462         if (skb->mark) {
1463                 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1464         }
1465
1466         si->packets_forwarded++;
1467         spin_unlock_bh(&si->lock);
1468
1469         /*
1470          * We're going to check for GSO flags when we transmit the packet so
1471          * start fetching the necessary cache line now.
1472          */
1473         prefetch(skb_shinfo(skb));
1474
1475         /*
1476          * Mark that this packet has been fast forwarded.
1477          */
1478         skb->fast_forwarded = 1;
1479
1480         /*
1481          * Send the packet on its way.
1482          */
1483         dev_queue_xmit(skb);
1484
1485         return 1;
1486 }
1487
1488 /*
1489  * sfe_ipv4_process_tcp_option_sack()
1490  *      Parse TCP SACK option and update ack according
1491  */
1492 static bool sfe_ipv4_process_tcp_option_sack(const struct sfe_ipv4_tcp_hdr *th, const u32 data_offs,
1493                                              u32 *ack)
1494 {
1495         u32 length = sizeof(struct sfe_ipv4_tcp_hdr);
1496         u8 *ptr = (u8 *)th + length;
1497
1498         /*
1499          * Ignore processing if TCP packet has only TIMESTAMP option.
1500          */
1501         if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
1502             && likely(ptr[0] == TCPOPT_NOP)
1503             && likely(ptr[1] == TCPOPT_NOP)
1504             && likely(ptr[2] == TCPOPT_TIMESTAMP)
1505             && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
1506                 return true;
1507         }
1508
1509         /*
1510          * TCP options. Parse SACK option.
1511          */
1512         while (length < data_offs) {
1513                 u8 size;
1514                 u8 kind;
1515
1516                 ptr = (u8 *)th + length;
1517                 kind = *ptr;
1518
1519                 /*
1520                  * NOP, for padding
1521                  * Not in the switch because to fast escape and to not calculate size
1522                  */
1523                 if (kind == TCPOPT_NOP) {
1524                         length++;
1525                         continue;
1526                 }
1527
1528                 if (kind == TCPOPT_SACK) {
1529                         u32 sack = 0;
1530                         u8 re = 1 + 1;
1531
1532                         size = *(ptr + 1);
1533                         if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
1534                             || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
1535                             || (size > (data_offs - length))) {
1536                                 return false;
1537                         }
1538
1539                         re += 4;
1540                         while (re < size) {
1541                                 u32 sack_re;
1542                                 u8 *sptr = ptr + re;
1543                                 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
1544                                 if (sack_re > sack) {
1545                                         sack = sack_re;
1546                                 }
1547                                 re += TCPOLEN_SACK_PERBLOCK;
1548                         }
1549                         if (sack > *ack) {
1550                                 *ack = sack;
1551                         }
1552                         length += size;
1553                         continue;
1554                 }
1555                 if (kind == TCPOPT_EOL) {
1556                         return true;
1557                 }
1558                 size = *(ptr + 1);
1559                 if (size < 2) {
1560                         return false;
1561                 }
1562                 length += size;
1563         }
1564
1565         return true;
1566 }
1567
1568 /*
1569  * sfe_ipv4_recv_tcp()
1570  *      Handle TCP packet receives and forwarding.
1571  */
1572 static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
1573                              unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
1574 {
1575         struct sfe_ipv4_tcp_hdr *tcph;
1576         __be32 src_ip;
1577         __be32 dest_ip;
1578         __be16 src_port;
1579         __be16 dest_port;
1580         struct sfe_ipv4_connection_match *cm;
1581         struct sfe_ipv4_connection_match *counter_cm;
1582         u8 ttl;
1583         u32 flags;
1584         struct net_device *xmit_dev;
1585
1586         /*
1587          * Is our packet too short to contain a valid UDP header?
1588          */
1589         if (unlikely(!pskb_may_pull(skb, (sizeof(struct sfe_ipv4_tcp_hdr) + ihl)))) {
1590                 spin_lock_bh(&si->lock);
1591                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++;
1592                 si->packets_not_forwarded++;
1593                 spin_unlock_bh(&si->lock);
1594
1595                 DEBUG_TRACE("packet too short for TCP header\n");
1596                 return 0;
1597         }
1598
1599         /*
1600          * Read the IP address and port information.  Read the IP header data first
1601          * because we've almost certainly got that in the cache.  We may not yet have
1602          * the TCP header cached though so allow more time for any prefetching.
1603          */
1604         src_ip = iph->saddr;
1605         dest_ip = iph->daddr;
1606
1607         tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl);
1608         src_port = tcph->source;
1609         dest_port = tcph->dest;
1610         flags = tcp_flag_word(tcph);
1611
1612         spin_lock_bh(&si->lock);
1613
1614         /*
1615          * Look for a connection match.
1616          */
1617 #ifdef CONFIG_NF_FLOW_COOKIE
1618         cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1619         if (unlikely(!cm)) {
1620                 cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
1621         }
1622 #else
1623         cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
1624 #endif
1625         if (unlikely(!cm)) {
1626                 /*
1627                  * We didn't get a connection but as TCP is connection-oriented that
1628                  * may be because this is a non-fast connection (not running established).
1629                  * For diagnostic purposes we differentiate this here.
1630                  */
1631                 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
1632                         si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++;
1633                         si->packets_not_forwarded++;
1634                         spin_unlock_bh(&si->lock);
1635
1636                         DEBUG_TRACE("no connection found - fast flags\n");
1637                         return 0;
1638                 }
1639                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++;
1640                 si->packets_not_forwarded++;
1641                 spin_unlock_bh(&si->lock);
1642
1643                 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
1644                             flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1645                 return 0;
1646         }
1647
1648         /*
1649          * If our packet has beern marked as "flush on find" we can't actually
1650          * forward it in the fast path, but now that we've found an associated
1651          * connection we can flush that out before we process the packet.
1652          */
1653         if (unlikely(flush_on_find)) {
1654                 struct sfe_ipv4_connection *c = cm->connection;
1655                 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1656                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1657                 si->packets_not_forwarded++;
1658                 spin_unlock_bh(&si->lock);
1659
1660                 DEBUG_TRACE("flush on find\n");
1661                 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1662                 return 0;
1663         }
1664
1665 #ifdef CONFIG_XFRM
1666         /*
1667          * We can't accelerate the flow on this direction, just let it go
1668          * through the slow path.
1669          */
1670         if (unlikely(!cm->flow_accel)) {
1671                 si->packets_not_forwarded++;
1672                 spin_unlock_bh(&si->lock);
1673                 return 0;
1674         }
1675 #endif
1676         /*
1677          * Does our TTL allow forwarding?
1678          */
1679         ttl = iph->ttl;
1680         if (unlikely(ttl < 2)) {
1681                 struct sfe_ipv4_connection *c = cm->connection;
1682                 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1683                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_TTL]++;
1684                 si->packets_not_forwarded++;
1685                 spin_unlock_bh(&si->lock);
1686
1687                 DEBUG_TRACE("ttl too low\n");
1688                 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1689                 return 0;
1690         }
1691
1692         /*
1693          * If our packet is larger than the MTU of the transmit interface then
1694          * we can't forward it easily.
1695          */
1696         if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
1697                 struct sfe_ipv4_connection *c = cm->connection;
1698                 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1699                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++;
1700                 si->packets_not_forwarded++;
1701                 spin_unlock_bh(&si->lock);
1702
1703                 DEBUG_TRACE("larger than mtu\n");
1704                 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1705                 return 0;
1706         }
1707
1708         /*
1709          * Look at our TCP flags.  Anything missing an ACK or that has RST, SYN or FIN
1710          * set is not a fast path packet.
1711          */
1712         if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
1713                 struct sfe_ipv4_connection *c = cm->connection;
1714                 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1715                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_FLAGS]++;
1716                 si->packets_not_forwarded++;
1717                 spin_unlock_bh(&si->lock);
1718
1719                 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
1720                             flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1721                 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1722                 return 0;
1723         }
1724
1725         counter_cm = cm->counter_match;
1726
1727         /*
1728          * Are we doing sequence number checking?
1729          */
1730         if (likely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
1731                 u32 seq;
1732                 u32 ack;
1733                 u32 sack;
1734                 u32 data_offs;
1735                 u32 end;
1736                 u32 left_edge;
1737                 u32 scaled_win;
1738                 u32 max_end;
1739
1740                 /*
1741                  * Is our sequence fully past the right hand edge of the window?
1742                  */
1743                 seq = ntohl(tcph->seq);
1744                 if (unlikely((s32)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
1745                         struct sfe_ipv4_connection *c = cm->connection;
1746                         sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1747                         si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++;
1748                         si->packets_not_forwarded++;
1749                         spin_unlock_bh(&si->lock);
1750
1751                         DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
1752                                     seq, cm->protocol_state.tcp.max_end + 1);
1753                         sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1754                         return 0;
1755                 }
1756
1757                 /*
1758                  * Check that our TCP data offset isn't too short.
1759                  */
1760                 data_offs = tcph->doff << 2;
1761                 if (unlikely(data_offs < sizeof(struct sfe_ipv4_tcp_hdr))) {
1762                         struct sfe_ipv4_connection *c = cm->connection;
1763                         sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1764                         si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++;
1765                         si->packets_not_forwarded++;
1766                         spin_unlock_bh(&si->lock);
1767
1768                         DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
1769                         sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1770                         return 0;
1771                 }
1772
1773                 /*
1774                  * Update ACK according to any SACK option.
1775                  */
1776                 ack = ntohl(tcph->ack_seq);
1777                 sack = ack;
1778                 if (unlikely(!sfe_ipv4_process_tcp_option_sack(tcph, data_offs, &sack))) {
1779                         struct sfe_ipv4_connection *c = cm->connection;
1780                         sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1781                         si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BAD_SACK]++;
1782                         si->packets_not_forwarded++;
1783                         spin_unlock_bh(&si->lock);
1784
1785                         DEBUG_TRACE("TCP option SACK size is wrong\n");
1786                         sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1787                         return 0;
1788                 }
1789
1790                 /*
1791                  * Check that our TCP data offset isn't past the end of the packet.
1792                  */
1793                 data_offs += sizeof(struct sfe_ipv4_ip_hdr);
1794                 if (unlikely(len < data_offs)) {
1795                         struct sfe_ipv4_connection *c = cm->connection;
1796                         sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1797                         si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++;
1798                         si->packets_not_forwarded++;
1799                         spin_unlock_bh(&si->lock);
1800
1801                         DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
1802                                     data_offs, len);
1803                         sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1804                         return 0;
1805                 }
1806
1807                 end = seq + len - data_offs;
1808
1809                 /*
1810                  * Is our sequence fully before the left hand edge of the window?
1811                  */
1812                 if (unlikely((s32)(end - (cm->protocol_state.tcp.end
1813                                                 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
1814                         struct sfe_ipv4_connection *c = cm->connection;
1815                         sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1816                         si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++;
1817                         si->packets_not_forwarded++;
1818                         spin_unlock_bh(&si->lock);
1819
1820                         DEBUG_TRACE("seq: %u before left edge: %u\n",
1821                                     end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
1822                         sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1823                         return 0;
1824                 }
1825
1826                 /*
1827                  * Are we acking data that is to the right of what has been sent?
1828                  */
1829                 if (unlikely((s32)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
1830                         struct sfe_ipv4_connection *c = cm->connection;
1831                         sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1832                         si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++;
1833                         si->packets_not_forwarded++;
1834                         spin_unlock_bh(&si->lock);
1835
1836                         DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
1837                                     sack, counter_cm->protocol_state.tcp.end + 1);
1838                         sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1839                         return 0;
1840                 }
1841
1842                 /*
1843                  * Is our ack too far before the left hand edge of the window?
1844                  */
1845                 left_edge = counter_cm->protocol_state.tcp.end
1846                             - cm->protocol_state.tcp.max_win
1847                             - SFE_IPV4_TCP_MAX_ACK_WINDOW
1848                             - 1;
1849                 if (unlikely((s32)(sack - left_edge) < 0)) {
1850                         struct sfe_ipv4_connection *c = cm->connection;
1851                         sfe_ipv4_remove_sfe_ipv4_connection(si, c);
1852                         si->exception_events[SFE_IPV4_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++;
1853                         si->packets_not_forwarded++;
1854                         spin_unlock_bh(&si->lock);
1855
1856                         DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
1857                         sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
1858                         return 0;
1859                 }
1860
1861                 /*
1862                  * Have we just seen the largest window size yet for this connection?  If yes
1863                  * then we need to record the new value.
1864                  */
1865                 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
1866                 scaled_win += (sack - ack);
1867                 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
1868                         cm->protocol_state.tcp.max_win = scaled_win;
1869                 }
1870
1871                 /*
1872                  * If our sequence and/or ack numbers have advanced then record the new state.
1873                  */
1874                 if (likely((s32)(end - cm->protocol_state.tcp.end) >= 0)) {
1875                         cm->protocol_state.tcp.end = end;
1876                 }
1877
1878                 max_end = sack + scaled_win;
1879                 if (likely((s32)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
1880                         counter_cm->protocol_state.tcp.max_end = max_end;
1881                 }
1882         }
1883
1884         /*
1885          * From this point on we're good to modify the packet.
1886          */
1887
1888         /*
1889          * Check if skb was cloned. If it was, unshare it. Because
1890          * the data area is going to be written in this path and we don't want to
1891          * change the cloned skb's data section.
1892          */
1893         if (unlikely(skb_cloned(skb))) {
1894                 DEBUG_TRACE("%p: skb is a cloned skb\n", skb);
1895                 skb = skb_unshare(skb, GFP_ATOMIC);
1896                 if (!skb) {
1897                         DEBUG_WARN("Failed to unshare the cloned skb\n");
1898                         si->exception_events[SFE_IPV4_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR]++;
1899                         si->packets_not_forwarded++;
1900                         spin_unlock_bh(&si->lock);
1901
1902                         return 0;
1903                 }
1904
1905                 /*
1906                  * Update the iph and tcph pointers with the unshared skb's data area.
1907                  */
1908                 iph = (struct sfe_ipv4_ip_hdr *)skb->data;
1909                 tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl);
1910         }
1911
1912         /*
1913          * Update DSCP
1914          */
1915         if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK)) {
1916                 iph->tos = (iph->tos & SFE_IPV4_DSCP_MASK) | cm->dscp;
1917         }
1918
1919         /*
1920          * Decrement our TTL.
1921          */
1922         iph->ttl = ttl - 1;
1923
1924         /*
1925          * Do we have to perform translations of the source address/port?
1926          */
1927         if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1928                 u16 tcp_csum;
1929                 u32 sum;
1930
1931                 iph->saddr = cm->xlate_src_ip;
1932                 tcph->source = cm->xlate_src_port;
1933
1934                 /*
1935                  * Do we have a non-zero UDP checksum?  If we do then we need
1936                  * to update it.
1937                  */
1938                 tcp_csum = tcph->check;
1939                 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1940                         sum = tcp_csum + cm->xlate_src_partial_csum_adjustment;
1941                 } else {
1942                         sum = tcp_csum + cm->xlate_src_csum_adjustment;
1943                 }
1944
1945                 sum = (sum & 0xffff) + (sum >> 16);
1946                 tcph->check = (u16)sum;
1947         }
1948
1949         /*
1950          * Do we have to perform translations of the destination address/port?
1951          */
1952         if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1953                 u16 tcp_csum;
1954                 u32 sum;
1955
1956                 iph->daddr = cm->xlate_dest_ip;
1957                 tcph->dest = cm->xlate_dest_port;
1958
1959                 /*
1960                  * Do we have a non-zero UDP checksum?  If we do then we need
1961                  * to update it.
1962                  */
1963                 tcp_csum = tcph->check;
1964                 if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1965                         sum = tcp_csum + cm->xlate_dest_partial_csum_adjustment;
1966                 } else {
1967                         sum = tcp_csum + cm->xlate_dest_csum_adjustment;
1968                 }
1969
1970                 sum = (sum & 0xffff) + (sum >> 16);
1971                 tcph->check = (u16)sum;
1972         }
1973
1974         /*
1975          * Replace the IP checksum.
1976          */
1977         iph->check = sfe_ipv4_gen_ip_csum(iph);
1978
1979         /*
1980          * Update traffic stats.
1981          */
1982         cm->rx_packet_count++;
1983         cm->rx_byte_count += len;
1984
1985         /*
1986          * If we're not already on the active list then insert ourselves at the tail
1987          * of the current list.
1988          */
1989         if (unlikely(!cm->active)) {
1990                 cm->active = true;
1991                 cm->active_prev = si->active_tail;
1992                 if (likely(si->active_tail)) {
1993                         si->active_tail->active_next = cm;
1994                 } else {
1995                         si->active_head = cm;
1996                 }
1997                 si->active_tail = cm;
1998         }
1999
2000         xmit_dev = cm->xmit_dev;
2001         skb->dev = xmit_dev;
2002
2003         /*
2004          * Check to see if we need to write a header.
2005          */
2006         if (likely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
2007                 if (unlikely(!(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
2008                         dev_hard_header(skb, xmit_dev, ETH_P_IP,
2009                                         cm->xmit_dest_mac, cm->xmit_src_mac, len);
2010                 } else {
2011                         /*
2012                          * For the simple case we write this really fast.
2013                          */
2014                         struct sfe_ipv4_eth_hdr *eth = (struct sfe_ipv4_eth_hdr *)__skb_push(skb, ETH_HLEN);
2015                         eth->h_proto = htons(ETH_P_IP);
2016                         eth->h_dest[0] = cm->xmit_dest_mac[0];
2017                         eth->h_dest[1] = cm->xmit_dest_mac[1];
2018                         eth->h_dest[2] = cm->xmit_dest_mac[2];
2019                         eth->h_source[0] = cm->xmit_src_mac[0];
2020                         eth->h_source[1] = cm->xmit_src_mac[1];
2021                         eth->h_source[2] = cm->xmit_src_mac[2];
2022                 }
2023         }
2024
2025         /*
2026          * Update priority of skb.
2027          */
2028         if (unlikely(cm->flags & SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) {
2029                 skb->priority = cm->priority;
2030         }
2031
2032         /*
2033          * Mark outgoing packet
2034          */
2035         skb->mark = cm->connection->mark;
2036         if (skb->mark) {
2037                 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
2038         }
2039
2040         si->packets_forwarded++;
2041         spin_unlock_bh(&si->lock);
2042
2043         /*
2044          * We're going to check for GSO flags when we transmit the packet so
2045          * start fetching the necessary cache line now.
2046          */
2047         prefetch(skb_shinfo(skb));
2048
2049         /*
2050          * Mark that this packet has been fast forwarded.
2051          */
2052         skb->fast_forwarded = 1;
2053
2054         /*
2055          * Send the packet on its way.
2056          */
2057         dev_queue_xmit(skb);
2058
2059         return 1;
2060 }
2061
2062 /*
2063  * sfe_ipv4_recv_icmp()
2064  *      Handle ICMP packet receives.
2065  *
2066  * ICMP packets aren't handled as a "fast path" and always have us process them
2067  * through the default Linux stack.  What we do need to do is look for any errors
2068  * about connections we are handling in the fast path.  If we find any such
2069  * connections then we want to flush their state so that the ICMP error path
2070  * within Linux has all of the correct state should it need it.
2071  */
2072 static int sfe_ipv4_recv_icmp(struct sfe_ipv4 *si, struct sk_buff *skb, struct net_device *dev,
2073                               unsigned int len, struct sfe_ipv4_ip_hdr *iph, unsigned int ihl)
2074 {
2075         struct icmphdr *icmph;
2076         struct sfe_ipv4_ip_hdr *icmp_iph;
2077         unsigned int icmp_ihl_words;
2078         unsigned int icmp_ihl;
2079         u32 *icmp_trans_h;
2080         struct sfe_ipv4_udp_hdr *icmp_udph;
2081         struct sfe_ipv4_tcp_hdr *icmp_tcph;
2082         __be32 src_ip;
2083         __be32 dest_ip;
2084         __be16 src_port;
2085         __be16 dest_port;
2086         struct sfe_ipv4_connection_match *cm;
2087         struct sfe_ipv4_connection *c;
2088         u32 pull_len = sizeof(struct icmphdr) + ihl;
2089
2090         /*
2091          * Is our packet too short to contain a valid ICMP header?
2092          */
2093         len -= ihl;
2094         if (!pskb_may_pull(skb, pull_len)) {
2095                 spin_lock_bh(&si->lock);
2096                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++;
2097                 si->packets_not_forwarded++;
2098                 spin_unlock_bh(&si->lock);
2099
2100                 DEBUG_TRACE("packet too short for ICMP header\n");
2101                 return 0;
2102         }
2103
2104         /*
2105          * We only handle "destination unreachable" and "time exceeded" messages.
2106          */
2107         icmph = (struct icmphdr *)(skb->data + ihl);
2108         if ((icmph->type != ICMP_DEST_UNREACH)
2109             && (icmph->type != ICMP_TIME_EXCEEDED)) {
2110                 spin_lock_bh(&si->lock);
2111                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++;
2112                 si->packets_not_forwarded++;
2113                 spin_unlock_bh(&si->lock);
2114
2115                 DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->type);
2116                 return 0;
2117         }
2118
2119         /*
2120          * Do we have the full embedded IP header?
2121          */
2122         len -= sizeof(struct icmphdr);
2123         pull_len += sizeof(struct sfe_ipv4_ip_hdr);
2124         if (!pskb_may_pull(skb, pull_len)) {
2125                 spin_lock_bh(&si->lock);
2126                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_HEADER_INCOMPLETE]++;
2127                 si->packets_not_forwarded++;
2128                 spin_unlock_bh(&si->lock);
2129
2130                 DEBUG_TRACE("Embedded IP header not complete\n");
2131                 return 0;
2132         }
2133
2134         /*
2135          * Is our embedded IP version wrong?
2136          */
2137         icmp_iph = (struct sfe_ipv4_ip_hdr *)(icmph + 1);
2138         if (unlikely(icmp_iph->version != 4)) {
2139                 spin_lock_bh(&si->lock);
2140                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_NON_V4]++;
2141                 si->packets_not_forwarded++;
2142                 spin_unlock_bh(&si->lock);
2143
2144                 DEBUG_TRACE("IP version: %u\n", icmp_iph->version);
2145                 return 0;
2146         }
2147
2148         /*
2149          * Do we have the full embedded IP header, including any options?
2150          */
2151         icmp_ihl_words = icmp_iph->ihl;
2152         icmp_ihl = icmp_ihl_words << 2;
2153         pull_len += icmp_ihl - sizeof(struct sfe_ipv4_ip_hdr);
2154         if (!pskb_may_pull(skb, pull_len)) {
2155                 spin_lock_bh(&si->lock);
2156                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_IP_OPTIONS_INCOMPLETE]++;
2157                 si->packets_not_forwarded++;
2158                 spin_unlock_bh(&si->lock);
2159
2160                 DEBUG_TRACE("Embedded header not large enough for IP options\n");
2161                 return 0;
2162         }
2163
2164         len -= icmp_ihl;
2165         icmp_trans_h = ((u32 *)icmp_iph) + icmp_ihl_words;
2166
2167         /*
2168          * Handle the embedded transport layer header.
2169          */
2170         switch (icmp_iph->protocol) {
2171         case IPPROTO_UDP:
2172                 /*
2173                  * We should have 8 bytes of UDP header - that's enough to identify
2174                  * the connection.
2175                  */
2176                 pull_len += 8;
2177                 if (!pskb_may_pull(skb, pull_len)) {
2178                         spin_lock_bh(&si->lock);
2179                         si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UDP_HEADER_INCOMPLETE]++;
2180                         si->packets_not_forwarded++;
2181                         spin_unlock_bh(&si->lock);
2182
2183                         DEBUG_TRACE("Incomplete embedded UDP header\n");
2184                         return 0;
2185                 }
2186
2187                 icmp_udph = (struct sfe_ipv4_udp_hdr *)icmp_trans_h;
2188                 src_port = icmp_udph->source;
2189                 dest_port = icmp_udph->dest;
2190                 break;
2191
2192         case IPPROTO_TCP:
2193                 /*
2194                  * We should have 8 bytes of TCP header - that's enough to identify
2195                  * the connection.
2196                  */
2197                 pull_len += 8;
2198                 if (!pskb_may_pull(skb, pull_len)) {
2199                         spin_lock_bh(&si->lock);
2200                         si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_TCP_HEADER_INCOMPLETE]++;
2201                         si->packets_not_forwarded++;
2202                         spin_unlock_bh(&si->lock);
2203
2204                         DEBUG_TRACE("Incomplete embedded TCP header\n");
2205                         return 0;
2206                 }
2207
2208                 icmp_tcph = (struct sfe_ipv4_tcp_hdr *)icmp_trans_h;
2209                 src_port = icmp_tcph->source;
2210                 dest_port = icmp_tcph->dest;
2211                 break;
2212
2213         default:
2214                 spin_lock_bh(&si->lock);
2215                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_IPV4_UNHANDLED_PROTOCOL]++;
2216                 si->packets_not_forwarded++;
2217                 spin_unlock_bh(&si->lock);
2218
2219                 DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", icmp_iph->protocol);
2220                 return 0;
2221         }
2222
2223         src_ip = icmp_iph->saddr;
2224         dest_ip = icmp_iph->daddr;
2225
2226         spin_lock_bh(&si->lock);
2227
2228         /*
2229          * Look for a connection match.  Note that we reverse the source and destination
2230          * here because our embedded message contains a packet that was sent in the
2231          * opposite direction to the one in which we just received it.  It will have
2232          * been sent on the interface from which we received it though so that's still
2233          * ok to use.
2234          */
2235         cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, icmp_iph->protocol, dest_ip, dest_port, src_ip, src_port);
2236         if (unlikely(!cm)) {
2237                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++;
2238                 si->packets_not_forwarded++;
2239                 spin_unlock_bh(&si->lock);
2240
2241                 DEBUG_TRACE("no connection found\n");
2242                 return 0;
2243         }
2244
2245         /*
2246          * We found a connection so now remove it from the connection list and flush
2247          * its state.
2248          */
2249         c = cm->connection;
2250         sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2251         si->exception_events[SFE_IPV4_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++;
2252         si->packets_not_forwarded++;
2253         spin_unlock_bh(&si->lock);
2254
2255         sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_FLUSH);
2256         return 0;
2257 }
2258
2259 /*
2260  * sfe_ipv4_recv()
2261  *      Handle packet receives and forwaring.
2262  *
2263  * Returns 1 if the packet is forwarded or 0 if it isn't.
2264  */
2265 int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb)
2266 {
2267         struct sfe_ipv4 *si = &__si;
2268         unsigned int len;
2269         unsigned int tot_len;
2270         unsigned int frag_off;
2271         unsigned int ihl;
2272         bool flush_on_find;
2273         bool ip_options;
2274         struct sfe_ipv4_ip_hdr *iph;
2275         u32 protocol;
2276
2277         /*
2278          * Check that we have space for an IP header here.
2279          */
2280         len = skb->len;
2281         if (unlikely(!pskb_may_pull(skb, sizeof(struct sfe_ipv4_ip_hdr)))) {
2282                 spin_lock_bh(&si->lock);
2283                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2284                 si->packets_not_forwarded++;
2285                 spin_unlock_bh(&si->lock);
2286
2287                 DEBUG_TRACE("len: %u is too short\n", len);
2288                 return 0;
2289         }
2290
2291         /*
2292          * Check that our "total length" is large enough for an IP header.
2293          */
2294         iph = (struct sfe_ipv4_ip_hdr *)skb->data;
2295         tot_len = ntohs(iph->tot_len);
2296         if (unlikely(tot_len < sizeof(struct sfe_ipv4_ip_hdr))) {
2297                 spin_lock_bh(&si->lock);
2298                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_BAD_TOTAL_LENGTH]++;
2299                 si->packets_not_forwarded++;
2300                 spin_unlock_bh(&si->lock);
2301
2302                 DEBUG_TRACE("tot_len: %u is too short\n", tot_len);
2303                 return 0;
2304         }
2305
2306         /*
2307          * Is our IP version wrong?
2308          */
2309         if (unlikely(iph->version != 4)) {
2310                 spin_lock_bh(&si->lock);
2311                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_V4]++;
2312                 si->packets_not_forwarded++;
2313                 spin_unlock_bh(&si->lock);
2314
2315                 DEBUG_TRACE("IP version: %u\n", iph->version);
2316                 return 0;
2317         }
2318
2319         /*
2320          * Does our datagram fit inside the skb?
2321          */
2322         if (unlikely(tot_len > len)) {
2323                 spin_lock_bh(&si->lock);
2324                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++;
2325                 si->packets_not_forwarded++;
2326                 spin_unlock_bh(&si->lock);
2327
2328                 DEBUG_TRACE("tot_len: %u, exceeds len: %u\n", tot_len, len);
2329                 return 0;
2330         }
2331
2332         /*
2333          * Do we have a non-initial fragment?
2334          */
2335         frag_off = ntohs(iph->frag_off);
2336         if (unlikely(frag_off & IP_OFFSET)) {
2337                 spin_lock_bh(&si->lock);
2338                 si->exception_events[SFE_IPV4_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++;
2339                 si->packets_not_forwarded++;
2340                 spin_unlock_bh(&si->lock);
2341
2342                 DEBUG_TRACE("non-initial fragment\n");
2343                 return 0;
2344         }
2345
2346         /*
2347          * If we have a (first) fragment then mark it to cause any connection to flush.
2348          */
2349         flush_on_find = unlikely(frag_off & IP_MF) ? true : false;
2350
2351         /*
2352          * Do we have any IP options?  That's definite a slow path!  If we do have IP
2353          * options we need to recheck our header size.
2354          */
2355         ihl = iph->ihl << 2;
2356         ip_options = unlikely(ihl != sizeof(struct sfe_ipv4_ip_hdr)) ? true : false;
2357         if (unlikely(ip_options)) {
2358                 if (unlikely(len < ihl)) {
2359                         spin_lock_bh(&si->lock);
2360                         si->exception_events[SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE]++;
2361                         si->packets_not_forwarded++;
2362                         spin_unlock_bh(&si->lock);
2363
2364                         DEBUG_TRACE("len: %u is too short for header of size: %u\n", len, ihl);
2365                         return 0;
2366                 }
2367
2368                 flush_on_find = true;
2369         }
2370
2371         protocol = iph->protocol;
2372         if (IPPROTO_UDP == protocol) {
2373                 return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find);
2374         }
2375
2376         if (IPPROTO_TCP == protocol) {
2377                 return sfe_ipv4_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find);
2378         }
2379
2380         if (IPPROTO_ICMP == protocol) {
2381                 return sfe_ipv4_recv_icmp(si, skb, dev, len, iph, ihl);
2382         }
2383
2384         spin_lock_bh(&si->lock);
2385         si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++;
2386         si->packets_not_forwarded++;
2387         spin_unlock_bh(&si->lock);
2388
2389         DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", protocol);
2390         return 0;
2391 }
2392
2393 static void
2394 sfe_ipv4_update_tcp_state(struct sfe_ipv4_connection *c,
2395                           struct sfe_connection_create *sic)
2396 {
2397         struct sfe_ipv4_connection_match *orig_cm;
2398         struct sfe_ipv4_connection_match *repl_cm;
2399         struct sfe_ipv4_tcp_connection_match *orig_tcp;
2400         struct sfe_ipv4_tcp_connection_match *repl_tcp;
2401
2402         orig_cm = c->original_match;
2403         repl_cm = c->reply_match;
2404         orig_tcp = &orig_cm->protocol_state.tcp;
2405         repl_tcp = &repl_cm->protocol_state.tcp;
2406
2407         /* update orig */
2408         if (orig_tcp->max_win < sic->src_td_max_window) {
2409                 orig_tcp->max_win = sic->src_td_max_window;
2410         }
2411         if ((s32)(orig_tcp->end - sic->src_td_end) < 0) {
2412                 orig_tcp->end = sic->src_td_end;
2413         }
2414         if ((s32)(orig_tcp->max_end - sic->src_td_max_end) < 0) {
2415                 orig_tcp->max_end = sic->src_td_max_end;
2416         }
2417
2418         /* update reply */
2419         if (repl_tcp->max_win < sic->dest_td_max_window) {
2420                 repl_tcp->max_win = sic->dest_td_max_window;
2421         }
2422         if ((s32)(repl_tcp->end - sic->dest_td_end) < 0) {
2423                 repl_tcp->end = sic->dest_td_end;
2424         }
2425         if ((s32)(repl_tcp->max_end - sic->dest_td_max_end) < 0) {
2426                 repl_tcp->max_end = sic->dest_td_max_end;
2427         }
2428
2429         /* update match flags */
2430         orig_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2431         repl_cm->flags &= ~SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2432         if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
2433                 orig_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2434                 repl_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2435         }
2436 }
2437
2438 static void
2439 sfe_ipv4_update_protocol_state(struct sfe_ipv4_connection *c,
2440                                struct sfe_connection_create *sic)
2441 {
2442         switch (sic->protocol) {
2443         case IPPROTO_TCP:
2444                 sfe_ipv4_update_tcp_state(c, sic);
2445                 break;
2446         }
2447 }
2448
2449 void sfe_ipv4_update_rule(struct sfe_connection_create *sic)
2450 {
2451         struct sfe_ipv4_connection *c;
2452         struct sfe_ipv4 *si = &__si;
2453
2454         spin_lock_bh(&si->lock);
2455
2456         c = sfe_ipv4_find_sfe_ipv4_connection(si,
2457                                               sic->protocol,
2458                                               sic->src_ip.ip,
2459                                               sic->src_port,
2460                                               sic->dest_ip.ip,
2461                                               sic->dest_port);
2462         if (c != NULL) {
2463                 sfe_ipv4_update_protocol_state(c, sic);
2464         }
2465
2466         spin_unlock_bh(&si->lock);
2467 }
2468
2469 /*
2470  * sfe_ipv4_create_rule()
2471  *      Create a forwarding rule.
2472  */
2473 int sfe_ipv4_create_rule(struct sfe_connection_create *sic)
2474 {
2475         struct sfe_ipv4 *si = &__si;
2476         struct sfe_ipv4_connection *c;
2477         struct sfe_ipv4_connection_match *original_cm;
2478         struct sfe_ipv4_connection_match *reply_cm;
2479         struct net_device *dest_dev;
2480         struct net_device *src_dev;
2481
2482         dest_dev = sic->dest_dev;
2483         src_dev = sic->src_dev;
2484
2485         if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) ||
2486                      (src_dev->reg_state != NETREG_REGISTERED))) {
2487                 return -EINVAL;
2488         }
2489
2490         spin_lock_bh(&si->lock);
2491         si->connection_create_requests++;
2492
2493         /*
2494          * Check to see if there is already a flow that matches the rule we're
2495          * trying to create.  If there is then we can't create a new one.
2496          */
2497         c = sfe_ipv4_find_sfe_ipv4_connection(si,
2498                                               sic->protocol,
2499                                               sic->src_ip.ip,
2500                                               sic->src_port,
2501                                               sic->dest_ip.ip,
2502                                               sic->dest_port);
2503         if (c != NULL) {
2504                 si->connection_create_collisions++;
2505
2506                 /*
2507                  * If we already have the flow then it's likely that this
2508                  * request to create the connection rule contains more
2509                  * up-to-date information. Check and update accordingly.
2510                  */
2511                 sfe_ipv4_update_protocol_state(c, sic);
2512                 spin_unlock_bh(&si->lock);
2513
2514                 DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n"
2515                             "  s: %s:%pM:%pI4:%u, d: %s:%pM:%pI4:%u\n",
2516                             sic->mark, sic->protocol,
2517                             sic->src_dev->name, sic->src_mac, &sic->src_ip.ip, ntohs(sic->src_port),
2518                             sic->dest_dev->name, sic->dest_mac, &sic->dest_ip.ip, ntohs(sic->dest_port));
2519                 return -EADDRINUSE;
2520         }
2521
2522         /*
2523          * Allocate the various connection tracking objects.
2524          */
2525         c = (struct sfe_ipv4_connection *)kmalloc(sizeof(struct sfe_ipv4_connection), GFP_ATOMIC);
2526         if (unlikely(!c)) {
2527                 spin_unlock_bh(&si->lock);
2528                 return -ENOMEM;
2529         }
2530
2531         original_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2532         if (unlikely(!original_cm)) {
2533                 spin_unlock_bh(&si->lock);
2534                 kfree(c);
2535                 return -ENOMEM;
2536         }
2537
2538         reply_cm = (struct sfe_ipv4_connection_match *)kmalloc(sizeof(struct sfe_ipv4_connection_match), GFP_ATOMIC);
2539         if (unlikely(!reply_cm)) {
2540                 spin_unlock_bh(&si->lock);
2541                 kfree(original_cm);
2542                 kfree(c);
2543                 return -ENOMEM;
2544         }
2545
2546         /*
2547          * Fill in the "original" direction connection matching object.
2548          * Note that the transmit MAC address is "dest_mac_xlate" because
2549          * we always know both ends of a connection by their translated
2550          * addresses and not their public addresses.
2551          */
2552         original_cm->match_dev = src_dev;
2553         original_cm->match_protocol = sic->protocol;
2554         original_cm->match_src_ip = sic->src_ip.ip;
2555         original_cm->match_src_port = sic->src_port;
2556         original_cm->match_dest_ip = sic->dest_ip.ip;
2557         original_cm->match_dest_port = sic->dest_port;
2558         original_cm->xlate_src_ip = sic->src_ip_xlate.ip;
2559         original_cm->xlate_src_port = sic->src_port_xlate;
2560         original_cm->xlate_dest_ip = sic->dest_ip_xlate.ip;
2561         original_cm->xlate_dest_port = sic->dest_port_xlate;
2562         original_cm->rx_packet_count = 0;
2563         original_cm->rx_packet_count64 = 0;
2564         original_cm->rx_byte_count = 0;
2565         original_cm->rx_byte_count64 = 0;
2566         original_cm->xmit_dev = dest_dev;
2567         original_cm->xmit_dev_mtu = sic->dest_mtu;
2568         memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN);
2569         memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN);
2570         original_cm->connection = c;
2571         original_cm->counter_match = reply_cm;
2572         original_cm->flags = 0;
2573         if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) {
2574                 original_cm->priority = sic->src_priority;
2575                 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK;
2576         }
2577         if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) {
2578                 original_cm->dscp = sic->src_dscp << SFE_IPV4_DSCP_SHIFT;
2579                 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK;
2580         }
2581 #ifdef CONFIG_NF_FLOW_COOKIE
2582         original_cm->flow_cookie = 0;
2583 #endif
2584 #ifdef CONFIG_XFRM
2585         original_cm->flow_accel = sic->original_accel;
2586 #endif
2587         original_cm->active_next = NULL;
2588         original_cm->active_prev = NULL;
2589         original_cm->active = false;
2590
2591         /*
2592          * For PPP links we don't write an L2 header.  For everything else we do.
2593          */
2594         if (!(dest_dev->flags & IFF_POINTOPOINT)) {
2595                 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2596
2597                 /*
2598                  * If our dev writes Ethernet headers then we can write a really fast
2599                  * version.
2600                  */
2601                 if (dest_dev->header_ops) {
2602                         if (dest_dev->header_ops->create == eth_header) {
2603                                 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2604                         }
2605                 }
2606         }
2607
2608         /*
2609          * Fill in the "reply" direction connection matching object.
2610          */
2611         reply_cm->match_dev = dest_dev;
2612         reply_cm->match_protocol = sic->protocol;
2613         reply_cm->match_src_ip = sic->dest_ip_xlate.ip;
2614         reply_cm->match_src_port = sic->dest_port_xlate;
2615         reply_cm->match_dest_ip = sic->src_ip_xlate.ip;
2616         reply_cm->match_dest_port = sic->src_port_xlate;
2617         reply_cm->xlate_src_ip = sic->dest_ip.ip;
2618         reply_cm->xlate_src_port = sic->dest_port;
2619         reply_cm->xlate_dest_ip = sic->src_ip.ip;
2620         reply_cm->xlate_dest_port = sic->src_port;
2621         reply_cm->rx_packet_count = 0;
2622         reply_cm->rx_packet_count64 = 0;
2623         reply_cm->rx_byte_count = 0;
2624         reply_cm->rx_byte_count64 = 0;
2625         reply_cm->xmit_dev = src_dev;
2626         reply_cm->xmit_dev_mtu = sic->src_mtu;
2627         memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN);
2628         memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN);
2629         reply_cm->connection = c;
2630         reply_cm->counter_match = original_cm;
2631         reply_cm->flags = 0;
2632         if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) {
2633                 reply_cm->priority = sic->dest_priority;
2634                 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_PRIORITY_REMARK;
2635         }
2636         if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) {
2637                 reply_cm->dscp = sic->dest_dscp << SFE_IPV4_DSCP_SHIFT;
2638                 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK;
2639         }
2640 #ifdef CONFIG_NF_FLOW_COOKIE
2641         reply_cm->flow_cookie = 0;
2642 #endif
2643 #ifdef CONFIG_XFRM
2644         reply_cm->flow_accel = sic->reply_accel;
2645 #endif
2646         reply_cm->active_next = NULL;
2647         reply_cm->active_prev = NULL;
2648         reply_cm->active = false;
2649
2650         /*
2651          * For PPP links we don't write an L2 header.  For everything else we do.
2652          */
2653         if (!(src_dev->flags & IFF_POINTOPOINT)) {
2654                 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2655
2656                 /*
2657                  * If our dev writes Ethernet headers then we can write a really fast
2658                  * version.
2659                  */
2660                 if (src_dev->header_ops) {
2661                         if (src_dev->header_ops->create == eth_header) {
2662                                 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2663                         }
2664                 }
2665         }
2666
2667
2668         if (sic->dest_ip.ip != sic->dest_ip_xlate.ip || sic->dest_port != sic->dest_port_xlate) {
2669                 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2670                 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2671         }
2672
2673         if (sic->src_ip.ip != sic->src_ip_xlate.ip || sic->src_port != sic->src_port_xlate) {
2674                 original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_SRC;
2675                 reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_XLATE_DEST;
2676         }
2677
2678         c->protocol = sic->protocol;
2679         c->src_ip = sic->src_ip.ip;
2680         c->src_ip_xlate = sic->src_ip_xlate.ip;
2681         c->src_port = sic->src_port;
2682         c->src_port_xlate = sic->src_port_xlate;
2683         c->original_dev = src_dev;
2684         c->original_match = original_cm;
2685         c->dest_ip = sic->dest_ip.ip;
2686         c->dest_ip_xlate = sic->dest_ip_xlate.ip;
2687         c->dest_port = sic->dest_port;
2688         c->dest_port_xlate = sic->dest_port_xlate;
2689         c->reply_dev = dest_dev;
2690         c->reply_match = reply_cm;
2691         c->mark = sic->mark;
2692         c->debug_read_seq = 0;
2693         c->last_sync_jiffies = get_jiffies_64();
2694
2695         /*
2696          * Take hold of our source and dest devices for the duration of the connection.
2697          */
2698         dev_hold(c->original_dev);
2699         dev_hold(c->reply_dev);
2700
2701         /*
2702          * Initialize the protocol-specific information that we track.
2703          */
2704         switch (sic->protocol) {
2705         case IPPROTO_TCP:
2706                 original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale;
2707                 original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1;
2708                 original_cm->protocol_state.tcp.end = sic->src_td_end;
2709                 original_cm->protocol_state.tcp.max_end = sic->src_td_max_end;
2710                 reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale;
2711                 reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1;
2712                 reply_cm->protocol_state.tcp.end = sic->dest_td_end;
2713                 reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end;
2714                 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
2715                         original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2716                         reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2717                 }
2718                 break;
2719         }
2720
2721         sfe_ipv4_connection_match_compute_translations(original_cm);
2722         sfe_ipv4_connection_match_compute_translations(reply_cm);
2723         sfe_ipv4_insert_sfe_ipv4_connection(si, c);
2724
2725         spin_unlock_bh(&si->lock);
2726
2727         /*
2728          * We have everything we need!
2729          */
2730         DEBUG_INFO("new connection - mark: %08x, p: %d\n"
2731                    "  s: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n"
2732                    "  d: %s:%pM(%pM):%pI4(%pI4):%u(%u)\n",
2733                    sic->mark, sic->protocol,
2734                    sic->src_dev->name, sic->src_mac, sic->src_mac_xlate,
2735                    &sic->src_ip.ip, &sic->src_ip_xlate.ip, ntohs(sic->src_port), ntohs(sic->src_port_xlate),
2736                    dest_dev->name, sic->dest_mac, sic->dest_mac_xlate,
2737                    &sic->dest_ip.ip, &sic->dest_ip_xlate.ip, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate));
2738
2739         return 0;
2740 }
2741
2742 /*
2743  * sfe_ipv4_destroy_rule()
2744  *      Destroy a forwarding rule.
2745  */
2746 void sfe_ipv4_destroy_rule(struct sfe_connection_destroy *sid)
2747 {
2748         struct sfe_ipv4 *si = &__si;
2749         struct sfe_ipv4_connection *c;
2750
2751         spin_lock_bh(&si->lock);
2752         si->connection_destroy_requests++;
2753
2754         /*
2755          * Check to see if we have a flow that matches the rule we're trying
2756          * to destroy.  If there isn't then we can't destroy it.
2757          */
2758         c = sfe_ipv4_find_sfe_ipv4_connection(si, sid->protocol, sid->src_ip.ip, sid->src_port,
2759                                               sid->dest_ip.ip, sid->dest_port);
2760         if (!c) {
2761                 si->connection_destroy_misses++;
2762                 spin_unlock_bh(&si->lock);
2763
2764                 DEBUG_TRACE("connection does not exist - p: %d, s: %pI4:%u, d: %pI4:%u\n",
2765                             sid->protocol, &sid->src_ip, ntohs(sid->src_port),
2766                             &sid->dest_ip, ntohs(sid->dest_port));
2767                 return;
2768         }
2769
2770         /*
2771          * Remove our connection details from the hash tables.
2772          */
2773         sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2774         spin_unlock_bh(&si->lock);
2775
2776         sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY);
2777
2778         DEBUG_INFO("connection destroyed - p: %d, s: %pI4:%u, d: %pI4:%u\n",
2779                    sid->protocol, &sid->src_ip.ip, ntohs(sid->src_port),
2780                    &sid->dest_ip.ip, ntohs(sid->dest_port));
2781 }
2782
2783 /*
2784  * sfe_ipv4_register_sync_rule_callback()
2785  *      Register a callback for rule synchronization.
2786  */
2787 void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback)
2788 {
2789         struct sfe_ipv4 *si = &__si;
2790
2791         spin_lock_bh(&si->lock);
2792         rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback);
2793         spin_unlock_bh(&si->lock);
2794 }
2795
2796 /*
2797  * sfe_ipv4_get_debug_dev()
2798  */
2799 static ssize_t sfe_ipv4_get_debug_dev(struct device *dev,
2800                                       struct device_attribute *attr,
2801                                       char *buf)
2802 {
2803         struct sfe_ipv4 *si = &__si;
2804         ssize_t count;
2805         int num;
2806
2807         spin_lock_bh(&si->lock);
2808         num = si->debug_dev;
2809         spin_unlock_bh(&si->lock);
2810
2811         count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num);
2812         return count;
2813 }
2814
2815 /*
2816  * sysfs attributes.
2817  */
2818 static const struct device_attribute sfe_ipv4_debug_dev_attr =
2819         __ATTR(debug_dev, S_IWUSR | S_IRUGO, sfe_ipv4_get_debug_dev, NULL);
2820
2821 /*
2822  * sfe_ipv4_destroy_all_rules_for_dev()
2823  *      Destroy all connections that match a particular device.
2824  *
2825  * If we pass dev as NULL then this destroys all connections.
2826  */
2827 void sfe_ipv4_destroy_all_rules_for_dev(struct net_device *dev)
2828 {
2829         struct sfe_ipv4 *si = &__si;
2830         struct sfe_ipv4_connection *c;
2831
2832 another_round:
2833         spin_lock_bh(&si->lock);
2834
2835         for (c = si->all_connections_head; c; c = c->all_connections_next) {
2836                 /*
2837                  * Does this connection relate to the device we are destroying?
2838                  */
2839                 if (!dev
2840                     || (dev == c->original_dev)
2841                     || (dev == c->reply_dev)) {
2842                         break;
2843                 }
2844         }
2845
2846         if (c) {
2847                 sfe_ipv4_remove_sfe_ipv4_connection(si, c);
2848         }
2849
2850         spin_unlock_bh(&si->lock);
2851
2852         if (c) {
2853                 sfe_ipv4_flush_sfe_ipv4_connection(si, c, SFE_SYNC_REASON_DESTROY);
2854                 goto another_round;
2855         }
2856 }
2857
2858 /*
2859  * sfe_ipv4_periodic_sync()
2860  */
2861 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0))
2862 static void sfe_ipv4_periodic_sync(struct timer_list *arg)
2863 #else
2864 static void sfe_ipv4_periodic_sync(unsigned long arg)
2865 #endif /*KERNEL_VERSION(4, 15, 0)*/
2866 {
2867 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0))
2868         struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg->cust_data;
2869 #else
2870         struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg;
2871 #endif /*KERNEL_VERSION(4, 15, 0)*/
2872         u64 now_jiffies;
2873         int quota;
2874         sfe_sync_rule_callback_t sync_rule_callback;
2875
2876         now_jiffies = get_jiffies_64();
2877
2878         rcu_read_lock();
2879         sync_rule_callback = rcu_dereference(si->sync_rule_callback);
2880         if (!sync_rule_callback) {
2881                 rcu_read_unlock();
2882                 goto done;
2883         }
2884
2885         spin_lock_bh(&si->lock);
2886         sfe_ipv4_update_summary_stats(si);
2887
2888         /*
2889          * Get an estimate of the number of connections to parse in this sync.
2890          */
2891         quota = (si->num_connections + 63) / 64;
2892
2893         /*
2894          * Walk the "active" list and sync the connection state.
2895          */
2896         while (quota--) {
2897                 struct sfe_ipv4_connection_match *cm;
2898                 struct sfe_ipv4_connection_match *counter_cm;
2899                 struct sfe_ipv4_connection *c;
2900                 struct sfe_connection_sync sis;
2901
2902                 cm = si->active_head;
2903                 if (!cm) {
2904                         break;
2905                 }
2906
2907                 /*
2908                  * There's a possibility that our counter match is in the active list too.
2909                  * If it is then remove it.
2910                  */
2911                 counter_cm = cm->counter_match;
2912                 if (counter_cm->active) {
2913                         counter_cm->active = false;
2914
2915                         /*
2916                          * We must have a connection preceding this counter match
2917                          * because that's the one that got us to this point, so we don't have
2918                          * to worry about removing the head of the list.
2919                          */
2920                         counter_cm->active_prev->active_next = counter_cm->active_next;
2921
2922                         if (likely(counter_cm->active_next)) {
2923                                 counter_cm->active_next->active_prev = counter_cm->active_prev;
2924                         } else {
2925                                 si->active_tail = counter_cm->active_prev;
2926                         }
2927
2928                         counter_cm->active_next = NULL;
2929                         counter_cm->active_prev = NULL;
2930                 }
2931
2932                 /*
2933                  * Now remove the head of the active scan list.
2934                  */
2935                 cm->active = false;
2936                 si->active_head = cm->active_next;
2937                 if (likely(cm->active_next)) {
2938                         cm->active_next->active_prev = NULL;
2939                 } else {
2940                         si->active_tail = NULL;
2941                 }
2942                 cm->active_next = NULL;
2943
2944                 /*
2945                  * Sync the connection state.
2946                  */
2947                 c = cm->connection;
2948                 sfe_ipv4_gen_sync_sfe_ipv4_connection(si, c, &sis, SFE_SYNC_REASON_STATS, now_jiffies);
2949
2950                 /*
2951                  * We don't want to be holding the lock when we sync!
2952                  */
2953                 spin_unlock_bh(&si->lock);
2954                 sync_rule_callback(&sis);
2955                 spin_lock_bh(&si->lock);
2956         }
2957
2958         spin_unlock_bh(&si->lock);
2959         rcu_read_unlock();
2960
2961 done:
2962         mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
2963 }
2964
2965 #define CHAR_DEV_MSG_SIZE 768