OSDN Git Service

3c5ef12630e01cf6848c6dd8d743a8290f096e53
[immortalwrt/immortalwrt.git] / package / kernel / shortcut-fe / src / sfe_ipv6.c
1 /*
2  * sfe_ipv6.c
3  *      Shortcut forwarding engine - IPv6 support.
4  *
5  * Copyright (c) 2015-2016, 2019, The Linux Foundation. All rights reserved.
6  * Permission to use, copy, modify, and/or distribute this software for
7  * any purpose with or without fee is hereby granted, provided that the
8  * above copyright notice and this permission notice appear in all copies.
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
15  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 #include <linux/module.h>
19 #include <linux/sysfs.h>
20 #include <linux/skbuff.h>
21 #include <linux/icmp.h>
22 #include <net/tcp.h>
23 #include <linux/etherdevice.h>
24 #include <linux/version.h>
25
26 #include "sfe.h"
27 #include "sfe_cm.h"
28
29 /*
30  * By default Linux IP header and transport layer header structures are
31  * unpacked, assuming that such headers should be 32-bit aligned.
32  * Unfortunately some wireless adaptors can't cope with this requirement and
33  * some CPUs can't handle misaligned accesses.  For those platforms we
34  * define SFE_IPV6_UNALIGNED_IP_HEADER and mark the structures as packed.
35  * When we do this the compiler will generate slightly worse code than for the
36  * aligned case (on most platforms) but will be much quicker than fixing
37  * things up in an unaligned trap handler.
38  */
39 #define SFE_IPV6_UNALIGNED_IP_HEADER 1
40 #if SFE_IPV6_UNALIGNED_IP_HEADER
41 #define SFE_IPV6_UNALIGNED_STRUCT __attribute__((packed))
42 #else
43 #define SFE_IPV6_UNALIGNED_STRUCT
44 #endif
45
46 #define CHAR_DEV_MSG_SIZE 768
47
48 /*
49  * An Ethernet header, but with an optional "packed" attribute to
50  * help with performance on some platforms (see the definition of
51  * SFE_IPV6_UNALIGNED_STRUCT)
52  */
53 struct sfe_ipv6_eth_hdr {
54         __be16 h_dest[ETH_ALEN / 2];
55         __be16 h_source[ETH_ALEN / 2];
56         __be16 h_proto;
57 } SFE_IPV6_UNALIGNED_STRUCT;
58
59 #define SFE_IPV6_DSCP_MASK 0xf03f
60 #define SFE_IPV6_DSCP_SHIFT 2
61
62 /*
63  * An IPv6 header, but with an optional "packed" attribute to
64  * help with performance on some platforms (see the definition of
65  * SFE_IPV6_UNALIGNED_STRUCT)
66  */
67 struct sfe_ipv6_ip_hdr {
68 #if defined(__LITTLE_ENDIAN_BITFIELD)
69         __u8 priority:4,
70              version:4;
71 #elif defined(__BIG_ENDIAN_BITFIELD)
72         __u8 version:4,
73              priority:4;
74 #else
75 #error  "Please fix <asm/byteorder.h>"
76 #endif
77         __u8 flow_lbl[3];
78         __be16 payload_len;
79         __u8 nexthdr;
80         __u8 hop_limit;
81         struct sfe_ipv6_addr saddr;
82         struct sfe_ipv6_addr daddr;
83
84         /*
85          * The extension header start here.
86          */
87 } SFE_IPV6_UNALIGNED_STRUCT;
88
89 #define SFE_IPV6_EXT_HDR_HOP 0
90 #define SFE_IPV6_EXT_HDR_ROUTING 43
91 #define SFE_IPV6_EXT_HDR_FRAG 44
92 #define SFE_IPV6_EXT_HDR_ESP 50
93 #define SFE_IPV6_EXT_HDR_AH 51
94 #define SFE_IPV6_EXT_HDR_NONE 59
95 #define SFE_IPV6_EXT_HDR_DST 60
96 #define SFE_IPV6_EXT_HDR_MH 135
97
98 /*
99  * fragmentation header
100  */
101
102 struct sfe_ipv6_frag_hdr {
103         __u8    nexthdr;
104         __u8    reserved;
105         __be16  frag_off;
106         __be32  identification;
107 };
108
109 #define SFE_IPV6_FRAG_OFFSET    0xfff8
110
111 /*
112  * generic IPv6 extension header
113  */
114 struct sfe_ipv6_ext_hdr {
115         __u8 next_hdr;
116         __u8 hdr_len;
117         __u8 padding[6];
118 } SFE_IPV6_UNALIGNED_STRUCT;
119
120 /*
121  * A UDP header, but with an optional "packed" attribute to
122  * help with performance on some platforms (see the definition of
123  * SFE_IPV6_UNALIGNED_STRUCT)
124  */
125 struct sfe_ipv6_udp_hdr {
126         __be16 source;
127         __be16 dest;
128         __be16 len;
129         __sum16 check;
130 } SFE_IPV6_UNALIGNED_STRUCT;
131
132 /*
133  * A TCP header, but with an optional "packed" attribute to
134  * help with performance on some platforms (see the definition of
135  * SFE_IPV6_UNALIGNED_STRUCT)
136  */
137 struct sfe_ipv6_tcp_hdr {
138         __be16 source;
139         __be16 dest;
140         __be32 seq;
141         __be32 ack_seq;
142 #if defined(__LITTLE_ENDIAN_BITFIELD)
143         __u16 res1:4,
144               doff:4,
145               fin:1,
146               syn:1,
147               rst:1,
148               psh:1,
149               ack:1,
150               urg:1,
151               ece:1,
152               cwr:1;
153 #elif defined(__BIG_ENDIAN_BITFIELD)
154         __u16 doff:4,
155               res1:4,
156               cwr:1,
157               ece:1,
158               urg:1,
159               ack:1,
160               psh:1,
161               rst:1,
162               syn:1,
163               fin:1;
164 #else
165 #error  "Adjust your <asm/byteorder.h> defines"
166 #endif
167         __be16 window;
168         __sum16 check;
169         __be16 urg_ptr;
170 } SFE_IPV6_UNALIGNED_STRUCT;
171
172 /*
173  * Specifies the lower bound on ACK numbers carried in the TCP header
174  */
175 #define SFE_IPV6_TCP_MAX_ACK_WINDOW 65520
176
177 /*
178  * IPv6 TCP connection match additional data.
179  */
180 struct sfe_ipv6_tcp_connection_match {
181         u8 win_scale;           /* Window scale */
182         u32 max_win;            /* Maximum window size seen */
183         u32 end;                /* Sequence number of the next byte to send (seq + segment length) */
184         u32 max_end;            /* Sequence number of the last byte to ack */
185 };
186
187 /*
188  * Bit flags for IPv6 connection matching entry.
189  */
190 #define SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC (1<<0)
191                                         /* Perform source translation */
192 #define SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST (1<<1)
193                                         /* Perform destination translation */
194 #define SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK (1<<2)
195                                         /* Ignore TCP sequence numbers */
196 #define SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR (1<<3)
197                                         /* Fast Ethernet header write */
198 #define SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR (1<<4)
199                                         /* Fast Ethernet header write */
200 #define SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK (1<<5)
201                                         /* remark priority of SKB */
202 #define SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK (1<<6)
203                                         /* remark DSCP of packet */
204
205 /*
206  * IPv6 connection matching structure.
207  */
208 struct sfe_ipv6_connection_match {
209         /*
210          * References to other objects.
211          */
212         struct sfe_ipv6_connection_match *next;
213         struct sfe_ipv6_connection_match *prev;
214         struct sfe_ipv6_connection *connection;
215         struct sfe_ipv6_connection_match *counter_match;
216                                         /* Matches the flow in the opposite direction as the one in connection */
217         struct sfe_ipv6_connection_match *active_next;
218         struct sfe_ipv6_connection_match *active_prev;
219         bool active;                    /* Flag to indicate if we're on the active list */
220
221         /*
222          * Characteristics that identify flows that match this rule.
223          */
224         struct net_device *match_dev;   /* Network device */
225         u8 match_protocol;              /* Protocol */
226         struct sfe_ipv6_addr match_src_ip[1];   /* Source IP address */
227         struct sfe_ipv6_addr match_dest_ip[1];  /* Destination IP address */
228         __be16 match_src_port;          /* Source port/connection ident */
229         __be16 match_dest_port;         /* Destination port/connection ident */
230
231         /*
232          * Control the operations of the match.
233          */
234         u32 flags;                      /* Bit flags */
235 #ifdef CONFIG_NF_FLOW_COOKIE
236         u32 flow_cookie;                /* used flow cookie, for debug */
237 #endif
238 #ifdef CONFIG_XFRM
239         u32 flow_accel;                 /* The flow accelerated or not */
240 #endif
241
242         /*
243          * Connection state that we track once we match.
244          */
245         union {                         /* Protocol-specific state */
246                 struct sfe_ipv6_tcp_connection_match tcp;
247         } protocol_state;
248         /*
249          * Stats recorded in a sync period. These stats will be added to
250          * rx_packet_count64/rx_byte_count64 after a sync period.
251          */
252         u32 rx_packet_count;
253         u32 rx_byte_count;
254
255         /*
256          * Packet translation information.
257          */
258         struct sfe_ipv6_addr xlate_src_ip[1];   /* Address after source translation */
259         __be16 xlate_src_port;  /* Port/connection ident after source translation */
260         u16 xlate_src_csum_adjustment;
261                                         /* Transport layer checksum adjustment after source translation */
262         struct sfe_ipv6_addr xlate_dest_ip[1];  /* Address after destination translation */
263         __be16 xlate_dest_port; /* Port/connection ident after destination translation */
264         u16 xlate_dest_csum_adjustment;
265                                         /* Transport layer checksum adjustment after destination translation */
266
267         /*
268          * QoS information
269          */
270         u32 priority;
271         u32 dscp;
272
273         /*
274          * Packet transmit information.
275          */
276         struct net_device *xmit_dev;    /* Network device on which to transmit */
277         unsigned short int xmit_dev_mtu;
278                                         /* Interface MTU */
279         u16 xmit_dest_mac[ETH_ALEN / 2];
280                                         /* Destination MAC address to use when forwarding */
281         u16 xmit_src_mac[ETH_ALEN / 2];
282                                         /* Source MAC address to use when forwarding */
283
284         /*
285          * Summary stats.
286          */
287         u64 rx_packet_count64;
288         u64 rx_byte_count64;
289 };
290
291 /*
292  * Per-connection data structure.
293  */
294 struct sfe_ipv6_connection {
295         struct sfe_ipv6_connection *next;
296                                         /* Pointer to the next entry in a hash chain */
297         struct sfe_ipv6_connection *prev;
298                                         /* Pointer to the previous entry in a hash chain */
299         int protocol;                   /* IP protocol number */
300         struct sfe_ipv6_addr src_ip[1];         /* Src IP addr pre-translation */
301         struct sfe_ipv6_addr src_ip_xlate[1];   /* Src IP addr post-translation */
302         struct sfe_ipv6_addr dest_ip[1];        /* Dest IP addr pre-translation */
303         struct sfe_ipv6_addr dest_ip_xlate[1];  /* Dest IP addr post-translation */
304         __be16 src_port;                /* Src port pre-translation */
305         __be16 src_port_xlate;          /* Src port post-translation */
306         __be16 dest_port;               /* Dest port pre-translation */
307         __be16 dest_port_xlate;         /* Dest port post-translation */
308         struct sfe_ipv6_connection_match *original_match;
309                                         /* Original direction matching structure */
310         struct net_device *original_dev;
311                                         /* Original direction source device */
312         struct sfe_ipv6_connection_match *reply_match;
313                                         /* Reply direction matching structure */
314         struct net_device *reply_dev;   /* Reply direction source device */
315         u64 last_sync_jiffies;          /* Jiffies count for the last sync */
316         struct sfe_ipv6_connection *all_connections_next;
317                                         /* Pointer to the next entry in the list of all connections */
318         struct sfe_ipv6_connection *all_connections_prev;
319                                         /* Pointer to the previous entry in the list of all connections */
320         u32 mark;                       /* mark for outgoing packet */
321         u32 debug_read_seq;             /* sequence number for debug dump */
322 };
323
324 /*
325  * IPv6 connections and hash table size information.
326  */
327 #define SFE_IPV6_CONNECTION_HASH_SHIFT 12
328 #define SFE_IPV6_CONNECTION_HASH_SIZE (1 << SFE_IPV6_CONNECTION_HASH_SHIFT)
329 #define SFE_IPV6_CONNECTION_HASH_MASK (SFE_IPV6_CONNECTION_HASH_SIZE - 1)
330
331 #ifdef CONFIG_NF_FLOW_COOKIE
332 #define SFE_FLOW_COOKIE_SIZE 2048
333 #define SFE_FLOW_COOKIE_MASK 0x7ff
334
335 struct sfe_ipv6_flow_cookie_entry {
336         struct sfe_ipv6_connection_match *match;
337         unsigned long last_clean_time;
338 };
339 #endif
340
341 enum sfe_ipv6_exception_events {
342         SFE_IPV6_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE,
343         SFE_IPV6_EXCEPTION_EVENT_UDP_NO_CONNECTION,
344         SFE_IPV6_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
345         SFE_IPV6_EXCEPTION_EVENT_UDP_SMALL_TTL,
346         SFE_IPV6_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION,
347         SFE_IPV6_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE,
348         SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS,
349         SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS,
350         SFE_IPV6_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
351         SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_TTL,
352         SFE_IPV6_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION,
353         SFE_IPV6_EXCEPTION_EVENT_TCP_FLAGS,
354         SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE,
355         SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS,
356         SFE_IPV6_EXCEPTION_EVENT_TCP_BAD_SACK,
357         SFE_IPV6_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS,
358         SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE,
359         SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE,
360         SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE,
361         SFE_IPV6_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE,
362         SFE_IPV6_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE,
363         SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_HEADER_INCOMPLETE,
364         SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_NON_V6,
365         SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_IP_OPTIONS_INCOMPLETE,
366         SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_UDP_HEADER_INCOMPLETE,
367         SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_TCP_HEADER_INCOMPLETE,
368         SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_UNHANDLED_PROTOCOL,
369         SFE_IPV6_EXCEPTION_EVENT_ICMP_NO_CONNECTION,
370         SFE_IPV6_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION,
371         SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE,
372         SFE_IPV6_EXCEPTION_EVENT_BAD_TOTAL_LENGTH,
373         SFE_IPV6_EXCEPTION_EVENT_NON_V6,
374         SFE_IPV6_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT,
375         SFE_IPV6_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE,
376         SFE_IPV6_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE,
377         SFE_IPV6_EXCEPTION_EVENT_UNHANDLED_PROTOCOL,
378         SFE_IPV6_EXCEPTION_EVENT_FLOW_COOKIE_ADD_FAIL,
379         SFE_IPV6_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR,
380         SFE_IPV6_EXCEPTION_EVENT_LAST
381 };
382
383 static char *sfe_ipv6_exception_events_string[SFE_IPV6_EXCEPTION_EVENT_LAST] = {
384         "UDP_HEADER_INCOMPLETE",
385         "UDP_NO_CONNECTION",
386         "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
387         "UDP_SMALL_TTL",
388         "UDP_NEEDS_FRAGMENTATION",
389         "TCP_HEADER_INCOMPLETE",
390         "TCP_NO_CONNECTION_SLOW_FLAGS",
391         "TCP_NO_CONNECTION_FAST_FLAGS",
392         "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
393         "TCP_SMALL_TTL",
394         "TCP_NEEDS_FRAGMENTATION",
395         "TCP_FLAGS",
396         "TCP_SEQ_EXCEEDS_RIGHT_EDGE",
397         "TCP_SMALL_DATA_OFFS",
398         "TCP_BAD_SACK",
399         "TCP_BIG_DATA_OFFS",
400         "TCP_SEQ_BEFORE_LEFT_EDGE",
401         "TCP_ACK_EXCEEDS_RIGHT_EDGE",
402         "TCP_ACK_BEFORE_LEFT_EDGE",
403         "ICMP_HEADER_INCOMPLETE",
404         "ICMP_UNHANDLED_TYPE",
405         "ICMP_IPV6_HEADER_INCOMPLETE",
406         "ICMP_IPV6_NON_V6",
407         "ICMP_IPV6_IP_OPTIONS_INCOMPLETE",
408         "ICMP_IPV6_UDP_HEADER_INCOMPLETE",
409         "ICMP_IPV6_TCP_HEADER_INCOMPLETE",
410         "ICMP_IPV6_UNHANDLED_PROTOCOL",
411         "ICMP_NO_CONNECTION",
412         "ICMP_FLUSHED_CONNECTION",
413         "HEADER_INCOMPLETE",
414         "BAD_TOTAL_LENGTH",
415         "NON_V6",
416         "NON_INITIAL_FRAGMENT",
417         "DATAGRAM_INCOMPLETE",
418         "IP_OPTIONS_INCOMPLETE",
419         "UNHANDLED_PROTOCOL",
420         "FLOW_COOKIE_ADD_FAIL",
421         "CLONED_SKB_UNSHARE_ERROR"
422 };
423
424 /*
425  * Per-module structure.
426  */
427 struct sfe_ipv6 {
428         spinlock_t lock;                /* Lock for SMP correctness */
429         struct sfe_ipv6_connection_match *active_head;
430                                         /* Head of the list of recently active connections */
431         struct sfe_ipv6_connection_match *active_tail;
432                                         /* Tail of the list of recently active connections */
433         struct sfe_ipv6_connection *all_connections_head;
434                                         /* Head of the list of all connections */
435         struct sfe_ipv6_connection *all_connections_tail;
436                                         /* Tail of the list of all connections */
437         unsigned int num_connections;   /* Number of connections */
438         struct timer_list timer;        /* Timer used for periodic sync ops */
439         sfe_sync_rule_callback_t __rcu sync_rule_callback;
440                                         /* Callback function registered by a connection manager for stats syncing */
441         struct sfe_ipv6_connection *conn_hash[SFE_IPV6_CONNECTION_HASH_SIZE];
442                                         /* Connection hash table */
443         struct sfe_ipv6_connection_match *conn_match_hash[SFE_IPV6_CONNECTION_HASH_SIZE];
444                                         /* Connection match hash table */
445 #ifdef CONFIG_NF_FLOW_COOKIE
446         struct sfe_ipv6_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE];
447                                         /* flow cookie table*/
448         sfe_ipv6_flow_cookie_set_func_t flow_cookie_set_func;
449                                         /* function used to configure flow cookie in hardware*/
450         int flow_cookie_enable;
451                                         /* Enable/disable flow cookie at runtime */
452 #endif
453
454         /*
455          * Stats recorded in a sync period. These stats will be added to
456          * connection_xxx64 after a sync period.
457          */
458         u32 connection_create_requests;
459                                         /* Number of IPv6 connection create requests */
460         u32 connection_create_collisions;
461                                         /* Number of IPv6 connection create requests that collided with existing hash table entries */
462         u32 connection_destroy_requests;
463                                         /* Number of IPv6 connection destroy requests */
464         u32 connection_destroy_misses;
465                                         /* Number of IPv6 connection destroy requests that missed our hash table */
466         u32 connection_match_hash_hits;
467                                         /* Number of IPv6 connection match hash hits */
468         u32 connection_match_hash_reorders;
469                                         /* Number of IPv6 connection match hash reorders */
470         u32 connection_flushes;         /* Number of IPv6 connection flushes */
471         u32 packets_forwarded;          /* Number of IPv6 packets forwarded */
472         u32 packets_not_forwarded;      /* Number of IPv6 packets not forwarded */
473         u32 exception_events[SFE_IPV6_EXCEPTION_EVENT_LAST];
474
475         /*
476          * Summary statistics.
477          */
478         u64 connection_create_requests64;
479                                         /* Number of IPv6 connection create requests */
480         u64 connection_create_collisions64;
481                                         /* Number of IPv6 connection create requests that collided with existing hash table entries */
482         u64 connection_destroy_requests64;
483                                         /* Number of IPv6 connection destroy requests */
484         u64 connection_destroy_misses64;
485                                         /* Number of IPv6 connection destroy requests that missed our hash table */
486         u64 connection_match_hash_hits64;
487                                         /* Number of IPv6 connection match hash hits */
488         u64 connection_match_hash_reorders64;
489                                         /* Number of IPv6 connection match hash reorders */
490         u64 connection_flushes64;       /* Number of IPv6 connection flushes */
491         u64 packets_forwarded64;        /* Number of IPv6 packets forwarded */
492         u64 packets_not_forwarded64;
493                                         /* Number of IPv6 packets not forwarded */
494         u64 exception_events64[SFE_IPV6_EXCEPTION_EVENT_LAST];
495
496         /*
497          * Control state.
498          */
499         struct kobject *sys_sfe_ipv6;   /* sysfs linkage */
500         int debug_dev;                  /* Major number of the debug char device */
501         u32 debug_read_seq;             /* sequence number for debug dump */
502 };
503
504 /*
505  * Enumeration of the XML output.
506  */
507 enum sfe_ipv6_debug_xml_states {
508         SFE_IPV6_DEBUG_XML_STATE_START,
509         SFE_IPV6_DEBUG_XML_STATE_CONNECTIONS_START,
510         SFE_IPV6_DEBUG_XML_STATE_CONNECTIONS_CONNECTION,
511         SFE_IPV6_DEBUG_XML_STATE_CONNECTIONS_END,
512         SFE_IPV6_DEBUG_XML_STATE_EXCEPTIONS_START,
513         SFE_IPV6_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION,
514         SFE_IPV6_DEBUG_XML_STATE_EXCEPTIONS_END,
515         SFE_IPV6_DEBUG_XML_STATE_STATS,
516         SFE_IPV6_DEBUG_XML_STATE_END,
517         SFE_IPV6_DEBUG_XML_STATE_DONE
518 };
519
520 /*
521  * XML write state.
522  */
523 struct sfe_ipv6_debug_xml_write_state {
524         enum sfe_ipv6_debug_xml_states state;
525                                         /* XML output file state machine state */
526         int iter_exception;             /* Next exception iterator */
527 };
528
529 typedef bool (*sfe_ipv6_debug_xml_write_method_t)(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
530                                                   int *total_read, struct sfe_ipv6_debug_xml_write_state *ws);
531
532 static struct sfe_ipv6 __si6;
533
534 /*
535  * sfe_ipv6_get_debug_dev()
536  */
537 static ssize_t sfe_ipv6_get_debug_dev(struct device *dev, struct device_attribute *attr, char *buf);
538
539 /*
540  * sysfs attributes.
541  */
542 static const struct device_attribute sfe_ipv6_debug_dev_attr =
543         __ATTR(debug_dev, S_IWUSR | S_IRUGO, sfe_ipv6_get_debug_dev, NULL);
544
545 /*
546  * sfe_ipv6_is_ext_hdr()
547  *      check if we recognize ipv6 extension header
548  */
549 static inline bool sfe_ipv6_is_ext_hdr(u8 hdr)
550 {
551         return (hdr == SFE_IPV6_EXT_HDR_HOP) ||
552                 (hdr == SFE_IPV6_EXT_HDR_ROUTING) ||
553                 (hdr == SFE_IPV6_EXT_HDR_FRAG) ||
554                 (hdr == SFE_IPV6_EXT_HDR_AH) ||
555                 (hdr == SFE_IPV6_EXT_HDR_DST) ||
556                 (hdr == SFE_IPV6_EXT_HDR_MH);
557 }
558
559 /*
560  * sfe_ipv6_change_dsfield()
561  *      change dscp field in IPv6 packet
562  */
563 static inline void sfe_ipv6_change_dsfield(struct sfe_ipv6_ip_hdr *iph, u8 dscp)
564 {
565         __be16 *p = (__be16 *)iph;
566
567         *p = ((*p & htons(SFE_IPV6_DSCP_MASK)) | htons((u16)dscp << 4));
568 }
569
570 /*
571  * sfe_ipv6_get_connection_match_hash()
572  *      Generate the hash used in connection match lookups.
573  */
574 static inline unsigned int sfe_ipv6_get_connection_match_hash(struct net_device *dev, u8 protocol,
575                                                               struct sfe_ipv6_addr *src_ip, __be16 src_port,
576                                                               struct sfe_ipv6_addr *dest_ip, __be16 dest_port)
577 {
578         u32 idx, hash = 0;
579         size_t dev_addr = (size_t)dev;
580
581         for (idx = 0; idx < 4; idx++) {
582                 hash ^= src_ip->addr[idx] ^ dest_ip->addr[idx];
583         }
584         hash = ((u32)dev_addr) ^ hash ^ protocol ^ ntohs(src_port ^ dest_port);
585         return ((hash >> SFE_IPV6_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV6_CONNECTION_HASH_MASK;
586 }
587
588 /*
589  * sfe_ipv6_find_connection_match()
590  *      Get the IPv6 flow match info that corresponds to a particular 5-tuple.
591  *
592  * On entry we must be holding the lock that protects the hash table.
593  */
594 static struct sfe_ipv6_connection_match *
595 sfe_ipv6_find_connection_match(struct sfe_ipv6 *si, struct net_device *dev, u8 protocol,
596                                         struct sfe_ipv6_addr *src_ip, __be16 src_port,
597                                         struct sfe_ipv6_addr *dest_ip, __be16 dest_port)
598 {
599         struct sfe_ipv6_connection_match *cm;
600         struct sfe_ipv6_connection_match *head;
601         unsigned int conn_match_idx;
602
603         conn_match_idx = sfe_ipv6_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port);
604         cm = si->conn_match_hash[conn_match_idx];
605
606         /*
607          * If we don't have anything in this chain then bail.
608          */
609         if (unlikely(!cm)) {
610                 return NULL;
611         }
612
613         /*
614          * Hopefully the first entry is the one we want.
615          */
616         if ((cm->match_src_port == src_port)
617             && (cm->match_dest_port == dest_port)
618             && (sfe_ipv6_addr_equal(cm->match_src_ip, src_ip))
619             && (sfe_ipv6_addr_equal(cm->match_dest_ip, dest_ip))
620             && (cm->match_protocol == protocol)
621             && (cm->match_dev == dev)) {
622                 si->connection_match_hash_hits++;
623                 return cm;
624         }
625
626         /*
627          * Unfortunately we didn't find it at head, so we search it in chain and
628          * move matching entry to the top of the hash chain. We presume that this
629          * will be reused again very quickly.
630          */
631         head = cm;
632         do {
633                 cm = cm->next;
634         } while (cm && (cm->match_src_port != src_port
635                  || cm->match_dest_port != dest_port
636                  || !sfe_ipv6_addr_equal(cm->match_src_ip, src_ip)
637                  || !sfe_ipv6_addr_equal(cm->match_dest_ip, dest_ip)
638                  || cm->match_protocol != protocol
639                  || cm->match_dev != dev));
640
641         /*
642          * Not found then we're done.
643          */
644         if (unlikely(!cm)) {
645                 return NULL;
646         }
647
648         /*
649          * We found a match so move it.
650          */
651         if (cm->next) {
652                 cm->next->prev = cm->prev;
653         }
654         cm->prev->next = cm->next;
655         cm->prev = NULL;
656         cm->next = head;
657         head->prev = cm;
658         si->conn_match_hash[conn_match_idx] = cm;
659         si->connection_match_hash_reorders++;
660
661         return cm;
662 }
663
664 /*
665  * sfe_ipv6_connection_match_update_summary_stats()
666  *      Update the summary stats for a connection match entry.
667  */
668 static inline void sfe_ipv6_connection_match_update_summary_stats(struct sfe_ipv6_connection_match *cm)
669 {
670         cm->rx_packet_count64 += cm->rx_packet_count;
671         cm->rx_packet_count = 0;
672         cm->rx_byte_count64 += cm->rx_byte_count;
673         cm->rx_byte_count = 0;
674 }
675
676 /*
677  * sfe_ipv6_connection_match_compute_translations()
678  *      Compute port and address translations for a connection match entry.
679  */
680 static void sfe_ipv6_connection_match_compute_translations(struct sfe_ipv6_connection_match *cm)
681 {
682         u32 diff[9];
683         u32 *idx_32;
684         u16 *idx_16;
685
686         /*
687          * Before we insert the entry look to see if this is tagged as doing address
688          * translations.  If it is then work out the adjustment that we need to apply
689          * to the transport checksum.
690          */
691         if (cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC) {
692                 u32 adj = 0;
693                 u32 carry = 0;
694
695                 /*
696                  * Precompute an incremental checksum adjustment so we can
697                  * edit packets in this stream very quickly.  The algorithm is from RFC1624.
698                  */
699                 idx_32 = diff;
700                 *(idx_32++) = cm->match_src_ip->addr[0];
701                 *(idx_32++) = cm->match_src_ip->addr[1];
702                 *(idx_32++) = cm->match_src_ip->addr[2];
703                 *(idx_32++) = cm->match_src_ip->addr[3];
704
705                 idx_16 = (u16 *)idx_32;
706                 *(idx_16++) = cm->match_src_port;
707                 *(idx_16++) = ~cm->xlate_src_port;
708                 idx_32 = (u32 *)idx_16;
709
710                 *(idx_32++) = ~cm->xlate_src_ip->addr[0];
711                 *(idx_32++) = ~cm->xlate_src_ip->addr[1];
712                 *(idx_32++) = ~cm->xlate_src_ip->addr[2];
713                 *(idx_32++) = ~cm->xlate_src_ip->addr[3];
714
715                 /*
716                  * When we compute this fold it down to a 16-bit offset
717                  * as that way we can avoid having to do a double
718                  * folding of the twos-complement result because the
719                  * addition of 2 16-bit values cannot cause a double
720                  * wrap-around!
721                  */
722                 for (idx_32 = diff; idx_32 < diff + 9; idx_32++) {
723                         u32 w = *idx_32;
724                         adj += carry;
725                         adj += w;
726                         carry = (w > adj);
727                 }
728                 adj += carry;
729                 adj = (adj & 0xffff) + (adj >> 16);
730                 adj = (adj & 0xffff) + (adj >> 16);
731                 cm->xlate_src_csum_adjustment = (u16)adj;
732         }
733
734         if (cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST) {
735                 u32 adj = 0;
736                 u32 carry = 0;
737
738                 /*
739                  * Precompute an incremental checksum adjustment so we can
740                  * edit packets in this stream very quickly.  The algorithm is from RFC1624.
741                  */
742                 idx_32 = diff;
743                 *(idx_32++) = cm->match_dest_ip->addr[0];
744                 *(idx_32++) = cm->match_dest_ip->addr[1];
745                 *(idx_32++) = cm->match_dest_ip->addr[2];
746                 *(idx_32++) = cm->match_dest_ip->addr[3];
747
748                 idx_16 = (u16 *)idx_32;
749                 *(idx_16++) = cm->match_dest_port;
750                 *(idx_16++) = ~cm->xlate_dest_port;
751                 idx_32 = (u32 *)idx_16;
752
753                 *(idx_32++) = ~cm->xlate_dest_ip->addr[0];
754                 *(idx_32++) = ~cm->xlate_dest_ip->addr[1];
755                 *(idx_32++) = ~cm->xlate_dest_ip->addr[2];
756                 *(idx_32++) = ~cm->xlate_dest_ip->addr[3];
757
758                 /*
759                  * When we compute this fold it down to a 16-bit offset
760                  * as that way we can avoid having to do a double
761                  * folding of the twos-complement result because the
762                  * addition of 2 16-bit values cannot cause a double
763                  * wrap-around!
764                  */
765                 for (idx_32 = diff; idx_32 < diff + 9; idx_32++) {
766                         u32 w = *idx_32;
767                         adj += carry;
768                         adj += w;
769                         carry = (w > adj);
770                 }
771                 adj += carry;
772                 adj = (adj & 0xffff) + (adj >> 16);
773                 adj = (adj & 0xffff) + (adj >> 16);
774                 cm->xlate_dest_csum_adjustment = (u16)adj;
775         }
776 }
777
778 /*
779  * sfe_ipv6_update_summary_stats()
780  *      Update the summary stats.
781  */
782 static void sfe_ipv6_update_summary_stats(struct sfe_ipv6 *si)
783 {
784         int i;
785
786         si->connection_create_requests64 += si->connection_create_requests;
787         si->connection_create_requests = 0;
788         si->connection_create_collisions64 += si->connection_create_collisions;
789         si->connection_create_collisions = 0;
790         si->connection_destroy_requests64 += si->connection_destroy_requests;
791         si->connection_destroy_requests = 0;
792         si->connection_destroy_misses64 += si->connection_destroy_misses;
793         si->connection_destroy_misses = 0;
794         si->connection_match_hash_hits64 += si->connection_match_hash_hits;
795         si->connection_match_hash_hits = 0;
796         si->connection_match_hash_reorders64 += si->connection_match_hash_reorders;
797         si->connection_match_hash_reorders = 0;
798         si->connection_flushes64 += si->connection_flushes;
799         si->connection_flushes = 0;
800         si->packets_forwarded64 += si->packets_forwarded;
801         si->packets_forwarded = 0;
802         si->packets_not_forwarded64 += si->packets_not_forwarded;
803         si->packets_not_forwarded = 0;
804
805         for (i = 0; i < SFE_IPV6_EXCEPTION_EVENT_LAST; i++) {
806                 si->exception_events64[i] += si->exception_events[i];
807                 si->exception_events[i] = 0;
808         }
809 }
810
811 /*
812  * sfe_ipv6_insert_connection_match()
813  *      Insert a connection match into the hash.
814  *
815  * On entry we must be holding the lock that protects the hash table.
816  */
817 static inline void sfe_ipv6_insert_connection_match(struct sfe_ipv6 *si,
818                                                     struct sfe_ipv6_connection_match *cm)
819 {
820         struct sfe_ipv6_connection_match **hash_head;
821         struct sfe_ipv6_connection_match *prev_head;
822         unsigned int conn_match_idx
823                 = sfe_ipv6_get_connection_match_hash(cm->match_dev, cm->match_protocol,
824                                                      cm->match_src_ip, cm->match_src_port,
825                                                      cm->match_dest_ip, cm->match_dest_port);
826
827         hash_head = &si->conn_match_hash[conn_match_idx];
828         prev_head = *hash_head;
829         cm->prev = NULL;
830         if (prev_head) {
831                 prev_head->prev = cm;
832         }
833
834         cm->next = prev_head;
835         *hash_head = cm;
836
837 #ifdef CONFIG_NF_FLOW_COOKIE
838         if (!si->flow_cookie_enable || !(cm->flags & (SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC | SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST)))
839                 return;
840
841         /*
842          * Configure hardware to put a flow cookie in packet of this flow,
843          * then we can accelerate the lookup process when we received this packet.
844          */
845         for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
846                 struct sfe_ipv6_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
847
848                 if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) {
849                         sfe_ipv6_flow_cookie_set_func_t func;
850
851                         rcu_read_lock();
852                         func = rcu_dereference(si->flow_cookie_set_func);
853                         if (func) {
854                                 if (!func(cm->match_protocol, cm->match_src_ip->addr, cm->match_src_port,
855                                          cm->match_dest_ip->addr, cm->match_dest_port, conn_match_idx)) {
856                                         entry->match = cm;
857                                         cm->flow_cookie = conn_match_idx;
858                                 } else {
859                                         si->exception_events[SFE_IPV6_EXCEPTION_EVENT_FLOW_COOKIE_ADD_FAIL]++;
860                                 }
861                         }
862                         rcu_read_unlock();
863
864                         break;
865                 }
866         }
867 #endif
868 }
869
870 /*
871  * sfe_ipv6_remove_connection_match()
872  *      Remove a connection match object from the hash.
873  *
874  * On entry we must be holding the lock that protects the hash table.
875  */
876 static inline void sfe_ipv6_remove_connection_match(struct sfe_ipv6 *si, struct sfe_ipv6_connection_match *cm)
877 {
878 #ifdef CONFIG_NF_FLOW_COOKIE
879         if (si->flow_cookie_enable) {
880                 /*
881                  * Tell hardware that we no longer need a flow cookie in packet of this flow
882                  */
883                 unsigned int conn_match_idx;
884
885                 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
886                         struct sfe_ipv6_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
887
888                         if (cm == entry->match) {
889                                 sfe_ipv6_flow_cookie_set_func_t func;
890
891                                 rcu_read_lock();
892                                 func = rcu_dereference(si->flow_cookie_set_func);
893                                 if (func) {
894                                         func(cm->match_protocol, cm->match_src_ip->addr, cm->match_src_port,
895                                              cm->match_dest_ip->addr, cm->match_dest_port, 0);
896                                 }
897                                 rcu_read_unlock();
898
899                                 cm->flow_cookie = 0;
900                                 entry->match = NULL;
901                                 entry->last_clean_time = jiffies;
902                                 break;
903                         }
904                 }
905         }
906 #endif
907
908         /*
909          * Unlink the connection match entry from the hash.
910          */
911         if (cm->prev) {
912                 cm->prev->next = cm->next;
913         } else {
914                 unsigned int conn_match_idx
915                         = sfe_ipv6_get_connection_match_hash(cm->match_dev, cm->match_protocol,
916                                                              cm->match_src_ip, cm->match_src_port,
917                                                              cm->match_dest_ip, cm->match_dest_port);
918                 si->conn_match_hash[conn_match_idx] = cm->next;
919         }
920
921         if (cm->next) {
922                 cm->next->prev = cm->prev;
923         }
924
925         /*
926          * If the connection match entry is in the active list remove it.
927          */
928         if (cm->active) {
929                 if (likely(cm->active_prev)) {
930                         cm->active_prev->active_next = cm->active_next;
931                 } else {
932                         si->active_head = cm->active_next;
933                 }
934
935                 if (likely(cm->active_next)) {
936                         cm->active_next->active_prev = cm->active_prev;
937                 } else {
938                         si->active_tail = cm->active_prev;
939                 }
940         }
941 }
942
943 /*
944  * sfe_ipv6_get_connection_hash()
945  *      Generate the hash used in connection lookups.
946  */
947 static inline unsigned int sfe_ipv6_get_connection_hash(u8 protocol, struct sfe_ipv6_addr *src_ip, __be16 src_port,
948                                                         struct sfe_ipv6_addr *dest_ip, __be16 dest_port)
949 {
950         u32 idx, hash = 0;
951
952         for (idx = 0; idx < 4; idx++) {
953                 hash ^= src_ip->addr[idx] ^ dest_ip->addr[idx];
954         }
955         hash = hash ^ protocol ^ ntohs(src_port ^ dest_port);
956         return ((hash >> SFE_IPV6_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV6_CONNECTION_HASH_MASK;
957 }
958
959 /*
960  * sfe_ipv6_find_connection()
961  *      Get the IPv6 connection info that corresponds to a particular 5-tuple.
962  *
963  * On entry we must be holding the lock that protects the hash table.
964  */
965 static inline struct sfe_ipv6_connection *sfe_ipv6_find_connection(struct sfe_ipv6 *si, u32 protocol,
966                                                                    struct sfe_ipv6_addr *src_ip, __be16 src_port,
967                                                                    struct sfe_ipv6_addr *dest_ip, __be16 dest_port)
968 {
969         struct sfe_ipv6_connection *c;
970         unsigned int conn_idx = sfe_ipv6_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port);
971         c = si->conn_hash[conn_idx];
972
973         /*
974          * If we don't have anything in this chain then bale.
975          */
976         if (unlikely(!c)) {
977                 return NULL;
978         }
979
980         /*
981          * Hopefully the first entry is the one we want.
982          */
983         if ((c->src_port == src_port)
984             && (c->dest_port == dest_port)
985             && (sfe_ipv6_addr_equal(c->src_ip, src_ip))
986             && (sfe_ipv6_addr_equal(c->dest_ip, dest_ip))
987             && (c->protocol == protocol)) {
988                 return c;
989         }
990
991         /*
992          * Unfortunately we didn't find it at head, so we search it in chain.
993          */
994         do {
995                 c = c->next;
996         } while (c && (c->src_port != src_port
997                  || c->dest_port != dest_port
998                  || !sfe_ipv6_addr_equal(c->src_ip, src_ip)
999                  || !sfe_ipv6_addr_equal(c->dest_ip, dest_ip)
1000                  || c->protocol != protocol));
1001
1002         /*
1003          * Will need connection entry for next create/destroy metadata,
1004          * So no need to re-order entry for these requests
1005          */
1006         return c;
1007 }
1008
1009 /*
1010  * sfe_ipv6_mark_rule()
1011  *      Updates the mark for a current offloaded connection
1012  *
1013  * Will take hash lock upon entry
1014  */
1015 void sfe_ipv6_mark_rule(struct sfe_connection_mark *mark)
1016 {
1017         struct sfe_ipv6 *si = &__si6;
1018         struct sfe_ipv6_connection *c;
1019
1020         spin_lock_bh(&si->lock);
1021         c = sfe_ipv6_find_connection(si, mark->protocol,
1022                                      mark->src_ip.ip6, mark->src_port,
1023                                      mark->dest_ip.ip6, mark->dest_port);
1024         if (c) {
1025                 WARN_ON((0 != c->mark) && (0 == mark->mark));
1026                 c->mark = mark->mark;
1027         }
1028         spin_unlock_bh(&si->lock);
1029
1030         if (c) {
1031                 DEBUG_TRACE("Matching connection found for mark, "
1032                             "setting from %08x to %08x\n",
1033                             c->mark, mark->mark);
1034         }
1035 }
1036
1037 /*
1038  * sfe_ipv6_insert_connection()
1039  *      Insert a connection into the hash.
1040  *
1041  * On entry we must be holding the lock that protects the hash table.
1042  */
1043 static void sfe_ipv6_insert_connection(struct sfe_ipv6 *si, struct sfe_ipv6_connection *c)
1044 {
1045         struct sfe_ipv6_connection **hash_head;
1046         struct sfe_ipv6_connection *prev_head;
1047         unsigned int conn_idx;
1048
1049         /*
1050          * Insert entry into the connection hash.
1051          */
1052         conn_idx = sfe_ipv6_get_connection_hash(c->protocol, c->src_ip, c->src_port,
1053                                                 c->dest_ip, c->dest_port);
1054         hash_head = &si->conn_hash[conn_idx];
1055         prev_head = *hash_head;
1056         c->prev = NULL;
1057         if (prev_head) {
1058                 prev_head->prev = c;
1059         }
1060
1061         c->next = prev_head;
1062         *hash_head = c;
1063
1064         /*
1065          * Insert entry into the "all connections" list.
1066          */
1067         if (si->all_connections_tail) {
1068                 c->all_connections_prev = si->all_connections_tail;
1069                 si->all_connections_tail->all_connections_next = c;
1070         } else {
1071                 c->all_connections_prev = NULL;
1072                 si->all_connections_head = c;
1073         }
1074
1075         si->all_connections_tail = c;
1076         c->all_connections_next = NULL;
1077         si->num_connections++;
1078
1079         /*
1080          * Insert the connection match objects too.
1081          */
1082         sfe_ipv6_insert_connection_match(si, c->original_match);
1083         sfe_ipv6_insert_connection_match(si, c->reply_match);
1084 }
1085
1086 /*
1087  * sfe_ipv6_remove_connection()
1088  *      Remove a sfe_ipv6_connection object from the hash.
1089  *
1090  * On entry we must be holding the lock that protects the hash table.
1091  */
1092 static void sfe_ipv6_remove_connection(struct sfe_ipv6 *si, struct sfe_ipv6_connection *c)
1093 {
1094         /*
1095          * Remove the connection match objects.
1096          */
1097         sfe_ipv6_remove_connection_match(si, c->reply_match);
1098         sfe_ipv6_remove_connection_match(si, c->original_match);
1099
1100         /*
1101          * Unlink the connection.
1102          */
1103         if (c->prev) {
1104                 c->prev->next = c->next;
1105         } else {
1106                 unsigned int conn_idx = sfe_ipv6_get_connection_hash(c->protocol, c->src_ip, c->src_port,
1107                                                                      c->dest_ip, c->dest_port);
1108                 si->conn_hash[conn_idx] = c->next;
1109         }
1110
1111         if (c->next) {
1112                 c->next->prev = c->prev;
1113         }
1114
1115         /*
1116          * Unlink connection from all_connections list
1117          */
1118         if (c->all_connections_prev) {
1119                 c->all_connections_prev->all_connections_next = c->all_connections_next;
1120         } else {
1121                 si->all_connections_head = c->all_connections_next;
1122         }
1123
1124         if (c->all_connections_next) {
1125                 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1126         } else {
1127                 si->all_connections_tail = c->all_connections_prev;
1128         }
1129
1130         si->num_connections--;
1131 }
1132
1133 /*
1134  * sfe_ipv6_gen_sync_connection()
1135  *      Sync a connection.
1136  *
1137  * On entry to this function we expect that the lock for the connection is either
1138  * already held or isn't required.
1139  */
1140 static void sfe_ipv6_gen_sync_connection(struct sfe_ipv6 *si, struct sfe_ipv6_connection *c,
1141                                         struct sfe_connection_sync *sis, sfe_sync_reason_t reason,
1142                                         u64 now_jiffies)
1143 {
1144         struct sfe_ipv6_connection_match *original_cm;
1145         struct sfe_ipv6_connection_match *reply_cm;
1146
1147         /*
1148          * Fill in the update message.
1149          */
1150         sis->is_v6 = 1;
1151         sis->protocol = c->protocol;
1152         sis->src_ip.ip6[0] = c->src_ip[0];
1153         sis->src_ip_xlate.ip6[0] = c->src_ip_xlate[0];
1154         sis->dest_ip.ip6[0] = c->dest_ip[0];
1155         sis->dest_ip_xlate.ip6[0] = c->dest_ip_xlate[0];
1156         sis->src_port = c->src_port;
1157         sis->src_port_xlate = c->src_port_xlate;
1158         sis->dest_port = c->dest_port;
1159         sis->dest_port_xlate = c->dest_port_xlate;
1160
1161         original_cm = c->original_match;
1162         reply_cm = c->reply_match;
1163         sis->src_td_max_window = original_cm->protocol_state.tcp.max_win;
1164         sis->src_td_end = original_cm->protocol_state.tcp.end;
1165         sis->src_td_max_end = original_cm->protocol_state.tcp.max_end;
1166         sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win;
1167         sis->dest_td_end = reply_cm->protocol_state.tcp.end;
1168         sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end;
1169
1170         sis->src_new_packet_count = original_cm->rx_packet_count;
1171         sis->src_new_byte_count = original_cm->rx_byte_count;
1172         sis->dest_new_packet_count = reply_cm->rx_packet_count;
1173         sis->dest_new_byte_count = reply_cm->rx_byte_count;
1174
1175         sfe_ipv6_connection_match_update_summary_stats(original_cm);
1176         sfe_ipv6_connection_match_update_summary_stats(reply_cm);
1177
1178         sis->src_dev = original_cm->match_dev;
1179         sis->src_packet_count = original_cm->rx_packet_count64;
1180         sis->src_byte_count = original_cm->rx_byte_count64;
1181
1182         sis->dest_dev = reply_cm->match_dev;
1183         sis->dest_packet_count = reply_cm->rx_packet_count64;
1184         sis->dest_byte_count = reply_cm->rx_byte_count64;
1185
1186         sis->reason = reason;
1187
1188         /*
1189          * Get the time increment since our last sync.
1190          */
1191         sis->delta_jiffies = now_jiffies - c->last_sync_jiffies;
1192         c->last_sync_jiffies = now_jiffies;
1193 }
1194
1195 /*
1196  * sfe_ipv6_flush_connection()
1197  *      Flush a connection and free all associated resources.
1198  *
1199  * We need to be called with bottom halves disabled locally as we need to acquire
1200  * the connection hash lock and release it again.  In general we're actually called
1201  * from within a BH and so we're fine, but we're also called when connections are
1202  * torn down.
1203  */
1204 static void sfe_ipv6_flush_connection(struct sfe_ipv6 *si,
1205                                       struct sfe_ipv6_connection *c,
1206                                       sfe_sync_reason_t reason)
1207 {
1208         struct sfe_connection_sync sis;
1209         u64 now_jiffies;
1210         sfe_sync_rule_callback_t sync_rule_callback;
1211
1212         rcu_read_lock();
1213         spin_lock_bh(&si->lock);
1214         si->connection_flushes++;
1215         sync_rule_callback = rcu_dereference(si->sync_rule_callback);
1216         spin_unlock_bh(&si->lock);
1217
1218         if (sync_rule_callback) {
1219                 /*
1220                  * Generate a sync message and then sync.
1221                  */
1222                 now_jiffies = get_jiffies_64();
1223                 sfe_ipv6_gen_sync_connection(si, c, &sis, reason, now_jiffies);
1224                 sync_rule_callback(&sis);
1225         }
1226
1227         rcu_read_unlock();
1228
1229         /*
1230          * Release our hold of the source and dest devices and free the memory
1231          * for our connection objects.
1232          */
1233         dev_put(c->original_dev);
1234         dev_put(c->reply_dev);
1235         kfree(c->original_match);
1236         kfree(c->reply_match);
1237         kfree(c);
1238 }
1239
1240 /*
1241  * sfe_ipv6_recv_udp()
1242  *      Handle UDP packet receives and forwarding.
1243  */
1244 static int sfe_ipv6_recv_udp(struct sfe_ipv6 *si, struct sk_buff *skb, struct net_device *dev,
1245                              unsigned int len, struct sfe_ipv6_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
1246 {
1247         struct sfe_ipv6_udp_hdr *udph;
1248         struct sfe_ipv6_addr *src_ip;
1249         struct sfe_ipv6_addr *dest_ip;
1250         __be16 src_port;
1251         __be16 dest_port;
1252         struct sfe_ipv6_connection_match *cm;
1253         struct net_device *xmit_dev;
1254
1255         /*
1256          * Is our packet too short to contain a valid UDP header?
1257          */
1258         if (!pskb_may_pull(skb, (sizeof(struct sfe_ipv6_udp_hdr) + ihl))) {
1259                 spin_lock_bh(&si->lock);
1260                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++;
1261                 si->packets_not_forwarded++;
1262                 spin_unlock_bh(&si->lock);
1263
1264                 DEBUG_TRACE("packet too short for UDP header\n");
1265                 return 0;
1266         }
1267
1268         /*
1269          * Read the IP address and port information.  Read the IP header data first
1270          * because we've almost certainly got that in the cache.  We may not yet have
1271          * the UDP header cached though so allow more time for any prefetching.
1272          */
1273         src_ip = &iph->saddr;
1274         dest_ip = &iph->daddr;
1275
1276         udph = (struct sfe_ipv6_udp_hdr *)(skb->data + ihl);
1277         src_port = udph->source;
1278         dest_port = udph->dest;
1279
1280         spin_lock_bh(&si->lock);
1281
1282         /*
1283          * Look for a connection match.
1284          */
1285 #ifdef CONFIG_NF_FLOW_COOKIE
1286         cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1287         if (unlikely(!cm)) {
1288                 cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1289         }
1290 #else
1291         cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1292 #endif
1293         if (unlikely(!cm)) {
1294                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
1295                 si->packets_not_forwarded++;
1296                 spin_unlock_bh(&si->lock);
1297
1298                 DEBUG_TRACE("no connection found\n");
1299                 return 0;
1300         }
1301
1302         /*
1303          * If our packet has beern marked as "flush on find" we can't actually
1304          * forward it in the fast path, but now that we've found an associated
1305          * connection we can flush that out before we process the packet.
1306          */
1307         if (unlikely(flush_on_find)) {
1308                 struct sfe_ipv6_connection *c = cm->connection;
1309                 sfe_ipv6_remove_connection(si, c);
1310                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1311                 si->packets_not_forwarded++;
1312                 spin_unlock_bh(&si->lock);
1313
1314                 DEBUG_TRACE("flush on find\n");
1315                 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
1316                 return 0;
1317         }
1318
1319 #ifdef CONFIG_XFRM
1320         /*
1321          * We can't accelerate the flow on this direction, just let it go
1322          * through the slow path.
1323          */
1324         if (unlikely(!cm->flow_accel)) {
1325                 si->packets_not_forwarded++;
1326                 spin_unlock_bh(&si->lock);
1327                 return 0;
1328         }
1329 #endif
1330
1331         /*
1332          * Does our hop_limit allow forwarding?
1333          */
1334         if (unlikely(iph->hop_limit < 2)) {
1335                 struct sfe_ipv6_connection *c = cm->connection;
1336                 sfe_ipv6_remove_connection(si, c);
1337                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_SMALL_TTL]++;
1338                 si->packets_not_forwarded++;
1339                 spin_unlock_bh(&si->lock);
1340
1341                 DEBUG_TRACE("hop_limit too low\n");
1342                 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
1343                 return 0;
1344         }
1345
1346         /*
1347          * If our packet is larger than the MTU of the transmit interface then
1348          * we can't forward it easily.
1349          */
1350         if (unlikely(len > cm->xmit_dev_mtu)) {
1351                 struct sfe_ipv6_connection *c = cm->connection;
1352                 sfe_ipv6_remove_connection(si, c);
1353                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++;
1354                 si->packets_not_forwarded++;
1355                 spin_unlock_bh(&si->lock);
1356
1357                 DEBUG_TRACE("larger than mtu\n");
1358                 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
1359                 return 0;
1360         }
1361
1362         /*
1363          * From this point on we're good to modify the packet.
1364          */
1365
1366         /*
1367          * Check if skb was cloned. If it was, unshare it. Because
1368          * the data area is going to be written in this path and we don't want to
1369          * change the cloned skb's data section.
1370          */
1371         if (unlikely(skb_cloned(skb))) {
1372                 DEBUG_TRACE("%p: skb is a cloned skb\n", skb);
1373                 skb = skb_unshare(skb, GFP_ATOMIC);
1374                 if (!skb) {
1375                         DEBUG_WARN("Failed to unshare the cloned skb\n");
1376                         si->exception_events[SFE_IPV6_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR]++;
1377                         si->packets_not_forwarded++;
1378                         spin_unlock_bh(&si->lock);
1379
1380                         return 0;
1381                 }
1382
1383                 /*
1384                  * Update the iph and udph pointers with the unshared skb's data area.
1385                  */
1386                 iph = (struct sfe_ipv6_ip_hdr *)skb->data;
1387                 udph = (struct sfe_ipv6_udp_hdr *)(skb->data + ihl);
1388         }
1389
1390         /*
1391          * Update DSCP
1392          */
1393         if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK)) {
1394                 sfe_ipv6_change_dsfield(iph, cm->dscp);
1395         }
1396
1397         /*
1398          * Decrement our hop_limit.
1399          */
1400         iph->hop_limit -= 1;
1401
1402         /*
1403          * Do we have to perform translations of the source address/port?
1404          */
1405         if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1406                 u16 udp_csum;
1407
1408                 iph->saddr = cm->xlate_src_ip[0];
1409                 udph->source = cm->xlate_src_port;
1410
1411                 /*
1412                  * Do we have a non-zero UDP checksum?  If we do then we need
1413                  * to update it.
1414                  */
1415                 udp_csum = udph->check;
1416                 if (likely(udp_csum)) {
1417                         u32 sum = udp_csum + cm->xlate_src_csum_adjustment;
1418                         sum = (sum & 0xffff) + (sum >> 16);
1419                         udph->check = (u16)sum;
1420                 }
1421         }
1422
1423         /*
1424          * Do we have to perform translations of the destination address/port?
1425          */
1426         if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1427                 u16 udp_csum;
1428
1429                 iph->daddr = cm->xlate_dest_ip[0];
1430                 udph->dest = cm->xlate_dest_port;
1431
1432                 /*
1433                  * Do we have a non-zero UDP checksum?  If we do then we need
1434                  * to update it.
1435                  */
1436                 udp_csum = udph->check;
1437                 if (likely(udp_csum)) {
1438                         u32 sum = udp_csum + cm->xlate_dest_csum_adjustment;
1439                         sum = (sum & 0xffff) + (sum >> 16);
1440                         udph->check = (u16)sum;
1441                 }
1442         }
1443
1444         /*
1445          * Update traffic stats.
1446          */
1447         cm->rx_packet_count++;
1448         cm->rx_byte_count += len;
1449
1450         /*
1451          * If we're not already on the active list then insert ourselves at the tail
1452          * of the current list.
1453          */
1454         if (unlikely(!cm->active)) {
1455                 cm->active = true;
1456                 cm->active_prev = si->active_tail;
1457                 if (likely(si->active_tail)) {
1458                         si->active_tail->active_next = cm;
1459                 } else {
1460                         si->active_head = cm;
1461                 }
1462                 si->active_tail = cm;
1463         }
1464
1465         xmit_dev = cm->xmit_dev;
1466         skb->dev = xmit_dev;
1467
1468         /*
1469          * Check to see if we need to write a header.
1470          */
1471         if (likely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1472                 if (unlikely(!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
1473                         dev_hard_header(skb, xmit_dev, ETH_P_IPV6,
1474                                         cm->xmit_dest_mac, cm->xmit_src_mac, len);
1475                 } else {
1476                         /*
1477                          * For the simple case we write this really fast.
1478                          */
1479                         struct sfe_ipv6_eth_hdr *eth = (struct sfe_ipv6_eth_hdr *)__skb_push(skb, ETH_HLEN);
1480                         eth->h_proto = htons(ETH_P_IPV6);
1481                         eth->h_dest[0] = cm->xmit_dest_mac[0];
1482                         eth->h_dest[1] = cm->xmit_dest_mac[1];
1483                         eth->h_dest[2] = cm->xmit_dest_mac[2];
1484                         eth->h_source[0] = cm->xmit_src_mac[0];
1485                         eth->h_source[1] = cm->xmit_src_mac[1];
1486                         eth->h_source[2] = cm->xmit_src_mac[2];
1487                 }
1488         }
1489
1490         /*
1491          * Update priority of skb.
1492          */
1493         if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) {
1494                 skb->priority = cm->priority;
1495         }
1496
1497         /*
1498          * Mark outgoing packet.
1499          */
1500         skb->mark = cm->connection->mark;
1501         if (skb->mark) {
1502                 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1503         }
1504
1505         si->packets_forwarded++;
1506         spin_unlock_bh(&si->lock);
1507
1508         /*
1509          * We're going to check for GSO flags when we transmit the packet so
1510          * start fetching the necessary cache line now.
1511          */
1512         prefetch(skb_shinfo(skb));
1513
1514         /*
1515          * Mark that this packet has been fast forwarded.
1516          */
1517         skb->fast_forwarded = 1;
1518
1519         /*
1520          * Send the packet on its way.
1521          */
1522         dev_queue_xmit(skb);
1523
1524         return 1;
1525 }
1526
1527 /*
1528  * sfe_ipv6_process_tcp_option_sack()
1529  *      Parse TCP SACK option and update ack according
1530  */
1531 static bool sfe_ipv6_process_tcp_option_sack(const struct sfe_ipv6_tcp_hdr *th, const u32 data_offs,
1532                                              u32 *ack)
1533 {
1534         u32 length = sizeof(struct sfe_ipv6_tcp_hdr);
1535         u8 *ptr = (u8 *)th + length;
1536
1537         /*
1538          * Ignore processing if TCP packet has only TIMESTAMP option.
1539          */
1540         if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
1541             && likely(ptr[0] == TCPOPT_NOP)
1542             && likely(ptr[1] == TCPOPT_NOP)
1543             && likely(ptr[2] == TCPOPT_TIMESTAMP)
1544             && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
1545                 return true;
1546         }
1547
1548         /*
1549          * TCP options. Parse SACK option.
1550          */
1551         while (length < data_offs) {
1552                 u8 size;
1553                 u8 kind;
1554
1555                 ptr = (u8 *)th + length;
1556                 kind = *ptr;
1557
1558                 /*
1559                  * NOP, for padding
1560                  * Not in the switch because to fast escape and to not calculate size
1561                  */
1562                 if (kind == TCPOPT_NOP) {
1563                         length++;
1564                         continue;
1565                 }
1566
1567                 if (kind == TCPOPT_SACK) {
1568                         u32 sack = 0;
1569                         u8 re = 1 + 1;
1570
1571                         size = *(ptr + 1);
1572                         if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
1573                             || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
1574                             || (size > (data_offs - length))) {
1575                                 return false;
1576                         }
1577
1578                         re += 4;
1579                         while (re < size) {
1580                                 u32 sack_re;
1581                                 u8 *sptr = ptr + re;
1582                                 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
1583                                 if (sack_re > sack) {
1584                                         sack = sack_re;
1585                                 }
1586                                 re += TCPOLEN_SACK_PERBLOCK;
1587                         }
1588                         if (sack > *ack) {
1589                                 *ack = sack;
1590                         }
1591                         length += size;
1592                         continue;
1593                 }
1594                 if (kind == TCPOPT_EOL) {
1595                         return true;
1596                 }
1597                 size = *(ptr + 1);
1598                 if (size < 2) {
1599                         return false;
1600                 }
1601                 length += size;
1602         }
1603
1604         return true;
1605 }
1606
1607 /*
1608  * sfe_ipv6_recv_tcp()
1609  *      Handle TCP packet receives and forwarding.
1610  */
1611 static int sfe_ipv6_recv_tcp(struct sfe_ipv6 *si, struct sk_buff *skb, struct net_device *dev,
1612                              unsigned int len, struct sfe_ipv6_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
1613 {
1614         struct sfe_ipv6_tcp_hdr *tcph;
1615         struct sfe_ipv6_addr *src_ip;
1616         struct sfe_ipv6_addr *dest_ip;
1617         __be16 src_port;
1618         __be16 dest_port;
1619         struct sfe_ipv6_connection_match *cm;
1620         struct sfe_ipv6_connection_match *counter_cm;
1621         u32 flags;
1622         struct net_device *xmit_dev;
1623
1624         /*
1625          * Is our packet too short to contain a valid UDP header?
1626          */
1627         if (!pskb_may_pull(skb, (sizeof(struct sfe_ipv6_tcp_hdr) + ihl))) {
1628                 spin_lock_bh(&si->lock);
1629                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++;
1630                 si->packets_not_forwarded++;
1631                 spin_unlock_bh(&si->lock);
1632
1633                 DEBUG_TRACE("packet too short for TCP header\n");
1634                 return 0;
1635         }
1636
1637         /*
1638          * Read the IP address and port information.  Read the IP header data first
1639          * because we've almost certainly got that in the cache.  We may not yet have
1640          * the TCP header cached though so allow more time for any prefetching.
1641          */
1642         src_ip = &iph->saddr;
1643         dest_ip = &iph->daddr;
1644
1645         tcph = (struct sfe_ipv6_tcp_hdr *)(skb->data + ihl);
1646         src_port = tcph->source;
1647         dest_port = tcph->dest;
1648         flags = tcp_flag_word(tcph);
1649
1650         spin_lock_bh(&si->lock);
1651
1652         /*
1653          * Look for a connection match.
1654          */
1655 #ifdef CONFIG_NF_FLOW_COOKIE
1656         cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1657         if (unlikely(!cm)) {
1658                 cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
1659         }
1660 #else
1661         cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
1662 #endif
1663         if (unlikely(!cm)) {
1664                 /*
1665                  * We didn't get a connection but as TCP is connection-oriented that
1666                  * may be because this is a non-fast connection (not running established).
1667                  * For diagnostic purposes we differentiate this here.
1668                  */
1669                 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
1670                         si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++;
1671                         si->packets_not_forwarded++;
1672                         spin_unlock_bh(&si->lock);
1673
1674                         DEBUG_TRACE("no connection found - fast flags\n");
1675                         return 0;
1676                 }
1677                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++;
1678                 si->packets_not_forwarded++;
1679                 spin_unlock_bh(&si->lock);
1680
1681                 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
1682                             flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1683                 return 0;
1684         }
1685
1686         /*
1687          * If our packet has beern marked as "flush on find" we can't actually
1688          * forward it in the fast path, but now that we've found an associated
1689          * connection we can flush that out before we process the packet.
1690          */
1691         if (unlikely(flush_on_find)) {
1692                 struct sfe_ipv6_connection *c = cm->connection;
1693                 sfe_ipv6_remove_connection(si, c);
1694                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1695                 si->packets_not_forwarded++;
1696                 spin_unlock_bh(&si->lock);
1697
1698                 DEBUG_TRACE("flush on find\n");
1699                 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
1700                 return 0;
1701         }
1702
1703 #ifdef CONFIG_XFRM
1704         /*
1705          * We can't accelerate the flow on this direction, just let it go
1706          * through the slow path.
1707          */
1708         if (unlikely(!cm->flow_accel)) {
1709                 si->packets_not_forwarded++;
1710                 spin_unlock_bh(&si->lock);
1711                 return 0;
1712         }
1713 #endif
1714
1715         /*
1716          * Does our hop_limit allow forwarding?
1717          */
1718         if (unlikely(iph->hop_limit < 2)) {
1719                 struct sfe_ipv6_connection *c = cm->connection;
1720                 sfe_ipv6_remove_connection(si, c);
1721                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_TTL]++;
1722                 si->packets_not_forwarded++;
1723                 spin_unlock_bh(&si->lock);
1724
1725                 DEBUG_TRACE("hop_limit too low\n");
1726                 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
1727                 return 0;
1728         }
1729
1730         /*
1731          * If our packet is larger than the MTU of the transmit interface then
1732          * we can't forward it easily.
1733          */
1734         if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
1735                 struct sfe_ipv6_connection *c = cm->connection;
1736                 sfe_ipv6_remove_connection(si, c);
1737                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++;
1738                 si->packets_not_forwarded++;
1739                 spin_unlock_bh(&si->lock);
1740
1741                 DEBUG_TRACE("larger than mtu\n");
1742                 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
1743                 return 0;
1744         }
1745
1746         /*
1747          * Look at our TCP flags.  Anything missing an ACK or that has RST, SYN or FIN
1748          * set is not a fast path packet.
1749          */
1750         if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
1751                 struct sfe_ipv6_connection *c = cm->connection;
1752                 sfe_ipv6_remove_connection(si, c);
1753                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_FLAGS]++;
1754                 si->packets_not_forwarded++;
1755                 spin_unlock_bh(&si->lock);
1756
1757                 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
1758                             flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1759                 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
1760                 return 0;
1761         }
1762
1763         counter_cm = cm->counter_match;
1764
1765         /*
1766          * Are we doing sequence number checking?
1767          */
1768         if (likely(!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
1769                 u32 seq;
1770                 u32 ack;
1771                 u32 sack;
1772                 u32 data_offs;
1773                 u32 end;
1774                 u32 left_edge;
1775                 u32 scaled_win;
1776                 u32 max_end;
1777
1778                 /*
1779                  * Is our sequence fully past the right hand edge of the window?
1780                  */
1781                 seq = ntohl(tcph->seq);
1782                 if (unlikely((s32)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
1783                         struct sfe_ipv6_connection *c = cm->connection;
1784                         sfe_ipv6_remove_connection(si, c);
1785                         si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++;
1786                         si->packets_not_forwarded++;
1787                         spin_unlock_bh(&si->lock);
1788
1789                         DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
1790                                     seq, cm->protocol_state.tcp.max_end + 1);
1791                         sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
1792                         return 0;
1793                 }
1794
1795                 /*
1796                  * Check that our TCP data offset isn't too short.
1797                  */
1798                 data_offs = tcph->doff << 2;
1799                 if (unlikely(data_offs < sizeof(struct sfe_ipv6_tcp_hdr))) {
1800                         struct sfe_ipv6_connection *c = cm->connection;
1801                         sfe_ipv6_remove_connection(si, c);
1802                         si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++;
1803                         si->packets_not_forwarded++;
1804                         spin_unlock_bh(&si->lock);
1805
1806                         DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
1807                         sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
1808                         return 0;
1809                 }
1810
1811                 /*
1812                  * Update ACK according to any SACK option.
1813                  */
1814                 ack = ntohl(tcph->ack_seq);
1815                 sack = ack;
1816                 if (unlikely(!sfe_ipv6_process_tcp_option_sack(tcph, data_offs, &sack))) {
1817                         struct sfe_ipv6_connection *c = cm->connection;
1818                         sfe_ipv6_remove_connection(si, c);
1819                         si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_BAD_SACK]++;
1820                         si->packets_not_forwarded++;
1821                         spin_unlock_bh(&si->lock);
1822
1823                         DEBUG_TRACE("TCP option SACK size is wrong\n");
1824                         sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
1825                         return 0;
1826                 }
1827
1828                 /*
1829                  * Check that our TCP data offset isn't past the end of the packet.
1830                  */
1831                 data_offs += sizeof(struct sfe_ipv6_ip_hdr);
1832                 if (unlikely(len < data_offs)) {
1833                         struct sfe_ipv6_connection *c = cm->connection;
1834                         sfe_ipv6_remove_connection(si, c);
1835                         si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++;
1836                         si->packets_not_forwarded++;
1837                         spin_unlock_bh(&si->lock);
1838
1839                         DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
1840                                     data_offs, len);
1841                         sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
1842                         return 0;
1843                 }
1844
1845                 end = seq + len - data_offs;
1846
1847                 /*
1848                  * Is our sequence fully before the left hand edge of the window?
1849                  */
1850                 if (unlikely((s32)(end - (cm->protocol_state.tcp.end
1851                                                 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
1852                         struct sfe_ipv6_connection *c = cm->connection;
1853                         sfe_ipv6_remove_connection(si, c);
1854                         si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++;
1855                         si->packets_not_forwarded++;
1856                         spin_unlock_bh(&si->lock);
1857
1858                         DEBUG_TRACE("seq: %u before left edge: %u\n",
1859                                     end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
1860                         sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
1861                         return 0;
1862                 }
1863
1864                 /*
1865                  * Are we acking data that is to the right of what has been sent?
1866                  */
1867                 if (unlikely((s32)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
1868                         struct sfe_ipv6_connection *c = cm->connection;
1869                         sfe_ipv6_remove_connection(si, c);
1870                         si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++;
1871                         si->packets_not_forwarded++;
1872                         spin_unlock_bh(&si->lock);
1873
1874                         DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
1875                                     sack, counter_cm->protocol_state.tcp.end + 1);
1876                         sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
1877                         return 0;
1878                 }
1879
1880                 /*
1881                  * Is our ack too far before the left hand edge of the window?
1882                  */
1883                 left_edge = counter_cm->protocol_state.tcp.end
1884                             - cm->protocol_state.tcp.max_win
1885                             - SFE_IPV6_TCP_MAX_ACK_WINDOW
1886                             - 1;
1887                 if (unlikely((s32)(sack - left_edge) < 0)) {
1888                         struct sfe_ipv6_connection *c = cm->connection;
1889                         sfe_ipv6_remove_connection(si, c);
1890                         si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++;
1891                         si->packets_not_forwarded++;
1892                         spin_unlock_bh(&si->lock);
1893
1894                         DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
1895                         sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
1896                         return 0;
1897                 }
1898
1899                 /*
1900                  * Have we just seen the largest window size yet for this connection?  If yes
1901                  * then we need to record the new value.
1902                  */
1903                 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
1904                 scaled_win += (sack - ack);
1905                 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
1906                         cm->protocol_state.tcp.max_win = scaled_win;
1907                 }
1908
1909                 /*
1910                  * If our sequence and/or ack numbers have advanced then record the new state.
1911                  */
1912                 if (likely((s32)(end - cm->protocol_state.tcp.end) >= 0)) {
1913                         cm->protocol_state.tcp.end = end;
1914                 }
1915
1916                 max_end = sack + scaled_win;
1917                 if (likely((s32)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
1918                         counter_cm->protocol_state.tcp.max_end = max_end;
1919                 }
1920         }
1921
1922         /*
1923          * From this point on we're good to modify the packet.
1924          */
1925
1926         /*
1927          * Check if skb was cloned. If it was, unshare it. Because
1928          * the data area is going to be written in this path and we don't want to
1929          * change the cloned skb's data section.
1930          */
1931         if (unlikely(skb_cloned(skb))) {
1932                 DEBUG_TRACE("%p: skb is a cloned skb\n", skb);
1933                 skb = skb_unshare(skb, GFP_ATOMIC);
1934                 if (!skb) {
1935                         DEBUG_WARN("Failed to unshare the cloned skb\n");
1936                         si->exception_events[SFE_IPV6_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR]++;
1937                         si->packets_not_forwarded++;
1938                         spin_unlock_bh(&si->lock);
1939
1940                         return 0;
1941                 }
1942
1943                 /*
1944                  * Update the iph and tcph pointers with the unshared skb's data area.
1945                  */
1946                 iph = (struct sfe_ipv6_ip_hdr *)skb->data;
1947                 tcph = (struct sfe_ipv6_tcp_hdr *)(skb->data + ihl);
1948         }
1949
1950         /*
1951          * Update DSCP
1952          */
1953         if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK)) {
1954                 sfe_ipv6_change_dsfield(iph, cm->dscp);
1955         }
1956
1957         /*
1958          * Decrement our hop_limit.
1959          */
1960         iph->hop_limit -= 1;
1961
1962         /*
1963          * Do we have to perform translations of the source address/port?
1964          */
1965         if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1966                 u16 tcp_csum;
1967                 u32 sum;
1968
1969                 iph->saddr = cm->xlate_src_ip[0];
1970                 tcph->source = cm->xlate_src_port;
1971
1972                 /*
1973                  * Do we have a non-zero UDP checksum?  If we do then we need
1974                  * to update it.
1975                  */
1976                 tcp_csum = tcph->check;
1977                 sum = tcp_csum + cm->xlate_src_csum_adjustment;
1978                 sum = (sum & 0xffff) + (sum >> 16);
1979                 tcph->check = (u16)sum;
1980         }
1981
1982         /*
1983          * Do we have to perform translations of the destination address/port?
1984          */
1985         if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1986                 u16 tcp_csum;
1987                 u32 sum;
1988
1989                 iph->daddr = cm->xlate_dest_ip[0];
1990                 tcph->dest = cm->xlate_dest_port;
1991
1992                 /*
1993                  * Do we have a non-zero UDP checksum?  If we do then we need
1994                  * to update it.
1995                  */
1996                 tcp_csum = tcph->check;
1997                 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
1998                 sum = (sum & 0xffff) + (sum >> 16);
1999                 tcph->check = (u16)sum;
2000         }
2001
2002         /*
2003          * Update traffic stats.
2004          */
2005         cm->rx_packet_count++;
2006         cm->rx_byte_count += len;
2007
2008         /*
2009          * If we're not already on the active list then insert ourselves at the tail
2010          * of the current list.
2011          */
2012         if (unlikely(!cm->active)) {
2013                 cm->active = true;
2014                 cm->active_prev = si->active_tail;
2015                 if (likely(si->active_tail)) {
2016                         si->active_tail->active_next = cm;
2017                 } else {
2018                         si->active_head = cm;
2019                 }
2020                 si->active_tail = cm;
2021         }
2022
2023         xmit_dev = cm->xmit_dev;
2024         skb->dev = xmit_dev;
2025
2026         /*
2027          * Check to see if we need to write a header.
2028          */
2029         if (likely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
2030                 if (unlikely(!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
2031                         dev_hard_header(skb, xmit_dev, ETH_P_IPV6,
2032                                         cm->xmit_dest_mac, cm->xmit_src_mac, len);
2033                 } else {
2034                         /*
2035                          * For the simple case we write this really fast.
2036                          */
2037                         struct sfe_ipv6_eth_hdr *eth = (struct sfe_ipv6_eth_hdr *)__skb_push(skb, ETH_HLEN);
2038                         eth->h_proto = htons(ETH_P_IPV6);
2039                         eth->h_dest[0] = cm->xmit_dest_mac[0];
2040                         eth->h_dest[1] = cm->xmit_dest_mac[1];
2041                         eth->h_dest[2] = cm->xmit_dest_mac[2];
2042                         eth->h_source[0] = cm->xmit_src_mac[0];
2043                         eth->h_source[1] = cm->xmit_src_mac[1];
2044                         eth->h_source[2] = cm->xmit_src_mac[2];
2045                 }
2046         }
2047
2048         /*
2049          * Update priority of skb.
2050          */
2051         if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) {
2052                 skb->priority = cm->priority;
2053         }
2054
2055         /*
2056          * Mark outgoing packet
2057          */
2058         skb->mark = cm->connection->mark;
2059         if (skb->mark) {
2060                 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
2061         }
2062
2063         si->packets_forwarded++;
2064         spin_unlock_bh(&si->lock);
2065
2066         /*
2067          * We're going to check for GSO flags when we transmit the packet so
2068          * start fetching the necessary cache line now.
2069          */
2070         prefetch(skb_shinfo(skb));
2071
2072         /*
2073          * Mark that this packet has been fast forwarded.
2074          */
2075         skb->fast_forwarded = 1;
2076
2077         /*
2078          * Send the packet on its way.
2079          */
2080         dev_queue_xmit(skb);
2081
2082         return 1;
2083 }
2084
2085 /*
2086  * sfe_ipv6_recv_icmp()
2087  *      Handle ICMP packet receives.
2088  *
2089  * ICMP packets aren't handled as a "fast path" and always have us process them
2090  * through the default Linux stack.  What we do need to do is look for any errors
2091  * about connections we are handling in the fast path.  If we find any such
2092  * connections then we want to flush their state so that the ICMP error path
2093  * within Linux has all of the correct state should it need it.
2094  */
2095 static int sfe_ipv6_recv_icmp(struct sfe_ipv6 *si, struct sk_buff *skb, struct net_device *dev,
2096                               unsigned int len, struct sfe_ipv6_ip_hdr *iph, unsigned int ihl)
2097 {
2098         struct icmp6hdr *icmph;
2099         struct sfe_ipv6_ip_hdr *icmp_iph;
2100         struct sfe_ipv6_udp_hdr *icmp_udph;
2101         struct sfe_ipv6_tcp_hdr *icmp_tcph;
2102         struct sfe_ipv6_addr *src_ip;
2103         struct sfe_ipv6_addr *dest_ip;
2104         __be16 src_port;
2105         __be16 dest_port;
2106         struct sfe_ipv6_connection_match *cm;
2107         struct sfe_ipv6_connection *c;
2108         u8 next_hdr;
2109
2110         /*
2111          * Is our packet too short to contain a valid ICMP header?
2112          */
2113         len -= ihl;
2114         if (!pskb_may_pull(skb, ihl + sizeof(struct icmp6hdr))) {
2115                 spin_lock_bh(&si->lock);
2116                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++;
2117                 si->packets_not_forwarded++;
2118                 spin_unlock_bh(&si->lock);
2119
2120                 DEBUG_TRACE("packet too short for ICMP header\n");
2121                 return 0;
2122         }
2123
2124         /*
2125          * We only handle "destination unreachable" and "time exceeded" messages.
2126          */
2127         icmph = (struct icmp6hdr *)(skb->data + ihl);
2128         if ((icmph->icmp6_type != ICMPV6_DEST_UNREACH)
2129             && (icmph->icmp6_type != ICMPV6_TIME_EXCEED)) {
2130                 spin_lock_bh(&si->lock);
2131                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++;
2132                 si->packets_not_forwarded++;
2133                 spin_unlock_bh(&si->lock);
2134
2135                 DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->icmp6_type);
2136                 return 0;
2137         }
2138
2139         /*
2140          * Do we have the full embedded IP header?
2141          * We should have 8 bytes of next L4 header - that's enough to identify
2142          * the connection.
2143          */
2144         len -= sizeof(struct icmp6hdr);
2145         ihl += sizeof(struct icmp6hdr);
2146         if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ip_hdr) + sizeof(struct sfe_ipv6_ext_hdr))) {
2147                 spin_lock_bh(&si->lock);
2148                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_HEADER_INCOMPLETE]++;
2149                 si->packets_not_forwarded++;
2150                 spin_unlock_bh(&si->lock);
2151
2152                 DEBUG_TRACE("Embedded IP header not complete\n");
2153                 return 0;
2154         }
2155
2156         /*
2157          * Is our embedded IP version wrong?
2158          */
2159         icmp_iph = (struct sfe_ipv6_ip_hdr *)(icmph + 1);
2160         if (unlikely(icmp_iph->version != 6)) {
2161                 spin_lock_bh(&si->lock);
2162                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_NON_V6]++;
2163                 si->packets_not_forwarded++;
2164                 spin_unlock_bh(&si->lock);
2165
2166                 DEBUG_TRACE("IP version: %u\n", icmp_iph->version);
2167                 return 0;
2168         }
2169
2170         len -= sizeof(struct sfe_ipv6_ip_hdr);
2171         ihl += sizeof(struct sfe_ipv6_ip_hdr);
2172         next_hdr = icmp_iph->nexthdr;
2173         while (unlikely(sfe_ipv6_is_ext_hdr(next_hdr))) {
2174                 struct sfe_ipv6_ext_hdr *ext_hdr;
2175                 unsigned int ext_hdr_len;
2176
2177                 ext_hdr = (struct sfe_ipv6_ext_hdr *)(skb->data + ihl);
2178                 if (next_hdr == SFE_IPV6_EXT_HDR_FRAG) {
2179                         struct sfe_ipv6_frag_hdr *frag_hdr = (struct sfe_ipv6_frag_hdr *)ext_hdr;
2180                         unsigned int frag_off = ntohs(frag_hdr->frag_off);
2181
2182                         if (frag_off & SFE_IPV6_FRAG_OFFSET) {
2183                                 spin_lock_bh(&si->lock);
2184                                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++;
2185                                 si->packets_not_forwarded++;
2186                                 spin_unlock_bh(&si->lock);
2187
2188                                 DEBUG_TRACE("non-initial fragment\n");
2189                                 return 0;
2190                         }
2191                 }
2192
2193                 ext_hdr_len = ext_hdr->hdr_len;
2194                 ext_hdr_len <<= 3;
2195                 ext_hdr_len += sizeof(struct sfe_ipv6_ext_hdr);
2196                 len -= ext_hdr_len;
2197                 ihl += ext_hdr_len;
2198                 /*
2199                  * We should have 8 bytes of next header - that's enough to identify
2200                  * the connection.
2201                  */
2202                 if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ext_hdr))) {
2203                         spin_lock_bh(&si->lock);
2204                         si->exception_events[SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2205                         si->packets_not_forwarded++;
2206                         spin_unlock_bh(&si->lock);
2207
2208                         DEBUG_TRACE("extension header %d not completed\n", next_hdr);
2209                         return 0;
2210                 }
2211
2212                 next_hdr = ext_hdr->next_hdr;
2213         }
2214
2215         /*
2216          * Handle the embedded transport layer header.
2217          */
2218         switch (next_hdr) {
2219         case IPPROTO_UDP:
2220                 icmp_udph = (struct sfe_ipv6_udp_hdr *)(skb->data + ihl);
2221                 src_port = icmp_udph->source;
2222                 dest_port = icmp_udph->dest;
2223                 break;
2224
2225         case IPPROTO_TCP:
2226                 icmp_tcph = (struct sfe_ipv6_tcp_hdr *)(skb->data + ihl);
2227                 src_port = icmp_tcph->source;
2228                 dest_port = icmp_tcph->dest;
2229                 break;
2230
2231         default:
2232                 spin_lock_bh(&si->lock);
2233                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_UNHANDLED_PROTOCOL]++;
2234                 si->packets_not_forwarded++;
2235                 spin_unlock_bh(&si->lock);
2236
2237                 DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", next_hdr);
2238                 return 0;
2239         }
2240
2241         src_ip = &icmp_iph->saddr;
2242         dest_ip = &icmp_iph->daddr;
2243
2244         spin_lock_bh(&si->lock);
2245
2246         /*
2247          * Look for a connection match.  Note that we reverse the source and destination
2248          * here because our embedded message contains a packet that was sent in the
2249          * opposite direction to the one in which we just received it.  It will have
2250          * been sent on the interface from which we received it though so that's still
2251          * ok to use.
2252          */
2253         cm = sfe_ipv6_find_connection_match(si, dev, icmp_iph->nexthdr, dest_ip, dest_port, src_ip, src_port);
2254         if (unlikely(!cm)) {
2255                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++;
2256                 si->packets_not_forwarded++;
2257                 spin_unlock_bh(&si->lock);
2258
2259                 DEBUG_TRACE("no connection found\n");
2260                 return 0;
2261         }
2262
2263         /*
2264          * We found a connection so now remove it from the connection list and flush
2265          * its state.
2266          */
2267         c = cm->connection;
2268         sfe_ipv6_remove_connection(si, c);
2269         si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++;
2270         si->packets_not_forwarded++;
2271         spin_unlock_bh(&si->lock);
2272
2273         sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
2274         return 0;
2275 }
2276
2277 /*
2278  * sfe_ipv6_recv()
2279  *      Handle packet receives and forwaring.
2280  *
2281  * Returns 1 if the packet is forwarded or 0 if it isn't.
2282  */
2283 int sfe_ipv6_recv(struct net_device *dev, struct sk_buff *skb)
2284 {
2285         struct sfe_ipv6 *si = &__si6;
2286         unsigned int len;
2287         unsigned int payload_len;
2288         unsigned int ihl = sizeof(struct sfe_ipv6_ip_hdr);
2289         bool flush_on_find = false;
2290         struct sfe_ipv6_ip_hdr *iph;
2291         u8 next_hdr;
2292
2293         /*
2294          * Check that we have space for an IP header and an uplayer header here.
2295          */
2296         len = skb->len;
2297         if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ext_hdr))) {
2298                 spin_lock_bh(&si->lock);
2299                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2300                 si->packets_not_forwarded++;
2301                 spin_unlock_bh(&si->lock);
2302
2303                 DEBUG_TRACE("len: %u is too short\n", len);
2304                 return 0;
2305         }
2306
2307         /*
2308          * Is our IP version wrong?
2309          */
2310         iph = (struct sfe_ipv6_ip_hdr *)skb->data;
2311         if (unlikely(iph->version != 6)) {
2312                 spin_lock_bh(&si->lock);
2313                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_NON_V6]++;
2314                 si->packets_not_forwarded++;
2315                 spin_unlock_bh(&si->lock);
2316
2317                 DEBUG_TRACE("IP version: %u\n", iph->version);
2318                 return 0;
2319         }
2320
2321         /*
2322          * Does our datagram fit inside the skb?
2323          */
2324         payload_len = ntohs(iph->payload_len);
2325         if (unlikely(payload_len > (len - ihl))) {
2326                 spin_lock_bh(&si->lock);
2327                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++;
2328                 si->packets_not_forwarded++;
2329                 spin_unlock_bh(&si->lock);
2330
2331                 DEBUG_TRACE("payload_len: %u, exceeds len: %u\n", payload_len, (len - sizeof(struct sfe_ipv6_ip_hdr)));
2332                 return 0;
2333         }
2334
2335         next_hdr = iph->nexthdr;
2336         while (unlikely(sfe_ipv6_is_ext_hdr(next_hdr))) {
2337                 struct sfe_ipv6_ext_hdr *ext_hdr;
2338                 unsigned int ext_hdr_len;
2339
2340                 ext_hdr = (struct sfe_ipv6_ext_hdr *)(skb->data + ihl);
2341                 if (next_hdr == SFE_IPV6_EXT_HDR_FRAG) {
2342                         struct sfe_ipv6_frag_hdr *frag_hdr = (struct sfe_ipv6_frag_hdr *)ext_hdr;
2343                         unsigned int frag_off = ntohs(frag_hdr->frag_off);
2344
2345                         if (frag_off & SFE_IPV6_FRAG_OFFSET) {
2346                                 spin_lock_bh(&si->lock);
2347                                 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++;
2348                                 si->packets_not_forwarded++;
2349                                 spin_unlock_bh(&si->lock);
2350
2351                                 DEBUG_TRACE("non-initial fragment\n");
2352                                 return 0;
2353                         }
2354                 }
2355
2356                 ext_hdr_len = ext_hdr->hdr_len;
2357                 ext_hdr_len <<= 3;
2358                 ext_hdr_len += sizeof(struct sfe_ipv6_ext_hdr);
2359                 ihl += ext_hdr_len;
2360                 if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ext_hdr))) {
2361                         spin_lock_bh(&si->lock);
2362                         si->exception_events[SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2363                         si->packets_not_forwarded++;
2364                         spin_unlock_bh(&si->lock);
2365
2366                         DEBUG_TRACE("extension header %d not completed\n", next_hdr);
2367                         return 0;
2368                 }
2369
2370                 flush_on_find = true;
2371                 next_hdr = ext_hdr->next_hdr;
2372         }
2373
2374         if (IPPROTO_UDP == next_hdr) {
2375                 return sfe_ipv6_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find);
2376         }
2377
2378         if (IPPROTO_TCP == next_hdr) {
2379                 return sfe_ipv6_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find);
2380         }
2381
2382         if (IPPROTO_ICMPV6 == next_hdr) {
2383                 return sfe_ipv6_recv_icmp(si, skb, dev, len, iph, ihl);
2384         }
2385
2386         spin_lock_bh(&si->lock);
2387         si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++;
2388         si->packets_not_forwarded++;
2389         spin_unlock_bh(&si->lock);
2390
2391         DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", next_hdr);
2392         return 0;
2393 }
2394
2395 /*
2396  * sfe_ipv6_update_tcp_state()
2397  *      update TCP window variables.
2398  */
2399 static void
2400 sfe_ipv6_update_tcp_state(struct sfe_ipv6_connection *c,
2401                           struct sfe_connection_create *sic)
2402 {
2403         struct sfe_ipv6_connection_match *orig_cm;
2404         struct sfe_ipv6_connection_match *repl_cm;
2405         struct sfe_ipv6_tcp_connection_match *orig_tcp;
2406         struct sfe_ipv6_tcp_connection_match *repl_tcp;
2407
2408         orig_cm = c->original_match;
2409         repl_cm = c->reply_match;
2410         orig_tcp = &orig_cm->protocol_state.tcp;
2411         repl_tcp = &repl_cm->protocol_state.tcp;
2412
2413         /* update orig */
2414         if (orig_tcp->max_win < sic->src_td_max_window) {
2415                 orig_tcp->max_win = sic->src_td_max_window;
2416         }
2417         if ((s32)(orig_tcp->end - sic->src_td_end) < 0) {
2418                 orig_tcp->end = sic->src_td_end;
2419         }
2420         if ((s32)(orig_tcp->max_end - sic->src_td_max_end) < 0) {
2421                 orig_tcp->max_end = sic->src_td_max_end;
2422         }
2423
2424         /* update reply */
2425         if (repl_tcp->max_win < sic->dest_td_max_window) {
2426                 repl_tcp->max_win = sic->dest_td_max_window;
2427         }
2428         if ((s32)(repl_tcp->end - sic->dest_td_end) < 0) {
2429                 repl_tcp->end = sic->dest_td_end;
2430         }
2431         if ((s32)(repl_tcp->max_end - sic->dest_td_max_end) < 0) {
2432                 repl_tcp->max_end = sic->dest_td_max_end;
2433         }
2434
2435         /* update match flags */
2436         orig_cm->flags &= ~SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2437         repl_cm->flags &= ~SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2438         if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
2439                 orig_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2440                 repl_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2441         }
2442 }
2443
2444 /*
2445  * sfe_ipv6_update_protocol_state()
2446  *      update protocol specified state machine.
2447  */
2448 static void
2449 sfe_ipv6_update_protocol_state(struct sfe_ipv6_connection *c,
2450                                struct sfe_connection_create *sic)
2451 {
2452         switch (sic->protocol) {
2453         case IPPROTO_TCP:
2454                 sfe_ipv6_update_tcp_state(c, sic);
2455                 break;
2456         }
2457 }
2458
2459 /*
2460  * sfe_ipv6_update_rule()
2461  *      update forwarding rule after rule is created.
2462  */
2463 void sfe_ipv6_update_rule(struct sfe_connection_create *sic)
2464 {
2465         struct sfe_ipv6_connection *c;
2466         struct sfe_ipv6 *si = &__si6;
2467
2468         spin_lock_bh(&si->lock);
2469
2470         c = sfe_ipv6_find_connection(si,
2471                                      sic->protocol,
2472                                      sic->src_ip.ip6,
2473                                      sic->src_port,
2474                                      sic->dest_ip.ip6,
2475                                      sic->dest_port);
2476         if (c != NULL) {
2477                 sfe_ipv6_update_protocol_state(c, sic);
2478         }
2479
2480         spin_unlock_bh(&si->lock);
2481 }
2482
2483 /*
2484  * sfe_ipv6_create_rule()
2485  *      Create a forwarding rule.
2486  */
2487 int sfe_ipv6_create_rule(struct sfe_connection_create *sic)
2488 {
2489         struct sfe_ipv6 *si = &__si6;
2490         struct sfe_ipv6_connection *c;
2491         struct sfe_ipv6_connection_match *original_cm;
2492         struct sfe_ipv6_connection_match *reply_cm;
2493         struct net_device *dest_dev;
2494         struct net_device *src_dev;
2495
2496         dest_dev = sic->dest_dev;
2497         src_dev = sic->src_dev;
2498
2499         if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) ||
2500                      (src_dev->reg_state != NETREG_REGISTERED))) {
2501                 return -EINVAL;
2502         }
2503
2504         spin_lock_bh(&si->lock);
2505         si->connection_create_requests++;
2506
2507         /*
2508          * Check to see if there is already a flow that matches the rule we're
2509          * trying to create.  If there is then we can't create a new one.
2510          */
2511         c = sfe_ipv6_find_connection(si,
2512                                      sic->protocol,
2513                                      sic->src_ip.ip6,
2514                                      sic->src_port,
2515                                      sic->dest_ip.ip6,
2516                                      sic->dest_port);
2517         if (c != NULL) {
2518                 si->connection_create_collisions++;
2519
2520                 /*
2521                  * If we already have the flow then it's likely that this
2522                  * request to create the connection rule contains more
2523                  * up-to-date information. Check and update accordingly.
2524                  */
2525                 sfe_ipv6_update_protocol_state(c, sic);
2526                 spin_unlock_bh(&si->lock);
2527
2528                 DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n"
2529                             "  s: %s:%pM:%pI6:%u, d: %s:%pM:%pI6:%u\n",
2530                             sic->mark, sic->protocol,
2531                             sic->src_dev->name, sic->src_mac, sic->src_ip.ip6, ntohs(sic->src_port),
2532                             sic->dest_dev->name, sic->dest_mac, sic->dest_ip.ip6, ntohs(sic->dest_port));
2533                 return -EADDRINUSE;
2534         }
2535
2536         /*
2537          * Allocate the various connection tracking objects.
2538          */
2539         c = (struct sfe_ipv6_connection *)kmalloc(sizeof(struct sfe_ipv6_connection), GFP_ATOMIC);
2540         if (unlikely(!c)) {
2541                 spin_unlock_bh(&si->lock);
2542                 return -ENOMEM;
2543         }
2544
2545         original_cm = (struct sfe_ipv6_connection_match *)kmalloc(sizeof(struct sfe_ipv6_connection_match), GFP_ATOMIC);
2546         if (unlikely(!original_cm)) {
2547                 spin_unlock_bh(&si->lock);
2548                 kfree(c);
2549                 return -ENOMEM;
2550         }
2551
2552         reply_cm = (struct sfe_ipv6_connection_match *)kmalloc(sizeof(struct sfe_ipv6_connection_match), GFP_ATOMIC);
2553         if (unlikely(!reply_cm)) {
2554                 spin_unlock_bh(&si->lock);
2555                 kfree(original_cm);
2556                 kfree(c);
2557                 return -ENOMEM;
2558         }
2559
2560         /*
2561          * Fill in the "original" direction connection matching object.
2562          * Note that the transmit MAC address is "dest_mac_xlate" because
2563          * we always know both ends of a connection by their translated
2564          * addresses and not their public addresses.
2565          */
2566         original_cm->match_dev = src_dev;
2567         original_cm->match_protocol = sic->protocol;
2568         original_cm->match_src_ip[0] = sic->src_ip.ip6[0];
2569         original_cm->match_src_port = sic->src_port;
2570         original_cm->match_dest_ip[0] = sic->dest_ip.ip6[0];
2571         original_cm->match_dest_port = sic->dest_port;
2572         original_cm->xlate_src_ip[0] = sic->src_ip_xlate.ip6[0];
2573         original_cm->xlate_src_port = sic->src_port_xlate;
2574         original_cm->xlate_dest_ip[0] = sic->dest_ip_xlate.ip6[0];
2575         original_cm->xlate_dest_port = sic->dest_port_xlate;
2576         original_cm->rx_packet_count = 0;
2577         original_cm->rx_packet_count64 = 0;
2578         original_cm->rx_byte_count = 0;
2579         original_cm->rx_byte_count64 = 0;
2580         original_cm->xmit_dev = dest_dev;
2581         original_cm->xmit_dev_mtu = sic->dest_mtu;
2582         memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN);
2583         memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN);
2584         original_cm->connection = c;
2585         original_cm->counter_match = reply_cm;
2586         original_cm->flags = 0;
2587         if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) {
2588                 original_cm->priority = sic->src_priority;
2589                 original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK;
2590         }
2591         if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) {
2592                 original_cm->dscp = sic->src_dscp << SFE_IPV6_DSCP_SHIFT;
2593                 original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK;
2594         }
2595 #ifdef CONFIG_NF_FLOW_COOKIE
2596         original_cm->flow_cookie = 0;
2597 #endif
2598 #ifdef CONFIG_XFRM
2599         original_cm->flow_accel = sic->original_accel;
2600 #endif
2601         original_cm->active_next = NULL;
2602         original_cm->active_prev = NULL;
2603         original_cm->active = false;
2604
2605         /*
2606          * For PPP links we don't write an L2 header.  For everything else we do.
2607          */
2608         if (!(dest_dev->flags & IFF_POINTOPOINT)) {
2609                 original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2610
2611                 /*
2612                  * If our dev writes Ethernet headers then we can write a really fast
2613                  * version.
2614                  */
2615                 if (dest_dev->header_ops) {
2616                         if (dest_dev->header_ops->create == eth_header) {
2617                                 original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2618                         }
2619                 }
2620         }
2621
2622         /*
2623          * Fill in the "reply" direction connection matching object.
2624          */
2625         reply_cm->match_dev = dest_dev;
2626         reply_cm->match_protocol = sic->protocol;
2627         reply_cm->match_src_ip[0] = sic->dest_ip_xlate.ip6[0];
2628         reply_cm->match_src_port = sic->dest_port_xlate;
2629         reply_cm->match_dest_ip[0] = sic->src_ip_xlate.ip6[0];
2630         reply_cm->match_dest_port = sic->src_port_xlate;
2631         reply_cm->xlate_src_ip[0] = sic->dest_ip.ip6[0];
2632         reply_cm->xlate_src_port = sic->dest_port;
2633         reply_cm->xlate_dest_ip[0] = sic->src_ip.ip6[0];
2634         reply_cm->xlate_dest_port = sic->src_port;
2635         reply_cm->rx_packet_count = 0;
2636         reply_cm->rx_packet_count64 = 0;
2637         reply_cm->rx_byte_count = 0;
2638         reply_cm->rx_byte_count64 = 0;
2639         reply_cm->xmit_dev = src_dev;
2640         reply_cm->xmit_dev_mtu = sic->src_mtu;
2641         memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN);
2642         memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN);
2643         reply_cm->connection = c;
2644         reply_cm->counter_match = original_cm;
2645         reply_cm->flags = 0;
2646         if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) {
2647                 reply_cm->priority = sic->dest_priority;
2648                 reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK;
2649         }
2650         if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) {
2651                 reply_cm->dscp = sic->dest_dscp << SFE_IPV6_DSCP_SHIFT;
2652                 reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK;
2653         }
2654 #ifdef CONFIG_NF_FLOW_COOKIE
2655         reply_cm->flow_cookie = 0;
2656 #endif
2657 #ifdef CONFIG_XFRM
2658         reply_cm->flow_accel = sic->reply_accel;
2659 #endif
2660         reply_cm->active_next = NULL;
2661         reply_cm->active_prev = NULL;
2662         reply_cm->active = false;
2663
2664         /*
2665          * For PPP links we don't write an L2 header.  For everything else we do.
2666          */
2667         if (!(src_dev->flags & IFF_POINTOPOINT)) {
2668                 reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2669
2670                 /*
2671                  * If our dev writes Ethernet headers then we can write a really fast
2672                  * version.
2673                  */
2674                 if (src_dev->header_ops) {
2675                         if (src_dev->header_ops->create == eth_header) {
2676                                 reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2677                         }
2678                 }
2679         }
2680
2681
2682         if (!sfe_ipv6_addr_equal(sic->dest_ip.ip6, sic->dest_ip_xlate.ip6) || sic->dest_port != sic->dest_port_xlate) {
2683                 original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST;
2684                 reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC;
2685         }
2686
2687         if (!sfe_ipv6_addr_equal(sic->src_ip.ip6, sic->src_ip_xlate.ip6) || sic->src_port != sic->src_port_xlate) {
2688                 original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC;
2689                 reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST;
2690         }
2691
2692         c->protocol = sic->protocol;
2693         c->src_ip[0] = sic->src_ip.ip6[0];
2694         c->src_ip_xlate[0] = sic->src_ip_xlate.ip6[0];
2695         c->src_port = sic->src_port;
2696         c->src_port_xlate = sic->src_port_xlate;
2697         c->original_dev = src_dev;
2698         c->original_match = original_cm;
2699         c->dest_ip[0] = sic->dest_ip.ip6[0];
2700         c->dest_ip_xlate[0] = sic->dest_ip_xlate.ip6[0];
2701         c->dest_port = sic->dest_port;
2702         c->dest_port_xlate = sic->dest_port_xlate;
2703         c->reply_dev = dest_dev;
2704         c->reply_match = reply_cm;
2705         c->mark = sic->mark;
2706         c->debug_read_seq = 0;
2707         c->last_sync_jiffies = get_jiffies_64();
2708
2709         /*
2710          * Take hold of our source and dest devices for the duration of the connection.
2711          */
2712         dev_hold(c->original_dev);
2713         dev_hold(c->reply_dev);
2714
2715         /*
2716          * Initialize the protocol-specific information that we track.
2717          */
2718         switch (sic->protocol) {
2719         case IPPROTO_TCP:
2720                 original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale;
2721                 original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1;
2722                 original_cm->protocol_state.tcp.end = sic->src_td_end;
2723                 original_cm->protocol_state.tcp.max_end = sic->src_td_max_end;
2724                 reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale;
2725                 reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1;
2726                 reply_cm->protocol_state.tcp.end = sic->dest_td_end;
2727                 reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end;
2728                 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
2729                         original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2730                         reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2731                 }
2732                 break;
2733         }
2734
2735         sfe_ipv6_connection_match_compute_translations(original_cm);
2736         sfe_ipv6_connection_match_compute_translations(reply_cm);
2737         sfe_ipv6_insert_connection(si, c);
2738
2739         spin_unlock_bh(&si->lock);
2740
2741         /*
2742          * We have everything we need!
2743          */
2744         DEBUG_INFO("new connection - mark: %08x, p: %d\n"
2745                    "  s: %s:%pM(%pM):%pI6(%pI6):%u(%u)\n"
2746                    "  d: %s:%pM(%pM):%pI6(%pI6):%u(%u)\n",
2747                    sic->mark, sic->protocol,
2748                    sic->src_dev->name, sic->src_mac, sic->src_mac_xlate,
2749                    sic->src_ip.ip6, sic->src_ip_xlate.ip6, ntohs(sic->src_port), ntohs(sic->src_port_xlate),
2750                    dest_dev->name, sic->dest_mac, sic->dest_mac_xlate,
2751                    sic->dest_ip.ip6, sic->dest_ip_xlate.ip6, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate));
2752
2753         return 0;
2754 }
2755
2756 /*
2757  * sfe_ipv6_destroy_rule()
2758  *      Destroy a forwarding rule.
2759  */
2760 void sfe_ipv6_destroy_rule(struct sfe_connection_destroy *sid)
2761 {
2762         struct sfe_ipv6 *si = &__si6;
2763         struct sfe_ipv6_connection *c;
2764
2765         spin_lock_bh(&si->lock);
2766         si->connection_destroy_requests++;
2767
2768         /*
2769          * Check to see if we have a flow that matches the rule we're trying
2770          * to destroy.  If there isn't then we can't destroy it.
2771          */
2772         c = sfe_ipv6_find_connection(si, sid->protocol, sid->src_ip.ip6, sid->src_port,
2773                                      sid->dest_ip.ip6, sid->dest_port);
2774         if (!c) {
2775                 si->connection_destroy_misses++;
2776                 spin_unlock_bh(&si->lock);
2777
2778                 DEBUG_TRACE("connection does not exist - p: %d, s: %pI6:%u, d: %pI6:%u\n",
2779                             sid->protocol, sid->src_ip.ip6, ntohs(sid->src_port),
2780                             sid->dest_ip.ip6, ntohs(sid->dest_port));
2781                 return;
2782         }
2783
2784         /*
2785          * Remove our connection details from the hash tables.
2786          */
2787         sfe_ipv6_remove_connection(si, c);
2788         spin_unlock_bh(&si->lock);
2789
2790         sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_DESTROY);
2791
2792         DEBUG_INFO("connection destroyed - p: %d, s: %pI6:%u, d: %pI6:%u\n",
2793                    sid->protocol, sid->src_ip.ip6, ntohs(sid->src_port),
2794                    sid->dest_ip.ip6, ntohs(sid->dest_port));
2795 }
2796
2797 /*
2798  * sfe_ipv6_register_sync_rule_callback()
2799  *      Register a callback for rule synchronization.
2800  */
2801 void sfe_ipv6_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback)
2802 {
2803         struct sfe_ipv6 *si = &__si6;
2804
2805         spin_lock_bh(&si->lock);
2806         rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback);
2807         spin_unlock_bh(&si->lock);
2808 }
2809
2810 /*
2811  * sfe_ipv6_get_debug_dev()
2812  */
2813 static ssize_t sfe_ipv6_get_debug_dev(struct device *dev,
2814                                       struct device_attribute *attr,
2815                                       char *buf)
2816 {
2817         struct sfe_ipv6 *si = &__si6;
2818         ssize_t count;
2819         int num;
2820
2821         spin_lock_bh(&si->lock);
2822         num = si->debug_dev;
2823         spin_unlock_bh(&si->lock);
2824
2825         count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num);
2826         return count;
2827 }
2828
2829 /*
2830  * sfe_ipv6_destroy_all_rules_for_dev()
2831  *      Destroy all connections that match a particular device.
2832  *
2833  * If we pass dev as NULL then this destroys all connections.
2834  */
2835 void sfe_ipv6_destroy_all_rules_for_dev(struct net_device *dev)
2836 {
2837         struct sfe_ipv6 *si = &__si6;
2838         struct sfe_ipv6_connection *c;
2839
2840 another_round:
2841         spin_lock_bh(&si->lock);
2842
2843         for (c = si->all_connections_head; c; c = c->all_connections_next) {
2844                 /*
2845                  * Does this connection relate to the device we are destroying?
2846                  */
2847                 if (!dev
2848                     || (dev == c->original_dev)
2849                     || (dev == c->reply_dev)) {
2850                         break;
2851                 }
2852         }
2853
2854         if (c) {
2855                 sfe_ipv6_remove_connection(si, c);
2856         }
2857
2858         spin_unlock_bh(&si->lock);
2859
2860         if (c) {
2861                 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_DESTROY);
2862                 goto another_round;
2863         }
2864 }
2865
2866 /*
2867  * sfe_ipv6_periodic_sync()
2868  */
2869 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0))
2870 static void sfe_ipv6_periodic_sync(struct timer_list *arg)
2871 #else
2872 static void sfe_ipv6_periodic_sync(unsigned long arg)
2873 #endif /*KERNEL_VERSION(4, 15, 0)*/
2874 {
2875 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0))
2876         struct sfe_ipv6 *si = (struct sfe_ipv6 *)arg->cust_data;
2877 #else
2878         struct sfe_ipv6 *si = (struct sfe_ipv6 *)arg;
2879 #endif /*KERNEL_VERSION(4, 15, 0)*/
2880         u64 now_jiffies;
2881         int quota;
2882         sfe_sync_rule_callback_t sync_rule_callback;
2883
2884         now_jiffies = get_jiffies_64();
2885
2886         rcu_read_lock();
2887         sync_rule_callback = rcu_dereference(si->sync_rule_callback);
2888         if (!sync_rule_callback) {
2889                 rcu_read_unlock();
2890                 goto done;
2891         }
2892
2893         spin_lock_bh(&si->lock);
2894         sfe_ipv6_update_summary_stats(si);
2895
2896         /*
2897          * Get an estimate of the number of connections to parse in this sync.
2898          */
2899         quota = (si->num_connections + 63) / 64;
2900
2901         /*
2902          * Walk the "active" list and sync the connection state.
2903          */
2904         while (quota--) {
2905                 struct sfe_ipv6_connection_match *cm;
2906                 struct sfe_ipv6_connection_match *counter_cm;
2907                 struct sfe_ipv6_connection *c;
2908                 struct sfe_connection_sync sis;
2909
2910                 cm = si->active_head;
2911                 if (!cm) {
2912                         break;
2913                 }
2914
2915                 /*
2916                  * There's a possibility that our counter match is in the active list too.
2917                  * If it is then remove it.
2918                  */
2919                 counter_cm = cm->counter_match;
2920                 if (counter_cm->active) {
2921                         counter_cm->active = false;
2922
2923                         /*
2924                          * We must have a connection preceding this counter match
2925                          * because that's the one that got us to this point, so we don't have
2926                          * to worry about removing the head of the list.
2927                          */
2928                         counter_cm->active_prev->active_next = counter_cm->active_next;
2929
2930                         if (likely(counter_cm->active_next)) {
2931                                 counter_cm->active_next->active_prev = counter_cm->active_prev;
2932                         } else {
2933                                 si->active_tail = counter_cm->active_prev;
2934                         }
2935
2936                         counter_cm->active_next = NULL;
2937                         counter_cm->active_prev = NULL;
2938                 }
2939
2940                 /*
2941                  * Now remove the head of the active scan list.
2942                  */
2943                 cm->active = false;
2944                 si->active_head = cm->active_next;
2945                 if (likely(cm->active_next)) {
2946                         cm->active_next->active_prev = NULL;
2947                 } else {
2948                         si->active_tail = NULL;
2949                 }
2950                 cm->active_next = NULL;
2951
2952                 /*
2953                  * Sync the connection state.
2954                  */
2955                 c = cm->connection;
2956                 sfe_ipv6_gen_sync_connection(si, c, &sis, SFE_SYNC_REASON_STATS, now_jiffies);
2957
2958                 /*
2959                  * We don't want to be holding the lock when we sync!
2960                  */
2961                 spin_unlock_bh(&si->lock);
2962                 sync_rule_callback(&sis);
2963                 spin_lock_bh(&si->lock);
2964         }
2965
2966         spin_unlock_bh(&si->lock);
2967         rcu_read_unlock();
2968
2969 done:
2970         mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
2971 }
2972
2973 /*
2974  * sfe_ipv6_debug_dev_read_start()
2975  *      Generate part of the XML output.
2976  */
2977 static bool sfe_ipv6_debug_dev_read_start(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
2978                                           int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
2979 {
2980         int bytes_read;
2981
2982         si->debug_read_seq++;
2983
2984         bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "<sfe_ipv6>\n");
2985         if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2986                 return false;
2987         }
2988
2989         *length -= bytes_read;
2990         *total_read += bytes_read;
2991
2992         ws->state++;
2993         return true;
2994 }
2995
2996 /*
2997  * sfe_ipv6_debug_dev_read_connections_start()
2998  *      Generate part of the XML output.
2999  */
3000 static bool sfe_ipv6_debug_dev_read_connections_start(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
3001                                                       int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
3002 {
3003         int bytes_read;
3004
3005         bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<connections>\n");
3006         if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3007                 return false;
3008         }
3009
3010         *length -= bytes_read;
3011         *total_read += bytes_read;
3012
3013         ws->state++;
3014         return true;
3015 }
3016
3017 /*
3018  * sfe_ipv6_debug_dev_read_connections_connection()
3019  *      Generate part of the XML output.
3020  */
3021 static bool sfe_ipv6_debug_dev_read_connections_connection(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
3022                                                            int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
3023 {
3024         struct sfe_ipv6_connection *c;
3025         struct sfe_ipv6_connection_match *original_cm;
3026         struct sfe_ipv6_connection_match *reply_cm;
3027         int bytes_read;
3028         int protocol;
3029         struct net_device *src_dev;
3030         struct sfe_ipv6_addr src_ip;
3031         struct sfe_ipv6_addr src_ip_xlate;
3032         __be16 src_port;
3033         __be16 src_port_xlate;
3034         u64 src_rx_packets;
3035         u64 src_rx_bytes;
3036         struct net_device *dest_dev;
3037         struct sfe_ipv6_addr dest_ip;
3038         struct sfe_ipv6_addr dest_ip_xlate;
3039         __be16 dest_port;
3040         __be16 dest_port_xlate;
3041         u64 dest_rx_packets;
3042         u64 dest_rx_bytes;
3043         u64 last_sync_jiffies;
3044         u32 mark, src_priority, dest_priority, src_dscp, dest_dscp;
3045 #ifdef CONFIG_NF_FLOW_COOKIE
3046         int src_flow_cookie, dst_flow_cookie;
3047 #endif
3048
3049         spin_lock_bh(&si->lock);
3050
3051         for (c = si->all_connections_head; c; c = c->all_connections_next) {
3052                 if (c->debug_read_seq < si->debug_read_seq) {
3053                         c->debug_read_seq = si->debug_read_seq;
3054                         break;
3055                 }
3056         }
3057
3058         /*
3059          * If there were no connections then move to the next state.
3060          */
3061         if (!c) {
3062                 spin_unlock_bh(&si->lock);
3063                 ws->state++;
3064                 return true;
3065         }
3066
3067         original_cm = c->original_match;
3068         reply_cm = c->reply_match;
3069
3070         protocol = c->protocol;
3071         src_dev = c->original_dev;
3072         src_ip = c->src_ip[0];
3073         src_ip_xlate = c->src_ip_xlate[0];
3074         src_port = c->src_port;
3075         src_port_xlate = c->src_port_xlate;
3076         src_priority = original_cm->priority;
3077         src_dscp = original_cm->dscp >> SFE_IPV6_DSCP_SHIFT;
3078
3079         sfe_ipv6_connection_match_update_summary_stats(original_cm);
3080         sfe_ipv6_connection_match_update_summary_stats(reply_cm);
3081
3082         src_rx_packets = original_cm->rx_packet_count64;
3083         src_rx_bytes = original_cm->rx_byte_count64;
3084         dest_dev = c->reply_dev;
3085         dest_ip = c->dest_ip[0];
3086         dest_ip_xlate = c->dest_ip_xlate[0];
3087         dest_port = c->dest_port;
3088         dest_port_xlate = c->dest_port_xlate;
3089         dest_priority = reply_cm->priority;
3090         dest_dscp = reply_cm->dscp >> SFE_IPV6_DSCP_SHIFT;
3091         dest_rx_packets = reply_cm->rx_packet_count64;
3092         dest_rx_bytes = reply_cm->rx_byte_count64;
3093         last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies;
3094         mark = c->mark;
3095 #ifdef CONFIG_NF_FLOW_COOKIE
3096         src_flow_cookie = original_cm->flow_cookie;
3097         dst_flow_cookie = reply_cm->flow_cookie;
3098 #endif
3099         spin_unlock_bh(&si->lock);
3100
3101         bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection "
3102                                 "protocol=\"%u\" "
3103                                 "src_dev=\"%s\" "
3104                                 "src_ip=\"%pI6\" src_ip_xlate=\"%pI6\" "
3105                                 "src_port=\"%u\" src_port_xlate=\"%u\" "
3106                                 "src_priority=\"%u\" src_dscp=\"%u\" "
3107                                 "src_rx_pkts=\"%llu\" src_rx_bytes=\"%llu\" "
3108                                 "dest_dev=\"%s\" "
3109                                 "dest_ip=\"%pI6\" dest_ip_xlate=\"%pI6\" "
3110                                 "dest_port=\"%u\" dest_port_xlate=\"%u\" "
3111                                 "dest_priority=\"%u\" dest_dscp=\"%u\" "
3112                                 "dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" "
3113 #ifdef CONFIG_NF_FLOW_COOKIE
3114                                 "src_flow_cookie=\"%d\" dst_flow_cookie=\"%d\" "
3115 #endif
3116                                 "last_sync=\"%llu\" "
3117                                 "mark=\"%08x\" />\n",
3118                                 protocol,
3119                                 src_dev->name,
3120                                 &src_ip, &src_ip_xlate,
3121                                 ntohs(src_port), ntohs(src_port_xlate),
3122                                 src_priority, src_dscp,
3123                                 src_rx_packets, src_rx_bytes,
3124                                 dest_dev->name,
3125                                 &dest_ip, &dest_ip_xlate,
3126                                 ntohs(dest_port), ntohs(dest_port_xlate),
3127                                 dest_priority, dest_dscp,
3128                                 dest_rx_packets, dest_rx_bytes,
3129 #ifdef CONFIG_NF_FLOW_COOKIE
3130                                 src_flow_cookie, dst_flow_cookie,
3131 #endif
3132                                 last_sync_jiffies, mark);
3133
3134         if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3135                 return false;
3136         }
3137
3138         *length -= bytes_read;
3139         *total_read += bytes_read;
3140
3141         return true;
3142 }
3143
3144 /*
3145  * sfe_ipv6_debug_dev_read_connections_end()
3146  *      Generate part of the XML output.
3147  */
3148 static bool sfe_ipv6_debug_dev_read_connections_end(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
3149                                                     int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
3150 {
3151         int bytes_read;
3152
3153         bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</connections>\n");
3154         if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3155                 return false;
3156         }
3157
3158         *length -= bytes_read;
3159         *total_read += bytes_read;
3160
3161         ws->state++;
3162         return true;
3163 }
3164
3165 /*
3166  * sfe_ipv6_debug_dev_read_exceptions_start()
3167  *      Generate part of the XML output.
3168  */
3169 static bool sfe_ipv6_debug_dev_read_exceptions_start(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
3170                                                      int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
3171 {
3172         int bytes_read;
3173
3174         bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<exceptions>\n");
3175         if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3176                 return false;
3177         }
3178
3179         *length -= bytes_read;
3180         *total_read += bytes_read;
3181
3182         ws->state++;
3183         return true;
3184 }
3185
3186 /*
3187  * sfe_ipv6_debug_dev_read_exceptions_exception()
3188  *      Generate part of the XML output.
3189  */
3190 static bool sfe_ipv6_debug_dev_read_exceptions_exception(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
3191                                                          int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
3192 {
3193         u64 ct;
3194
3195         spin_lock_bh(&si->lock);
3196         ct = si->exception_events64[ws->iter_exception];
3197         spin_unlock_bh(&si->lock);
3198
3199         if (ct) {
3200                 int bytes_read;
3201
3202                 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE,
3203                                       "\t\t<exception name=\"%s\" count=\"%llu\" />\n",
3204                                       sfe_ipv6_exception_events_string[ws->iter_exception],
3205                                       ct);
3206                 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3207                         return false;
3208                 }
3209
3210                 *length -= bytes_read;
3211                 *total_read += bytes_read;
3212         }
3213
3214         ws->iter_exception++;
3215         if (ws->iter_exception >= SFE_IPV6_EXCEPTION_EVENT_LAST) {
3216                 ws->iter_exception = 0;
3217                 ws->state++;
3218         }
3219
3220         return true;
3221 }
3222
3223 /*
3224  * sfe_ipv6_debug_dev_read_exceptions_end()
3225  *      Generate part of the XML output.
3226  */
3227 static bool sfe_ipv6_debug_dev_read_exceptions_end(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
3228                                                    int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
3229 {
3230         int bytes_read;
3231
3232         bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</exceptions>\n");
3233         if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3234                 return false;
3235         }
3236
3237         *length -= bytes_read;
3238         *total_read += bytes_read;
3239
3240         ws->state++;
3241         return true;
3242 }
3243
3244 /*
3245  * sfe_ipv6_debug_dev_read_stats()
3246  *      Generate part of the XML output.
3247  */
3248 static bool sfe_ipv6_debug_dev_read_stats(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
3249                                           int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
3250 {
3251         int bytes_read;
3252         unsigned int num_connections;
3253         u64 packets_forwarded;
3254         u64 packets_not_forwarded;
3255         u64 connection_create_requests;
3256         u64 connection_create_collisions;
3257         u64 connection_destroy_requests;
3258         u64 connection_destroy_misses;
3259         u64 connection_flushes;
3260         u64 connection_match_hash_hits;
3261         u64 connection_match_hash_reorders;
3262
3263         spin_lock_bh(&si->lock);
3264         sfe_ipv6_update_summary_stats(si);
3265
3266         num_connections = si->num_connections;
3267         packets_forwarded = si->packets_forwarded64;
3268         packets_not_forwarded = si->packets_not_forwarded64;
3269         connection_create_requests = si->connection_create_requests64;
3270         connection_create_collisions = si->connection_create_collisions64;
3271         connection_destroy_requests = si->connection_destroy_requests64;
3272         connection_destroy_misses = si->connection_destroy_misses64;
3273         connection_flushes = si->connection_flushes64;
3274         connection_match_hash_hits = si->connection_match_hash_hits64;
3275         connection_match_hash_reorders = si->connection_match_hash_reorders64;
3276         spin_unlock_bh(&si->lock);
3277
3278         bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<stats "
3279                               "num_connections=\"%u\" "
3280                               "pkts_forwarded=\"%llu\" pkts_not_forwarded=\"%llu\" "
3281                               "create_requests=\"%llu\" create_collisions=\"%llu\" "
3282                               "destroy_requests=\"%llu\" destroy_misses=\"%llu\" "
3283                               "flushes=\"%llu\" "
3284                               "hash_hits=\"%llu\" hash_reorders=\"%llu\" />\n",
3285                               num_connections,
3286                               packets_forwarded,
3287                               packets_not_forwarded,
3288                               connection_create_requests,
3289                               connection_create_collisions,
3290                               connection_destroy_requests,
3291                               connection_destroy_misses,
3292                               connection_flushes,
3293                               connection_match_hash_hits,
3294                               connection_match_hash_reorders);
3295         if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3296                 return false;
3297         }
3298
3299         *length -= bytes_read;
3300         *total_read += bytes_read;
3301
3302         ws->state++;
3303         return true;
3304 }
3305
3306 /*
3307  * sfe_ipv6_debug_dev_read_end()
3308  *      Generate part of the XML output.
3309  */
3310 static bool sfe_ipv6_debug_dev_read_end(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
3311                                         int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
3312 {
3313         int bytes_read;
3314
3315         bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "</sfe_ipv6>\n");
3316         if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3317                 return false;
3318         }
3319
3320         *length -= bytes_read;
3321         *total_read += bytes_read;
3322
3323         ws->state++;
3324         return true;
3325 }
3326
3327 /*
3328  * Array of write functions that write various XML elements that correspond to
3329  * our XML output state machine.
3330  */
3331 static sfe_ipv6_debug_xml_write_method_t sfe_ipv6_debug_xml_write_methods[SFE_IPV6_DEBUG_XML_STATE_DONE] = {
3332         sfe_ipv6_debug_dev_read_start,
3333         sfe_ipv6_debug_dev_read_connections_start,
3334         sfe_ipv6_debug_dev_read_connections_connection,
3335         sfe_ipv6_debug_dev_read_connections_end,
3336         sfe_ipv6_debug_dev_read_exceptions_start,
3337         sfe_ipv6_debug_dev_read_exceptions_exception,
3338         sfe_ipv6_debug_dev_read_exceptions_end,
3339         sfe_ipv6_debug_dev_read_stats,
3340         sfe_ipv6_debug_dev_read_end,
3341 };
3342
3343 /*
3344  * sfe_ipv6_debug_dev_read()
3345  *      Send info to userspace upon read request from user
3346  */
3347 static ssize_t sfe_ipv6_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset)
3348 {
3349         char msg[CHAR_DEV_MSG_SIZE];
3350         int total_read = 0;
3351         struct sfe_ipv6_debug_xml_write_state *ws;
3352         struct sfe_ipv6 *si = &__si6;
3353
3354         ws = (struct sfe_ipv6_debug_xml_write_state *)filp->private_data;
3355         while ((ws->state != SFE_IPV6_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) {
3356                 if ((sfe_ipv6_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) {
3357                         continue;
3358                 }
3359         }
3360
3361         return total_read;
3362 }
3363
3364 /*
3365  * sfe_ipv6_debug_dev_write()
3366  *      Write to char device resets some stats
3367  */
3368 static ssize_t sfe_ipv6_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset)
3369 {
3370         struct sfe_ipv6 *si = &__si6;
3371
3372         spin_lock_bh(&si->lock);
3373         sfe_ipv6_update_summary_stats(si);
3374
3375         si->packets_forwarded64 = 0;
3376         si->packets_not_forwarded64 = 0;
3377         si->connection_create_requests64 = 0;
3378         si->connection_create_collisions64 = 0;
3379         si->connection_destroy_requests64 = 0;
3380         si->connection_destroy_misses64 = 0;
3381         si->connection_flushes64 = 0;
3382         si->connection_match_hash_hits64 = 0;
3383         si->connection_match_hash_reorders64 = 0;
3384         spin_unlock_bh(&si->lock);
3385
3386         return length;
3387 }
3388
3389 /*
3390  * sfe_ipv6_debug_dev_open()
3391  */
3392 static int sfe_ipv6_debug_dev_open(struct inode *inode, struct file *file)
3393 {
3394         struct sfe_ipv6_debug_xml_write_state *ws;
3395
3396         ws = (struct sfe_ipv6_debug_xml_write_state *)file->private_data;
3397         if (ws) {
3398                 return 0;
3399         }
3400
3401         ws = kzalloc(sizeof(struct sfe_ipv6_debug_xml_write_state), GFP_KERNEL);
3402         if (!ws) {
3403                 return -ENOMEM;
3404         }
3405
3406         ws->state = SFE_IPV6_DEBUG_XML_STATE_START;
3407         file->private_data = ws;
3408
3409         return 0;
3410 }
3411
3412 /*
3413  * sfe_ipv6_debug_dev_release()
3414  */
3415 static int sfe_ipv6_debug_dev_release(struct inode *inode, struct file *file)
3416 {
3417         struct sfe_ipv6_debug_xml_write_state *ws;
3418
3419         ws = (struct sfe_ipv6_debug_xml_write_state *)file->private_data;
3420         if (ws) {
3421       &n