OSDN Git Service

b88fae233a625d34c532b2bf8d4be4bd451aa5c5
[tomoyo/tomoyo-test1.git] / net / mptcp / options.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Multipath TCP
3  *
4  * Copyright (c) 2017 - 2019, Intel Corporation.
5  */
6
7 #define pr_fmt(fmt) "MPTCP: " fmt
8
9 #include <linux/kernel.h>
10 #include <net/tcp.h>
11 #include <net/mptcp.h>
12 #include "protocol.h"
13
14 static bool mptcp_cap_flag_sha256(u8 flags)
15 {
16         return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256;
17 }
18
19 static void mptcp_parse_option(const struct sk_buff *skb,
20                                const unsigned char *ptr, int opsize,
21                                struct mptcp_options_received *mp_opt)
22 {
23         u8 subtype = *ptr >> 4;
24         int expected_opsize;
25         u8 version;
26         u8 flags;
27
28         switch (subtype) {
29         case MPTCPOPT_MP_CAPABLE:
30                 /* strict size checking */
31                 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
32                         if (skb->len > tcp_hdr(skb)->doff << 2)
33                                 expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA;
34                         else
35                                 expected_opsize = TCPOLEN_MPTCP_MPC_ACK;
36                 } else {
37                         if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)
38                                 expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK;
39                         else
40                                 expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
41                 }
42                 if (opsize != expected_opsize)
43                         break;
44
45                 /* try to be gentle vs future versions on the initial syn */
46                 version = *ptr++ & MPTCP_VERSION_MASK;
47                 if (opsize != TCPOLEN_MPTCP_MPC_SYN) {
48                         if (version != MPTCP_SUPPORTED_VERSION)
49                                 break;
50                 } else if (version < MPTCP_SUPPORTED_VERSION) {
51                         break;
52                 }
53
54                 flags = *ptr++;
55                 if (!mptcp_cap_flag_sha256(flags) ||
56                     (flags & MPTCP_CAP_EXTENSIBILITY))
57                         break;
58
59                 /* RFC 6824, Section 3.1:
60                  * "For the Checksum Required bit (labeled "A"), if either
61                  * host requires the use of checksums, checksums MUST be used.
62                  * In other words, the only way for checksums not to be used
63                  * is if both hosts in their SYNs set A=0."
64                  *
65                  * Section 3.3.0:
66                  * "If a checksum is not present when its use has been
67                  * negotiated, the receiver MUST close the subflow with a RST as
68                  * it is considered broken."
69                  *
70                  * We don't implement DSS checksum - fall back to TCP.
71                  */
72                 if (flags & MPTCP_CAP_CHECKSUM_REQD)
73                         break;
74
75                 mp_opt->mp_capable = 1;
76                 if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
77                         mp_opt->sndr_key = get_unaligned_be64(ptr);
78                         ptr += 8;
79                 }
80                 if (opsize >= TCPOLEN_MPTCP_MPC_ACK) {
81                         mp_opt->rcvr_key = get_unaligned_be64(ptr);
82                         ptr += 8;
83                 }
84                 if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) {
85                         /* Section 3.1.:
86                          * "the data parameters in a MP_CAPABLE are semantically
87                          * equivalent to those in a DSS option and can be used
88                          * interchangeably."
89                          */
90                         mp_opt->dss = 1;
91                         mp_opt->use_map = 1;
92                         mp_opt->mpc_map = 1;
93                         mp_opt->data_len = get_unaligned_be16(ptr);
94                         ptr += 2;
95                 }
96                 pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
97                          version, flags, opsize, mp_opt->sndr_key,
98                          mp_opt->rcvr_key, mp_opt->data_len);
99                 break;
100
101         case MPTCPOPT_MP_JOIN:
102                 mp_opt->mp_join = 1;
103                 if (opsize == TCPOLEN_MPTCP_MPJ_SYN) {
104                         mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
105                         mp_opt->join_id = *ptr++;
106                         mp_opt->token = get_unaligned_be32(ptr);
107                         ptr += 4;
108                         mp_opt->nonce = get_unaligned_be32(ptr);
109                         ptr += 4;
110                         pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u",
111                                  mp_opt->backup, mp_opt->join_id,
112                                  mp_opt->token, mp_opt->nonce);
113                 } else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) {
114                         mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
115                         mp_opt->join_id = *ptr++;
116                         mp_opt->thmac = get_unaligned_be64(ptr);
117                         ptr += 8;
118                         mp_opt->nonce = get_unaligned_be32(ptr);
119                         ptr += 4;
120                         pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u",
121                                  mp_opt->backup, mp_opt->join_id,
122                                  mp_opt->thmac, mp_opt->nonce);
123                 } else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) {
124                         ptr += 2;
125                         memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
126                         pr_debug("MP_JOIN hmac");
127                 } else {
128                         pr_warn("MP_JOIN bad option size");
129                         mp_opt->mp_join = 0;
130                 }
131                 break;
132
133         case MPTCPOPT_DSS:
134                 pr_debug("DSS");
135                 ptr++;
136
137                 /* we must clear 'mpc_map' be able to detect MP_CAPABLE
138                  * map vs DSS map in mptcp_incoming_options(), and reconstruct
139                  * map info accordingly
140                  */
141                 mp_opt->mpc_map = 0;
142                 flags = (*ptr++) & MPTCP_DSS_FLAG_MASK;
143                 mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0;
144                 mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0;
145                 mp_opt->use_map = (flags & MPTCP_DSS_HAS_MAP) != 0;
146                 mp_opt->ack64 = (flags & MPTCP_DSS_ACK64) != 0;
147                 mp_opt->use_ack = (flags & MPTCP_DSS_HAS_ACK);
148
149                 pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d",
150                          mp_opt->data_fin, mp_opt->dsn64,
151                          mp_opt->use_map, mp_opt->ack64,
152                          mp_opt->use_ack);
153
154                 expected_opsize = TCPOLEN_MPTCP_DSS_BASE;
155
156                 if (mp_opt->use_ack) {
157                         if (mp_opt->ack64)
158                                 expected_opsize += TCPOLEN_MPTCP_DSS_ACK64;
159                         else
160                                 expected_opsize += TCPOLEN_MPTCP_DSS_ACK32;
161                 }
162
163                 if (mp_opt->use_map) {
164                         if (mp_opt->dsn64)
165                                 expected_opsize += TCPOLEN_MPTCP_DSS_MAP64;
166                         else
167                                 expected_opsize += TCPOLEN_MPTCP_DSS_MAP32;
168                 }
169
170                 /* RFC 6824, Section 3.3:
171                  * If a checksum is present, but its use had
172                  * not been negotiated in the MP_CAPABLE handshake,
173                  * the checksum field MUST be ignored.
174                  */
175                 if (opsize != expected_opsize &&
176                     opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
177                         break;
178
179                 mp_opt->dss = 1;
180
181                 if (mp_opt->use_ack) {
182                         if (mp_opt->ack64) {
183                                 mp_opt->data_ack = get_unaligned_be64(ptr);
184                                 ptr += 8;
185                         } else {
186                                 mp_opt->data_ack = get_unaligned_be32(ptr);
187                                 ptr += 4;
188                         }
189
190                         pr_debug("data_ack=%llu", mp_opt->data_ack);
191                 }
192
193                 if (mp_opt->use_map) {
194                         if (mp_opt->dsn64) {
195                                 mp_opt->data_seq = get_unaligned_be64(ptr);
196                                 ptr += 8;
197                         } else {
198                                 mp_opt->data_seq = get_unaligned_be32(ptr);
199                                 ptr += 4;
200                         }
201
202                         mp_opt->subflow_seq = get_unaligned_be32(ptr);
203                         ptr += 4;
204
205                         mp_opt->data_len = get_unaligned_be16(ptr);
206                         ptr += 2;
207
208                         pr_debug("data_seq=%llu subflow_seq=%u data_len=%u",
209                                  mp_opt->data_seq, mp_opt->subflow_seq,
210                                  mp_opt->data_len);
211                 }
212
213                 break;
214
215         case MPTCPOPT_ADD_ADDR:
216                 mp_opt->echo = (*ptr++) & MPTCP_ADDR_ECHO;
217                 if (!mp_opt->echo) {
218                         if (opsize == TCPOLEN_MPTCP_ADD_ADDR ||
219                             opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT)
220                                 mp_opt->family = MPTCP_ADDR_IPVERSION_4;
221 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
222                         else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6 ||
223                                  opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT)
224                                 mp_opt->family = MPTCP_ADDR_IPVERSION_6;
225 #endif
226                         else
227                                 break;
228                 } else {
229                         if (opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE ||
230                             opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT)
231                                 mp_opt->family = MPTCP_ADDR_IPVERSION_4;
232 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
233                         else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE ||
234                                  opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT)
235                                 mp_opt->family = MPTCP_ADDR_IPVERSION_6;
236 #endif
237                         else
238                                 break;
239                 }
240
241                 mp_opt->add_addr = 1;
242                 mp_opt->port = 0;
243                 mp_opt->addr_id = *ptr++;
244                 pr_debug("ADD_ADDR: id=%d", mp_opt->addr_id);
245                 if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) {
246                         memcpy((u8 *)&mp_opt->addr.s_addr, (u8 *)ptr, 4);
247                         ptr += 4;
248                         if (opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT ||
249                             opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT) {
250                                 mp_opt->port = get_unaligned_be16(ptr);
251                                 ptr += 2;
252                         }
253                 }
254 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
255                 else {
256                         memcpy(mp_opt->addr6.s6_addr, (u8 *)ptr, 16);
257                         ptr += 16;
258                         if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT ||
259                             opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT) {
260                                 mp_opt->port = get_unaligned_be16(ptr);
261                                 ptr += 2;
262                         }
263                 }
264 #endif
265                 if (!mp_opt->echo) {
266                         mp_opt->ahmac = get_unaligned_be64(ptr);
267                         ptr += 8;
268                 }
269                 break;
270
271         case MPTCPOPT_RM_ADDR:
272                 if (opsize != TCPOLEN_MPTCP_RM_ADDR_BASE)
273                         break;
274
275                 mp_opt->rm_addr = 1;
276                 mp_opt->rm_id = *ptr++;
277                 pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
278                 break;
279
280         default:
281                 break;
282         }
283 }
284
285 void mptcp_get_options(const struct sk_buff *skb,
286                        struct mptcp_options_received *mp_opt)
287 {
288         const struct tcphdr *th = tcp_hdr(skb);
289         const unsigned char *ptr;
290         int length;
291
292         /* initialize option status */
293         mp_opt->mp_capable = 0;
294         mp_opt->mp_join = 0;
295         mp_opt->add_addr = 0;
296         mp_opt->rm_addr = 0;
297         mp_opt->dss = 0;
298
299         length = (th->doff * 4) - sizeof(struct tcphdr);
300         ptr = (const unsigned char *)(th + 1);
301
302         while (length > 0) {
303                 int opcode = *ptr++;
304                 int opsize;
305
306                 switch (opcode) {
307                 case TCPOPT_EOL:
308                         return;
309                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
310                         length--;
311                         continue;
312                 default:
313                         opsize = *ptr++;
314                         if (opsize < 2) /* "silly options" */
315                                 return;
316                         if (opsize > length)
317                                 return; /* don't parse partial options */
318                         if (opcode == TCPOPT_MPTCP)
319                                 mptcp_parse_option(skb, ptr, opsize, mp_opt);
320                         ptr += opsize - 2;
321                         length -= opsize;
322                 }
323         }
324 }
325
326 bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
327                        unsigned int *size, struct mptcp_out_options *opts)
328 {
329         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
330
331         /* we will use snd_isn to detect first pkt [re]transmission
332          * in mptcp_established_options_mp()
333          */
334         subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
335         if (subflow->request_mptcp) {
336                 pr_debug("local_key=%llu", subflow->local_key);
337                 opts->suboptions = OPTION_MPTCP_MPC_SYN;
338                 opts->sndr_key = subflow->local_key;
339                 *size = TCPOLEN_MPTCP_MPC_SYN;
340                 return true;
341         } else if (subflow->request_join) {
342                 pr_debug("remote_token=%u, nonce=%u", subflow->remote_token,
343                          subflow->local_nonce);
344                 opts->suboptions = OPTION_MPTCP_MPJ_SYN;
345                 opts->join_id = subflow->local_id;
346                 opts->token = subflow->remote_token;
347                 opts->nonce = subflow->local_nonce;
348                 opts->backup = subflow->request_bkup;
349                 *size = TCPOLEN_MPTCP_MPJ_SYN;
350                 return true;
351         }
352         return false;
353 }
354
355 /* MP_JOIN client subflow must wait for 4th ack before sending any data:
356  * TCP can't schedule delack timer before the subflow is fully established.
357  * MPTCP uses the delack timer to do 3rd ack retransmissions
358  */
359 static void schedule_3rdack_retransmission(struct sock *sk)
360 {
361         struct inet_connection_sock *icsk = inet_csk(sk);
362         struct tcp_sock *tp = tcp_sk(sk);
363         unsigned long timeout;
364
365         /* reschedule with a timeout above RTT, as we must look only for drop */
366         if (tp->srtt_us)
367                 timeout = tp->srtt_us << 1;
368         else
369                 timeout = TCP_TIMEOUT_INIT;
370
371         WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
372         icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
373         icsk->icsk_ack.timeout = timeout;
374         sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
375 }
376
377 static void clear_3rdack_retransmission(struct sock *sk)
378 {
379         struct inet_connection_sock *icsk = inet_csk(sk);
380
381         sk_stop_timer(sk, &icsk->icsk_delack_timer);
382         icsk->icsk_ack.timeout = 0;
383         icsk->icsk_ack.ato = 0;
384         icsk->icsk_ack.pending &= ~(ICSK_ACK_SCHED | ICSK_ACK_TIMER);
385 }
386
387 static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
388                                          unsigned int *size,
389                                          unsigned int remaining,
390                                          struct mptcp_out_options *opts)
391 {
392         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
393         struct mptcp_ext *mpext;
394         unsigned int data_len;
395
396         /* When skb is not available, we better over-estimate the emitted
397          * options len. A full DSS option (28 bytes) is longer than
398          * TCPOLEN_MPTCP_MPC_ACK_DATA(22) or TCPOLEN_MPTCP_MPJ_ACK(24), so
399          * tell the caller to defer the estimate to
400          * mptcp_established_options_dss(), which will reserve enough space.
401          */
402         if (!skb)
403                 return false;
404
405         /* MPC/MPJ needed only on 3rd ack packet */
406         if (subflow->fully_established ||
407             subflow->snd_isn != TCP_SKB_CB(skb)->seq)
408                 return false;
409
410         if (subflow->mp_capable) {
411                 mpext = mptcp_get_ext(skb);
412                 data_len = mpext ? mpext->data_len : 0;
413
414                 /* we will check ext_copy.data_len in mptcp_write_options() to
415                  * discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and
416                  * TCPOLEN_MPTCP_MPC_ACK
417                  */
418                 opts->ext_copy.data_len = data_len;
419                 opts->suboptions = OPTION_MPTCP_MPC_ACK;
420                 opts->sndr_key = subflow->local_key;
421                 opts->rcvr_key = subflow->remote_key;
422
423                 /* Section 3.1.
424                  * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
425                  * packets that start the first subflow of an MPTCP connection,
426                  * as well as the first packet that carries data
427                  */
428                 if (data_len > 0)
429                         *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4);
430                 else
431                         *size = TCPOLEN_MPTCP_MPC_ACK;
432
433                 pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
434                          subflow, subflow->local_key, subflow->remote_key,
435                          data_len);
436
437                 return true;
438         } else if (subflow->mp_join) {
439                 opts->suboptions = OPTION_MPTCP_MPJ_ACK;
440                 memcpy(opts->hmac, subflow->hmac, MPTCPOPT_HMAC_LEN);
441                 *size = TCPOLEN_MPTCP_MPJ_ACK;
442                 pr_debug("subflow=%p", subflow);
443
444                 schedule_3rdack_retransmission(sk);
445                 return true;
446         }
447         return false;
448 }
449
450 static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
451                                  struct mptcp_ext *ext)
452 {
453         if (!ext->use_map) {
454                 /* RFC6824 requires a DSS mapping with specific values
455                  * if DATA_FIN is set but no data payload is mapped
456                  */
457                 ext->data_fin = 1;
458                 ext->use_map = 1;
459                 ext->dsn64 = 1;
460                 ext->data_seq = subflow->data_fin_tx_seq;
461                 ext->subflow_seq = 0;
462                 ext->data_len = 1;
463         } else if (ext->data_seq + ext->data_len == subflow->data_fin_tx_seq) {
464                 /* If there's an existing DSS mapping and it is the
465                  * final mapping, DATA_FIN consumes 1 additional byte of
466                  * mapping space.
467                  */
468                 ext->data_fin = 1;
469                 ext->data_len++;
470         }
471 }
472
473 static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
474                                           unsigned int *size,
475                                           unsigned int remaining,
476                                           struct mptcp_out_options *opts)
477 {
478         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
479         unsigned int dss_size = 0;
480         struct mptcp_ext *mpext;
481         struct mptcp_sock *msk;
482         unsigned int ack_size;
483         bool ret = false;
484         u8 tcp_fin;
485
486         if (skb) {
487                 mpext = mptcp_get_ext(skb);
488                 tcp_fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
489         } else {
490                 mpext = NULL;
491                 tcp_fin = 0;
492         }
493
494         if (!skb || (mpext && mpext->use_map) || tcp_fin) {
495                 unsigned int map_size;
496
497                 map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
498
499                 remaining -= map_size;
500                 dss_size = map_size;
501                 if (mpext)
502                         opts->ext_copy = *mpext;
503
504                 if (skb && tcp_fin && subflow->data_fin_tx_enable)
505                         mptcp_write_data_fin(subflow, &opts->ext_copy);
506                 ret = true;
507         }
508
509         /* passive sockets msk will set the 'can_ack' after accept(), even
510          * if the first subflow may have the already the remote key handy
511          */
512         opts->ext_copy.use_ack = 0;
513         msk = mptcp_sk(subflow->conn);
514         if (!READ_ONCE(msk->can_ack)) {
515                 *size = ALIGN(dss_size, 4);
516                 return ret;
517         }
518
519         ack_size = TCPOLEN_MPTCP_DSS_ACK64;
520
521         /* Add kind/length/subtype/flag overhead if mapping is not populated */
522         if (dss_size == 0)
523                 ack_size += TCPOLEN_MPTCP_DSS_BASE;
524
525         dss_size += ack_size;
526
527         opts->ext_copy.data_ack = msk->ack_seq;
528         opts->ext_copy.ack64 = 1;
529         opts->ext_copy.use_ack = 1;
530
531         *size = ALIGN(dss_size, 4);
532         return true;
533 }
534
535 static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id,
536                                   struct in_addr *addr)
537 {
538         u8 hmac[MPTCP_ADDR_HMAC_LEN];
539         u8 msg[7];
540
541         msg[0] = addr_id;
542         memcpy(&msg[1], &addr->s_addr, 4);
543         msg[5] = 0;
544         msg[6] = 0;
545
546         mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac);
547
548         return get_unaligned_be64(&hmac[MPTCP_ADDR_HMAC_LEN - sizeof(u64)]);
549 }
550
551 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
552 static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id,
553                                    struct in6_addr *addr)
554 {
555         u8 hmac[MPTCP_ADDR_HMAC_LEN];
556         u8 msg[19];
557
558         msg[0] = addr_id;
559         memcpy(&msg[1], &addr->s6_addr, 16);
560         msg[17] = 0;
561         msg[18] = 0;
562
563         mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac);
564
565         return get_unaligned_be64(&hmac[MPTCP_ADDR_HMAC_LEN - sizeof(u64)]);
566 }
567 #endif
568
569 static bool mptcp_established_options_addr(struct sock *sk,
570                                            unsigned int *size,
571                                            unsigned int remaining,
572                                            struct mptcp_out_options *opts)
573 {
574         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
575         struct mptcp_sock *msk = mptcp_sk(subflow->conn);
576         struct mptcp_addr_info saddr;
577         int len;
578
579         if (!mptcp_pm_should_signal(msk) ||
580             !(mptcp_pm_addr_signal(msk, remaining, &saddr)))
581                 return false;
582
583         len = mptcp_add_addr_len(saddr.family);
584         if (remaining < len)
585                 return false;
586
587         *size = len;
588         opts->addr_id = saddr.id;
589         if (saddr.family == AF_INET) {
590                 opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
591                 opts->addr = saddr.addr;
592                 opts->ahmac = add_addr_generate_hmac(msk->local_key,
593                                                      msk->remote_key,
594                                                      opts->addr_id,
595                                                      &opts->addr);
596         }
597 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
598         else if (saddr.family == AF_INET6) {
599                 opts->suboptions |= OPTION_MPTCP_ADD_ADDR6;
600                 opts->addr6 = saddr.addr6;
601                 opts->ahmac = add_addr6_generate_hmac(msk->local_key,
602                                                       msk->remote_key,
603                                                       opts->addr_id,
604                                                       &opts->addr6);
605         }
606 #endif
607         pr_debug("addr_id=%d, ahmac=%llu", opts->addr_id, opts->ahmac);
608
609         return true;
610 }
611
612 bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
613                                unsigned int *size, unsigned int remaining,
614                                struct mptcp_out_options *opts)
615 {
616         unsigned int opt_size = 0;
617         bool ret = false;
618
619         opts->suboptions = 0;
620
621         if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
622                 ret = true;
623         else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
624                                                opts))
625                 ret = true;
626
627         /* we reserved enough space for the above options, and exceeding the
628          * TCP option space would be fatal
629          */
630         if (WARN_ON_ONCE(opt_size > remaining))
631                 return false;
632
633         *size += opt_size;
634         remaining -= opt_size;
635         if (mptcp_established_options_addr(sk, &opt_size, remaining, opts)) {
636                 *size += opt_size;
637                 remaining -= opt_size;
638                 ret = true;
639         }
640
641         return ret;
642 }
643
644 bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
645                           struct mptcp_out_options *opts)
646 {
647         struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
648
649         if (subflow_req->mp_capable) {
650                 opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
651                 opts->sndr_key = subflow_req->local_key;
652                 *size = TCPOLEN_MPTCP_MPC_SYNACK;
653                 pr_debug("subflow_req=%p, local_key=%llu",
654                          subflow_req, subflow_req->local_key);
655                 return true;
656         } else if (subflow_req->mp_join) {
657                 opts->suboptions = OPTION_MPTCP_MPJ_SYNACK;
658                 opts->backup = subflow_req->backup;
659                 opts->join_id = subflow_req->local_id;
660                 opts->thmac = subflow_req->thmac;
661                 opts->nonce = subflow_req->local_nonce;
662                 pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u",
663                          subflow_req, opts->backup, opts->join_id,
664                          opts->thmac, opts->nonce);
665                 *size = TCPOLEN_MPTCP_MPJ_SYNACK;
666                 return true;
667         }
668         return false;
669 }
670
671 static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
672                                     struct mptcp_subflow_context *subflow,
673                                     struct sk_buff *skb,
674                                     struct mptcp_options_received *mp_opt)
675 {
676         /* here we can process OoO, in-window pkts, only in-sequence 4th ack
677          * will make the subflow fully established
678          */
679         if (likely(subflow->fully_established)) {
680                 /* on passive sockets, check for 3rd ack retransmission
681                  * note that msk is always set by subflow_syn_recv_sock()
682                  * for mp_join subflows
683                  */
684                 if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 &&
685                     TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
686                     subflow->mp_join && mp_opt->mp_join &&
687                     READ_ONCE(msk->pm.server_side))
688                         tcp_send_ack(sk);
689                 goto fully_established;
690         }
691
692         /* we should process OoO packets before the first subflow is fully
693          * established, but not expected for MP_JOIN subflows
694          */
695         if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1)
696                 return subflow->mp_capable;
697
698         if (mp_opt->dss && mp_opt->use_ack) {
699                 /* subflows are fully established as soon as we get any
700                  * additional ack.
701                  */
702                 subflow->fully_established = 1;
703                 goto fully_established;
704         }
705
706         /* If the first established packet does not contain MP_CAPABLE + data
707          * then fallback to TCP
708          */
709         if (!mp_opt->mp_capable) {
710                 subflow->mp_capable = 0;
711                 tcp_sk(sk)->is_mptcp = 0;
712                 return false;
713         }
714
715         if (unlikely(!READ_ONCE(msk->pm.server_side)))
716                 pr_warn_once("bogus mpc option on established client sk");
717         subflow->fully_established = 1;
718         subflow->remote_key = mp_opt->sndr_key;
719         subflow->can_ack = 1;
720
721 fully_established:
722         if (likely(subflow->pm_notified))
723                 return true;
724
725         subflow->pm_notified = 1;
726         if (subflow->mp_join) {
727                 clear_3rdack_retransmission(sk);
728                 mptcp_pm_subflow_established(msk, subflow);
729         } else {
730                 mptcp_pm_fully_established(msk);
731         }
732         return true;
733 }
734
735 static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit)
736 {
737         u32 old_ack32, cur_ack32;
738
739         if (use_64bit)
740                 return cur_ack;
741
742         old_ack32 = (u32)old_ack;
743         cur_ack32 = (u32)cur_ack;
744         cur_ack = (old_ack & GENMASK_ULL(63, 32)) + cur_ack32;
745         if (unlikely(before(cur_ack32, old_ack32)))
746                 return cur_ack + (1LL << 32);
747         return cur_ack;
748 }
749
750 static void update_una(struct mptcp_sock *msk,
751                        struct mptcp_options_received *mp_opt)
752 {
753         u64 new_snd_una, snd_una, old_snd_una = atomic64_read(&msk->snd_una);
754         u64 write_seq = READ_ONCE(msk->write_seq);
755
756         /* avoid ack expansion on update conflict, to reduce the risk of
757          * wrongly expanding to a future ack sequence number, which is way
758          * more dangerous than missing an ack
759          */
760         new_snd_una = expand_ack(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
761
762         /* ACK for data not even sent yet? Ignore. */
763         if (after64(new_snd_una, write_seq))
764                 new_snd_una = old_snd_una;
765
766         while (after64(new_snd_una, old_snd_una)) {
767                 snd_una = old_snd_una;
768                 old_snd_una = atomic64_cmpxchg(&msk->snd_una, snd_una,
769                                                new_snd_una);
770                 if (old_snd_una == snd_una) {
771                         mptcp_data_acked((struct sock *)msk);
772                         break;
773                 }
774         }
775 }
776
777 static bool add_addr_hmac_valid(struct mptcp_sock *msk,
778                                 struct mptcp_options_received *mp_opt)
779 {
780         u64 hmac = 0;
781
782         if (mp_opt->echo)
783                 return true;
784
785         if (mp_opt->family == MPTCP_ADDR_IPVERSION_4)
786                 hmac = add_addr_generate_hmac(msk->remote_key,
787                                               msk->local_key,
788                                               mp_opt->addr_id, &mp_opt->addr);
789 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
790         else
791                 hmac = add_addr6_generate_hmac(msk->remote_key,
792                                                msk->local_key,
793                                                mp_opt->addr_id, &mp_opt->addr6);
794 #endif
795
796         pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
797                  msk, (unsigned long long)hmac,
798                  (unsigned long long)mp_opt->ahmac);
799
800         return hmac == mp_opt->ahmac;
801 }
802
803 void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
804                             struct tcp_options_received *opt_rx)
805 {
806         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
807         struct mptcp_sock *msk = mptcp_sk(subflow->conn);
808         struct mptcp_options_received mp_opt;
809         struct mptcp_ext *mpext;
810
811         mptcp_get_options(skb, &mp_opt);
812         if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
813                 return;
814
815         if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
816                 struct mptcp_addr_info addr;
817
818                 addr.port = htons(mp_opt.port);
819                 addr.id = mp_opt.addr_id;
820                 if (mp_opt.family == MPTCP_ADDR_IPVERSION_4) {
821                         addr.family = AF_INET;
822                         addr.addr = mp_opt.addr;
823                 }
824 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
825                 else if (mp_opt.family == MPTCP_ADDR_IPVERSION_6) {
826                         addr.family = AF_INET6;
827                         addr.addr6 = mp_opt.addr6;
828                 }
829 #endif
830                 if (!mp_opt.echo)
831                         mptcp_pm_add_addr_received(msk, &addr);
832                 mp_opt.add_addr = 0;
833         }
834
835         if (!mp_opt.dss)
836                 return;
837
838         /* we can't wait for recvmsg() to update the ack_seq, otherwise
839          * monodirectional flows will stuck
840          */
841         if (mp_opt.use_ack)
842                 update_una(msk, &mp_opt);
843
844         mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
845         if (!mpext)
846                 return;
847
848         memset(mpext, 0, sizeof(*mpext));
849
850         if (mp_opt.use_map) {
851                 if (mp_opt.mpc_map) {
852                         /* this is an MP_CAPABLE carrying MPTCP data
853                          * we know this map the first chunk of data
854                          */
855                         mptcp_crypto_key_sha(subflow->remote_key, NULL,
856                                              &mpext->data_seq);
857                         mpext->data_seq++;
858                         mpext->subflow_seq = 1;
859                         mpext->dsn64 = 1;
860                         mpext->mpc_map = 1;
861                         mpext->data_fin = 0;
862                 } else {
863                         mpext->data_seq = mp_opt.data_seq;
864                         mpext->subflow_seq = mp_opt.subflow_seq;
865                         mpext->dsn64 = mp_opt.dsn64;
866                         mpext->data_fin = mp_opt.data_fin;
867                 }
868                 mpext->data_len = mp_opt.data_len;
869                 mpext->use_map = 1;
870         }
871 }
872
873 void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
874 {
875         if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
876              OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
877                 u8 len;
878
879                 if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
880                         len = TCPOLEN_MPTCP_MPC_SYN;
881                 else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
882                         len = TCPOLEN_MPTCP_MPC_SYNACK;
883                 else if (opts->ext_copy.data_len)
884                         len = TCPOLEN_MPTCP_MPC_ACK_DATA;
885                 else
886                         len = TCPOLEN_MPTCP_MPC_ACK;
887
888                 *ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
889                                       MPTCP_SUPPORTED_VERSION,
890                                       MPTCP_CAP_HMAC_SHA256);
891
892                 if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
893                     opts->suboptions))
894                         goto mp_capable_done;
895
896                 put_unaligned_be64(opts->sndr_key, ptr);
897                 ptr += 2;
898                 if (!((OPTION_MPTCP_MPC_ACK) & opts->suboptions))
899                         goto mp_capable_done;
900
901                 put_unaligned_be64(opts->rcvr_key, ptr);
902                 ptr += 2;
903                 if (!opts->ext_copy.data_len)
904                         goto mp_capable_done;
905
906                 put_unaligned_be32(opts->ext_copy.data_len << 16 |
907                                    TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
908                 ptr += 1;
909         }
910
911 mp_capable_done:
912         if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
913                 if (opts->ahmac)
914                         *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
915                                               TCPOLEN_MPTCP_ADD_ADDR, 0,
916                                               opts->addr_id);
917                 else
918                         *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
919                                               TCPOLEN_MPTCP_ADD_ADDR_BASE,
920                                               MPTCP_ADDR_ECHO,
921                                               opts->addr_id);
922                 memcpy((u8 *)ptr, (u8 *)&opts->addr.s_addr, 4);
923                 ptr += 1;
924                 if (opts->ahmac) {
925                         put_unaligned_be64(opts->ahmac, ptr);
926                         ptr += 2;
927                 }
928         }
929
930 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
931         if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) {
932                 if (opts->ahmac)
933                         *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
934                                               TCPOLEN_MPTCP_ADD_ADDR6, 0,
935                                               opts->addr_id);
936                 else
937                         *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
938                                               TCPOLEN_MPTCP_ADD_ADDR6_BASE,
939                                               MPTCP_ADDR_ECHO,
940                                               opts->addr_id);
941                 memcpy((u8 *)ptr, opts->addr6.s6_addr, 16);
942                 ptr += 4;
943                 if (opts->ahmac) {
944                         put_unaligned_be64(opts->ahmac, ptr);
945                         ptr += 2;
946                 }
947         }
948 #endif
949
950         if (OPTION_MPTCP_RM_ADDR & opts->suboptions) {
951                 *ptr++ = mptcp_option(MPTCPOPT_RM_ADDR,
952                                       TCPOLEN_MPTCP_RM_ADDR_BASE,
953                                       0, opts->rm_id);
954         }
955
956         if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
957                 *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
958                                       TCPOLEN_MPTCP_MPJ_SYN,
959                                       opts->backup, opts->join_id);
960                 put_unaligned_be32(opts->token, ptr);
961                 ptr += 1;
962                 put_unaligned_be32(opts->nonce, ptr);
963                 ptr += 1;
964         }
965
966         if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
967                 *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
968                                       TCPOLEN_MPTCP_MPJ_SYNACK,
969                                       opts->backup, opts->join_id);
970                 put_unaligned_be64(opts->thmac, ptr);
971                 ptr += 2;
972                 put_unaligned_be32(opts->nonce, ptr);
973                 ptr += 1;
974         }
975
976         if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
977                 *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
978                                       TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
979                 memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
980                 ptr += 5;
981         }
982
983         if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
984                 struct mptcp_ext *mpext = &opts->ext_copy;
985                 u8 len = TCPOLEN_MPTCP_DSS_BASE;
986                 u8 flags = 0;
987
988                 if (mpext->use_ack) {
989                         len += TCPOLEN_MPTCP_DSS_ACK64;
990                         flags = MPTCP_DSS_HAS_ACK | MPTCP_DSS_ACK64;
991                 }
992
993                 if (mpext->use_map) {
994                         len += TCPOLEN_MPTCP_DSS_MAP64;
995
996                         /* Use only 64-bit mapping flags for now, add
997                          * support for optional 32-bit mappings later.
998                          */
999                         flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
1000                         if (mpext->data_fin)
1001                                 flags |= MPTCP_DSS_DATA_FIN;
1002                 }
1003
1004                 *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
1005
1006                 if (mpext->use_ack) {
1007                         put_unaligned_be64(mpext->data_ack, ptr);
1008                         ptr += 2;
1009                 }
1010
1011                 if (mpext->use_map) {
1012                         put_unaligned_be64(mpext->data_seq, ptr);
1013                         ptr += 2;
1014                         put_unaligned_be32(mpext->subflow_seq, ptr);
1015                         ptr += 1;
1016                         put_unaligned_be32(mpext->data_len << 16 |
1017                                            TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
1018                 }
1019         }
1020 }