OSDN Git Service

Merge tag 'perf-urgent-2023-09-10' of git://git.kernel.org/pub/scm/linux/kernel/git...
[tomoyo/tomoyo-test1.git] / net / ceph / messenger_v2.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Ceph msgr2 protocol implementation
4  *
5  * Copyright (C) 2020 Ilya Dryomov <idryomov@gmail.com>
6  */
7
8 #include <linux/ceph/ceph_debug.h>
9
10 #include <crypto/aead.h>
11 #include <crypto/hash.h>
12 #include <crypto/sha2.h>
13 #include <crypto/utils.h>
14 #include <linux/bvec.h>
15 #include <linux/crc32c.h>
16 #include <linux/net.h>
17 #include <linux/scatterlist.h>
18 #include <linux/socket.h>
19 #include <linux/sched/mm.h>
20 #include <net/sock.h>
21 #include <net/tcp.h>
22
23 #include <linux/ceph/ceph_features.h>
24 #include <linux/ceph/decode.h>
25 #include <linux/ceph/libceph.h>
26 #include <linux/ceph/messenger.h>
27
28 #include "crypto.h"  /* for CEPH_KEY_LEN and CEPH_MAX_CON_SECRET_LEN */
29
30 #define FRAME_TAG_HELLO                 1
31 #define FRAME_TAG_AUTH_REQUEST          2
32 #define FRAME_TAG_AUTH_BAD_METHOD       3
33 #define FRAME_TAG_AUTH_REPLY_MORE       4
34 #define FRAME_TAG_AUTH_REQUEST_MORE     5
35 #define FRAME_TAG_AUTH_DONE             6
36 #define FRAME_TAG_AUTH_SIGNATURE        7
37 #define FRAME_TAG_CLIENT_IDENT          8
38 #define FRAME_TAG_SERVER_IDENT          9
39 #define FRAME_TAG_IDENT_MISSING_FEATURES 10
40 #define FRAME_TAG_SESSION_RECONNECT     11
41 #define FRAME_TAG_SESSION_RESET         12
42 #define FRAME_TAG_SESSION_RETRY         13
43 #define FRAME_TAG_SESSION_RETRY_GLOBAL  14
44 #define FRAME_TAG_SESSION_RECONNECT_OK  15
45 #define FRAME_TAG_WAIT                  16
46 #define FRAME_TAG_MESSAGE               17
47 #define FRAME_TAG_KEEPALIVE2            18
48 #define FRAME_TAG_KEEPALIVE2_ACK        19
49 #define FRAME_TAG_ACK                   20
50
51 #define FRAME_LATE_STATUS_ABORTED       0x1
52 #define FRAME_LATE_STATUS_COMPLETE      0xe
53 #define FRAME_LATE_STATUS_ABORTED_MASK  0xf
54
55 #define IN_S_HANDLE_PREAMBLE                    1
56 #define IN_S_HANDLE_CONTROL                     2
57 #define IN_S_HANDLE_CONTROL_REMAINDER           3
58 #define IN_S_PREPARE_READ_DATA                  4
59 #define IN_S_PREPARE_READ_DATA_CONT             5
60 #define IN_S_PREPARE_READ_ENC_PAGE              6
61 #define IN_S_PREPARE_SPARSE_DATA                7
62 #define IN_S_PREPARE_SPARSE_DATA_CONT           8
63 #define IN_S_HANDLE_EPILOGUE                    9
64 #define IN_S_FINISH_SKIP                        10
65
66 #define OUT_S_QUEUE_DATA                1
67 #define OUT_S_QUEUE_DATA_CONT           2
68 #define OUT_S_QUEUE_ENC_PAGE            3
69 #define OUT_S_QUEUE_ZEROS               4
70 #define OUT_S_FINISH_MESSAGE            5
71 #define OUT_S_GET_NEXT                  6
72
73 #define CTRL_BODY(p)    ((void *)(p) + CEPH_PREAMBLE_LEN)
74 #define FRONT_PAD(p)    ((void *)(p) + CEPH_EPILOGUE_SECURE_LEN)
75 #define MIDDLE_PAD(p)   (FRONT_PAD(p) + CEPH_GCM_BLOCK_LEN)
76 #define DATA_PAD(p)     (MIDDLE_PAD(p) + CEPH_GCM_BLOCK_LEN)
77
78 #define CEPH_MSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
79
80 static int do_recvmsg(struct socket *sock, struct iov_iter *it)
81 {
82         struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS };
83         int ret;
84
85         msg.msg_iter = *it;
86         while (iov_iter_count(it)) {
87                 ret = sock_recvmsg(sock, &msg, msg.msg_flags);
88                 if (ret <= 0) {
89                         if (ret == -EAGAIN)
90                                 ret = 0;
91                         return ret;
92                 }
93
94                 iov_iter_advance(it, ret);
95         }
96
97         WARN_ON(msg_data_left(&msg));
98         return 1;
99 }
100
101 /*
102  * Read as much as possible.
103  *
104  * Return:
105  *   1 - done, nothing (else) to read
106  *   0 - socket is empty, need to wait
107  *  <0 - error
108  */
109 static int ceph_tcp_recv(struct ceph_connection *con)
110 {
111         int ret;
112
113         dout("%s con %p %s %zu\n", __func__, con,
114              iov_iter_is_discard(&con->v2.in_iter) ? "discard" : "need",
115              iov_iter_count(&con->v2.in_iter));
116         ret = do_recvmsg(con->sock, &con->v2.in_iter);
117         dout("%s con %p ret %d left %zu\n", __func__, con, ret,
118              iov_iter_count(&con->v2.in_iter));
119         return ret;
120 }
121
122 static int do_sendmsg(struct socket *sock, struct iov_iter *it)
123 {
124         struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS };
125         int ret;
126
127         msg.msg_iter = *it;
128         while (iov_iter_count(it)) {
129                 ret = sock_sendmsg(sock, &msg);
130                 if (ret <= 0) {
131                         if (ret == -EAGAIN)
132                                 ret = 0;
133                         return ret;
134                 }
135
136                 iov_iter_advance(it, ret);
137         }
138
139         WARN_ON(msg_data_left(&msg));
140         return 1;
141 }
142
143 static int do_try_sendpage(struct socket *sock, struct iov_iter *it)
144 {
145         struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS };
146         struct bio_vec bv;
147         int ret;
148
149         if (WARN_ON(!iov_iter_is_bvec(it)))
150                 return -EINVAL;
151
152         while (iov_iter_count(it)) {
153                 /* iov_iter_iovec() for ITER_BVEC */
154                 bvec_set_page(&bv, it->bvec->bv_page,
155                               min(iov_iter_count(it),
156                                   it->bvec->bv_len - it->iov_offset),
157                               it->bvec->bv_offset + it->iov_offset);
158
159                 /*
160                  * MSG_SPLICE_PAGES cannot properly handle pages with
161                  * page_count == 0, we need to fall back to sendmsg if
162                  * that's the case.
163                  *
164                  * Same goes for slab pages: skb_can_coalesce() allows
165                  * coalescing neighboring slab objects into a single frag
166                  * which triggers one of hardened usercopy checks.
167                  */
168                 if (sendpage_ok(bv.bv_page))
169                         msg.msg_flags |= MSG_SPLICE_PAGES;
170                 else
171                         msg.msg_flags &= ~MSG_SPLICE_PAGES;
172
173                 iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, bv.bv_len);
174                 ret = sock_sendmsg(sock, &msg);
175                 if (ret <= 0) {
176                         if (ret == -EAGAIN)
177                                 ret = 0;
178                         return ret;
179                 }
180
181                 iov_iter_advance(it, ret);
182         }
183
184         return 1;
185 }
186
187 /*
188  * Write as much as possible.  The socket is expected to be corked,
189  * so we don't bother with MSG_MORE here.
190  *
191  * Return:
192  *   1 - done, nothing (else) to write
193  *   0 - socket is full, need to wait
194  *  <0 - error
195  */
196 static int ceph_tcp_send(struct ceph_connection *con)
197 {
198         int ret;
199
200         dout("%s con %p have %zu try_sendpage %d\n", __func__, con,
201              iov_iter_count(&con->v2.out_iter), con->v2.out_iter_sendpage);
202         if (con->v2.out_iter_sendpage)
203                 ret = do_try_sendpage(con->sock, &con->v2.out_iter);
204         else
205                 ret = do_sendmsg(con->sock, &con->v2.out_iter);
206         dout("%s con %p ret %d left %zu\n", __func__, con, ret,
207              iov_iter_count(&con->v2.out_iter));
208         return ret;
209 }
210
211 static void add_in_kvec(struct ceph_connection *con, void *buf, int len)
212 {
213         BUG_ON(con->v2.in_kvec_cnt >= ARRAY_SIZE(con->v2.in_kvecs));
214         WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
215
216         con->v2.in_kvecs[con->v2.in_kvec_cnt].iov_base = buf;
217         con->v2.in_kvecs[con->v2.in_kvec_cnt].iov_len = len;
218         con->v2.in_kvec_cnt++;
219
220         con->v2.in_iter.nr_segs++;
221         con->v2.in_iter.count += len;
222 }
223
224 static void reset_in_kvecs(struct ceph_connection *con)
225 {
226         WARN_ON(iov_iter_count(&con->v2.in_iter));
227
228         con->v2.in_kvec_cnt = 0;
229         iov_iter_kvec(&con->v2.in_iter, ITER_DEST, con->v2.in_kvecs, 0, 0);
230 }
231
232 static void set_in_bvec(struct ceph_connection *con, const struct bio_vec *bv)
233 {
234         WARN_ON(iov_iter_count(&con->v2.in_iter));
235
236         con->v2.in_bvec = *bv;
237         iov_iter_bvec(&con->v2.in_iter, ITER_DEST, &con->v2.in_bvec, 1, bv->bv_len);
238 }
239
240 static void set_in_skip(struct ceph_connection *con, int len)
241 {
242         WARN_ON(iov_iter_count(&con->v2.in_iter));
243
244         dout("%s con %p len %d\n", __func__, con, len);
245         iov_iter_discard(&con->v2.in_iter, ITER_DEST, len);
246 }
247
248 static void add_out_kvec(struct ceph_connection *con, void *buf, int len)
249 {
250         BUG_ON(con->v2.out_kvec_cnt >= ARRAY_SIZE(con->v2.out_kvecs));
251         WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter));
252         WARN_ON(con->v2.out_zero);
253
254         con->v2.out_kvecs[con->v2.out_kvec_cnt].iov_base = buf;
255         con->v2.out_kvecs[con->v2.out_kvec_cnt].iov_len = len;
256         con->v2.out_kvec_cnt++;
257
258         con->v2.out_iter.nr_segs++;
259         con->v2.out_iter.count += len;
260 }
261
262 static void reset_out_kvecs(struct ceph_connection *con)
263 {
264         WARN_ON(iov_iter_count(&con->v2.out_iter));
265         WARN_ON(con->v2.out_zero);
266
267         con->v2.out_kvec_cnt = 0;
268
269         iov_iter_kvec(&con->v2.out_iter, ITER_SOURCE, con->v2.out_kvecs, 0, 0);
270         con->v2.out_iter_sendpage = false;
271 }
272
273 static void set_out_bvec(struct ceph_connection *con, const struct bio_vec *bv,
274                          bool zerocopy)
275 {
276         WARN_ON(iov_iter_count(&con->v2.out_iter));
277         WARN_ON(con->v2.out_zero);
278
279         con->v2.out_bvec = *bv;
280         con->v2.out_iter_sendpage = zerocopy;
281         iov_iter_bvec(&con->v2.out_iter, ITER_SOURCE, &con->v2.out_bvec, 1,
282                       con->v2.out_bvec.bv_len);
283 }
284
285 static void set_out_bvec_zero(struct ceph_connection *con)
286 {
287         WARN_ON(iov_iter_count(&con->v2.out_iter));
288         WARN_ON(!con->v2.out_zero);
289
290         bvec_set_page(&con->v2.out_bvec, ceph_zero_page,
291                       min(con->v2.out_zero, (int)PAGE_SIZE), 0);
292         con->v2.out_iter_sendpage = true;
293         iov_iter_bvec(&con->v2.out_iter, ITER_SOURCE, &con->v2.out_bvec, 1,
294                       con->v2.out_bvec.bv_len);
295 }
296
297 static void out_zero_add(struct ceph_connection *con, int len)
298 {
299         dout("%s con %p len %d\n", __func__, con, len);
300         con->v2.out_zero += len;
301 }
302
303 static void *alloc_conn_buf(struct ceph_connection *con, int len)
304 {
305         void *buf;
306
307         dout("%s con %p len %d\n", __func__, con, len);
308
309         if (WARN_ON(con->v2.conn_buf_cnt >= ARRAY_SIZE(con->v2.conn_bufs)))
310                 return NULL;
311
312         buf = kvmalloc(len, GFP_NOIO);
313         if (!buf)
314                 return NULL;
315
316         con->v2.conn_bufs[con->v2.conn_buf_cnt++] = buf;
317         return buf;
318 }
319
320 static void free_conn_bufs(struct ceph_connection *con)
321 {
322         while (con->v2.conn_buf_cnt)
323                 kvfree(con->v2.conn_bufs[--con->v2.conn_buf_cnt]);
324 }
325
326 static void add_in_sign_kvec(struct ceph_connection *con, void *buf, int len)
327 {
328         BUG_ON(con->v2.in_sign_kvec_cnt >= ARRAY_SIZE(con->v2.in_sign_kvecs));
329
330         con->v2.in_sign_kvecs[con->v2.in_sign_kvec_cnt].iov_base = buf;
331         con->v2.in_sign_kvecs[con->v2.in_sign_kvec_cnt].iov_len = len;
332         con->v2.in_sign_kvec_cnt++;
333 }
334
335 static void clear_in_sign_kvecs(struct ceph_connection *con)
336 {
337         con->v2.in_sign_kvec_cnt = 0;
338 }
339
340 static void add_out_sign_kvec(struct ceph_connection *con, void *buf, int len)
341 {
342         BUG_ON(con->v2.out_sign_kvec_cnt >= ARRAY_SIZE(con->v2.out_sign_kvecs));
343
344         con->v2.out_sign_kvecs[con->v2.out_sign_kvec_cnt].iov_base = buf;
345         con->v2.out_sign_kvecs[con->v2.out_sign_kvec_cnt].iov_len = len;
346         con->v2.out_sign_kvec_cnt++;
347 }
348
349 static void clear_out_sign_kvecs(struct ceph_connection *con)
350 {
351         con->v2.out_sign_kvec_cnt = 0;
352 }
353
354 static bool con_secure(struct ceph_connection *con)
355 {
356         return con->v2.con_mode == CEPH_CON_MODE_SECURE;
357 }
358
359 static int front_len(const struct ceph_msg *msg)
360 {
361         return le32_to_cpu(msg->hdr.front_len);
362 }
363
364 static int middle_len(const struct ceph_msg *msg)
365 {
366         return le32_to_cpu(msg->hdr.middle_len);
367 }
368
369 static int data_len(const struct ceph_msg *msg)
370 {
371         return le32_to_cpu(msg->hdr.data_len);
372 }
373
374 static bool need_padding(int len)
375 {
376         return !IS_ALIGNED(len, CEPH_GCM_BLOCK_LEN);
377 }
378
379 static int padded_len(int len)
380 {
381         return ALIGN(len, CEPH_GCM_BLOCK_LEN);
382 }
383
384 static int padding_len(int len)
385 {
386         return padded_len(len) - len;
387 }
388
389 /* preamble + control segment */
390 static int head_onwire_len(int ctrl_len, bool secure)
391 {
392         int head_len;
393         int rem_len;
394
395         BUG_ON(ctrl_len < 0 || ctrl_len > CEPH_MSG_MAX_CONTROL_LEN);
396
397         if (secure) {
398                 head_len = CEPH_PREAMBLE_SECURE_LEN;
399                 if (ctrl_len > CEPH_PREAMBLE_INLINE_LEN) {
400                         rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN;
401                         head_len += padded_len(rem_len) + CEPH_GCM_TAG_LEN;
402                 }
403         } else {
404                 head_len = CEPH_PREAMBLE_PLAIN_LEN;
405                 if (ctrl_len)
406                         head_len += ctrl_len + CEPH_CRC_LEN;
407         }
408         return head_len;
409 }
410
411 /* front, middle and data segments + epilogue */
412 static int __tail_onwire_len(int front_len, int middle_len, int data_len,
413                              bool secure)
414 {
415         BUG_ON(front_len < 0 || front_len > CEPH_MSG_MAX_FRONT_LEN ||
416                middle_len < 0 || middle_len > CEPH_MSG_MAX_MIDDLE_LEN ||
417                data_len < 0 || data_len > CEPH_MSG_MAX_DATA_LEN);
418
419         if (!front_len && !middle_len && !data_len)
420                 return 0;
421
422         if (!secure)
423                 return front_len + middle_len + data_len +
424                        CEPH_EPILOGUE_PLAIN_LEN;
425
426         return padded_len(front_len) + padded_len(middle_len) +
427                padded_len(data_len) + CEPH_EPILOGUE_SECURE_LEN;
428 }
429
430 static int tail_onwire_len(const struct ceph_msg *msg, bool secure)
431 {
432         return __tail_onwire_len(front_len(msg), middle_len(msg),
433                                  data_len(msg), secure);
434 }
435
436 /* head_onwire_len(sizeof(struct ceph_msg_header2), false) */
437 #define MESSAGE_HEAD_PLAIN_LEN  (CEPH_PREAMBLE_PLAIN_LEN +              \
438                                  sizeof(struct ceph_msg_header2) +      \
439                                  CEPH_CRC_LEN)
440
441 static const int frame_aligns[] = {
442         sizeof(void *),
443         sizeof(void *),
444         sizeof(void *),
445         PAGE_SIZE
446 };
447
448 /*
449  * Discards trailing empty segments, unless there is just one segment.
450  * A frame always has at least one (possibly empty) segment.
451  */
452 static int calc_segment_count(const int *lens, int len_cnt)
453 {
454         int i;
455
456         for (i = len_cnt - 1; i >= 0; i--) {
457                 if (lens[i])
458                         return i + 1;
459         }
460
461         return 1;
462 }
463
464 static void init_frame_desc(struct ceph_frame_desc *desc, int tag,
465                             const int *lens, int len_cnt)
466 {
467         int i;
468
469         memset(desc, 0, sizeof(*desc));
470
471         desc->fd_tag = tag;
472         desc->fd_seg_cnt = calc_segment_count(lens, len_cnt);
473         BUG_ON(desc->fd_seg_cnt > CEPH_FRAME_MAX_SEGMENT_COUNT);
474         for (i = 0; i < desc->fd_seg_cnt; i++) {
475                 desc->fd_lens[i] = lens[i];
476                 desc->fd_aligns[i] = frame_aligns[i];
477         }
478 }
479
480 /*
481  * Preamble crc covers everything up to itself (28 bytes) and
482  * is calculated and verified irrespective of the connection mode
483  * (i.e. even if the frame is encrypted).
484  */
485 static void encode_preamble(const struct ceph_frame_desc *desc, void *p)
486 {
487         void *crcp = p + CEPH_PREAMBLE_LEN - CEPH_CRC_LEN;
488         void *start = p;
489         int i;
490
491         memset(p, 0, CEPH_PREAMBLE_LEN);
492
493         ceph_encode_8(&p, desc->fd_tag);
494         ceph_encode_8(&p, desc->fd_seg_cnt);
495         for (i = 0; i < desc->fd_seg_cnt; i++) {
496                 ceph_encode_32(&p, desc->fd_lens[i]);
497                 ceph_encode_16(&p, desc->fd_aligns[i]);
498         }
499
500         put_unaligned_le32(crc32c(0, start, crcp - start), crcp);
501 }
502
503 static int decode_preamble(void *p, struct ceph_frame_desc *desc)
504 {
505         void *crcp = p + CEPH_PREAMBLE_LEN - CEPH_CRC_LEN;
506         u32 crc, expected_crc;
507         int i;
508
509         crc = crc32c(0, p, crcp - p);
510         expected_crc = get_unaligned_le32(crcp);
511         if (crc != expected_crc) {
512                 pr_err("bad preamble crc, calculated %u, expected %u\n",
513                        crc, expected_crc);
514                 return -EBADMSG;
515         }
516
517         memset(desc, 0, sizeof(*desc));
518
519         desc->fd_tag = ceph_decode_8(&p);
520         desc->fd_seg_cnt = ceph_decode_8(&p);
521         if (desc->fd_seg_cnt < 1 ||
522             desc->fd_seg_cnt > CEPH_FRAME_MAX_SEGMENT_COUNT) {
523                 pr_err("bad segment count %d\n", desc->fd_seg_cnt);
524                 return -EINVAL;
525         }
526         for (i = 0; i < desc->fd_seg_cnt; i++) {
527                 desc->fd_lens[i] = ceph_decode_32(&p);
528                 desc->fd_aligns[i] = ceph_decode_16(&p);
529         }
530
531         if (desc->fd_lens[0] < 0 ||
532             desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) {
533                 pr_err("bad control segment length %d\n", desc->fd_lens[0]);
534                 return -EINVAL;
535         }
536         if (desc->fd_lens[1] < 0 ||
537             desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) {
538                 pr_err("bad front segment length %d\n", desc->fd_lens[1]);
539                 return -EINVAL;
540         }
541         if (desc->fd_lens[2] < 0 ||
542             desc->fd_lens[2] > CEPH_MSG_MAX_MIDDLE_LEN) {
543                 pr_err("bad middle segment length %d\n", desc->fd_lens[2]);
544                 return -EINVAL;
545         }
546         if (desc->fd_lens[3] < 0 ||
547             desc->fd_lens[3] > CEPH_MSG_MAX_DATA_LEN) {
548                 pr_err("bad data segment length %d\n", desc->fd_lens[3]);
549                 return -EINVAL;
550         }
551
552         /*
553          * This would fire for FRAME_TAG_WAIT (it has one empty
554          * segment), but we should never get it as client.
555          */
556         if (!desc->fd_lens[desc->fd_seg_cnt - 1]) {
557                 pr_err("last segment empty, segment count %d\n",
558                        desc->fd_seg_cnt);
559                 return -EINVAL;
560         }
561
562         return 0;
563 }
564
565 static void encode_epilogue_plain(struct ceph_connection *con, bool aborted)
566 {
567         con->v2.out_epil.late_status = aborted ? FRAME_LATE_STATUS_ABORTED :
568                                                  FRAME_LATE_STATUS_COMPLETE;
569         cpu_to_le32s(&con->v2.out_epil.front_crc);
570         cpu_to_le32s(&con->v2.out_epil.middle_crc);
571         cpu_to_le32s(&con->v2.out_epil.data_crc);
572 }
573
574 static void encode_epilogue_secure(struct ceph_connection *con, bool aborted)
575 {
576         memset(&con->v2.out_epil, 0, sizeof(con->v2.out_epil));
577         con->v2.out_epil.late_status = aborted ? FRAME_LATE_STATUS_ABORTED :
578                                                  FRAME_LATE_STATUS_COMPLETE;
579 }
580
581 static int decode_epilogue(void *p, u32 *front_crc, u32 *middle_crc,
582                            u32 *data_crc)
583 {
584         u8 late_status;
585
586         late_status = ceph_decode_8(&p);
587         if ((late_status & FRAME_LATE_STATUS_ABORTED_MASK) !=
588                         FRAME_LATE_STATUS_COMPLETE) {
589                 /* we should never get an aborted message as client */
590                 pr_err("bad late_status 0x%x\n", late_status);
591                 return -EINVAL;
592         }
593
594         if (front_crc && middle_crc && data_crc) {
595                 *front_crc = ceph_decode_32(&p);
596                 *middle_crc = ceph_decode_32(&p);
597                 *data_crc = ceph_decode_32(&p);
598         }
599
600         return 0;
601 }
602
603 static void fill_header(struct ceph_msg_header *hdr,
604                         const struct ceph_msg_header2 *hdr2,
605                         int front_len, int middle_len, int data_len,
606                         const struct ceph_entity_name *peer_name)
607 {
608         hdr->seq = hdr2->seq;
609         hdr->tid = hdr2->tid;
610         hdr->type = hdr2->type;
611         hdr->priority = hdr2->priority;
612         hdr->version = hdr2->version;
613         hdr->front_len = cpu_to_le32(front_len);
614         hdr->middle_len = cpu_to_le32(middle_len);
615         hdr->data_len = cpu_to_le32(data_len);
616         hdr->data_off = hdr2->data_off;
617         hdr->src = *peer_name;
618         hdr->compat_version = hdr2->compat_version;
619         hdr->reserved = 0;
620         hdr->crc = 0;
621 }
622
623 static void fill_header2(struct ceph_msg_header2 *hdr2,
624                          const struct ceph_msg_header *hdr, u64 ack_seq)
625 {
626         hdr2->seq = hdr->seq;
627         hdr2->tid = hdr->tid;
628         hdr2->type = hdr->type;
629         hdr2->priority = hdr->priority;
630         hdr2->version = hdr->version;
631         hdr2->data_pre_padding_len = 0;
632         hdr2->data_off = hdr->data_off;
633         hdr2->ack_seq = cpu_to_le64(ack_seq);
634         hdr2->flags = 0;
635         hdr2->compat_version = hdr->compat_version;
636         hdr2->reserved = 0;
637 }
638
639 static int verify_control_crc(struct ceph_connection *con)
640 {
641         int ctrl_len = con->v2.in_desc.fd_lens[0];
642         u32 crc, expected_crc;
643
644         WARN_ON(con->v2.in_kvecs[0].iov_len != ctrl_len);
645         WARN_ON(con->v2.in_kvecs[1].iov_len != CEPH_CRC_LEN);
646
647         crc = crc32c(-1, con->v2.in_kvecs[0].iov_base, ctrl_len);
648         expected_crc = get_unaligned_le32(con->v2.in_kvecs[1].iov_base);
649         if (crc != expected_crc) {
650                 pr_err("bad control crc, calculated %u, expected %u\n",
651                        crc, expected_crc);
652                 return -EBADMSG;
653         }
654
655         return 0;
656 }
657
658 static int verify_epilogue_crcs(struct ceph_connection *con, u32 front_crc,
659                                 u32 middle_crc, u32 data_crc)
660 {
661         if (front_len(con->in_msg)) {
662                 con->in_front_crc = crc32c(-1, con->in_msg->front.iov_base,
663                                            front_len(con->in_msg));
664         } else {
665                 WARN_ON(!middle_len(con->in_msg) && !data_len(con->in_msg));
666                 con->in_front_crc = -1;
667         }
668
669         if (middle_len(con->in_msg))
670                 con->in_middle_crc = crc32c(-1,
671                                             con->in_msg->middle->vec.iov_base,
672                                             middle_len(con->in_msg));
673         else if (data_len(con->in_msg))
674                 con->in_middle_crc = -1;
675         else
676                 con->in_middle_crc = 0;
677
678         if (!data_len(con->in_msg))
679                 con->in_data_crc = 0;
680
681         dout("%s con %p msg %p crcs %u %u %u\n", __func__, con, con->in_msg,
682              con->in_front_crc, con->in_middle_crc, con->in_data_crc);
683
684         if (con->in_front_crc != front_crc) {
685                 pr_err("bad front crc, calculated %u, expected %u\n",
686                        con->in_front_crc, front_crc);
687                 return -EBADMSG;
688         }
689         if (con->in_middle_crc != middle_crc) {
690                 pr_err("bad middle crc, calculated %u, expected %u\n",
691                        con->in_middle_crc, middle_crc);
692                 return -EBADMSG;
693         }
694         if (con->in_data_crc != data_crc) {
695                 pr_err("bad data crc, calculated %u, expected %u\n",
696                        con->in_data_crc, data_crc);
697                 return -EBADMSG;
698         }
699
700         return 0;
701 }
702
703 static int setup_crypto(struct ceph_connection *con,
704                         const u8 *session_key, int session_key_len,
705                         const u8 *con_secret, int con_secret_len)
706 {
707         unsigned int noio_flag;
708         int ret;
709
710         dout("%s con %p con_mode %d session_key_len %d con_secret_len %d\n",
711              __func__, con, con->v2.con_mode, session_key_len, con_secret_len);
712         WARN_ON(con->v2.hmac_tfm || con->v2.gcm_tfm || con->v2.gcm_req);
713
714         if (con->v2.con_mode != CEPH_CON_MODE_CRC &&
715             con->v2.con_mode != CEPH_CON_MODE_SECURE) {
716                 pr_err("bad con_mode %d\n", con->v2.con_mode);
717                 return -EINVAL;
718         }
719
720         if (!session_key_len) {
721                 WARN_ON(con->v2.con_mode != CEPH_CON_MODE_CRC);
722                 WARN_ON(con_secret_len);
723                 return 0;  /* auth_none */
724         }
725
726         noio_flag = memalloc_noio_save();
727         con->v2.hmac_tfm = crypto_alloc_shash("hmac(sha256)", 0, 0);
728         memalloc_noio_restore(noio_flag);
729         if (IS_ERR(con->v2.hmac_tfm)) {
730                 ret = PTR_ERR(con->v2.hmac_tfm);
731                 con->v2.hmac_tfm = NULL;
732                 pr_err("failed to allocate hmac tfm context: %d\n", ret);
733                 return ret;
734         }
735
736         WARN_ON((unsigned long)session_key &
737                 crypto_shash_alignmask(con->v2.hmac_tfm));
738         ret = crypto_shash_setkey(con->v2.hmac_tfm, session_key,
739                                   session_key_len);
740         if (ret) {
741                 pr_err("failed to set hmac key: %d\n", ret);
742                 return ret;
743         }
744
745         if (con->v2.con_mode == CEPH_CON_MODE_CRC) {
746                 WARN_ON(con_secret_len);
747                 return 0;  /* auth_x, plain mode */
748         }
749
750         if (con_secret_len < CEPH_GCM_KEY_LEN + 2 * CEPH_GCM_IV_LEN) {
751                 pr_err("con_secret too small %d\n", con_secret_len);
752                 return -EINVAL;
753         }
754
755         noio_flag = memalloc_noio_save();
756         con->v2.gcm_tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
757         memalloc_noio_restore(noio_flag);
758         if (IS_ERR(con->v2.gcm_tfm)) {
759                 ret = PTR_ERR(con->v2.gcm_tfm);
760                 con->v2.gcm_tfm = NULL;
761                 pr_err("failed to allocate gcm tfm context: %d\n", ret);
762                 return ret;
763         }
764
765         WARN_ON((unsigned long)con_secret &
766                 crypto_aead_alignmask(con->v2.gcm_tfm));
767         ret = crypto_aead_setkey(con->v2.gcm_tfm, con_secret, CEPH_GCM_KEY_LEN);
768         if (ret) {
769                 pr_err("failed to set gcm key: %d\n", ret);
770                 return ret;
771         }
772
773         WARN_ON(crypto_aead_ivsize(con->v2.gcm_tfm) != CEPH_GCM_IV_LEN);
774         ret = crypto_aead_setauthsize(con->v2.gcm_tfm, CEPH_GCM_TAG_LEN);
775         if (ret) {
776                 pr_err("failed to set gcm tag size: %d\n", ret);
777                 return ret;
778         }
779
780         con->v2.gcm_req = aead_request_alloc(con->v2.gcm_tfm, GFP_NOIO);
781         if (!con->v2.gcm_req) {
782                 pr_err("failed to allocate gcm request\n");
783                 return -ENOMEM;
784         }
785
786         crypto_init_wait(&con->v2.gcm_wait);
787         aead_request_set_callback(con->v2.gcm_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
788                                   crypto_req_done, &con->v2.gcm_wait);
789
790         memcpy(&con->v2.in_gcm_nonce, con_secret + CEPH_GCM_KEY_LEN,
791                CEPH_GCM_IV_LEN);
792         memcpy(&con->v2.out_gcm_nonce,
793                con_secret + CEPH_GCM_KEY_LEN + CEPH_GCM_IV_LEN,
794                CEPH_GCM_IV_LEN);
795         return 0;  /* auth_x, secure mode */
796 }
797
798 static int hmac_sha256(struct ceph_connection *con, const struct kvec *kvecs,
799                        int kvec_cnt, u8 *hmac)
800 {
801         SHASH_DESC_ON_STACK(desc, con->v2.hmac_tfm);  /* tfm arg is ignored */
802         int ret;
803         int i;
804
805         dout("%s con %p hmac_tfm %p kvec_cnt %d\n", __func__, con,
806              con->v2.hmac_tfm, kvec_cnt);
807
808         if (!con->v2.hmac_tfm) {
809                 memset(hmac, 0, SHA256_DIGEST_SIZE);
810                 return 0;  /* auth_none */
811         }
812
813         desc->tfm = con->v2.hmac_tfm;
814         ret = crypto_shash_init(desc);
815         if (ret)
816                 goto out;
817
818         for (i = 0; i < kvec_cnt; i++) {
819                 WARN_ON((unsigned long)kvecs[i].iov_base &
820                         crypto_shash_alignmask(con->v2.hmac_tfm));
821                 ret = crypto_shash_update(desc, kvecs[i].iov_base,
822                                           kvecs[i].iov_len);
823                 if (ret)
824                         goto out;
825         }
826
827         ret = crypto_shash_final(desc, hmac);
828
829 out:
830         shash_desc_zero(desc);
831         return ret;  /* auth_x, both plain and secure modes */
832 }
833
834 static void gcm_inc_nonce(struct ceph_gcm_nonce *nonce)
835 {
836         u64 counter;
837
838         counter = le64_to_cpu(nonce->counter);
839         nonce->counter = cpu_to_le64(counter + 1);
840 }
841
842 static int gcm_crypt(struct ceph_connection *con, bool encrypt,
843                      struct scatterlist *src, struct scatterlist *dst,
844                      int src_len)
845 {
846         struct ceph_gcm_nonce *nonce;
847         int ret;
848
849         nonce = encrypt ? &con->v2.out_gcm_nonce : &con->v2.in_gcm_nonce;
850
851         aead_request_set_ad(con->v2.gcm_req, 0);  /* no AAD */
852         aead_request_set_crypt(con->v2.gcm_req, src, dst, src_len, (u8 *)nonce);
853         ret = crypto_wait_req(encrypt ? crypto_aead_encrypt(con->v2.gcm_req) :
854                                         crypto_aead_decrypt(con->v2.gcm_req),
855                               &con->v2.gcm_wait);
856         if (ret)
857                 return ret;
858
859         gcm_inc_nonce(nonce);
860         return 0;
861 }
862
863 static void get_bvec_at(struct ceph_msg_data_cursor *cursor,
864                         struct bio_vec *bv)
865 {
866         struct page *page;
867         size_t off, len;
868
869         WARN_ON(!cursor->total_resid);
870
871         /* skip zero-length data items */
872         while (!cursor->resid)
873                 ceph_msg_data_advance(cursor, 0);
874
875         /* get a piece of data, cursor isn't advanced */
876         page = ceph_msg_data_next(cursor, &off, &len);
877         bvec_set_page(bv, page, len, off);
878 }
879
880 static int calc_sg_cnt(void *buf, int buf_len)
881 {
882         int sg_cnt;
883
884         if (!buf_len)
885                 return 0;
886
887         sg_cnt = need_padding(buf_len) ? 1 : 0;
888         if (is_vmalloc_addr(buf)) {
889                 WARN_ON(offset_in_page(buf));
890                 sg_cnt += PAGE_ALIGN(buf_len) >> PAGE_SHIFT;
891         } else {
892                 sg_cnt++;
893         }
894
895         return sg_cnt;
896 }
897
898 static int calc_sg_cnt_cursor(struct ceph_msg_data_cursor *cursor)
899 {
900         int data_len = cursor->total_resid;
901         struct bio_vec bv;
902         int sg_cnt;
903
904         if (!data_len)
905                 return 0;
906
907         sg_cnt = need_padding(data_len) ? 1 : 0;
908         do {
909                 get_bvec_at(cursor, &bv);
910                 sg_cnt++;
911
912                 ceph_msg_data_advance(cursor, bv.bv_len);
913         } while (cursor->total_resid);
914
915         return sg_cnt;
916 }
917
918 static void init_sgs(struct scatterlist **sg, void *buf, int buf_len, u8 *pad)
919 {
920         void *end = buf + buf_len;
921         struct page *page;
922         int len;
923         void *p;
924
925         if (!buf_len)
926                 return;
927
928         if (is_vmalloc_addr(buf)) {
929                 p = buf;
930                 do {
931                         page = vmalloc_to_page(p);
932                         len = min_t(int, end - p, PAGE_SIZE);
933                         WARN_ON(!page || !len || offset_in_page(p));
934                         sg_set_page(*sg, page, len, 0);
935                         *sg = sg_next(*sg);
936                         p += len;
937                 } while (p != end);
938         } else {
939                 sg_set_buf(*sg, buf, buf_len);
940                 *sg = sg_next(*sg);
941         }
942
943         if (need_padding(buf_len)) {
944                 sg_set_buf(*sg, pad, padding_len(buf_len));
945                 *sg = sg_next(*sg);
946         }
947 }
948
949 static void init_sgs_cursor(struct scatterlist **sg,
950                             struct ceph_msg_data_cursor *cursor, u8 *pad)
951 {
952         int data_len = cursor->total_resid;
953         struct bio_vec bv;
954
955         if (!data_len)
956                 return;
957
958         do {
959                 get_bvec_at(cursor, &bv);
960                 sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset);
961                 *sg = sg_next(*sg);
962
963                 ceph_msg_data_advance(cursor, bv.bv_len);
964         } while (cursor->total_resid);
965
966         if (need_padding(data_len)) {
967                 sg_set_buf(*sg, pad, padding_len(data_len));
968                 *sg = sg_next(*sg);
969         }
970 }
971
972 /**
973  * init_sgs_pages: set up scatterlist on an array of page pointers
974  * @sg:         scatterlist to populate
975  * @pages:      pointer to page array
976  * @dpos:       position in the array to start (bytes)
977  * @dlen:       len to add to sg (bytes)
978  * @pad:        pointer to pad destination (if any)
979  *
980  * Populate the scatterlist from the page array, starting at an arbitrary
981  * byte in the array and running for a specified length.
982  */
983 static void init_sgs_pages(struct scatterlist **sg, struct page **pages,
984                            int dpos, int dlen, u8 *pad)
985 {
986         int idx = dpos >> PAGE_SHIFT;
987         int off = offset_in_page(dpos);
988         int resid = dlen;
989
990         do {
991                 int len = min(resid, (int)PAGE_SIZE - off);
992
993                 sg_set_page(*sg, pages[idx], len, off);
994                 *sg = sg_next(*sg);
995                 off = 0;
996                 ++idx;
997                 resid -= len;
998         } while (resid);
999
1000         if (need_padding(dlen)) {
1001                 sg_set_buf(*sg, pad, padding_len(dlen));
1002                 *sg = sg_next(*sg);
1003         }
1004 }
1005
1006 static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
1007                              u8 *front_pad, u8 *middle_pad, u8 *data_pad,
1008                              void *epilogue, struct page **pages, int dpos,
1009                              bool add_tag)
1010 {
1011         struct ceph_msg_data_cursor cursor;
1012         struct scatterlist *cur_sg;
1013         int dlen = data_len(msg);
1014         int sg_cnt;
1015         int ret;
1016
1017         if (!front_len(msg) && !middle_len(msg) && !data_len(msg))
1018                 return 0;
1019
1020         sg_cnt = 1;  /* epilogue + [auth tag] */
1021         if (front_len(msg))
1022                 sg_cnt += calc_sg_cnt(msg->front.iov_base,
1023                                       front_len(msg));
1024         if (middle_len(msg))
1025                 sg_cnt += calc_sg_cnt(msg->middle->vec.iov_base,
1026                                       middle_len(msg));
1027         if (dlen) {
1028                 if (pages) {
1029                         sg_cnt += calc_pages_for(dpos, dlen);
1030                         if (need_padding(dlen))
1031                                 sg_cnt++;
1032                 } else {
1033                         ceph_msg_data_cursor_init(&cursor, msg, dlen);
1034                         sg_cnt += calc_sg_cnt_cursor(&cursor);
1035                 }
1036         }
1037
1038         ret = sg_alloc_table(sgt, sg_cnt, GFP_NOIO);
1039         if (ret)
1040                 return ret;
1041
1042         cur_sg = sgt->sgl;
1043         if (front_len(msg))
1044                 init_sgs(&cur_sg, msg->front.iov_base, front_len(msg),
1045                          front_pad);
1046         if (middle_len(msg))
1047                 init_sgs(&cur_sg, msg->middle->vec.iov_base, middle_len(msg),
1048                          middle_pad);
1049         if (dlen) {
1050                 if (pages) {
1051                         init_sgs_pages(&cur_sg, pages, dpos, dlen, data_pad);
1052                 } else {
1053                         ceph_msg_data_cursor_init(&cursor, msg, dlen);
1054                         init_sgs_cursor(&cur_sg, &cursor, data_pad);
1055                 }
1056         }
1057
1058         WARN_ON(!sg_is_last(cur_sg));
1059         sg_set_buf(cur_sg, epilogue,
1060                    CEPH_GCM_BLOCK_LEN + (add_tag ? CEPH_GCM_TAG_LEN : 0));
1061         return 0;
1062 }
1063
1064 static int decrypt_preamble(struct ceph_connection *con)
1065 {
1066         struct scatterlist sg;
1067
1068         sg_init_one(&sg, con->v2.in_buf, CEPH_PREAMBLE_SECURE_LEN);
1069         return gcm_crypt(con, false, &sg, &sg, CEPH_PREAMBLE_SECURE_LEN);
1070 }
1071
1072 static int decrypt_control_remainder(struct ceph_connection *con)
1073 {
1074         int ctrl_len = con->v2.in_desc.fd_lens[0];
1075         int rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN;
1076         int pt_len = padding_len(rem_len) + CEPH_GCM_TAG_LEN;
1077         struct scatterlist sgs[2];
1078
1079         WARN_ON(con->v2.in_kvecs[0].iov_len != rem_len);
1080         WARN_ON(con->v2.in_kvecs[1].iov_len != pt_len);
1081
1082         sg_init_table(sgs, 2);
1083         sg_set_buf(&sgs[0], con->v2.in_kvecs[0].iov_base, rem_len);
1084         sg_set_buf(&sgs[1], con->v2.in_buf, pt_len);
1085
1086         return gcm_crypt(con, false, sgs, sgs,
1087                          padded_len(rem_len) + CEPH_GCM_TAG_LEN);
1088 }
1089
1090 /* Process sparse read data that lives in a buffer */
1091 static int process_v2_sparse_read(struct ceph_connection *con,
1092                                   struct page **pages, int spos)
1093 {
1094         struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor;
1095         int ret;
1096
1097         for (;;) {
1098                 char *buf = NULL;
1099
1100                 ret = con->ops->sparse_read(con, cursor, &buf);
1101                 if (ret <= 0)
1102                         return ret;
1103
1104                 dout("%s: sparse_read return %x buf %p\n", __func__, ret, buf);
1105
1106                 do {
1107                         int idx = spos >> PAGE_SHIFT;
1108                         int soff = offset_in_page(spos);
1109                         struct page *spage = con->v2.in_enc_pages[idx];
1110                         int len = min_t(int, ret, PAGE_SIZE - soff);
1111
1112                         if (buf) {
1113                                 memcpy_from_page(buf, spage, soff, len);
1114                                 buf += len;
1115                         } else {
1116                                 struct bio_vec bv;
1117
1118                                 get_bvec_at(cursor, &bv);
1119                                 len = min_t(int, len, bv.bv_len);
1120                                 memcpy_page(bv.bv_page, bv.bv_offset,
1121                                             spage, soff, len);
1122                                 ceph_msg_data_advance(cursor, len);
1123                         }
1124                         spos += len;
1125                         ret -= len;
1126                 } while (ret);
1127         }
1128 }
1129
1130 static int decrypt_tail(struct ceph_connection *con)
1131 {
1132         struct sg_table enc_sgt = {};
1133         struct sg_table sgt = {};
1134         struct page **pages = NULL;
1135         bool sparse = con->in_msg->sparse_read;
1136         int dpos = 0;
1137         int tail_len;
1138         int ret;
1139
1140         tail_len = tail_onwire_len(con->in_msg, true);
1141         ret = sg_alloc_table_from_pages(&enc_sgt, con->v2.in_enc_pages,
1142                                         con->v2.in_enc_page_cnt, 0, tail_len,
1143                                         GFP_NOIO);
1144         if (ret)
1145                 goto out;
1146
1147         if (sparse) {
1148                 dpos = padded_len(front_len(con->in_msg) + padded_len(middle_len(con->in_msg)));
1149                 pages = con->v2.in_enc_pages;
1150         }
1151
1152         ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf),
1153                                 MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
1154                                 con->v2.in_buf, pages, dpos, true);
1155         if (ret)
1156                 goto out;
1157
1158         dout("%s con %p msg %p enc_page_cnt %d sg_cnt %d\n", __func__, con,
1159              con->in_msg, con->v2.in_enc_page_cnt, sgt.orig_nents);
1160         ret = gcm_crypt(con, false, enc_sgt.sgl, sgt.sgl, tail_len);
1161         if (ret)
1162                 goto out;
1163
1164         if (sparse && data_len(con->in_msg)) {
1165                 ret = process_v2_sparse_read(con, con->v2.in_enc_pages, dpos);
1166                 if (ret)
1167                         goto out;
1168         }
1169
1170         WARN_ON(!con->v2.in_enc_page_cnt);
1171         ceph_release_page_vector(con->v2.in_enc_pages,
1172                                  con->v2.in_enc_page_cnt);
1173         con->v2.in_enc_pages = NULL;
1174         con->v2.in_enc_page_cnt = 0;
1175
1176 out:
1177         sg_free_table(&sgt);
1178         sg_free_table(&enc_sgt);
1179         return ret;
1180 }
1181
1182 static int prepare_banner(struct ceph_connection *con)
1183 {
1184         int buf_len = CEPH_BANNER_V2_LEN + 2 + 8 + 8;
1185         void *buf, *p;
1186
1187         buf = alloc_conn_buf(con, buf_len);
1188         if (!buf)
1189                 return -ENOMEM;
1190
1191         p = buf;
1192         ceph_encode_copy(&p, CEPH_BANNER_V2, CEPH_BANNER_V2_LEN);
1193         ceph_encode_16(&p, sizeof(u64) + sizeof(u64));
1194         ceph_encode_64(&p, CEPH_MSGR2_SUPPORTED_FEATURES);
1195         ceph_encode_64(&p, CEPH_MSGR2_REQUIRED_FEATURES);
1196         WARN_ON(p != buf + buf_len);
1197
1198         add_out_kvec(con, buf, buf_len);
1199         add_out_sign_kvec(con, buf, buf_len);
1200         ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
1201         return 0;
1202 }
1203
1204 /*
1205  * base:
1206  *   preamble
1207  *   control body (ctrl_len bytes)
1208  *   space for control crc
1209  *
1210  * extdata (optional):
1211  *   control body (extdata_len bytes)
1212  *
1213  * Compute control crc and gather base and extdata into:
1214  *
1215  *   preamble
1216  *   control body (ctrl_len + extdata_len bytes)
1217  *   control crc
1218  *
1219  * Preamble should already be encoded at the start of base.
1220  */
1221 static void prepare_head_plain(struct ceph_connection *con, void *base,
1222                                int ctrl_len, void *extdata, int extdata_len,
1223                                bool to_be_signed)
1224 {
1225         int base_len = CEPH_PREAMBLE_LEN + ctrl_len + CEPH_CRC_LEN;
1226         void *crcp = base + base_len - CEPH_CRC_LEN;
1227         u32 crc;
1228
1229         crc = crc32c(-1, CTRL_BODY(base), ctrl_len);
1230         if (extdata_len)
1231                 crc = crc32c(crc, extdata, extdata_len);
1232         put_unaligned_le32(crc, crcp);
1233
1234         if (!extdata_len) {
1235                 add_out_kvec(con, base, base_len);
1236                 if (to_be_signed)
1237                         add_out_sign_kvec(con, base, base_len);
1238                 return;
1239         }
1240
1241         add_out_kvec(con, base, crcp - base);
1242         add_out_kvec(con, extdata, extdata_len);
1243         add_out_kvec(con, crcp, CEPH_CRC_LEN);
1244         if (to_be_signed) {
1245                 add_out_sign_kvec(con, base, crcp - base);
1246                 add_out_sign_kvec(con, extdata, extdata_len);
1247                 add_out_sign_kvec(con, crcp, CEPH_CRC_LEN);
1248         }
1249 }
1250
1251 static int prepare_head_secure_small(struct ceph_connection *con,
1252                                      void *base, int ctrl_len)
1253 {
1254         struct scatterlist sg;
1255         int ret;
1256
1257         /* inline buffer padding? */
1258         if (ctrl_len < CEPH_PREAMBLE_INLINE_LEN)
1259                 memset(CTRL_BODY(base) + ctrl_len, 0,
1260                        CEPH_PREAMBLE_INLINE_LEN - ctrl_len);
1261
1262         sg_init_one(&sg, base, CEPH_PREAMBLE_SECURE_LEN);
1263         ret = gcm_crypt(con, true, &sg, &sg,
1264                         CEPH_PREAMBLE_SECURE_LEN - CEPH_GCM_TAG_LEN);
1265         if (ret)
1266                 return ret;
1267
1268         add_out_kvec(con, base, CEPH_PREAMBLE_SECURE_LEN);
1269         return 0;
1270 }
1271
1272 /*
1273  * base:
1274  *   preamble
1275  *   control body (ctrl_len bytes)
1276  *   space for padding, if needed
1277  *   space for control remainder auth tag
1278  *   space for preamble auth tag
1279  *
1280  * Encrypt preamble and the inline portion, then encrypt the remainder
1281  * and gather into:
1282  *
1283  *   preamble
1284  *   control body (48 bytes)
1285  *   preamble auth tag
1286  *   control body (ctrl_len - 48 bytes)
1287  *   zero padding, if needed
1288  *   control remainder auth tag
1289  *
1290  * Preamble should already be encoded at the start of base.
1291  */
1292 static int prepare_head_secure_big(struct ceph_connection *con,
1293                                    void *base, int ctrl_len)
1294 {
1295         int rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN;
1296         void *rem = CTRL_BODY(base) + CEPH_PREAMBLE_INLINE_LEN;
1297         void *rem_tag = rem + padded_len(rem_len);
1298         void *pmbl_tag = rem_tag + CEPH_GCM_TAG_LEN;
1299         struct scatterlist sgs[2];
1300         int ret;
1301
1302         sg_init_table(sgs, 2);
1303         sg_set_buf(&sgs[0], base, rem - base);
1304         sg_set_buf(&sgs[1], pmbl_tag, CEPH_GCM_TAG_LEN);
1305         ret = gcm_crypt(con, true, sgs, sgs, rem - base);
1306         if (ret)
1307                 return ret;
1308
1309         /* control remainder padding? */
1310         if (need_padding(rem_len))
1311                 memset(rem + rem_len, 0, padding_len(rem_len));
1312
1313         sg_init_one(&sgs[0], rem, pmbl_tag - rem);
1314         ret = gcm_crypt(con, true, sgs, sgs, rem_tag - rem);
1315         if (ret)
1316                 return ret;
1317
1318         add_out_kvec(con, base, rem - base);
1319         add_out_kvec(con, pmbl_tag, CEPH_GCM_TAG_LEN);
1320         add_out_kvec(con, rem, pmbl_tag - rem);
1321         return 0;
1322 }
1323
1324 static int __prepare_control(struct ceph_connection *con, int tag,
1325                              void *base, int ctrl_len, void *extdata,
1326                              int extdata_len, bool to_be_signed)
1327 {
1328         int total_len = ctrl_len + extdata_len;
1329         struct ceph_frame_desc desc;
1330         int ret;
1331
1332         dout("%s con %p tag %d len %d (%d+%d)\n", __func__, con, tag,
1333              total_len, ctrl_len, extdata_len);
1334
1335         /* extdata may be vmalloc'ed but not base */
1336         if (WARN_ON(is_vmalloc_addr(base) || !ctrl_len))
1337                 return -EINVAL;
1338
1339         init_frame_desc(&desc, tag, &total_len, 1);
1340         encode_preamble(&desc, base);
1341
1342         if (con_secure(con)) {
1343                 if (WARN_ON(extdata_len || to_be_signed))
1344                         return -EINVAL;
1345
1346                 if (ctrl_len <= CEPH_PREAMBLE_INLINE_LEN)
1347                         /* fully inlined, inline buffer may need padding */
1348                         ret = prepare_head_secure_small(con, base, ctrl_len);
1349                 else
1350                         /* partially inlined, inline buffer is full */
1351                         ret = prepare_head_secure_big(con, base, ctrl_len);
1352                 if (ret)
1353                         return ret;
1354         } else {
1355                 prepare_head_plain(con, base, ctrl_len, extdata, extdata_len,
1356                                    to_be_signed);
1357         }
1358
1359         ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
1360         return 0;
1361 }
1362
1363 static int prepare_control(struct ceph_connection *con, int tag,
1364                            void *base, int ctrl_len)
1365 {
1366         return __prepare_control(con, tag, base, ctrl_len, NULL, 0, false);
1367 }
1368
1369 static int prepare_hello(struct ceph_connection *con)
1370 {
1371         void *buf, *p;
1372         int ctrl_len;
1373
1374         ctrl_len = 1 + ceph_entity_addr_encoding_len(&con->peer_addr);
1375         buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, false));
1376         if (!buf)
1377                 return -ENOMEM;
1378
1379         p = CTRL_BODY(buf);
1380         ceph_encode_8(&p, CEPH_ENTITY_TYPE_CLIENT);
1381         ceph_encode_entity_addr(&p, &con->peer_addr);
1382         WARN_ON(p != CTRL_BODY(buf) + ctrl_len);
1383
1384         return __prepare_control(con, FRAME_TAG_HELLO, buf, ctrl_len,
1385                                  NULL, 0, true);
1386 }
1387
1388 /* so that head_onwire_len(AUTH_BUF_LEN, false) is 512 */
1389 #define AUTH_BUF_LEN    (512 - CEPH_CRC_LEN - CEPH_PREAMBLE_PLAIN_LEN)
1390
1391 static int prepare_auth_request(struct ceph_connection *con)
1392 {
1393         void *authorizer, *authorizer_copy;
1394         int ctrl_len, authorizer_len;
1395         void *buf;
1396         int ret;
1397
1398         ctrl_len = AUTH_BUF_LEN;
1399         buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, false));
1400         if (!buf)
1401                 return -ENOMEM;
1402
1403         mutex_unlock(&con->mutex);
1404         ret = con->ops->get_auth_request(con, CTRL_BODY(buf), &ctrl_len,
1405                                          &authorizer, &authorizer_len);
1406         mutex_lock(&con->mutex);
1407         if (con->state != CEPH_CON_S_V2_HELLO) {
1408                 dout("%s con %p state changed to %d\n", __func__, con,
1409                      con->state);
1410                 return -EAGAIN;
1411         }
1412
1413         dout("%s con %p get_auth_request ret %d\n", __func__, con, ret);
1414         if (ret)
1415                 return ret;
1416
1417         authorizer_copy = alloc_conn_buf(con, authorizer_len);
1418         if (!authorizer_copy)
1419                 return -ENOMEM;
1420
1421         memcpy(authorizer_copy, authorizer, authorizer_len);
1422
1423         return __prepare_control(con, FRAME_TAG_AUTH_REQUEST, buf, ctrl_len,
1424                                  authorizer_copy, authorizer_len, true);
1425 }
1426
1427 static int prepare_auth_request_more(struct ceph_connection *con,
1428                                      void *reply, int reply_len)
1429 {
1430         int ctrl_len, authorizer_len;
1431         void *authorizer;
1432         void *buf;
1433         int ret;
1434
1435         ctrl_len = AUTH_BUF_LEN;
1436         buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, false));
1437         if (!buf)
1438                 return -ENOMEM;
1439
1440         mutex_unlock(&con->mutex);
1441         ret = con->ops->handle_auth_reply_more(con, reply, reply_len,
1442                                                CTRL_BODY(buf), &ctrl_len,
1443                                                &authorizer, &authorizer_len);
1444         mutex_lock(&con->mutex);
1445         if (con->state != CEPH_CON_S_V2_AUTH) {
1446                 dout("%s con %p state changed to %d\n", __func__, con,
1447                      con->state);
1448                 return -EAGAIN;
1449         }
1450
1451         dout("%s con %p handle_auth_reply_more ret %d\n", __func__, con, ret);
1452         if (ret)
1453                 return ret;
1454
1455         return __prepare_control(con, FRAME_TAG_AUTH_REQUEST_MORE, buf,
1456                                  ctrl_len, authorizer, authorizer_len, true);
1457 }
1458
1459 static int prepare_auth_signature(struct ceph_connection *con)
1460 {
1461         void *buf;
1462         int ret;
1463
1464         buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE,
1465                                                   con_secure(con)));
1466         if (!buf)
1467                 return -ENOMEM;
1468
1469         ret = hmac_sha256(con, con->v2.in_sign_kvecs, con->v2.in_sign_kvec_cnt,
1470                           CTRL_BODY(buf));
1471         if (ret)
1472                 return ret;
1473
1474         return prepare_control(con, FRAME_TAG_AUTH_SIGNATURE, buf,
1475                                SHA256_DIGEST_SIZE);
1476 }
1477
1478 static int prepare_client_ident(struct ceph_connection *con)
1479 {
1480         struct ceph_entity_addr *my_addr = &con->msgr->inst.addr;
1481         struct ceph_client *client = from_msgr(con->msgr);
1482         u64 global_id = ceph_client_gid(client);
1483         void *buf, *p;
1484         int ctrl_len;
1485
1486         WARN_ON(con->v2.server_cookie);
1487         WARN_ON(con->v2.connect_seq);
1488         WARN_ON(con->v2.peer_global_seq);
1489
1490         if (!con->v2.client_cookie) {
1491                 do {
1492                         get_random_bytes(&con->v2.client_cookie,
1493                                          sizeof(con->v2.client_cookie));
1494                 } while (!con->v2.client_cookie);
1495                 dout("%s con %p generated cookie 0x%llx\n", __func__, con,
1496                      con->v2.client_cookie);
1497         } else {
1498                 dout("%s con %p cookie already set 0x%llx\n", __func__, con,
1499                      con->v2.client_cookie);
1500         }
1501
1502         dout("%s con %p my_addr %s/%u peer_addr %s/%u global_id %llu global_seq %llu features 0x%llx required_features 0x%llx cookie 0x%llx\n",
1503              __func__, con, ceph_pr_addr(my_addr), le32_to_cpu(my_addr->nonce),
1504              ceph_pr_addr(&con->peer_addr), le32_to_cpu(con->peer_addr.nonce),
1505              global_id, con->v2.global_seq, client->supported_features,
1506              client->required_features, con->v2.client_cookie);
1507
1508         ctrl_len = 1 + 4 + ceph_entity_addr_encoding_len(my_addr) +
1509                    ceph_entity_addr_encoding_len(&con->peer_addr) + 6 * 8;
1510         buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, con_secure(con)));
1511         if (!buf)
1512                 return -ENOMEM;
1513
1514         p = CTRL_BODY(buf);
1515         ceph_encode_8(&p, 2);  /* addrvec marker */
1516         ceph_encode_32(&p, 1);  /* addr_cnt */
1517         ceph_encode_entity_addr(&p, my_addr);
1518         ceph_encode_entity_addr(&p, &con->peer_addr);
1519         ceph_encode_64(&p, global_id);
1520         ceph_encode_64(&p, con->v2.global_seq);
1521         ceph_encode_64(&p, client->supported_features);
1522         ceph_encode_64(&p, client->required_features);
1523         ceph_encode_64(&p, 0);  /* flags */
1524         ceph_encode_64(&p, con->v2.client_cookie);
1525         WARN_ON(p != CTRL_BODY(buf) + ctrl_len);
1526
1527         return prepare_control(con, FRAME_TAG_CLIENT_IDENT, buf, ctrl_len);
1528 }
1529
1530 static int prepare_session_reconnect(struct ceph_connection *con)
1531 {
1532         struct ceph_entity_addr *my_addr = &con->msgr->inst.addr;
1533         void *buf, *p;
1534         int ctrl_len;
1535
1536         WARN_ON(!con->v2.client_cookie);
1537         WARN_ON(!con->v2.server_cookie);
1538         WARN_ON(!con->v2.connect_seq);
1539         WARN_ON(!con->v2.peer_global_seq);
1540
1541         dout("%s con %p my_addr %s/%u client_cookie 0x%llx server_cookie 0x%llx global_seq %llu connect_seq %llu in_seq %llu\n",
1542              __func__, con, ceph_pr_addr(my_addr), le32_to_cpu(my_addr->nonce),
1543              con->v2.client_cookie, con->v2.server_cookie, con->v2.global_seq,
1544              con->v2.connect_seq, con->in_seq);
1545
1546         ctrl_len = 1 + 4 + ceph_entity_addr_encoding_len(my_addr) + 5 * 8;
1547         buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, con_secure(con)));
1548         if (!buf)
1549                 return -ENOMEM;
1550
1551         p = CTRL_BODY(buf);
1552         ceph_encode_8(&p, 2);  /* entity_addrvec_t marker */
1553         ceph_encode_32(&p, 1);  /* my_addrs len */
1554         ceph_encode_entity_addr(&p, my_addr);
1555         ceph_encode_64(&p, con->v2.client_cookie);
1556         ceph_encode_64(&p, con->v2.server_cookie);
1557         ceph_encode_64(&p, con->v2.global_seq);
1558         ceph_encode_64(&p, con->v2.connect_seq);
1559         ceph_encode_64(&p, con->in_seq);
1560         WARN_ON(p != CTRL_BODY(buf) + ctrl_len);
1561
1562         return prepare_control(con, FRAME_TAG_SESSION_RECONNECT, buf, ctrl_len);
1563 }
1564
1565 static int prepare_keepalive2(struct ceph_connection *con)
1566 {
1567         struct ceph_timespec *ts = CTRL_BODY(con->v2.out_buf);
1568         struct timespec64 now;
1569
1570         ktime_get_real_ts64(&now);
1571         dout("%s con %p timestamp %lld.%09ld\n", __func__, con, now.tv_sec,
1572              now.tv_nsec);
1573
1574         ceph_encode_timespec64(ts, &now);
1575
1576         reset_out_kvecs(con);
1577         return prepare_control(con, FRAME_TAG_KEEPALIVE2, con->v2.out_buf,
1578                                sizeof(struct ceph_timespec));
1579 }
1580
1581 static int prepare_ack(struct ceph_connection *con)
1582 {
1583         void *p;
1584
1585         dout("%s con %p in_seq_acked %llu -> %llu\n", __func__, con,
1586              con->in_seq_acked, con->in_seq);
1587         con->in_seq_acked = con->in_seq;
1588
1589         p = CTRL_BODY(con->v2.out_buf);
1590         ceph_encode_64(&p, con->in_seq_acked);
1591
1592         reset_out_kvecs(con);
1593         return prepare_control(con, FRAME_TAG_ACK, con->v2.out_buf, 8);
1594 }
1595
1596 static void prepare_epilogue_plain(struct ceph_connection *con, bool aborted)
1597 {
1598         dout("%s con %p msg %p aborted %d crcs %u %u %u\n", __func__, con,
1599              con->out_msg, aborted, con->v2.out_epil.front_crc,
1600              con->v2.out_epil.middle_crc, con->v2.out_epil.data_crc);
1601
1602         encode_epilogue_plain(con, aborted);
1603         add_out_kvec(con, &con->v2.out_epil, CEPH_EPILOGUE_PLAIN_LEN);
1604 }
1605
1606 /*
1607  * For "used" empty segments, crc is -1.  For unused (trailing)
1608  * segments, crc is 0.
1609  */
1610 static void prepare_message_plain(struct ceph_connection *con)
1611 {
1612         struct ceph_msg *msg = con->out_msg;
1613
1614         prepare_head_plain(con, con->v2.out_buf,
1615                            sizeof(struct ceph_msg_header2), NULL, 0, false);
1616
1617         if (!front_len(msg) && !middle_len(msg)) {
1618                 if (!data_len(msg)) {
1619                         /*
1620                          * Empty message: once the head is written,
1621                          * we are done -- there is no epilogue.
1622                          */
1623                         con->v2.out_state = OUT_S_FINISH_MESSAGE;
1624                         return;
1625                 }
1626
1627                 con->v2.out_epil.front_crc = -1;
1628                 con->v2.out_epil.middle_crc = -1;
1629                 con->v2.out_state = OUT_S_QUEUE_DATA;
1630                 return;
1631         }
1632
1633         if (front_len(msg)) {
1634                 con->v2.out_epil.front_crc = crc32c(-1, msg->front.iov_base,
1635                                                     front_len(msg));
1636                 add_out_kvec(con, msg->front.iov_base, front_len(msg));
1637         } else {
1638                 /* middle (at least) is there, checked above */
1639                 con->v2.out_epil.front_crc = -1;
1640         }
1641
1642         if (middle_len(msg)) {
1643                 con->v2.out_epil.middle_crc =
1644                         crc32c(-1, msg->middle->vec.iov_base, middle_len(msg));
1645                 add_out_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
1646         } else {
1647                 con->v2.out_epil.middle_crc = data_len(msg) ? -1 : 0;
1648         }
1649
1650         if (data_len(msg)) {
1651                 con->v2.out_state = OUT_S_QUEUE_DATA;
1652         } else {
1653                 con->v2.out_epil.data_crc = 0;
1654                 prepare_epilogue_plain(con, false);
1655                 con->v2.out_state = OUT_S_FINISH_MESSAGE;
1656         }
1657 }
1658
1659 /*
1660  * Unfortunately the kernel crypto API doesn't support streaming
1661  * (piecewise) operation for AEAD algorithms, so we can't get away
1662  * with a fixed size buffer and a couple sgs.  Instead, we have to
1663  * allocate pages for the entire tail of the message (currently up
1664  * to ~32M) and two sgs arrays (up to ~256K each)...
1665  */
1666 static int prepare_message_secure(struct ceph_connection *con)
1667 {
1668         void *zerop = page_address(ceph_zero_page);
1669         struct sg_table enc_sgt = {};
1670         struct sg_table sgt = {};
1671         struct page **enc_pages;
1672         int enc_page_cnt;
1673         int tail_len;
1674         int ret;
1675
1676         ret = prepare_head_secure_small(con, con->v2.out_buf,
1677                                         sizeof(struct ceph_msg_header2));
1678         if (ret)
1679                 return ret;
1680
1681         tail_len = tail_onwire_len(con->out_msg, true);
1682         if (!tail_len) {
1683                 /*
1684                  * Empty message: once the head is written,
1685                  * we are done -- there is no epilogue.
1686                  */
1687                 con->v2.out_state = OUT_S_FINISH_MESSAGE;
1688                 return 0;
1689         }
1690
1691         encode_epilogue_secure(con, false);
1692         ret = setup_message_sgs(&sgt, con->out_msg, zerop, zerop, zerop,
1693                                 &con->v2.out_epil, NULL, 0, false);
1694         if (ret)
1695                 goto out;
1696
1697         enc_page_cnt = calc_pages_for(0, tail_len);
1698         enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO);
1699         if (IS_ERR(enc_pages)) {
1700                 ret = PTR_ERR(enc_pages);
1701                 goto out;
1702         }
1703
1704         WARN_ON(con->v2.out_enc_pages || con->v2.out_enc_page_cnt);
1705         con->v2.out_enc_pages = enc_pages;
1706         con->v2.out_enc_page_cnt = enc_page_cnt;
1707         con->v2.out_enc_resid = tail_len;
1708         con->v2.out_enc_i = 0;
1709
1710         ret = sg_alloc_table_from_pages(&enc_sgt, enc_pages, enc_page_cnt,
1711                                         0, tail_len, GFP_NOIO);
1712         if (ret)
1713                 goto out;
1714
1715         ret = gcm_crypt(con, true, sgt.sgl, enc_sgt.sgl,
1716                         tail_len - CEPH_GCM_TAG_LEN);
1717         if (ret)
1718                 goto out;
1719
1720         dout("%s con %p msg %p sg_cnt %d enc_page_cnt %d\n", __func__, con,
1721              con->out_msg, sgt.orig_nents, enc_page_cnt);
1722         con->v2.out_state = OUT_S_QUEUE_ENC_PAGE;
1723
1724 out:
1725         sg_free_table(&sgt);
1726         sg_free_table(&enc_sgt);
1727         return ret;
1728 }
1729
1730 static int prepare_message(struct ceph_connection *con)
1731 {
1732         int lens[] = {
1733                 sizeof(struct ceph_msg_header2),
1734                 front_len(con->out_msg),
1735                 middle_len(con->out_msg),
1736                 data_len(con->out_msg)
1737         };
1738         struct ceph_frame_desc desc;
1739         int ret;
1740
1741         dout("%s con %p msg %p logical %d+%d+%d+%d\n", __func__, con,
1742              con->out_msg, lens[0], lens[1], lens[2], lens[3]);
1743
1744         if (con->in_seq > con->in_seq_acked) {
1745                 dout("%s con %p in_seq_acked %llu -> %llu\n", __func__, con,
1746                      con->in_seq_acked, con->in_seq);
1747                 con->in_seq_acked = con->in_seq;
1748         }
1749
1750         reset_out_kvecs(con);
1751         init_frame_desc(&desc, FRAME_TAG_MESSAGE, lens, 4);
1752         encode_preamble(&desc, con->v2.out_buf);
1753         fill_header2(CTRL_BODY(con->v2.out_buf), &con->out_msg->hdr,
1754                      con->in_seq_acked);
1755
1756         if (con_secure(con)) {
1757                 ret = prepare_message_secure(con);
1758                 if (ret)
1759                         return ret;
1760         } else {
1761                 prepare_message_plain(con);
1762         }
1763
1764         ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
1765         return 0;
1766 }
1767
1768 static int prepare_read_banner_prefix(struct ceph_connection *con)
1769 {
1770         void *buf;
1771
1772         buf = alloc_conn_buf(con, CEPH_BANNER_V2_PREFIX_LEN);
1773         if (!buf)
1774                 return -ENOMEM;
1775
1776         reset_in_kvecs(con);
1777         add_in_kvec(con, buf, CEPH_BANNER_V2_PREFIX_LEN);
1778         add_in_sign_kvec(con, buf, CEPH_BANNER_V2_PREFIX_LEN);
1779         con->state = CEPH_CON_S_V2_BANNER_PREFIX;
1780         return 0;
1781 }
1782
1783 static int prepare_read_banner_payload(struct ceph_connection *con,
1784                                        int payload_len)
1785 {
1786         void *buf;
1787
1788         buf = alloc_conn_buf(con, payload_len);
1789         if (!buf)
1790                 return -ENOMEM;
1791
1792         reset_in_kvecs(con);
1793         add_in_kvec(con, buf, payload_len);
1794         add_in_sign_kvec(con, buf, payload_len);
1795         con->state = CEPH_CON_S_V2_BANNER_PAYLOAD;
1796         return 0;
1797 }
1798
1799 static void prepare_read_preamble(struct ceph_connection *con)
1800 {
1801         reset_in_kvecs(con);
1802         add_in_kvec(con, con->v2.in_buf,
1803                     con_secure(con) ? CEPH_PREAMBLE_SECURE_LEN :
1804                                       CEPH_PREAMBLE_PLAIN_LEN);
1805         con->v2.in_state = IN_S_HANDLE_PREAMBLE;
1806 }
1807
1808 static int prepare_read_control(struct ceph_connection *con)
1809 {
1810         int ctrl_len = con->v2.in_desc.fd_lens[0];
1811         int head_len;
1812         void *buf;
1813
1814         reset_in_kvecs(con);
1815         if (con->state == CEPH_CON_S_V2_HELLO ||
1816             con->state == CEPH_CON_S_V2_AUTH) {
1817                 head_len = head_onwire_len(ctrl_len, false);
1818                 buf = alloc_conn_buf(con, head_len);
1819                 if (!buf)
1820                         return -ENOMEM;
1821
1822                 /* preserve preamble */
1823                 memcpy(buf, con->v2.in_buf, CEPH_PREAMBLE_LEN);
1824
1825                 add_in_kvec(con, CTRL_BODY(buf), ctrl_len);
1826                 add_in_kvec(con, CTRL_BODY(buf) + ctrl_len, CEPH_CRC_LEN);
1827                 add_in_sign_kvec(con, buf, head_len);
1828         } else {
1829                 if (ctrl_len > CEPH_PREAMBLE_INLINE_LEN) {
1830                         buf = alloc_conn_buf(con, ctrl_len);
1831                         if (!buf)
1832                                 return -ENOMEM;
1833
1834                         add_in_kvec(con, buf, ctrl_len);
1835                 } else {
1836                         add_in_kvec(con, CTRL_BODY(con->v2.in_buf), ctrl_len);
1837                 }
1838                 add_in_kvec(con, con->v2.in_buf, CEPH_CRC_LEN);
1839         }
1840         con->v2.in_state = IN_S_HANDLE_CONTROL;
1841         return 0;
1842 }
1843
1844 static int prepare_read_control_remainder(struct ceph_connection *con)
1845 {
1846         int ctrl_len = con->v2.in_desc.fd_lens[0];
1847         int rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN;
1848         void *buf;
1849
1850         buf = alloc_conn_buf(con, ctrl_len);
1851         if (!buf)
1852                 return -ENOMEM;
1853
1854         memcpy(buf, CTRL_BODY(con->v2.in_buf), CEPH_PREAMBLE_INLINE_LEN);
1855
1856         reset_in_kvecs(con);
1857         add_in_kvec(con, buf + CEPH_PREAMBLE_INLINE_LEN, rem_len);
1858         add_in_kvec(con, con->v2.in_buf,
1859                     padding_len(rem_len) + CEPH_GCM_TAG_LEN);
1860         con->v2.in_state = IN_S_HANDLE_CONTROL_REMAINDER;
1861         return 0;
1862 }
1863
1864 static int prepare_read_data(struct ceph_connection *con)
1865 {
1866         struct bio_vec bv;
1867
1868         con->in_data_crc = -1;
1869         ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg,
1870                                   data_len(con->in_msg));
1871
1872         get_bvec_at(&con->v2.in_cursor, &bv);
1873         if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
1874                 if (unlikely(!con->bounce_page)) {
1875                         con->bounce_page = alloc_page(GFP_NOIO);
1876                         if (!con->bounce_page) {
1877                                 pr_err("failed to allocate bounce page\n");
1878                                 return -ENOMEM;
1879                         }
1880                 }
1881
1882                 bv.bv_page = con->bounce_page;
1883                 bv.bv_offset = 0;
1884         }
1885         set_in_bvec(con, &bv);
1886         con->v2.in_state = IN_S_PREPARE_READ_DATA_CONT;
1887         return 0;
1888 }
1889
1890 static void prepare_read_data_cont(struct ceph_connection *con)
1891 {
1892         struct bio_vec bv;
1893
1894         if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
1895                 con->in_data_crc = crc32c(con->in_data_crc,
1896                                           page_address(con->bounce_page),
1897                                           con->v2.in_bvec.bv_len);
1898
1899                 get_bvec_at(&con->v2.in_cursor, &bv);
1900                 memcpy_to_page(bv.bv_page, bv.bv_offset,
1901                                page_address(con->bounce_page),
1902                                con->v2.in_bvec.bv_len);
1903         } else {
1904                 con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
1905                                                     con->v2.in_bvec.bv_page,
1906                                                     con->v2.in_bvec.bv_offset,
1907                                                     con->v2.in_bvec.bv_len);
1908         }
1909
1910         ceph_msg_data_advance(&con->v2.in_cursor, con->v2.in_bvec.bv_len);
1911         if (con->v2.in_cursor.total_resid) {
1912                 get_bvec_at(&con->v2.in_cursor, &bv);
1913                 if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
1914                         bv.bv_page = con->bounce_page;
1915                         bv.bv_offset = 0;
1916                 }
1917                 set_in_bvec(con, &bv);
1918                 WARN_ON(con->v2.in_state != IN_S_PREPARE_READ_DATA_CONT);
1919                 return;
1920         }
1921
1922         /*
1923          * We've read all data.  Prepare to read epilogue.
1924          */
1925         reset_in_kvecs(con);
1926         add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
1927         con->v2.in_state = IN_S_HANDLE_EPILOGUE;
1928 }
1929
1930 static int prepare_sparse_read_cont(struct ceph_connection *con)
1931 {
1932         int ret;
1933         struct bio_vec bv;
1934         char *buf = NULL;
1935         struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor;
1936
1937         WARN_ON(con->v2.in_state != IN_S_PREPARE_SPARSE_DATA_CONT);
1938
1939         if (iov_iter_is_bvec(&con->v2.in_iter)) {
1940                 if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
1941                         con->in_data_crc = crc32c(con->in_data_crc,
1942                                                   page_address(con->bounce_page),
1943                                                   con->v2.in_bvec.bv_len);
1944                         get_bvec_at(cursor, &bv);
1945                         memcpy_to_page(bv.bv_page, bv.bv_offset,
1946                                        page_address(con->bounce_page),
1947                                        con->v2.in_bvec.bv_len);
1948                 } else {
1949                         con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
1950                                                             con->v2.in_bvec.bv_page,
1951                                                             con->v2.in_bvec.bv_offset,
1952                                                             con->v2.in_bvec.bv_len);
1953                 }
1954
1955                 ceph_msg_data_advance(cursor, con->v2.in_bvec.bv_len);
1956                 cursor->sr_resid -= con->v2.in_bvec.bv_len;
1957                 dout("%s: advance by 0x%x sr_resid 0x%x\n", __func__,
1958                      con->v2.in_bvec.bv_len, cursor->sr_resid);
1959                 WARN_ON_ONCE(cursor->sr_resid > cursor->total_resid);
1960                 if (cursor->sr_resid) {
1961                         get_bvec_at(cursor, &bv);
1962                         if (bv.bv_len > cursor->sr_resid)
1963                                 bv.bv_len = cursor->sr_resid;
1964                         if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
1965                                 bv.bv_page = con->bounce_page;
1966                                 bv.bv_offset = 0;
1967                         }
1968                         set_in_bvec(con, &bv);
1969                         con->v2.data_len_remain -= bv.bv_len;
1970                         return 0;
1971                 }
1972         } else if (iov_iter_is_kvec(&con->v2.in_iter)) {
1973                 /* On first call, we have no kvec so don't compute crc */
1974                 if (con->v2.in_kvec_cnt) {
1975                         WARN_ON_ONCE(con->v2.in_kvec_cnt > 1);
1976                         con->in_data_crc = crc32c(con->in_data_crc,
1977                                                   con->v2.in_kvecs[0].iov_base,
1978                                                   con->v2.in_kvecs[0].iov_len);
1979                 }
1980         } else {
1981                 return -EIO;
1982         }
1983
1984         /* get next extent */
1985         ret = con->ops->sparse_read(con, cursor, &buf);
1986         if (ret <= 0) {
1987                 if (ret < 0)
1988                         return ret;
1989
1990                 reset_in_kvecs(con);
1991                 add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
1992                 con->v2.in_state = IN_S_HANDLE_EPILOGUE;
1993                 return 0;
1994         }
1995
1996         if (buf) {
1997                 /* receive into buffer */
1998                 reset_in_kvecs(con);
1999                 add_in_kvec(con, buf, ret);
2000                 con->v2.data_len_remain -= ret;
2001                 return 0;
2002         }
2003
2004         if (ret > cursor->total_resid) {
2005                 pr_warn("%s: ret 0x%x total_resid 0x%zx resid 0x%zx\n",
2006                         __func__, ret, cursor->total_resid, cursor->resid);
2007                 return -EIO;
2008         }
2009         get_bvec_at(cursor, &bv);
2010         if (bv.bv_len > cursor->sr_resid)
2011                 bv.bv_len = cursor->sr_resid;
2012         if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
2013                 if (unlikely(!con->bounce_page)) {
2014                         con->bounce_page = alloc_page(GFP_NOIO);
2015                         if (!con->bounce_page) {
2016                                 pr_err("failed to allocate bounce page\n");
2017                                 return -ENOMEM;
2018                         }
2019                 }
2020
2021                 bv.bv_page = con->bounce_page;
2022                 bv.bv_offset = 0;
2023         }
2024         set_in_bvec(con, &bv);
2025         con->v2.data_len_remain -= ret;
2026         return ret;
2027 }
2028
2029 static int prepare_sparse_read_data(struct ceph_connection *con)
2030 {
2031         struct ceph_msg *msg = con->in_msg;
2032
2033         dout("%s: starting sparse read\n", __func__);
2034
2035         if (WARN_ON_ONCE(!con->ops->sparse_read))
2036                 return -EOPNOTSUPP;
2037
2038         if (!con_secure(con))
2039                 con->in_data_crc = -1;
2040
2041         reset_in_kvecs(con);
2042         con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
2043         con->v2.data_len_remain = data_len(msg);
2044         return prepare_sparse_read_cont(con);
2045 }
2046
2047 static int prepare_read_tail_plain(struct ceph_connection *con)
2048 {
2049         struct ceph_msg *msg = con->in_msg;
2050
2051         if (!front_len(msg) && !middle_len(msg)) {
2052                 WARN_ON(!data_len(msg));
2053                 return prepare_read_data(con);
2054         }
2055
2056         reset_in_kvecs(con);
2057         if (front_len(msg)) {
2058                 add_in_kvec(con, msg->front.iov_base, front_len(msg));
2059                 WARN_ON(msg->front.iov_len != front_len(msg));
2060         }
2061         if (middle_len(msg)) {
2062                 add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
2063                 WARN_ON(msg->middle->vec.iov_len != middle_len(msg));
2064         }
2065
2066         if (data_len(msg)) {
2067                 if (msg->sparse_read)
2068                         con->v2.in_state = IN_S_PREPARE_SPARSE_DATA;
2069                 else
2070                         con->v2.in_state = IN_S_PREPARE_READ_DATA;
2071         } else {
2072                 add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
2073                 con->v2.in_state = IN_S_HANDLE_EPILOGUE;
2074         }
2075         return 0;
2076 }
2077
2078 static void prepare_read_enc_page(struct ceph_connection *con)
2079 {
2080         struct bio_vec bv;
2081
2082         dout("%s con %p i %d resid %d\n", __func__, con, con->v2.in_enc_i,
2083              con->v2.in_enc_resid);
2084         WARN_ON(!con->v2.in_enc_resid);
2085
2086         bvec_set_page(&bv, con->v2.in_enc_pages[con->v2.in_enc_i],
2087                       min(con->v2.in_enc_resid, (int)PAGE_SIZE), 0);
2088
2089         set_in_bvec(con, &bv);
2090         con->v2.in_enc_i++;
2091         con->v2.in_enc_resid -= bv.bv_len;
2092
2093         if (con->v2.in_enc_resid) {
2094                 con->v2.in_state = IN_S_PREPARE_READ_ENC_PAGE;
2095                 return;
2096         }
2097
2098         /*
2099          * We are set to read the last piece of ciphertext (ending
2100          * with epilogue) + auth tag.
2101          */
2102         WARN_ON(con->v2.in_enc_i != con->v2.in_enc_page_cnt);
2103         con->v2.in_state = IN_S_HANDLE_EPILOGUE;
2104 }
2105
2106 static int prepare_read_tail_secure(struct ceph_connection *con)
2107 {
2108         struct page **enc_pages;
2109         int enc_page_cnt;
2110         int tail_len;
2111
2112         tail_len = tail_onwire_len(con->in_msg, true);
2113         WARN_ON(!tail_len);
2114
2115         enc_page_cnt = calc_pages_for(0, tail_len);
2116         enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO);
2117         if (IS_ERR(enc_pages))
2118                 return PTR_ERR(enc_pages);
2119
2120         WARN_ON(con->v2.in_enc_pages || con->v2.in_enc_page_cnt);
2121         con->v2.in_enc_pages = enc_pages;
2122         con->v2.in_enc_page_cnt = enc_page_cnt;
2123         con->v2.in_enc_resid = tail_len;
2124         con->v2.in_enc_i = 0;
2125
2126         prepare_read_enc_page(con);
2127         return 0;
2128 }
2129
2130 static void __finish_skip(struct ceph_connection *con)
2131 {
2132         con->in_seq++;
2133         prepare_read_preamble(con);
2134 }
2135
2136 static void prepare_skip_message(struct ceph_connection *con)
2137 {
2138         struct ceph_frame_desc *desc = &con->v2.in_desc;
2139         int tail_len;
2140
2141         dout("%s con %p %d+%d+%d\n", __func__, con, desc->fd_lens[1],
2142              desc->fd_lens[2], desc->fd_lens[3]);
2143
2144         tail_len = __tail_onwire_len(desc->fd_lens[1], desc->fd_lens[2],
2145                                      desc->fd_lens[3], con_secure(con));
2146         if (!tail_len) {
2147                 __finish_skip(con);
2148         } else {
2149                 set_in_skip(con, tail_len);
2150                 con->v2.in_state = IN_S_FINISH_SKIP;
2151         }
2152 }
2153
2154 static int process_banner_prefix(struct ceph_connection *con)
2155 {
2156         int payload_len;
2157         void *p;
2158
2159         WARN_ON(con->v2.in_kvecs[0].iov_len != CEPH_BANNER_V2_PREFIX_LEN);
2160
2161         p = con->v2.in_kvecs[0].iov_base;
2162         if (memcmp(p, CEPH_BANNER_V2, CEPH_BANNER_V2_LEN)) {
2163                 if (!memcmp(p, CEPH_BANNER, CEPH_BANNER_LEN))
2164                         con->error_msg = "server is speaking msgr1 protocol";
2165                 else
2166                         con->error_msg = "protocol error, bad banner";
2167                 return -EINVAL;
2168         }
2169
2170         p += CEPH_BANNER_V2_LEN;
2171         payload_len = ceph_decode_16(&p);
2172         dout("%s con %p payload_len %d\n", __func__, con, payload_len);
2173
2174         return prepare_read_banner_payload(con, payload_len);
2175 }
2176
2177 static int process_banner_payload(struct ceph_connection *con)
2178 {
2179         void *end = con->v2.in_kvecs[0].iov_base + con->v2.in_kvecs[0].iov_len;
2180         u64 feat = CEPH_MSGR2_SUPPORTED_FEATURES;
2181         u64 req_feat = CEPH_MSGR2_REQUIRED_FEATURES;
2182         u64 server_feat, server_req_feat;
2183         void *p;
2184         int ret;
2185
2186         p = con->v2.in_kvecs[0].iov_base;
2187         ceph_decode_64_safe(&p, end, server_feat, bad);
2188         ceph_decode_64_safe(&p, end, server_req_feat, bad);
2189
2190         dout("%s con %p server_feat 0x%llx server_req_feat 0x%llx\n",
2191              __func__, con, server_feat, server_req_feat);
2192
2193         if (req_feat & ~server_feat) {
2194                 pr_err("msgr2 feature set mismatch: my required > server's supported 0x%llx, need 0x%llx\n",
2195                        server_feat, req_feat & ~server_feat);
2196                 con->error_msg = "missing required protocol features";
2197                 return -EINVAL;
2198         }
2199         if (server_req_feat & ~feat) {
2200                 pr_err("msgr2 feature set mismatch: server's required > my supported 0x%llx, missing 0x%llx\n",
2201                        feat, server_req_feat & ~feat);
2202                 con->error_msg = "missing required protocol features";
2203                 return -EINVAL;
2204         }
2205
2206         /* no reset_out_kvecs() as our banner may still be pending */
2207         ret = prepare_hello(con);
2208         if (ret) {
2209                 pr_err("prepare_hello failed: %d\n", ret);
2210                 return ret;
2211         }
2212
2213         con->state = CEPH_CON_S_V2_HELLO;
2214         prepare_read_preamble(con);
2215         return 0;
2216
2217 bad:
2218         pr_err("failed to decode banner payload\n");
2219         return -EINVAL;
2220 }
2221
2222 static int process_hello(struct ceph_connection *con, void *p, void *end)
2223 {
2224         struct ceph_entity_addr *my_addr = &con->msgr->inst.addr;
2225         struct ceph_entity_addr addr_for_me;
2226         u8 entity_type;
2227         int ret;
2228
2229         if (con->state != CEPH_CON_S_V2_HELLO) {
2230                 con->error_msg = "protocol error, unexpected hello";
2231                 return -EINVAL;
2232         }
2233
2234         ceph_decode_8_safe(&p, end, entity_type, bad);
2235         ret = ceph_decode_entity_addr(&p, end, &addr_for_me);
2236         if (ret) {
2237                 pr_err("failed to decode addr_for_me: %d\n", ret);
2238                 return ret;
2239         }
2240
2241         dout("%s con %p entity_type %d addr_for_me %s\n", __func__, con,
2242              entity_type, ceph_pr_addr(&addr_for_me));
2243
2244         if (entity_type != con->peer_name.type) {
2245                 pr_err("bad peer type, want %d, got %d\n",
2246                        con->peer_name.type, entity_type);
2247                 con->error_msg = "wrong peer at address";
2248                 return -EINVAL;
2249         }
2250
2251         /*
2252          * Set our address to the address our first peer (i.e. monitor)
2253          * sees that we are connecting from.  If we are behind some sort
2254          * of NAT and want to be identified by some private (not NATed)
2255          * address, ip option should be used.
2256          */
2257         if (ceph_addr_is_blank(my_addr)) {
2258                 memcpy(&my_addr->in_addr, &addr_for_me.in_addr,
2259                        sizeof(my_addr->in_addr));
2260                 ceph_addr_set_port(my_addr, 0);
2261                 dout("%s con %p set my addr %s, as seen by peer %s\n",
2262                      __func__, con, ceph_pr_addr(my_addr),
2263                      ceph_pr_addr(&con->peer_addr));
2264         } else {
2265                 dout("%s con %p my addr already set %s\n",
2266                      __func__, con, ceph_pr_addr(my_addr));
2267         }
2268
2269         WARN_ON(ceph_addr_is_blank(my_addr) || ceph_addr_port(my_addr));
2270         WARN_ON(my_addr->type != CEPH_ENTITY_ADDR_TYPE_ANY);
2271         WARN_ON(!my_addr->nonce);
2272
2273         /* no reset_out_kvecs() as our hello may still be pending */
2274         ret = prepare_auth_request(con);
2275         if (ret) {
2276                 if (ret != -EAGAIN)
2277                         pr_err("prepare_auth_request failed: %d\n", ret);
2278                 return ret;
2279         }
2280
2281         con->state = CEPH_CON_S_V2_AUTH;
2282         return 0;
2283
2284 bad:
2285         pr_err("failed to decode hello\n");
2286         return -EINVAL;
2287 }
2288
2289 static int process_auth_bad_method(struct ceph_connection *con,
2290                                    void *p, void *end)
2291 {
2292         int allowed_protos[8], allowed_modes[8];
2293         int allowed_proto_cnt, allowed_mode_cnt;
2294         int used_proto, result;
2295         int ret;
2296         int i;
2297
2298         if (con->state != CEPH_CON_S_V2_AUTH) {
2299                 con->error_msg = "protocol error, unexpected auth_bad_method";
2300                 return -EINVAL;
2301         }
2302
2303         ceph_decode_32_safe(&p, end, used_proto, bad);
2304         ceph_decode_32_safe(&p, end, result, bad);
2305         dout("%s con %p used_proto %d result %d\n", __func__, con, used_proto,
2306              result);
2307
2308         ceph_decode_32_safe(&p, end, allowed_proto_cnt, bad);
2309         if (allowed_proto_cnt > ARRAY_SIZE(allowed_protos)) {
2310                 pr_err("allowed_protos too big %d\n", allowed_proto_cnt);
2311                 return -EINVAL;
2312         }
2313         for (i = 0; i < allowed_proto_cnt; i++) {
2314                 ceph_decode_32_safe(&p, end, allowed_protos[i], bad);
2315                 dout("%s con %p allowed_protos[%d] %d\n", __func__, con,
2316                      i, allowed_protos[i]);
2317         }
2318
2319         ceph_decode_32_safe(&p, end, allowed_mode_cnt, bad);
2320         if (allowed_mode_cnt > ARRAY_SIZE(allowed_modes)) {
2321                 pr_err("allowed_modes too big %d\n", allowed_mode_cnt);
2322                 return -EINVAL;
2323         }
2324         for (i = 0; i < allowed_mode_cnt; i++) {
2325                 ceph_decode_32_safe(&p, end, allowed_modes[i], bad);
2326                 dout("%s con %p allowed_modes[%d] %d\n", __func__, con,
2327                      i, allowed_modes[i]);
2328         }
2329
2330         mutex_unlock(&con->mutex);
2331         ret = con->ops->handle_auth_bad_method(con, used_proto, result,
2332                                                allowed_protos,
2333                                                allowed_proto_cnt,
2334                                                allowed_modes,
2335                                                allowed_mode_cnt);
2336         mutex_lock(&con->mutex);
2337         if (con->state != CEPH_CON_S_V2_AUTH) {
2338                 dout("%s con %p state changed to %d\n", __func__, con,
2339                      con->state);
2340                 return -EAGAIN;
2341         }
2342
2343         dout("%s con %p handle_auth_bad_method ret %d\n", __func__, con, ret);
2344         return ret;
2345
2346 bad:
2347         pr_err("failed to decode auth_bad_method\n");
2348         return -EINVAL;
2349 }
2350
2351 static int process_auth_reply_more(struct ceph_connection *con,
2352                                    void *p, void *end)
2353 {
2354         int payload_len;
2355         int ret;
2356
2357         if (con->state != CEPH_CON_S_V2_AUTH) {
2358                 con->error_msg = "protocol error, unexpected auth_reply_more";
2359                 return -EINVAL;
2360         }
2361
2362         ceph_decode_32_safe(&p, end, payload_len, bad);
2363         ceph_decode_need(&p, end, payload_len, bad);
2364
2365         dout("%s con %p payload_len %d\n", __func__, con, payload_len);
2366
2367         reset_out_kvecs(con);
2368         ret = prepare_auth_request_more(con, p, payload_len);
2369         if (ret) {
2370                 if (ret != -EAGAIN)
2371                         pr_err("prepare_auth_request_more failed: %d\n", ret);
2372                 return ret;
2373         }
2374
2375         return 0;
2376
2377 bad:
2378         pr_err("failed to decode auth_reply_more\n");
2379         return -EINVAL;
2380 }
2381
2382 /*
2383  * Align session_key and con_secret to avoid GFP_ATOMIC allocation
2384  * inside crypto_shash_setkey() and crypto_aead_setkey() called from
2385  * setup_crypto().  __aligned(16) isn't guaranteed to work for stack
2386  * objects, so do it by hand.
2387  */
2388 static int process_auth_done(struct ceph_connection *con, void *p, void *end)
2389 {
2390         u8 session_key_buf[CEPH_KEY_LEN + 16];
2391         u8 con_secret_buf[CEPH_MAX_CON_SECRET_LEN + 16];
2392         u8 *session_key = PTR_ALIGN(&session_key_buf[0], 16);
2393         u8 *con_secret = PTR_ALIGN(&con_secret_buf[0], 16);
2394         int session_key_len, con_secret_len;
2395         int payload_len;
2396         u64 global_id;
2397         int ret;
2398
2399         if (con->state != CEPH_CON_S_V2_AUTH) {
2400                 con->error_msg = "protocol error, unexpected auth_done";
2401                 return -EINVAL;
2402         }
2403
2404         ceph_decode_64_safe(&p, end, global_id, bad);
2405         ceph_decode_32_safe(&p, end, con->v2.con_mode, bad);
2406         ceph_decode_32_safe(&p, end, payload_len, bad);
2407
2408         dout("%s con %p global_id %llu con_mode %d payload_len %d\n",
2409              __func__, con, global_id, con->v2.con_mode, payload_len);
2410
2411         mutex_unlock(&con->mutex);
2412         session_key_len = 0;
2413         con_secret_len = 0;
2414         ret = con->ops->handle_auth_done(con, global_id, p, payload_len,
2415                                          session_key, &session_key_len,
2416                                          con_secret, &con_secret_len);
2417         mutex_lock(&con->mutex);
2418         if (con->state != CEPH_CON_S_V2_AUTH) {
2419                 dout("%s con %p state changed to %d\n", __func__, con,
2420                      con->state);
2421                 ret = -EAGAIN;
2422                 goto out;
2423         }
2424
2425         dout("%s con %p handle_auth_done ret %d\n", __func__, con, ret);
2426         if (ret)
2427                 goto out;
2428
2429         ret = setup_crypto(con, session_key, session_key_len, con_secret,
2430                            con_secret_len);
2431         if (ret)
2432                 goto out;
2433
2434         reset_out_kvecs(con);
2435         ret = prepare_auth_signature(con);
2436         if (ret) {
2437                 pr_err("prepare_auth_signature failed: %d\n", ret);
2438                 goto out;
2439         }
2440
2441         con->state = CEPH_CON_S_V2_AUTH_SIGNATURE;
2442
2443 out:
2444         memzero_explicit(session_key_buf, sizeof(session_key_buf));
2445         memzero_explicit(con_secret_buf, sizeof(con_secret_buf));
2446         return ret;
2447
2448 bad:
2449         pr_err("failed to decode auth_done\n");
2450         return -EINVAL;
2451 }
2452
2453 static int process_auth_signature(struct ceph_connection *con,
2454                                   void *p, void *end)
2455 {
2456         u8 hmac[SHA256_DIGEST_SIZE];
2457         int ret;
2458
2459         if (con->state != CEPH_CON_S_V2_AUTH_SIGNATURE) {
2460                 con->error_msg = "protocol error, unexpected auth_signature";
2461                 return -EINVAL;
2462         }
2463
2464         ret = hmac_sha256(con, con->v2.out_sign_kvecs,
2465                           con->v2.out_sign_kvec_cnt, hmac);
2466         if (ret)
2467                 return ret;
2468
2469         ceph_decode_need(&p, end, SHA256_DIGEST_SIZE, bad);
2470         if (crypto_memneq(p, hmac, SHA256_DIGEST_SIZE)) {
2471                 con->error_msg = "integrity error, bad auth signature";
2472                 return -EBADMSG;
2473         }
2474
2475         dout("%s con %p auth signature ok\n", __func__, con);
2476
2477         /* no reset_out_kvecs() as our auth_signature may still be pending */
2478         if (!con->v2.server_cookie) {
2479                 ret = prepare_client_ident(con);
2480                 if (ret) {
2481                         pr_err("prepare_client_ident failed: %d\n", ret);
2482                         return ret;
2483                 }
2484
2485                 con->state = CEPH_CON_S_V2_SESSION_CONNECT;
2486         } else {
2487                 ret = prepare_session_reconnect(con);
2488                 if (ret) {
2489                         pr_err("prepare_session_reconnect failed: %d\n", ret);
2490                         return ret;
2491                 }
2492
2493                 con->state = CEPH_CON_S_V2_SESSION_RECONNECT;
2494         }
2495
2496         return 0;
2497
2498 bad:
2499         pr_err("failed to decode auth_signature\n");
2500         return -EINVAL;
2501 }
2502
2503 static int process_server_ident(struct ceph_connection *con,
2504                                 void *p, void *end)
2505 {
2506         struct ceph_client *client = from_msgr(con->msgr);
2507         u64 features, required_features;
2508         struct ceph_entity_addr addr;
2509         u64 global_seq;
2510         u64 global_id;
2511         u64 cookie;
2512         u64 flags;
2513         int ret;
2514
2515         if (con->state != CEPH_CON_S_V2_SESSION_CONNECT) {
2516                 con->error_msg = "protocol error, unexpected server_ident";
2517                 return -EINVAL;
2518         }
2519
2520         ret = ceph_decode_entity_addrvec(&p, end, true, &addr);
2521         if (ret) {
2522                 pr_err("failed to decode server addrs: %d\n", ret);
2523                 return ret;
2524         }
2525
2526         ceph_decode_64_safe(&p, end, global_id, bad);
2527         ceph_decode_64_safe(&p, end, global_seq, bad);
2528         ceph_decode_64_safe(&p, end, features, bad);
2529         ceph_decode_64_safe(&p, end, required_features, bad);
2530         ceph_decode_64_safe(&p, end, flags, bad);
2531         ceph_decode_64_safe(&p, end, cookie, bad);
2532
2533         dout("%s con %p addr %s/%u global_id %llu global_seq %llu features 0x%llx required_features 0x%llx flags 0x%llx cookie 0x%llx\n",
2534              __func__, con, ceph_pr_addr(&addr), le32_to_cpu(addr.nonce),
2535              global_id, global_seq, features, required_features, flags, cookie);
2536
2537         /* is this who we intended to talk to? */
2538         if (memcmp(&addr, &con->peer_addr, sizeof(con->peer_addr))) {
2539                 pr_err("bad peer addr/nonce, want %s/%u, got %s/%u\n",
2540                        ceph_pr_addr(&con->peer_addr),
2541                        le32_to_cpu(con->peer_addr.nonce),
2542                        ceph_pr_addr(&addr), le32_to_cpu(addr.nonce));
2543                 con->error_msg = "wrong peer at address";
2544                 return -EINVAL;
2545         }
2546
2547         if (client->required_features & ~features) {
2548                 pr_err("RADOS feature set mismatch: my required > server's supported 0x%llx, need 0x%llx\n",
2549                        features, client->required_features & ~features);
2550                 con->error_msg = "missing required protocol features";
2551                 return -EINVAL;
2552         }
2553
2554         /*
2555          * Both name->type and name->num are set in ceph_con_open() but
2556          * name->num may be bogus in the initial monmap.  name->type is
2557          * verified in handle_hello().
2558          */
2559         WARN_ON(!con->peer_name.type);
2560         con->peer_name.num = cpu_to_le64(global_id);
2561         con->v2.peer_global_seq = global_seq;
2562         con->peer_features = features;
2563         WARN_ON(required_features & ~client->supported_features);
2564         con->v2.server_cookie = cookie;
2565
2566         if (flags & CEPH_MSG_CONNECT_LOSSY) {
2567                 ceph_con_flag_set(con, CEPH_CON_F_LOSSYTX);
2568                 WARN_ON(con->v2.server_cookie);
2569         } else {
2570                 WARN_ON(!con->v2.server_cookie);
2571         }
2572
2573         clear_in_sign_kvecs(con);
2574         clear_out_sign_kvecs(con);
2575         free_conn_bufs(con);
2576         con->delay = 0;  /* reset backoff memory */
2577
2578         con->state = CEPH_CON_S_OPEN;
2579         con->v2.out_state = OUT_S_GET_NEXT;
2580         return 0;
2581
2582 bad:
2583         pr_err("failed to decode server_ident\n");
2584         return -EINVAL;
2585 }
2586
2587 static int process_ident_missing_features(struct ceph_connection *con,
2588                                           void *p, void *end)
2589 {
2590         struct ceph_client *client = from_msgr(con->msgr);
2591         u64 missing_features;
2592
2593         if (con->state != CEPH_CON_S_V2_SESSION_CONNECT) {
2594                 con->error_msg = "protocol error, unexpected ident_missing_features";
2595                 return -EINVAL;
2596         }
2597
2598         ceph_decode_64_safe(&p, end, missing_features, bad);
2599         pr_err("RADOS feature set mismatch: server's required > my supported 0x%llx, missing 0x%llx\n",
2600                client->supported_features, missing_features);
2601         con->error_msg = "missing required protocol features";
2602         return -EINVAL;
2603
2604 bad:
2605         pr_err("failed to decode ident_missing_features\n");
2606         return -EINVAL;
2607 }
2608
2609 static int process_session_reconnect_ok(struct ceph_connection *con,
2610                                         void *p, void *end)
2611 {
2612         u64 seq;
2613
2614         if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2615                 con->error_msg = "protocol error, unexpected session_reconnect_ok";
2616                 return -EINVAL;
2617         }
2618
2619         ceph_decode_64_safe(&p, end, seq, bad);
2620
2621         dout("%s con %p seq %llu\n", __func__, con, seq);
2622         ceph_con_discard_requeued(con, seq);
2623
2624         clear_in_sign_kvecs(con);
2625         clear_out_sign_kvecs(con);
2626         free_conn_bufs(con);
2627         con->delay = 0;  /* reset backoff memory */
2628
2629         con->state = CEPH_CON_S_OPEN;
2630         con->v2.out_state = OUT_S_GET_NEXT;
2631         return 0;
2632
2633 bad:
2634         pr_err("failed to decode session_reconnect_ok\n");
2635         return -EINVAL;
2636 }
2637
2638 static int process_session_retry(struct ceph_connection *con,
2639                                  void *p, void *end)
2640 {
2641         u64 connect_seq;
2642         int ret;
2643
2644         if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2645                 con->error_msg = "protocol error, unexpected session_retry";
2646                 return -EINVAL;
2647         }
2648
2649         ceph_decode_64_safe(&p, end, connect_seq, bad);
2650
2651         dout("%s con %p connect_seq %llu\n", __func__, con, connect_seq);
2652         WARN_ON(connect_seq <= con->v2.connect_seq);
2653         con->v2.connect_seq = connect_seq + 1;
2654
2655         free_conn_bufs(con);
2656
2657         reset_out_kvecs(con);
2658         ret = prepare_session_reconnect(con);
2659         if (ret) {
2660                 pr_err("prepare_session_reconnect (cseq) failed: %d\n", ret);
2661                 return ret;
2662         }
2663
2664         return 0;
2665
2666 bad:
2667         pr_err("failed to decode session_retry\n");
2668         return -EINVAL;
2669 }
2670
2671 static int process_session_retry_global(struct ceph_connection *con,
2672                                         void *p, void *end)
2673 {
2674         u64 global_seq;
2675         int ret;
2676
2677         if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2678                 con->error_msg = "protocol error, unexpected session_retry_global";
2679                 return -EINVAL;
2680         }
2681
2682         ceph_decode_64_safe(&p, end, global_seq, bad);
2683
2684         dout("%s con %p global_seq %llu\n", __func__, con, global_seq);
2685         WARN_ON(global_seq <= con->v2.global_seq);
2686         con->v2.global_seq = ceph_get_global_seq(con->msgr, global_seq);
2687
2688         free_conn_bufs(con);
2689
2690         reset_out_kvecs(con);
2691         ret = prepare_session_reconnect(con);
2692         if (ret) {
2693                 pr_err("prepare_session_reconnect (gseq) failed: %d\n", ret);
2694                 return ret;
2695         }
2696
2697         return 0;
2698
2699 bad:
2700         pr_err("failed to decode session_retry_global\n");
2701         return -EINVAL;
2702 }
2703
2704 static int process_session_reset(struct ceph_connection *con,
2705                                  void *p, void *end)
2706 {
2707         bool full;
2708         int ret;
2709
2710         if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2711                 con->error_msg = "protocol error, unexpected session_reset";
2712                 return -EINVAL;
2713         }
2714
2715         ceph_decode_8_safe(&p, end, full, bad);
2716         if (!full) {
2717                 con->error_msg = "protocol error, bad session_reset";
2718                 return -EINVAL;
2719         }
2720
2721         pr_info("%s%lld %s session reset\n", ENTITY_NAME(con->peer_name),
2722                 ceph_pr_addr(&con->peer_addr));
2723         ceph_con_reset_session(con);
2724
2725         mutex_unlock(&con->mutex);
2726         if (con->ops->peer_reset)
2727                 con->ops->peer_reset(con);
2728         mutex_lock(&con->mutex);
2729         if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2730                 dout("%s con %p state changed to %d\n", __func__, con,
2731                      con->state);
2732                 return -EAGAIN;
2733         }
2734
2735         free_conn_bufs(con);
2736
2737         reset_out_kvecs(con);
2738         ret = prepare_client_ident(con);
2739         if (ret) {
2740                 pr_err("prepare_client_ident (rst) failed: %d\n", ret);
2741                 return ret;
2742         }
2743
2744         con->state = CEPH_CON_S_V2_SESSION_CONNECT;
2745         return 0;
2746
2747 bad:
2748         pr_err("failed to decode session_reset\n");
2749         return -EINVAL;
2750 }
2751
2752 static int process_keepalive2_ack(struct ceph_connection *con,
2753                                   void *p, void *end)
2754 {
2755         if (con->state != CEPH_CON_S_OPEN) {
2756                 con->error_msg = "protocol error, unexpected keepalive2_ack";
2757                 return -EINVAL;
2758         }
2759
2760         ceph_decode_need(&p, end, sizeof(struct ceph_timespec), bad);
2761         ceph_decode_timespec64(&con->last_keepalive_ack, p);
2762
2763         dout("%s con %p timestamp %lld.%09ld\n", __func__, con,
2764              con->last_keepalive_ack.tv_sec, con->last_keepalive_ack.tv_nsec);
2765
2766         return 0;
2767
2768 bad:
2769         pr_err("failed to decode keepalive2_ack\n");
2770         return -EINVAL;
2771 }
2772
2773 static int process_ack(struct ceph_connection *con, void *p, void *end)
2774 {
2775         u64 seq;
2776
2777         if (con->state != CEPH_CON_S_OPEN) {
2778                 con->error_msg = "protocol error, unexpected ack";
2779                 return -EINVAL;
2780         }
2781
2782         ceph_decode_64_safe(&p, end, seq, bad);
2783
2784         dout("%s con %p seq %llu\n", __func__, con, seq);
2785         ceph_con_discard_sent(con, seq);
2786         return 0;
2787
2788 bad:
2789         pr_err("failed to decode ack\n");
2790         return -EINVAL;
2791 }
2792
2793 static int process_control(struct ceph_connection *con, void *p, void *end)
2794 {
2795         int tag = con->v2.in_desc.fd_tag;
2796         int ret;
2797
2798         dout("%s con %p tag %d len %d\n", __func__, con, tag, (int)(end - p));
2799
2800         switch (tag) {
2801         case FRAME_TAG_HELLO:
2802                 ret = process_hello(con, p, end);
2803                 break;
2804         case FRAME_TAG_AUTH_BAD_METHOD:
2805                 ret = process_auth_bad_method(con, p, end);
2806                 break;
2807         case FRAME_TAG_AUTH_REPLY_MORE:
2808                 ret = process_auth_reply_more(con, p, end);
2809                 break;
2810         case FRAME_TAG_AUTH_DONE:
2811                 ret = process_auth_done(con, p, end);
2812                 break;
2813         case FRAME_TAG_AUTH_SIGNATURE:
2814                 ret = process_auth_signature(con, p, end);
2815                 break;
2816         case FRAME_TAG_SERVER_IDENT:
2817                 ret = process_server_ident(con, p, end);
2818                 break;
2819         case FRAME_TAG_IDENT_MISSING_FEATURES:
2820                 ret = process_ident_missing_features(con, p, end);
2821                 break;
2822         case FRAME_TAG_SESSION_RECONNECT_OK:
2823                 ret = process_session_reconnect_ok(con, p, end);
2824                 break;
2825         case FRAME_TAG_SESSION_RETRY:
2826                 ret = process_session_retry(con, p, end);
2827                 break;
2828         case FRAME_TAG_SESSION_RETRY_GLOBAL:
2829                 ret = process_session_retry_global(con, p, end);
2830                 break;
2831         case FRAME_TAG_SESSION_RESET:
2832                 ret = process_session_reset(con, p, end);
2833                 break;
2834         case FRAME_TAG_KEEPALIVE2_ACK:
2835                 ret = process_keepalive2_ack(con, p, end);
2836                 break;
2837         case FRAME_TAG_ACK:
2838                 ret = process_ack(con, p, end);
2839                 break;
2840         default:
2841                 pr_err("bad tag %d\n", tag);
2842                 con->error_msg = "protocol error, bad tag";
2843                 return -EINVAL;
2844         }
2845         if (ret) {
2846                 dout("%s con %p error %d\n", __func__, con, ret);
2847                 return ret;
2848         }
2849
2850         prepare_read_preamble(con);
2851         return 0;
2852 }
2853
2854 /*
2855  * Return:
2856  *   1 - con->in_msg set, read message
2857  *   0 - skip message
2858  *  <0 - error
2859  */
2860 static int process_message_header(struct ceph_connection *con,
2861                                   void *p, void *end)
2862 {
2863         struct ceph_frame_desc *desc = &con->v2.in_desc;
2864         struct ceph_msg_header2 *hdr2 = p;
2865         struct ceph_msg_header hdr;
2866         int skip;
2867         int ret;
2868         u64 seq;
2869
2870         /* verify seq# */
2871         seq = le64_to_cpu(hdr2->seq);
2872         if ((s64)seq - (s64)con->in_seq < 1) {
2873                 pr_info("%s%lld %s skipping old message: seq %llu, expected %llu\n",
2874                         ENTITY_NAME(con->peer_name),
2875                         ceph_pr_addr(&con->peer_addr),
2876                         seq, con->in_seq + 1);
2877                 return 0;
2878         }
2879         if ((s64)seq - (s64)con->in_seq > 1) {
2880                 pr_err("bad seq %llu, expected %llu\n", seq, con->in_seq + 1);
2881                 con->error_msg = "bad message sequence # for incoming message";
2882                 return -EBADE;
2883         }
2884
2885         ceph_con_discard_sent(con, le64_to_cpu(hdr2->ack_seq));
2886
2887         fill_header(&hdr, hdr2, desc->fd_lens[1], desc->fd_lens[2],
2888                     desc->fd_lens[3], &con->peer_name);
2889         ret = ceph_con_in_msg_alloc(con, &hdr, &skip);
2890         if (ret)
2891                 return ret;
2892
2893         WARN_ON(!con->in_msg ^ skip);
2894         if (skip)
2895                 return 0;
2896
2897         WARN_ON(!con->in_msg);
2898         WARN_ON(con->in_msg->con != con);
2899         return 1;
2900 }
2901
2902 static int process_message(struct ceph_connection *con)
2903 {
2904         ceph_con_process_message(con);
2905
2906         /*
2907          * We could have been closed by ceph_con_close() because
2908          * ceph_con_process_message() temporarily drops con->mutex.
2909          */
2910         if (con->state != CEPH_CON_S_OPEN) {
2911                 dout("%s con %p state changed to %d\n", __func__, con,
2912                      con->state);
2913                 return -EAGAIN;
2914         }
2915
2916         prepare_read_preamble(con);
2917         return 0;
2918 }
2919
2920 static int __handle_control(struct ceph_connection *con, void *p)
2921 {
2922         void *end = p + con->v2.in_desc.fd_lens[0];
2923         struct ceph_msg *msg;
2924         int ret;
2925
2926         if (con->v2.in_desc.fd_tag != FRAME_TAG_MESSAGE)
2927                 return process_control(con, p, end);
2928
2929         ret = process_message_header(con, p, end);
2930         if (ret < 0)
2931                 return ret;
2932         if (ret == 0) {
2933                 prepare_skip_message(con);
2934                 return 0;
2935         }
2936
2937         msg = con->in_msg;  /* set in process_message_header() */
2938         if (front_len(msg)) {
2939                 WARN_ON(front_len(msg) > msg->front_alloc_len);
2940                 msg->front.iov_len = front_len(msg);
2941         } else {
2942                 msg->front.iov_len = 0;
2943         }
2944         if (middle_len(msg)) {
2945                 WARN_ON(middle_len(msg) > msg->middle->alloc_len);
2946                 msg->middle->vec.iov_len = middle_len(msg);
2947         } else if (msg->middle) {
2948                 msg->middle->vec.iov_len = 0;
2949         }
2950
2951         if (!front_len(msg) && !middle_len(msg) && !data_len(msg))
2952                 return process_message(con);
2953
2954         if (con_secure(con))
2955                 return prepare_read_tail_secure(con);
2956
2957         return prepare_read_tail_plain(con);
2958 }
2959
2960 static int handle_preamble(struct ceph_connection *con)
2961 {
2962         struct ceph_frame_desc *desc = &con->v2.in_desc;
2963         int ret;
2964
2965         if (con_secure(con)) {
2966                 ret = decrypt_preamble(con);
2967                 if (ret) {
2968                         if (ret == -EBADMSG)
2969                                 con->error_msg = "integrity error, bad preamble auth tag";
2970                         return ret;
2971                 }
2972         }
2973
2974         ret = decode_preamble(con->v2.in_buf, desc);
2975         if (ret) {
2976                 if (ret == -EBADMSG)
2977                         con->error_msg = "integrity error, bad crc";
2978                 else
2979                         con->error_msg = "protocol error, bad preamble";
2980                 return ret;
2981         }
2982
2983         dout("%s con %p tag %d seg_cnt %d %d+%d+%d+%d\n", __func__,
2984              con, desc->fd_tag, desc->fd_seg_cnt, desc->fd_lens[0],
2985              desc->fd_lens[1], desc->fd_lens[2], desc->fd_lens[3]);
2986
2987         if (!con_secure(con))
2988                 return prepare_read_control(con);
2989
2990         if (desc->fd_lens[0] > CEPH_PREAMBLE_INLINE_LEN)
2991                 return prepare_read_control_remainder(con);
2992
2993         return __handle_control(con, CTRL_BODY(con->v2.in_buf));
2994 }
2995
2996 static int handle_control(struct ceph_connection *con)
2997 {
2998         int ctrl_len = con->v2.in_desc.fd_lens[0];
2999         void *buf;
3000         int ret;
3001
3002         WARN_ON(con_secure(con));
3003
3004         ret = verify_control_crc(con);
3005         if (ret) {
3006                 con->error_msg = "integrity error, bad crc";
3007                 return ret;
3008         }
3009
3010         if (con->state == CEPH_CON_S_V2_AUTH) {
3011                 buf = alloc_conn_buf(con, ctrl_len);
3012                 if (!buf)
3013                         return -ENOMEM;
3014
3015                 memcpy(buf, con->v2.in_kvecs[0].iov_base, ctrl_len);
3016                 return __handle_control(con, buf);
3017         }
3018
3019         return __handle_control(con, con->v2.in_kvecs[0].iov_base);
3020 }
3021
3022 static int handle_control_remainder(struct ceph_connection *con)
3023 {
3024         int ret;
3025
3026         WARN_ON(!con_secure(con));
3027
3028         ret = decrypt_control_remainder(con);
3029         if (ret) {
3030                 if (ret == -EBADMSG)
3031                         con->error_msg = "integrity error, bad control remainder auth tag";
3032                 return ret;
3033         }
3034
3035         return __handle_control(con, con->v2.in_kvecs[0].iov_base -
3036                                      CEPH_PREAMBLE_INLINE_LEN);
3037 }
3038
3039 static int handle_epilogue(struct ceph_connection *con)
3040 {
3041         u32 front_crc, middle_crc, data_crc;
3042         int ret;
3043
3044         if (con_secure(con)) {
3045                 ret = decrypt_tail(con);
3046                 if (ret) {
3047                         if (ret == -EBADMSG)
3048                                 con->error_msg = "integrity error, bad epilogue auth tag";
3049                         return ret;
3050                 }
3051
3052                 /* just late_status */
3053                 ret = decode_epilogue(con->v2.in_buf, NULL, NULL, NULL);
3054                 if (ret) {
3055                         con->error_msg = "protocol error, bad epilogue";
3056                         return ret;
3057                 }
3058         } else {
3059                 ret = decode_epilogue(con->v2.in_buf, &front_crc,
3060                                       &middle_crc, &data_crc);
3061                 if (ret) {
3062                         con->error_msg = "protocol error, bad epilogue";
3063                         return ret;
3064                 }
3065
3066                 ret = verify_epilogue_crcs(con, front_crc, middle_crc,
3067                                            data_crc);
3068                 if (ret) {
3069                         con->error_msg = "integrity error, bad crc";
3070                         return ret;
3071                 }
3072         }
3073
3074         return process_message(con);
3075 }
3076
3077 static void finish_skip(struct ceph_connection *con)
3078 {
3079         dout("%s con %p\n", __func__, con);
3080
3081         if (con_secure(con))
3082                 gcm_inc_nonce(&con->v2.in_gcm_nonce);
3083
3084         __finish_skip(con);
3085 }
3086
3087 static int populate_in_iter(struct ceph_connection *con)
3088 {
3089         int ret;
3090
3091         dout("%s con %p state %d in_state %d\n", __func__, con, con->state,
3092              con->v2.in_state);
3093         WARN_ON(iov_iter_count(&con->v2.in_iter));
3094
3095         if (con->state == CEPH_CON_S_V2_BANNER_PREFIX) {
3096                 ret = process_banner_prefix(con);
3097         } else if (con->state == CEPH_CON_S_V2_BANNER_PAYLOAD) {
3098                 ret = process_banner_payload(con);
3099         } else if ((con->state >= CEPH_CON_S_V2_HELLO &&
3100                     con->state <= CEPH_CON_S_V2_SESSION_RECONNECT) ||
3101                    con->state == CEPH_CON_S_OPEN) {
3102                 switch (con->v2.in_state) {
3103                 case IN_S_HANDLE_PREAMBLE:
3104                         ret = handle_preamble(con);
3105                         break;
3106                 case IN_S_HANDLE_CONTROL:
3107                         ret = handle_control(con);
3108                         break;
3109                 case IN_S_HANDLE_CONTROL_REMAINDER:
3110                         ret = handle_control_remainder(con);
3111                         break;
3112                 case IN_S_PREPARE_READ_DATA:
3113                         ret = prepare_read_data(con);
3114                         break;
3115                 case IN_S_PREPARE_READ_DATA_CONT:
3116                         prepare_read_data_cont(con);
3117                         ret = 0;
3118                         break;
3119                 case IN_S_PREPARE_READ_ENC_PAGE:
3120                         prepare_read_enc_page(con);
3121                         ret = 0;
3122                         break;
3123                 case IN_S_PREPARE_SPARSE_DATA:
3124                         ret = prepare_sparse_read_data(con);
3125                         break;
3126                 case IN_S_PREPARE_SPARSE_DATA_CONT:
3127                         ret = prepare_sparse_read_cont(con);
3128                         break;
3129                 case IN_S_HANDLE_EPILOGUE:
3130                         ret = handle_epilogue(con);
3131                         break;
3132                 case IN_S_FINISH_SKIP:
3133                         finish_skip(con);
3134                         ret = 0;
3135                         break;
3136                 default:
3137                         WARN(1, "bad in_state %d", con->v2.in_state);
3138                         return -EINVAL;
3139                 }
3140         } else {
3141                 WARN(1, "bad state %d", con->state);
3142                 return -EINVAL;
3143         }
3144         if (ret) {
3145                 dout("%s con %p error %d\n", __func__, con, ret);
3146                 return ret;
3147         }
3148
3149         if (WARN_ON(!iov_iter_count(&con->v2.in_iter)))
3150                 return -ENODATA;
3151         dout("%s con %p populated %zu\n", __func__, con,
3152              iov_iter_count(&con->v2.in_iter));
3153         return 1;
3154 }
3155
3156 int ceph_con_v2_try_read(struct ceph_connection *con)
3157 {
3158         int ret;
3159
3160         dout("%s con %p state %d need %zu\n", __func__, con, con->state,
3161              iov_iter_count(&con->v2.in_iter));
3162
3163         if (con->state == CEPH_CON_S_PREOPEN)
3164                 return 0;
3165
3166         /*
3167          * We should always have something pending here.  If not,
3168          * avoid calling populate_in_iter() as if we read something
3169          * (ceph_tcp_recv() would immediately return 1).
3170          */
3171         if (WARN_ON(!iov_iter_count(&con->v2.in_iter)))
3172                 return -ENODATA;
3173
3174         for (;;) {
3175                 ret = ceph_tcp_recv(con);
3176                 if (ret <= 0)
3177                         return ret;
3178
3179                 ret = populate_in_iter(con);
3180                 if (ret <= 0) {
3181                         if (ret && ret != -EAGAIN && !con->error_msg)
3182                                 con->error_msg = "read processing error";
3183                         return ret;
3184                 }
3185         }
3186 }
3187
3188 static void queue_data(struct ceph_connection *con)
3189 {
3190         struct bio_vec bv;
3191
3192         con->v2.out_epil.data_crc = -1;
3193         ceph_msg_data_cursor_init(&con->v2.out_cursor, con->out_msg,
3194                                   data_len(con->out_msg));
3195
3196         get_bvec_at(&con->v2.out_cursor, &bv);
3197         set_out_bvec(con, &bv, true);
3198         con->v2.out_state = OUT_S_QUEUE_DATA_CONT;
3199 }
3200
3201 static void queue_data_cont(struct ceph_connection *con)
3202 {
3203         struct bio_vec bv;
3204
3205         con->v2.out_epil.data_crc = ceph_crc32c_page(
3206                 con->v2.out_epil.data_crc, con->v2.out_bvec.bv_page,
3207                 con->v2.out_bvec.bv_offset, con->v2.out_bvec.bv_len);
3208
3209         ceph_msg_data_advance(&con->v2.out_cursor, con->v2.out_bvec.bv_len);
3210         if (con->v2.out_cursor.total_resid) {
3211                 get_bvec_at(&con->v2.out_cursor, &bv);
3212                 set_out_bvec(con, &bv, true);
3213                 WARN_ON(con->v2.out_state != OUT_S_QUEUE_DATA_CONT);
3214                 return;
3215         }
3216
3217         /*
3218          * We've written all data.  Queue epilogue.  Once it's written,
3219          * we are done.
3220          */
3221         reset_out_kvecs(con);
3222         prepare_epilogue_plain(con, false);
3223         con->v2.out_state = OUT_S_FINISH_MESSAGE;
3224 }
3225
3226 static void queue_enc_page(struct ceph_connection *con)
3227 {
3228         struct bio_vec bv;
3229
3230         dout("%s con %p i %d resid %d\n", __func__, con, con->v2.out_enc_i,
3231              con->v2.out_enc_resid);
3232         WARN_ON(!con->v2.out_enc_resid);
3233
3234         bvec_set_page(&bv, con->v2.out_enc_pages[con->v2.out_enc_i],
3235                       min(con->v2.out_enc_resid, (int)PAGE_SIZE), 0);
3236
3237         set_out_bvec(con, &bv, false);
3238         con->v2.out_enc_i++;
3239         con->v2.out_enc_resid -= bv.bv_len;
3240
3241         if (con->v2.out_enc_resid) {
3242                 WARN_ON(con->v2.out_state != OUT_S_QUEUE_ENC_PAGE);
3243                 return;
3244         }
3245
3246         /*
3247          * We've queued the last piece of ciphertext (ending with
3248          * epilogue) + auth tag.  Once it's written, we are done.
3249          */
3250         WARN_ON(con->v2.out_enc_i != con->v2.out_enc_page_cnt);
3251         con->v2.out_state = OUT_S_FINISH_MESSAGE;
3252 }
3253
3254 static void queue_zeros(struct ceph_connection *con)
3255 {
3256         dout("%s con %p out_zero %d\n", __func__, con, con->v2.out_zero);
3257
3258         if (con->v2.out_zero) {
3259                 set_out_bvec_zero(con);
3260                 con->v2.out_zero -= con->v2.out_bvec.bv_len;
3261                 con->v2.out_state = OUT_S_QUEUE_ZEROS;
3262                 return;
3263         }
3264
3265         /*
3266          * We've zero-filled everything up to epilogue.  Queue epilogue
3267          * with late_status set to ABORTED and crcs adjusted for zeros.
3268          * Once it's written, we are done patching up for the revoke.
3269          */
3270         reset_out_kvecs(con);
3271         prepare_epilogue_plain(con, true);
3272         con->v2.out_state = OUT_S_FINISH_MESSAGE;
3273 }
3274
3275 static void finish_message(struct ceph_connection *con)
3276 {
3277         dout("%s con %p msg %p\n", __func__, con, con->out_msg);
3278
3279         /* we end up here both plain and secure modes */
3280         if (con->v2.out_enc_pages) {
3281                 WARN_ON(!con->v2.out_enc_page_cnt);
3282                 ceph_release_page_vector(con->v2.out_enc_pages,
3283                                          con->v2.out_enc_page_cnt);
3284                 con->v2.out_enc_pages = NULL;
3285                 con->v2.out_enc_page_cnt = 0;
3286         }
3287         /* message may have been revoked */
3288         if (con->out_msg) {
3289                 ceph_msg_put(con->out_msg);
3290                 con->out_msg = NULL;
3291         }
3292
3293         con->v2.out_state = OUT_S_GET_NEXT;
3294 }
3295
3296 static int populate_out_iter(struct ceph_connection *con)
3297 {
3298         int ret;
3299
3300         dout("%s con %p state %d out_state %d\n", __func__, con, con->state,
3301              con->v2.out_state);
3302         WARN_ON(iov_iter_count(&con->v2.out_iter));
3303
3304         if (con->state != CEPH_CON_S_OPEN) {
3305                 WARN_ON(con->state < CEPH_CON_S_V2_BANNER_PREFIX ||
3306                         con->state > CEPH_CON_S_V2_SESSION_RECONNECT);
3307                 goto nothing_pending;
3308         }
3309
3310         switch (con->v2.out_state) {
3311         case OUT_S_QUEUE_DATA:
3312                 WARN_ON(!con->out_msg);
3313                 queue_data(con);
3314                 goto populated;
3315         case OUT_S_QUEUE_DATA_CONT:
3316                 WARN_ON(!con->out_msg);
3317                 queue_data_cont(con);
3318                 goto populated;
3319         case OUT_S_QUEUE_ENC_PAGE:
3320                 queue_enc_page(con);
3321                 goto populated;
3322         case OUT_S_QUEUE_ZEROS:
3323                 WARN_ON(con->out_msg);  /* revoked */
3324                 queue_zeros(con);
3325                 goto populated;
3326         case OUT_S_FINISH_MESSAGE:
3327                 finish_message(con);
3328                 break;
3329         case OUT_S_GET_NEXT:
3330                 break;
3331         default:
3332                 WARN(1, "bad out_state %d", con->v2.out_state);
3333                 return -EINVAL;
3334         }
3335
3336         WARN_ON(con->v2.out_state != OUT_S_GET_NEXT);
3337         if (ceph_con_flag_test_and_clear(con, CEPH_CON_F_KEEPALIVE_PENDING)) {
3338                 ret = prepare_keepalive2(con);
3339                 if (ret) {
3340                         pr_err("prepare_keepalive2 failed: %d\n", ret);
3341                         return ret;
3342                 }
3343         } else if (!list_empty(&con->out_queue)) {
3344                 ceph_con_get_out_msg(con);
3345                 ret = prepare_message(con);
3346                 if (ret) {
3347                         pr_err("prepare_message failed: %d\n", ret);
3348                         return ret;
3349                 }
3350         } else if (con->in_seq > con->in_seq_acked) {
3351                 ret = prepare_ack(con);
3352                 if (ret) {
3353                         pr_err("prepare_ack failed: %d\n", ret);
3354                         return ret;
3355                 }
3356         } else {
3357                 goto nothing_pending;
3358         }
3359
3360 populated:
3361         if (WARN_ON(!iov_iter_count(&con->v2.out_iter)))
3362                 return -ENODATA;
3363         dout("%s con %p populated %zu\n", __func__, con,
3364              iov_iter_count(&con->v2.out_iter));
3365         return 1;
3366
3367 nothing_pending:
3368         WARN_ON(iov_iter_count(&con->v2.out_iter));
3369         dout("%s con %p nothing pending\n", __func__, con);
3370         ceph_con_flag_clear(con, CEPH_CON_F_WRITE_PENDING);
3371         return 0;
3372 }
3373
3374 int ceph_con_v2_try_write(struct ceph_connection *con)
3375 {
3376         int ret;
3377
3378         dout("%s con %p state %d have %zu\n", __func__, con, con->state,
3379              iov_iter_count(&con->v2.out_iter));
3380
3381         /* open the socket first? */
3382         if (con->state == CEPH_CON_S_PREOPEN) {
3383                 WARN_ON(con->peer_addr.type != CEPH_ENTITY_ADDR_TYPE_MSGR2);
3384
3385                 /*
3386                  * Always bump global_seq.  Bump connect_seq only if
3387                  * there is a session (i.e. we are reconnecting and will
3388                  * send session_reconnect instead of client_ident).
3389                  */
3390                 con->v2.global_seq = ceph_get_global_seq(con->msgr, 0);
3391                 if (con->v2.server_cookie)
3392                         con->v2.connect_seq++;
3393
3394                 ret = prepare_read_banner_prefix(con);
3395                 if (ret) {
3396                         pr_err("prepare_read_banner_prefix failed: %d\n", ret);
3397                         con->error_msg = "connect error";
3398                         return ret;
3399                 }
3400
3401                 reset_out_kvecs(con);
3402                 ret = prepare_banner(con);
3403                 if (ret) {
3404                         pr_err("prepare_banner failed: %d\n", ret);
3405                         con->error_msg = "connect error";
3406                         return ret;
3407                 }
3408
3409                 ret = ceph_tcp_connect(con);
3410                 if (ret) {
3411                         pr_err("ceph_tcp_connect failed: %d\n", ret);
3412                         con->error_msg = "connect error";
3413                         return ret;
3414                 }
3415         }
3416
3417         if (!iov_iter_count(&con->v2.out_iter)) {
3418                 ret = populate_out_iter(con);
3419                 if (ret <= 0) {
3420                         if (ret && ret != -EAGAIN && !con->error_msg)
3421                                 con->error_msg = "write processing error";
3422                         return ret;
3423                 }
3424         }
3425
3426         tcp_sock_set_cork(con->sock->sk, true);
3427         for (;;) {
3428                 ret = ceph_tcp_send(con);
3429                 if (ret <= 0)
3430                         break;
3431
3432                 ret = populate_out_iter(con);
3433                 if (ret <= 0) {
3434                         if (ret && ret != -EAGAIN && !con->error_msg)
3435                                 con->error_msg = "write processing error";
3436                         break;
3437                 }
3438         }
3439
3440         tcp_sock_set_cork(con->sock->sk, false);
3441         return ret;
3442 }
3443
3444 static u32 crc32c_zeros(u32 crc, int zero_len)
3445 {
3446         int len;
3447
3448         while (zero_len) {
3449                 len = min(zero_len, (int)PAGE_SIZE);
3450                 crc = crc32c(crc, page_address(ceph_zero_page), len);
3451                 zero_len -= len;
3452         }
3453
3454         return crc;
3455 }
3456
3457 static void prepare_zero_front(struct ceph_connection *con, int resid)
3458 {
3459         int sent;
3460
3461         WARN_ON(!resid || resid > front_len(con->out_msg));
3462         sent = front_len(con->out_msg) - resid;
3463         dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid);
3464
3465         if (sent) {
3466                 con->v2.out_epil.front_crc =
3467                         crc32c(-1, con->out_msg->front.iov_base, sent);
3468                 con->v2.out_epil.front_crc =
3469                         crc32c_zeros(con->v2.out_epil.front_crc, resid);
3470         } else {
3471                 con->v2.out_epil.front_crc = crc32c_zeros(-1, resid);
3472         }
3473
3474         con->v2.out_iter.count -= resid;
3475         out_zero_add(con, resid);
3476 }
3477
3478 static void prepare_zero_middle(struct ceph_connection *con, int resid)
3479 {
3480         int sent;
3481
3482         WARN_ON(!resid || resid > middle_len(con->out_msg));
3483         sent = middle_len(con->out_msg) - resid;
3484         dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid);
3485
3486         if (sent) {
3487                 con->v2.out_epil.middle_crc =
3488                         crc32c(-1, con->out_msg->middle->vec.iov_base, sent);
3489                 con->v2.out_epil.middle_crc =
3490                         crc32c_zeros(con->v2.out_epil.middle_crc, resid);
3491         } else {
3492                 con->v2.out_epil.middle_crc = crc32c_zeros(-1, resid);
3493         }
3494
3495         con->v2.out_iter.count -= resid;
3496         out_zero_add(con, resid);
3497 }
3498
3499 static void prepare_zero_data(struct ceph_connection *con)
3500 {
3501         dout("%s con %p\n", __func__, con);
3502         con->v2.out_epil.data_crc = crc32c_zeros(-1, data_len(con->out_msg));
3503         out_zero_add(con, data_len(con->out_msg));
3504 }
3505
3506 static void revoke_at_queue_data(struct ceph_connection *con)
3507 {
3508         int boundary;
3509         int resid;
3510
3511         WARN_ON(!data_len(con->out_msg));
3512         WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter));
3513         resid = iov_iter_count(&con->v2.out_iter);
3514
3515         boundary = front_len(con->out_msg) + middle_len(con->out_msg);
3516         if (resid > boundary) {
3517                 resid -= boundary;
3518                 WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN);
3519                 dout("%s con %p was sending head\n", __func__, con);
3520                 if (front_len(con->out_msg))
3521                         prepare_zero_front(con, front_len(con->out_msg));
3522                 if (middle_len(con->out_msg))
3523                         prepare_zero_middle(con, middle_len(con->out_msg));
3524                 prepare_zero_data(con);
3525                 WARN_ON(iov_iter_count(&con->v2.out_iter) != resid);
3526                 con->v2.out_state = OUT_S_QUEUE_ZEROS;
3527                 return;
3528         }
3529
3530         boundary = middle_len(con->out_msg);
3531         if (resid > boundary) {
3532                 resid -= boundary;
3533                 dout("%s con %p was sending front\n", __func__, con);
3534                 prepare_zero_front(con, resid);
3535                 if (middle_len(con->out_msg))
3536                         prepare_zero_middle(con, middle_len(con->out_msg));
3537                 prepare_zero_data(con);
3538                 queue_zeros(con);
3539                 return;
3540         }
3541
3542         WARN_ON(!resid);
3543         dout("%s con %p was sending middle\n", __func__, con);
3544         prepare_zero_middle(con, resid);
3545         prepare_zero_data(con);
3546         queue_zeros(con);
3547 }
3548
3549 static void revoke_at_queue_data_cont(struct ceph_connection *con)
3550 {
3551         int sent, resid;  /* current piece of data */
3552
3553         WARN_ON(!data_len(con->out_msg));
3554         WARN_ON(!iov_iter_is_bvec(&con->v2.out_iter));
3555         resid = iov_iter_count(&con->v2.out_iter);
3556         WARN_ON(!resid || resid > con->v2.out_bvec.bv_len);
3557         sent = con->v2.out_bvec.bv_len - resid;
3558         dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid);
3559
3560         if (sent) {
3561                 con->v2.out_epil.data_crc = ceph_crc32c_page(
3562                         con->v2.out_epil.data_crc, con->v2.out_bvec.bv_page,
3563                         con->v2.out_bvec.bv_offset, sent);
3564                 ceph_msg_data_advance(&con->v2.out_cursor, sent);
3565         }
3566         WARN_ON(resid > con->v2.out_cursor.total_resid);
3567         con->v2.out_epil.data_crc = crc32c_zeros(con->v2.out_epil.data_crc,
3568                                                 con->v2.out_cursor.total_resid);
3569
3570         con->v2.out_iter.count -= resid;
3571         out_zero_add(con, con->v2.out_cursor.total_resid);
3572         queue_zeros(con);
3573 }
3574
3575 static void revoke_at_finish_message(struct ceph_connection *con)
3576 {
3577         int boundary;
3578         int resid;
3579
3580         WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter));
3581         resid = iov_iter_count(&con->v2.out_iter);
3582
3583         if (!front_len(con->out_msg) && !middle_len(con->out_msg) &&
3584             !data_len(con->out_msg)) {
3585                 WARN_ON(!resid || resid > MESSAGE_HEAD_PLAIN_LEN);
3586                 dout("%s con %p was sending head (empty message) - noop\n",
3587                      __func__, con);
3588                 return;
3589         }
3590
3591         boundary = front_len(con->out_msg) + middle_len(con->out_msg) +
3592                    CEPH_EPILOGUE_PLAIN_LEN;
3593         if (resid > boundary) {
3594                 resid -= boundary;
3595                 WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN);
3596                 dout("%s con %p was sending head\n", __func__, con);
3597                 if (front_len(con->out_msg))
3598                         prepare_zero_front(con, front_len(con->out_msg));
3599                 if (middle_len(con->out_msg))
3600                         prepare_zero_middle(con, middle_len(con->out_msg));
3601                 con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN;
3602                 WARN_ON(iov_iter_count(&con->v2.out_iter) != resid);
3603                 con->v2.out_state = OUT_S_QUEUE_ZEROS;
3604                 return;
3605         }
3606
3607         boundary = middle_len(con->out_msg) + CEPH_EPILOGUE_PLAIN_LEN;
3608         if (resid > boundary) {
3609                 resid -= boundary;
3610                 dout("%s con %p was sending front\n", __func__, con);
3611                 prepare_zero_front(con, resid);
3612                 if (middle_len(con->out_msg))
3613                         prepare_zero_middle(con, middle_len(con->out_msg));
3614                 con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN;
3615                 queue_zeros(con);
3616                 return;
3617         }
3618
3619         boundary = CEPH_EPILOGUE_PLAIN_LEN;
3620         if (resid > boundary) {
3621                 resid -= boundary;
3622                 dout("%s con %p was sending middle\n", __func__, con);
3623                 prepare_zero_middle(con, resid);
3624                 con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN;
3625                 queue_zeros(con);
3626                 return;
3627         }
3628
3629         WARN_ON(!resid);
3630         dout("%s con %p was sending epilogue - noop\n", __func__, con);
3631 }
3632
3633 void ceph_con_v2_revoke(struct ceph_connection *con)
3634 {
3635         WARN_ON(con->v2.out_zero);
3636
3637         if (con_secure(con)) {
3638                 WARN_ON(con->v2.out_state != OUT_S_QUEUE_ENC_PAGE &&
3639                         con->v2.out_state != OUT_S_FINISH_MESSAGE);
3640                 dout("%s con %p secure - noop\n", __func__, con);
3641                 return;
3642         }
3643
3644         switch (con->v2.out_state) {
3645         case OUT_S_QUEUE_DATA:
3646                 revoke_at_queue_data(con);
3647                 break;
3648         case OUT_S_QUEUE_DATA_CONT:
3649                 revoke_at_queue_data_cont(con);
3650                 break;
3651         case OUT_S_FINISH_MESSAGE:
3652                 revoke_at_finish_message(con);
3653                 break;
3654         default:
3655                 WARN(1, "bad out_state %d", con->v2.out_state);
3656                 break;
3657         }
3658 }
3659
3660 static void revoke_at_prepare_read_data(struct ceph_connection *con)
3661 {
3662         int remaining;
3663         int resid;
3664
3665         WARN_ON(con_secure(con));
3666         WARN_ON(!data_len(con->in_msg));
3667         WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
3668         resid = iov_iter_count(&con->v2.in_iter);
3669         WARN_ON(!resid);
3670
3671         remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN;
3672         dout("%s con %p resid %d remaining %d\n", __func__, con, resid,
3673              remaining);
3674         con->v2.in_iter.count -= resid;
3675         set_in_skip(con, resid + remaining);
3676         con->v2.in_state = IN_S_FINISH_SKIP;
3677 }
3678
3679 static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
3680 {
3681         int recved, resid;  /* current piece of data */
3682         int remaining;
3683
3684         WARN_ON(con_secure(con));
3685         WARN_ON(!data_len(con->in_msg));
3686         WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
3687         resid = iov_iter_count(&con->v2.in_iter);
3688         WARN_ON(!resid || resid > con->v2.in_bvec.bv_len);
3689         recved = con->v2.in_bvec.bv_len - resid;
3690         dout("%s con %p recved %d resid %d\n", __func__, con, recved, resid);
3691
3692         if (recved)
3693                 ceph_msg_data_advance(&con->v2.in_cursor, recved);
3694         WARN_ON(resid > con->v2.in_cursor.total_resid);
3695
3696         remaining = CEPH_EPILOGUE_PLAIN_LEN;
3697         dout("%s con %p total_resid %zu remaining %d\n", __func__, con,
3698              con->v2.in_cursor.total_resid, remaining);
3699         con->v2.in_iter.count -= resid;
3700         set_in_skip(con, con->v2.in_cursor.total_resid + remaining);
3701         con->v2.in_state = IN_S_FINISH_SKIP;
3702 }
3703
3704 static void revoke_at_prepare_read_enc_page(struct ceph_connection *con)
3705 {
3706         int resid;  /* current enc page (not necessarily data) */
3707
3708         WARN_ON(!con_secure(con));
3709         WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
3710         resid = iov_iter_count(&con->v2.in_iter);
3711         WARN_ON(!resid || resid > con->v2.in_bvec.bv_len);
3712
3713         dout("%s con %p resid %d enc_resid %d\n", __func__, con, resid,
3714              con->v2.in_enc_resid);
3715         con->v2.in_iter.count -= resid;
3716         set_in_skip(con, resid + con->v2.in_enc_resid);
3717         con->v2.in_state = IN_S_FINISH_SKIP;
3718 }
3719
3720 static void revoke_at_prepare_sparse_data(struct ceph_connection *con)
3721 {
3722         int resid;  /* current piece of data */
3723         int remaining;
3724
3725         WARN_ON(con_secure(con));
3726         WARN_ON(!data_len(con->in_msg));
3727         WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
3728         resid = iov_iter_count(&con->v2.in_iter);
3729         dout("%s con %p resid %d\n", __func__, con, resid);
3730
3731         remaining = CEPH_EPILOGUE_PLAIN_LEN + con->v2.data_len_remain;
3732         con->v2.in_iter.count -= resid;
3733         set_in_skip(con, resid + remaining);
3734         con->v2.in_state = IN_S_FINISH_SKIP;
3735 }
3736
3737 static void revoke_at_handle_epilogue(struct ceph_connection *con)
3738 {
3739         int resid;
3740
3741         resid = iov_iter_count(&con->v2.in_iter);
3742         WARN_ON(!resid);
3743
3744         dout("%s con %p resid %d\n", __func__, con, resid);
3745         con->v2.in_iter.count -= resid;
3746         set_in_skip(con, resid);
3747         con->v2.in_state = IN_S_FINISH_SKIP;
3748 }
3749
3750 void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
3751 {
3752         switch (con->v2.in_state) {
3753         case IN_S_PREPARE_SPARSE_DATA:
3754         case IN_S_PREPARE_READ_DATA:
3755                 revoke_at_prepare_read_data(con);
3756                 break;
3757         case IN_S_PREPARE_READ_DATA_CONT:
3758                 revoke_at_prepare_read_data_cont(con);
3759                 break;
3760         case IN_S_PREPARE_READ_ENC_PAGE:
3761                 revoke_at_prepare_read_enc_page(con);
3762                 break;
3763         case IN_S_PREPARE_SPARSE_DATA_CONT:
3764                 revoke_at_prepare_sparse_data(con);
3765                 break;
3766         case IN_S_HANDLE_EPILOGUE:
3767                 revoke_at_handle_epilogue(con);
3768                 break;
3769         default:
3770                 WARN(1, "bad in_state %d", con->v2.in_state);
3771                 break;
3772         }
3773 }
3774
3775 bool ceph_con_v2_opened(struct ceph_connection *con)
3776 {
3777         return con->v2.peer_global_seq;
3778 }
3779
3780 void ceph_con_v2_reset_session(struct ceph_connection *con)
3781 {
3782         con->v2.client_cookie = 0;
3783         con->v2.server_cookie = 0;
3784         con->v2.global_seq = 0;
3785         con->v2.connect_seq = 0;
3786         con->v2.peer_global_seq = 0;
3787 }
3788
3789 void ceph_con_v2_reset_protocol(struct ceph_connection *con)
3790 {
3791         iov_iter_truncate(&con->v2.in_iter, 0);
3792         iov_iter_truncate(&con->v2.out_iter, 0);
3793         con->v2.out_zero = 0;
3794
3795         clear_in_sign_kvecs(con);
3796         clear_out_sign_kvecs(con);
3797         free_conn_bufs(con);
3798
3799         if (con->v2.in_enc_pages) {
3800                 WARN_ON(!con->v2.in_enc_page_cnt);
3801                 ceph_release_page_vector(con->v2.in_enc_pages,
3802                                          con->v2.in_enc_page_cnt);
3803                 con->v2.in_enc_pages = NULL;
3804                 con->v2.in_enc_page_cnt = 0;
3805         }
3806         if (con->v2.out_enc_pages) {
3807                 WARN_ON(!con->v2.out_enc_page_cnt);
3808                 ceph_release_page_vector(con->v2.out_enc_pages,
3809                                          con->v2.out_enc_page_cnt);
3810                 con->v2.out_enc_pages = NULL;
3811                 con->v2.out_enc_page_cnt = 0;
3812         }
3813
3814         con->v2.con_mode = CEPH_CON_MODE_UNKNOWN;
3815         memzero_explicit(&con->v2.in_gcm_nonce, CEPH_GCM_IV_LEN);
3816         memzero_explicit(&con->v2.out_gcm_nonce, CEPH_GCM_IV_LEN);
3817
3818         if (con->v2.hmac_tfm) {
3819                 crypto_free_shash(con->v2.hmac_tfm);
3820                 con->v2.hmac_tfm = NULL;
3821         }
3822         if (con->v2.gcm_req) {
3823                 aead_request_free(con->v2.gcm_req);
3824                 con->v2.gcm_req = NULL;
3825         }
3826         if (con->v2.gcm_tfm) {
3827                 crypto_free_aead(con->v2.gcm_tfm);
3828                 con->v2.gcm_tfm = NULL;
3829         }
3830 }