OSDN Git Service

Merge tag '4.20-rc7-smb3-fixes' of git://git.samba.org/sfrench/cifs-2.6
[uclinux-h8/linux.git] / net / smc / af_smc.c
1 /*
2  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
3  *
4  *  AF_SMC protocol family socket handler keeping the AF_INET sock address type
5  *  applies to SOCK_STREAM sockets only
6  *  offers an alternative communication option for TCP-protocol sockets
7  *  applicable with RoCE-cards only
8  *
9  *  Initial restrictions:
10  *    - support for alternate links postponed
11  *
12  *  Copyright IBM Corp. 2016, 2018
13  *
14  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
15  *              based on prototype from Frank Blaschka
16  */
17
18 #define KMSG_COMPONENT "smc"
19 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
20
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/workqueue.h>
24 #include <linux/in.h>
25 #include <linux/sched/signal.h>
26 #include <linux/if_vlan.h>
27
28 #include <net/sock.h>
29 #include <net/tcp.h>
30 #include <net/smc.h>
31 #include <asm/ioctls.h>
32
33 #include "smc.h"
34 #include "smc_clc.h"
35 #include "smc_llc.h"
36 #include "smc_cdc.h"
37 #include "smc_core.h"
38 #include "smc_ib.h"
39 #include "smc_ism.h"
40 #include "smc_pnet.h"
41 #include "smc_tx.h"
42 #include "smc_rx.h"
43 #include "smc_close.h"
44
45 static DEFINE_MUTEX(smc_create_lgr_pending);    /* serialize link group
46                                                  * creation
47                                                  */
48
49 static void smc_tcp_listen_work(struct work_struct *);
50 static void smc_connect_work(struct work_struct *);
51
52 static void smc_set_keepalive(struct sock *sk, int val)
53 {
54         struct smc_sock *smc = smc_sk(sk);
55
56         smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
57 }
58
59 static struct smc_hashinfo smc_v4_hashinfo = {
60         .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
61 };
62
63 static struct smc_hashinfo smc_v6_hashinfo = {
64         .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
65 };
66
67 int smc_hash_sk(struct sock *sk)
68 {
69         struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
70         struct hlist_head *head;
71
72         head = &h->ht;
73
74         write_lock_bh(&h->lock);
75         sk_add_node(sk, head);
76         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
77         write_unlock_bh(&h->lock);
78
79         return 0;
80 }
81 EXPORT_SYMBOL_GPL(smc_hash_sk);
82
83 void smc_unhash_sk(struct sock *sk)
84 {
85         struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
86
87         write_lock_bh(&h->lock);
88         if (sk_del_node_init(sk))
89                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
90         write_unlock_bh(&h->lock);
91 }
92 EXPORT_SYMBOL_GPL(smc_unhash_sk);
93
94 struct proto smc_proto = {
95         .name           = "SMC",
96         .owner          = THIS_MODULE,
97         .keepalive      = smc_set_keepalive,
98         .hash           = smc_hash_sk,
99         .unhash         = smc_unhash_sk,
100         .obj_size       = sizeof(struct smc_sock),
101         .h.smc_hash     = &smc_v4_hashinfo,
102         .slab_flags     = SLAB_TYPESAFE_BY_RCU,
103 };
104 EXPORT_SYMBOL_GPL(smc_proto);
105
106 struct proto smc_proto6 = {
107         .name           = "SMC6",
108         .owner          = THIS_MODULE,
109         .keepalive      = smc_set_keepalive,
110         .hash           = smc_hash_sk,
111         .unhash         = smc_unhash_sk,
112         .obj_size       = sizeof(struct smc_sock),
113         .h.smc_hash     = &smc_v6_hashinfo,
114         .slab_flags     = SLAB_TYPESAFE_BY_RCU,
115 };
116 EXPORT_SYMBOL_GPL(smc_proto6);
117
118 static int smc_release(struct socket *sock)
119 {
120         struct sock *sk = sock->sk;
121         struct smc_sock *smc;
122         int rc = 0;
123
124         if (!sk)
125                 goto out;
126
127         smc = smc_sk(sk);
128
129         /* cleanup for a dangling non-blocking connect */
130         if (smc->connect_info && sk->sk_state == SMC_INIT)
131                 tcp_abort(smc->clcsock->sk, ECONNABORTED);
132         flush_work(&smc->connect_work);
133         kfree(smc->connect_info);
134         smc->connect_info = NULL;
135
136         if (sk->sk_state == SMC_LISTEN)
137                 /* smc_close_non_accepted() is called and acquires
138                  * sock lock for child sockets again
139                  */
140                 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
141         else
142                 lock_sock(sk);
143
144         if (!smc->use_fallback) {
145                 rc = smc_close_active(smc);
146                 sock_set_flag(sk, SOCK_DEAD);
147                 sk->sk_shutdown |= SHUTDOWN_MASK;
148         }
149         if (smc->clcsock) {
150                 if (smc->use_fallback && sk->sk_state == SMC_LISTEN) {
151                         /* wake up clcsock accept */
152                         rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
153                 }
154                 mutex_lock(&smc->clcsock_release_lock);
155                 sock_release(smc->clcsock);
156                 smc->clcsock = NULL;
157                 mutex_unlock(&smc->clcsock_release_lock);
158         }
159         if (smc->use_fallback) {
160                 if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
161                         sock_put(sk); /* passive closing */
162                 sk->sk_state = SMC_CLOSED;
163                 sk->sk_state_change(sk);
164         }
165
166         /* detach socket */
167         sock_orphan(sk);
168         sock->sk = NULL;
169         if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
170                 smc_conn_free(&smc->conn);
171         release_sock(sk);
172
173         sk->sk_prot->unhash(sk);
174         sock_put(sk); /* final sock_put */
175 out:
176         return rc;
177 }
178
179 static void smc_destruct(struct sock *sk)
180 {
181         if (sk->sk_state != SMC_CLOSED)
182                 return;
183         if (!sock_flag(sk, SOCK_DEAD))
184                 return;
185
186         sk_refcnt_debug_dec(sk);
187 }
188
189 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
190                                    int protocol)
191 {
192         struct smc_sock *smc;
193         struct proto *prot;
194         struct sock *sk;
195
196         prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
197         sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
198         if (!sk)
199                 return NULL;
200
201         sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
202         sk->sk_state = SMC_INIT;
203         sk->sk_destruct = smc_destruct;
204         sk->sk_protocol = protocol;
205         smc = smc_sk(sk);
206         INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
207         INIT_WORK(&smc->connect_work, smc_connect_work);
208         INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
209         INIT_LIST_HEAD(&smc->accept_q);
210         spin_lock_init(&smc->accept_q_lock);
211         spin_lock_init(&smc->conn.send_lock);
212         sk->sk_prot->hash(sk);
213         sk_refcnt_debug_inc(sk);
214         mutex_init(&smc->clcsock_release_lock);
215
216         return sk;
217 }
218
219 static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
220                     int addr_len)
221 {
222         struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
223         struct sock *sk = sock->sk;
224         struct smc_sock *smc;
225         int rc;
226
227         smc = smc_sk(sk);
228
229         /* replicate tests from inet_bind(), to be safe wrt. future changes */
230         rc = -EINVAL;
231         if (addr_len < sizeof(struct sockaddr_in))
232                 goto out;
233
234         rc = -EAFNOSUPPORT;
235         if (addr->sin_family != AF_INET &&
236             addr->sin_family != AF_INET6 &&
237             addr->sin_family != AF_UNSPEC)
238                 goto out;
239         /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
240         if (addr->sin_family == AF_UNSPEC &&
241             addr->sin_addr.s_addr != htonl(INADDR_ANY))
242                 goto out;
243
244         lock_sock(sk);
245
246         /* Check if socket is already active */
247         rc = -EINVAL;
248         if (sk->sk_state != SMC_INIT)
249                 goto out_rel;
250
251         smc->clcsock->sk->sk_reuse = sk->sk_reuse;
252         rc = kernel_bind(smc->clcsock, uaddr, addr_len);
253
254 out_rel:
255         release_sock(sk);
256 out:
257         return rc;
258 }
259
260 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
261                                    unsigned long mask)
262 {
263         /* options we don't get control via setsockopt for */
264         nsk->sk_type = osk->sk_type;
265         nsk->sk_sndbuf = osk->sk_sndbuf;
266         nsk->sk_rcvbuf = osk->sk_rcvbuf;
267         nsk->sk_sndtimeo = osk->sk_sndtimeo;
268         nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
269         nsk->sk_mark = osk->sk_mark;
270         nsk->sk_priority = osk->sk_priority;
271         nsk->sk_rcvlowat = osk->sk_rcvlowat;
272         nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
273         nsk->sk_err = osk->sk_err;
274
275         nsk->sk_flags &= ~mask;
276         nsk->sk_flags |= osk->sk_flags & mask;
277 }
278
279 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
280                              (1UL << SOCK_KEEPOPEN) | \
281                              (1UL << SOCK_LINGER) | \
282                              (1UL << SOCK_BROADCAST) | \
283                              (1UL << SOCK_TIMESTAMP) | \
284                              (1UL << SOCK_DBG) | \
285                              (1UL << SOCK_RCVTSTAMP) | \
286                              (1UL << SOCK_RCVTSTAMPNS) | \
287                              (1UL << SOCK_LOCALROUTE) | \
288                              (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
289                              (1UL << SOCK_RXQ_OVFL) | \
290                              (1UL << SOCK_WIFI_STATUS) | \
291                              (1UL << SOCK_NOFCS) | \
292                              (1UL << SOCK_FILTER_LOCKED))
293 /* copy only relevant settings and flags of SOL_SOCKET level from smc to
294  * clc socket (since smc is not called for these options from net/core)
295  */
296 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
297 {
298         smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
299 }
300
301 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
302                              (1UL << SOCK_KEEPOPEN) | \
303                              (1UL << SOCK_LINGER) | \
304                              (1UL << SOCK_DBG))
305 /* copy only settings and flags relevant for smc from clc to smc socket */
306 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
307 {
308         smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
309 }
310
311 /* register a new rmb, optionally send confirm_rkey msg to register with peer */
312 static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
313                        bool conf_rkey)
314 {
315         /* register memory region for new rmb */
316         if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
317                 rmb_desc->regerr = 1;
318                 return -EFAULT;
319         }
320         if (!conf_rkey)
321                 return 0;
322         /* exchange confirm_rkey msg with peer */
323         if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
324                 rmb_desc->regerr = 1;
325                 return -EFAULT;
326         }
327         return 0;
328 }
329
330 static int smc_clnt_conf_first_link(struct smc_sock *smc)
331 {
332         struct net *net = sock_net(smc->clcsock->sk);
333         struct smc_link_group *lgr = smc->conn.lgr;
334         struct smc_link *link;
335         int rest;
336         int rc;
337
338         link = &lgr->lnk[SMC_SINGLE_LINK];
339         /* receive CONFIRM LINK request from server over RoCE fabric */
340         rest = wait_for_completion_interruptible_timeout(
341                 &link->llc_confirm,
342                 SMC_LLC_WAIT_FIRST_TIME);
343         if (rest <= 0) {
344                 struct smc_clc_msg_decline dclc;
345
346                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
347                                       SMC_CLC_DECLINE);
348                 return rc;
349         }
350
351         if (link->llc_confirm_rc)
352                 return SMC_CLC_DECL_RMBE_EC;
353
354         rc = smc_ib_modify_qp_rts(link);
355         if (rc)
356                 return SMC_CLC_DECL_ERR_RDYLNK;
357
358         smc_wr_remember_qp_attr(link);
359
360         if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
361                 return SMC_CLC_DECL_ERR_REGRMB;
362
363         /* send CONFIRM LINK response over RoCE fabric */
364         rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
365         if (rc < 0)
366                 return SMC_CLC_DECL_TIMEOUT_CL;
367
368         /* receive ADD LINK request from server over RoCE fabric */
369         rest = wait_for_completion_interruptible_timeout(&link->llc_add,
370                                                          SMC_LLC_WAIT_TIME);
371         if (rest <= 0) {
372                 struct smc_clc_msg_decline dclc;
373
374                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
375                                       SMC_CLC_DECLINE);
376                 return rc;
377         }
378
379         /* send add link reject message, only one link supported for now */
380         rc = smc_llc_send_add_link(link,
381                                    link->smcibdev->mac[link->ibport - 1],
382                                    link->gid, SMC_LLC_RESP);
383         if (rc < 0)
384                 return SMC_CLC_DECL_TIMEOUT_AL;
385
386         smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
387
388         return 0;
389 }
390
391 static void smcr_conn_save_peer_info(struct smc_sock *smc,
392                                      struct smc_clc_msg_accept_confirm *clc)
393 {
394         int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
395
396         smc->conn.peer_rmbe_idx = clc->rmbe_idx;
397         smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token);
398         smc->conn.peer_rmbe_size = bufsize;
399         atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
400         smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
401 }
402
403 static void smcd_conn_save_peer_info(struct smc_sock *smc,
404                                      struct smc_clc_msg_accept_confirm *clc)
405 {
406         int bufsize = smc_uncompress_bufsize(clc->dmbe_size);
407
408         smc->conn.peer_rmbe_idx = clc->dmbe_idx;
409         smc->conn.peer_token = clc->token;
410         /* msg header takes up space in the buffer */
411         smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
412         atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
413         smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
414 }
415
416 static void smc_conn_save_peer_info(struct smc_sock *smc,
417                                     struct smc_clc_msg_accept_confirm *clc)
418 {
419         if (smc->conn.lgr->is_smcd)
420                 smcd_conn_save_peer_info(smc, clc);
421         else
422                 smcr_conn_save_peer_info(smc, clc);
423 }
424
425 static void smc_link_save_peer_info(struct smc_link *link,
426                                     struct smc_clc_msg_accept_confirm *clc)
427 {
428         link->peer_qpn = ntoh24(clc->qpn);
429         memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE);
430         memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac));
431         link->peer_psn = ntoh24(clc->psn);
432         link->peer_mtu = clc->qp_mtu;
433 }
434
435 /* fall back during connect */
436 static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
437 {
438         smc->use_fallback = true;
439         smc->fallback_rsn = reason_code;
440         smc_copy_sock_settings_to_clc(smc);
441         if (smc->sk.sk_state == SMC_INIT)
442                 smc->sk.sk_state = SMC_ACTIVE;
443         return 0;
444 }
445
446 /* decline and fall back during connect */
447 static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
448 {
449         int rc;
450
451         if (reason_code < 0) { /* error, fallback is not possible */
452                 if (smc->sk.sk_state == SMC_INIT)
453                         sock_put(&smc->sk); /* passive closing */
454                 return reason_code;
455         }
456         if (reason_code != SMC_CLC_DECL_PEERDECL) {
457                 rc = smc_clc_send_decline(smc, reason_code);
458                 if (rc < 0) {
459                         if (smc->sk.sk_state == SMC_INIT)
460                                 sock_put(&smc->sk); /* passive closing */
461                         return rc;
462                 }
463         }
464         return smc_connect_fallback(smc, reason_code);
465 }
466
467 /* abort connecting */
468 static int smc_connect_abort(struct smc_sock *smc, int reason_code,
469                              int local_contact)
470 {
471         if (local_contact == SMC_FIRST_CONTACT)
472                 smc_lgr_forget(smc->conn.lgr);
473         mutex_unlock(&smc_create_lgr_pending);
474         smc_conn_free(&smc->conn);
475         return reason_code;
476 }
477
478 /* check if there is a rdma device available for this connection. */
479 /* called for connect and listen */
480 static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
481                           u8 *ibport, unsigned short vlan_id, u8 gid[])
482 {
483         int reason_code = 0;
484
485         /* PNET table look up: search active ib_device and port
486          * within same PNETID that also contains the ethernet device
487          * used for the internal TCP socket
488          */
489         smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id,
490                                     gid);
491         if (!(*ibdev))
492                 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
493
494         return reason_code;
495 }
496
497 /* check if there is an ISM device available for this connection. */
498 /* called for connect and listen */
499 static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev)
500 {
501         /* Find ISM device with same PNETID as connecting interface  */
502         smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev);
503         if (!(*ismdev))
504                 return SMC_CLC_DECL_CNFERR; /* configuration error */
505         return 0;
506 }
507
508 /* Check for VLAN ID and register it on ISM device just for CLC handshake */
509 static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
510                                       struct smcd_dev *ismdev,
511                                       unsigned short vlan_id)
512 {
513         if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id))
514                 return SMC_CLC_DECL_CNFERR;
515         return 0;
516 }
517
518 /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
519  * used, the VLAN ID will be registered again during the connection setup.
520  */
521 static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
522                                         struct smcd_dev *ismdev,
523                                         unsigned short vlan_id)
524 {
525         if (!is_smcd)
526                 return 0;
527         if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id))
528                 return SMC_CLC_DECL_CNFERR;
529         return 0;
530 }
531
532 /* CLC handshake during connect */
533 static int smc_connect_clc(struct smc_sock *smc, int smc_type,
534                            struct smc_clc_msg_accept_confirm *aclc,
535                            struct smc_ib_device *ibdev, u8 ibport,
536                            u8 gid[], struct smcd_dev *ismdev)
537 {
538         int rc = 0;
539
540         /* do inband token exchange */
541         rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev);
542         if (rc)
543                 return rc;
544         /* receive SMC Accept CLC message */
545         return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT);
546 }
547
548 /* setup for RDMA connection of client */
549 static int smc_connect_rdma(struct smc_sock *smc,
550                             struct smc_clc_msg_accept_confirm *aclc,
551                             struct smc_ib_device *ibdev, u8 ibport)
552 {
553         int local_contact = SMC_FIRST_CONTACT;
554         struct smc_link *link;
555         int reason_code = 0;
556
557         mutex_lock(&smc_create_lgr_pending);
558         local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
559                                         ibport, ntoh24(aclc->qpn), &aclc->lcl,
560                                         NULL, 0);
561         if (local_contact < 0) {
562                 if (local_contact == -ENOMEM)
563                         reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
564                 else if (local_contact == -ENOLINK)
565                         reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
566                 else
567                         reason_code = SMC_CLC_DECL_INTERR; /* other error */
568                 return smc_connect_abort(smc, reason_code, 0);
569         }
570         link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
571
572         smc_conn_save_peer_info(smc, aclc);
573
574         /* create send buffer and rmb */
575         if (smc_buf_create(smc, false))
576                 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
577
578         if (local_contact == SMC_FIRST_CONTACT)
579                 smc_link_save_peer_info(link, aclc);
580
581         if (smc_rmb_rtoken_handling(&smc->conn, aclc))
582                 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
583                                          local_contact);
584
585         smc_close_init(smc);
586         smc_rx_init(smc);
587
588         if (local_contact == SMC_FIRST_CONTACT) {
589                 if (smc_ib_ready_link(link))
590                         return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
591                                                  local_contact);
592         } else {
593                 if (!smc->conn.rmb_desc->reused &&
594                     smc_reg_rmb(link, smc->conn.rmb_desc, true))
595                         return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
596                                                  local_contact);
597         }
598         smc_rmb_sync_sg_for_device(&smc->conn);
599
600         reason_code = smc_clc_send_confirm(smc);
601         if (reason_code)
602                 return smc_connect_abort(smc, reason_code, local_contact);
603
604         smc_tx_init(smc);
605
606         if (local_contact == SMC_FIRST_CONTACT) {
607                 /* QP confirmation over RoCE fabric */
608                 reason_code = smc_clnt_conf_first_link(smc);
609                 if (reason_code)
610                         return smc_connect_abort(smc, reason_code,
611                                                  local_contact);
612         }
613         mutex_unlock(&smc_create_lgr_pending);
614
615         smc_copy_sock_settings_to_clc(smc);
616         if (smc->sk.sk_state == SMC_INIT)
617                 smc->sk.sk_state = SMC_ACTIVE;
618
619         return 0;
620 }
621
622 /* setup for ISM connection of client */
623 static int smc_connect_ism(struct smc_sock *smc,
624                            struct smc_clc_msg_accept_confirm *aclc,
625                            struct smcd_dev *ismdev)
626 {
627         int local_contact = SMC_FIRST_CONTACT;
628         int rc = 0;
629
630         mutex_lock(&smc_create_lgr_pending);
631         local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0,
632                                         NULL, ismdev, aclc->gid);
633         if (local_contact < 0)
634                 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
635
636         /* Create send and receive buffers */
637         if (smc_buf_create(smc, true))
638                 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
639
640         smc_conn_save_peer_info(smc, aclc);
641         smc_close_init(smc);
642         smc_rx_init(smc);
643         smc_tx_init(smc);
644
645         rc = smc_clc_send_confirm(smc);
646         if (rc)
647                 return smc_connect_abort(smc, rc, local_contact);
648         mutex_unlock(&smc_create_lgr_pending);
649
650         smc_copy_sock_settings_to_clc(smc);
651         if (smc->sk.sk_state == SMC_INIT)
652                 smc->sk.sk_state = SMC_ACTIVE;
653
654         return 0;
655 }
656
657 /* perform steps before actually connecting */
658 static int __smc_connect(struct smc_sock *smc)
659 {
660         bool ism_supported = false, rdma_supported = false;
661         struct smc_clc_msg_accept_confirm aclc;
662         struct smc_ib_device *ibdev;
663         struct smcd_dev *ismdev;
664         u8 gid[SMC_GID_SIZE];
665         unsigned short vlan;
666         int smc_type;
667         int rc = 0;
668         u8 ibport;
669
670         sock_hold(&smc->sk); /* sock put in passive closing */
671
672         if (smc->use_fallback)
673                 return smc_connect_fallback(smc, smc->fallback_rsn);
674
675         /* if peer has not signalled SMC-capability, fall back */
676         if (!tcp_sk(smc->clcsock->sk)->syn_smc)
677                 return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
678
679         /* IPSec connections opt out of SMC-R optimizations */
680         if (using_ipsec(smc))
681                 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
682
683         /* check for VLAN ID */
684         if (smc_vlan_by_tcpsk(smc->clcsock, &vlan))
685                 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
686
687         /* check if there is an ism device available */
688         if (!smc_check_ism(smc, &ismdev) &&
689             !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) {
690                 /* ISM is supported for this connection */
691                 ism_supported = true;
692                 smc_type = SMC_TYPE_D;
693         }
694
695         /* check if there is a rdma device available */
696         if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) {
697                 /* RDMA is supported for this connection */
698                 rdma_supported = true;
699                 if (ism_supported)
700                         smc_type = SMC_TYPE_B; /* both */
701                 else
702                         smc_type = SMC_TYPE_R; /* only RDMA */
703         }
704
705         /* if neither ISM nor RDMA are supported, fallback */
706         if (!rdma_supported && !ism_supported)
707                 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
708
709         /* perform CLC handshake */
710         rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev);
711         if (rc) {
712                 smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
713                 return smc_connect_decline_fallback(smc, rc);
714         }
715
716         /* depending on previous steps, connect using rdma or ism */
717         if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
718                 rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
719         else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
720                 rc = smc_connect_ism(smc, &aclc, ismdev);
721         else
722                 rc = SMC_CLC_DECL_MODEUNSUPP;
723         if (rc) {
724                 smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
725                 return smc_connect_decline_fallback(smc, rc);
726         }
727
728         smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
729         return 0;
730 }
731
732 static void smc_connect_work(struct work_struct *work)
733 {
734         struct smc_sock *smc = container_of(work, struct smc_sock,
735                                             connect_work);
736         int rc;
737
738         lock_sock(&smc->sk);
739         rc = kernel_connect(smc->clcsock, &smc->connect_info->addr,
740                             smc->connect_info->alen, smc->connect_info->flags);
741         if (smc->clcsock->sk->sk_err) {
742                 smc->sk.sk_err = smc->clcsock->sk->sk_err;
743                 goto out;
744         }
745         if (rc < 0) {
746                 smc->sk.sk_err = -rc;
747                 goto out;
748         }
749
750         rc = __smc_connect(smc);
751         if (rc < 0)
752                 smc->sk.sk_err = -rc;
753
754 out:
755         if (smc->sk.sk_err)
756                 smc->sk.sk_state_change(&smc->sk);
757         else
758                 smc->sk.sk_write_space(&smc->sk);
759         kfree(smc->connect_info);
760         smc->connect_info = NULL;
761         release_sock(&smc->sk);
762 }
763
764 static int smc_connect(struct socket *sock, struct sockaddr *addr,
765                        int alen, int flags)
766 {
767         struct sock *sk = sock->sk;
768         struct smc_sock *smc;
769         int rc = -EINVAL;
770
771         smc = smc_sk(sk);
772
773         /* separate smc parameter checking to be safe */
774         if (alen < sizeof(addr->sa_family))
775                 goto out_err;
776         if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
777                 goto out_err;
778
779         lock_sock(sk);
780         switch (sk->sk_state) {
781         default:
782                 goto out;
783         case SMC_ACTIVE:
784                 rc = -EISCONN;
785                 goto out;
786         case SMC_INIT:
787                 rc = 0;
788                 break;
789         }
790
791         smc_copy_sock_settings_to_clc(smc);
792         tcp_sk(smc->clcsock->sk)->syn_smc = 1;
793         if (flags & O_NONBLOCK) {
794                 if (smc->connect_info) {
795                         rc = -EALREADY;
796                         goto out;
797                 }
798                 smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL);
799                 if (!smc->connect_info) {
800                         rc = -ENOMEM;
801                         goto out;
802                 }
803                 smc->connect_info->alen = alen;
804                 smc->connect_info->flags = flags ^ O_NONBLOCK;
805                 memcpy(&smc->connect_info->addr, addr, alen);
806                 schedule_work(&smc->connect_work);
807                 rc = -EINPROGRESS;
808         } else {
809                 rc = kernel_connect(smc->clcsock, addr, alen, flags);
810                 if (rc)
811                         goto out;
812
813                 rc = __smc_connect(smc);
814                 if (rc < 0)
815                         goto out;
816                 else
817                         rc = 0; /* success cases including fallback */
818         }
819
820 out:
821         release_sock(sk);
822 out_err:
823         return rc;
824 }
825
826 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
827 {
828         struct socket *new_clcsock = NULL;
829         struct sock *lsk = &lsmc->sk;
830         struct sock *new_sk;
831         int rc = -EINVAL;
832
833         release_sock(lsk);
834         new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
835         if (!new_sk) {
836                 rc = -ENOMEM;
837                 lsk->sk_err = ENOMEM;
838                 *new_smc = NULL;
839                 lock_sock(lsk);
840                 goto out;
841         }
842         *new_smc = smc_sk(new_sk);
843
844         mutex_lock(&lsmc->clcsock_release_lock);
845         if (lsmc->clcsock)
846                 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
847         mutex_unlock(&lsmc->clcsock_release_lock);
848         lock_sock(lsk);
849         if  (rc < 0)
850                 lsk->sk_err = -rc;
851         if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
852                 if (new_clcsock)
853                         sock_release(new_clcsock);
854                 new_sk->sk_state = SMC_CLOSED;
855                 sock_set_flag(new_sk, SOCK_DEAD);
856                 new_sk->sk_prot->unhash(new_sk);
857                 sock_put(new_sk); /* final */
858                 *new_smc = NULL;
859                 goto out;
860         }
861
862         (*new_smc)->clcsock = new_clcsock;
863 out:
864         return rc;
865 }
866
867 /* add a just created sock to the accept queue of the listen sock as
868  * candidate for a following socket accept call from user space
869  */
870 static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
871 {
872         struct smc_sock *par = smc_sk(parent);
873
874         sock_hold(sk); /* sock_put in smc_accept_unlink () */
875         spin_lock(&par->accept_q_lock);
876         list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
877         spin_unlock(&par->accept_q_lock);
878         sk_acceptq_added(parent);
879 }
880
881 /* remove a socket from the accept queue of its parental listening socket */
882 static void smc_accept_unlink(struct sock *sk)
883 {
884         struct smc_sock *par = smc_sk(sk)->listen_smc;
885
886         spin_lock(&par->accept_q_lock);
887         list_del_init(&smc_sk(sk)->accept_q);
888         spin_unlock(&par->accept_q_lock);
889         sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
890         sock_put(sk); /* sock_hold in smc_accept_enqueue */
891 }
892
893 /* remove a sock from the accept queue to bind it to a new socket created
894  * for a socket accept call from user space
895  */
896 struct sock *smc_accept_dequeue(struct sock *parent,
897                                 struct socket *new_sock)
898 {
899         struct smc_sock *isk, *n;
900         struct sock *new_sk;
901
902         list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
903                 new_sk = (struct sock *)isk;
904
905                 smc_accept_unlink(new_sk);
906                 if (new_sk->sk_state == SMC_CLOSED) {
907                         if (isk->clcsock) {
908                                 sock_release(isk->clcsock);
909                                 isk->clcsock = NULL;
910                         }
911                         new_sk->sk_prot->unhash(new_sk);
912                         sock_put(new_sk); /* final */
913                         continue;
914                 }
915                 if (new_sock)
916                         sock_graft(new_sk, new_sock);
917                 return new_sk;
918         }
919         return NULL;
920 }
921
922 /* clean up for a created but never accepted sock */
923 void smc_close_non_accepted(struct sock *sk)
924 {
925         struct smc_sock *smc = smc_sk(sk);
926
927         lock_sock(sk);
928         if (!sk->sk_lingertime)
929                 /* wait for peer closing */
930                 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
931         if (!smc->use_fallback) {
932                 smc_close_active(smc);
933                 sock_set_flag(sk, SOCK_DEAD);
934                 sk->sk_shutdown |= SHUTDOWN_MASK;
935         }
936         if (smc->clcsock) {
937                 struct socket *tcp;
938
939                 tcp = smc->clcsock;
940                 smc->clcsock = NULL;
941                 sock_release(tcp);
942         }
943         if (smc->use_fallback) {
944                 sock_put(sk); /* passive closing */
945                 sk->sk_state = SMC_CLOSED;
946         } else {
947                 if (sk->sk_state == SMC_CLOSED)
948                         smc_conn_free(&smc->conn);
949         }
950         release_sock(sk);
951         sk->sk_prot->unhash(sk);
952         sock_put(sk); /* final sock_put */
953 }
954
955 static int smc_serv_conf_first_link(struct smc_sock *smc)
956 {
957         struct net *net = sock_net(smc->clcsock->sk);
958         struct smc_link_group *lgr = smc->conn.lgr;
959         struct smc_link *link;
960         int rest;
961         int rc;
962
963         link = &lgr->lnk[SMC_SINGLE_LINK];
964
965         if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
966                 return SMC_CLC_DECL_ERR_REGRMB;
967
968         /* send CONFIRM LINK request to client over the RoCE fabric */
969         rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
970         if (rc < 0)
971                 return SMC_CLC_DECL_TIMEOUT_CL;
972
973         /* receive CONFIRM LINK response from client over the RoCE fabric */
974         rest = wait_for_completion_interruptible_timeout(
975                 &link->llc_confirm_resp,
976                 SMC_LLC_WAIT_FIRST_TIME);
977         if (rest <= 0) {
978                 struct smc_clc_msg_decline dclc;
979
980                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
981                                       SMC_CLC_DECLINE);
982                 return rc;
983         }
984
985         if (link->llc_confirm_resp_rc)
986                 return SMC_CLC_DECL_RMBE_EC;
987
988         /* send ADD LINK request to client over the RoCE fabric */
989         rc = smc_llc_send_add_link(link,
990                                    link->smcibdev->mac[link->ibport - 1],
991                                    link->gid, SMC_LLC_REQ);
992         if (rc < 0)
993                 return SMC_CLC_DECL_TIMEOUT_AL;
994
995         /* receive ADD LINK response from client over the RoCE fabric */
996         rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
997                                                          SMC_LLC_WAIT_TIME);
998         if (rest <= 0) {
999                 struct smc_clc_msg_decline dclc;
1000
1001                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
1002                                       SMC_CLC_DECLINE);
1003                 return rc;
1004         }
1005
1006         smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
1007
1008         return 0;
1009 }
1010
1011 /* listen worker: finish */
1012 static void smc_listen_out(struct smc_sock *new_smc)
1013 {
1014         struct smc_sock *lsmc = new_smc->listen_smc;
1015         struct sock *newsmcsk = &new_smc->sk;
1016
1017         lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
1018         if (lsmc->sk.sk_state == SMC_LISTEN) {
1019                 smc_accept_enqueue(&lsmc->sk, newsmcsk);
1020         } else { /* no longer listening */
1021                 smc_close_non_accepted(newsmcsk);
1022         }
1023         release_sock(&lsmc->sk);
1024
1025         /* Wake up accept */
1026         lsmc->sk.sk_data_ready(&lsmc->sk);
1027         sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
1028 }
1029
1030 /* listen worker: finish in state connected */
1031 static void smc_listen_out_connected(struct smc_sock *new_smc)
1032 {
1033         struct sock *newsmcsk = &new_smc->sk;
1034
1035         sk_refcnt_debug_inc(newsmcsk);
1036         if (newsmcsk->sk_state == SMC_INIT)
1037                 newsmcsk->sk_state = SMC_ACTIVE;
1038
1039         smc_listen_out(new_smc);
1040 }
1041
1042 /* listen worker: finish in error state */
1043 static void smc_listen_out_err(struct smc_sock *new_smc)
1044 {
1045         struct sock *newsmcsk = &new_smc->sk;
1046
1047         if (newsmcsk->sk_state == SMC_INIT)
1048                 sock_put(&new_smc->sk); /* passive closing */
1049         newsmcsk->sk_state = SMC_CLOSED;
1050         smc_conn_free(&new_smc->conn);
1051
1052         smc_listen_out(new_smc);
1053 }
1054
1055 /* listen worker: decline and fall back if possible */
1056 static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
1057                                int local_contact)
1058 {
1059         /* RDMA setup failed, switch back to TCP */
1060         if (local_contact == SMC_FIRST_CONTACT)
1061                 smc_lgr_forget(new_smc->conn.lgr);
1062         if (reason_code < 0) { /* error, no fallback possible */
1063                 smc_listen_out_err(new_smc);
1064                 return;
1065         }
1066         smc_conn_free(&new_smc->conn);
1067         new_smc->use_fallback = true;
1068         new_smc->fallback_rsn = reason_code;
1069         if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
1070                 if (smc_clc_send_decline(new_smc, reason_code) < 0) {
1071                         smc_listen_out_err(new_smc);
1072                         return;
1073                 }
1074         }
1075         smc_listen_out_connected(new_smc);
1076 }
1077
1078 /* listen worker: check prefixes */
1079 static int smc_listen_rdma_check(struct smc_sock *new_smc,
1080                                  struct smc_clc_msg_proposal *pclc)
1081 {
1082         struct smc_clc_msg_proposal_prefix *pclc_prfx;
1083         struct socket *newclcsock = new_smc->clcsock;
1084
1085         pclc_prfx = smc_clc_proposal_get_prefix(pclc);
1086         if (smc_clc_prfx_match(newclcsock, pclc_prfx))
1087                 return SMC_CLC_DECL_CNFERR;
1088
1089         return 0;
1090 }
1091
1092 /* listen worker: initialize connection and buffers */
1093 static int smc_listen_rdma_init(struct smc_sock *new_smc,
1094                                 struct smc_clc_msg_proposal *pclc,
1095                                 struct smc_ib_device *ibdev, u8 ibport,
1096                                 int *local_contact)
1097 {
1098         /* allocate connection / link group */
1099         *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport, 0,
1100                                          &pclc->lcl, NULL, 0);
1101         if (*local_contact < 0) {
1102                 if (*local_contact == -ENOMEM)
1103                         return SMC_CLC_DECL_MEM;/* insufficient memory*/
1104                 return SMC_CLC_DECL_INTERR; /* other error */
1105         }
1106
1107         /* create send buffer and rmb */
1108         if (smc_buf_create(new_smc, false))
1109                 return SMC_CLC_DECL_MEM;
1110
1111         return 0;
1112 }
1113
1114 /* listen worker: initialize connection and buffers for SMC-D */
1115 static int smc_listen_ism_init(struct smc_sock *new_smc,
1116                                struct smc_clc_msg_proposal *pclc,
1117                                struct smcd_dev *ismdev,
1118                                int *local_contact)
1119 {
1120         struct smc_clc_msg_smcd *pclc_smcd;
1121
1122         pclc_smcd = smc_get_clc_msg_smcd(pclc);
1123         *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, 0, NULL,
1124                                          ismdev, pclc_smcd->gid);
1125         if (*local_contact < 0) {
1126                 if (*local_contact == -ENOMEM)
1127                         return SMC_CLC_DECL_MEM;/* insufficient memory*/
1128                 return SMC_CLC_DECL_INTERR; /* other error */
1129         }
1130
1131         /* Check if peer can be reached via ISM device */
1132         if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
1133                             new_smc->conn.lgr->vlan_id,
1134                             new_smc->conn.lgr->smcd)) {
1135                 if (*local_contact == SMC_FIRST_CONTACT)
1136                         smc_lgr_forget(new_smc->conn.lgr);
1137                 smc_conn_free(&new_smc->conn);
1138                 return SMC_CLC_DECL_CNFERR;
1139         }
1140
1141         /* Create send and receive buffers */
1142         if (smc_buf_create(new_smc, true)) {
1143                 if (*local_contact == SMC_FIRST_CONTACT)
1144                         smc_lgr_forget(new_smc->conn.lgr);
1145                 smc_conn_free(&new_smc->conn);
1146                 return SMC_CLC_DECL_MEM;
1147         }
1148
1149         return 0;
1150 }
1151
1152 /* listen worker: register buffers */
1153 static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
1154 {
1155         struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
1156
1157         if (local_contact != SMC_FIRST_CONTACT) {
1158                 if (!new_smc->conn.rmb_desc->reused) {
1159                         if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
1160                                 return SMC_CLC_DECL_ERR_REGRMB;
1161                 }
1162         }
1163         smc_rmb_sync_sg_for_device(&new_smc->conn);
1164
1165         return 0;
1166 }
1167
1168 /* listen worker: finish RDMA setup */
1169 static int smc_listen_rdma_finish(struct smc_sock *new_smc,
1170                                   struct smc_clc_msg_accept_confirm *cclc,
1171                                   int local_contact)
1172 {
1173         struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
1174         int reason_code = 0;
1175
1176         if (local_contact == SMC_FIRST_CONTACT)
1177                 smc_link_save_peer_info(link, cclc);
1178
1179         if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
1180                 reason_code = SMC_CLC_DECL_ERR_RTOK;
1181                 goto decline;
1182         }
1183
1184         if (local_contact == SMC_FIRST_CONTACT) {
1185                 if (smc_ib_ready_link(link)) {
1186                         reason_code = SMC_CLC_DECL_ERR_RDYLNK;
1187                         goto decline;
1188                 }
1189                 /* QP confirmation over RoCE fabric */
1190                 reason_code = smc_serv_conf_first_link(new_smc);
1191                 if (reason_code)
1192                         goto decline;
1193         }
1194         return 0;
1195
1196 decline:
1197         mutex_unlock(&smc_create_lgr_pending);
1198         smc_listen_decline(new_smc, reason_code, local_contact);
1199         return reason_code;
1200 }
1201
1202 /* setup for RDMA connection of server */
1203 static void smc_listen_work(struct work_struct *work)
1204 {
1205         struct smc_sock *new_smc = container_of(work, struct smc_sock,
1206                                                 smc_listen_work);
1207         struct socket *newclcsock = new_smc->clcsock;
1208         struct smc_clc_msg_accept_confirm cclc;
1209         struct smc_clc_msg_proposal *pclc;
1210         struct smc_ib_device *ibdev;
1211         bool ism_supported = false;
1212         struct smcd_dev *ismdev;
1213         u8 buf[SMC_CLC_MAX_LEN];
1214         int local_contact = 0;
1215         unsigned short vlan;
1216         int reason_code = 0;
1217         int rc = 0;
1218         u8 ibport;
1219
1220         if (new_smc->use_fallback) {
1221                 smc_listen_out_connected(new_smc);
1222                 return;
1223         }
1224
1225         /* check if peer is smc capable */
1226         if (!tcp_sk(newclcsock->sk)->syn_smc) {
1227                 new_smc->use_fallback = true;
1228                 new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
1229                 smc_listen_out_connected(new_smc);
1230                 return;
1231         }
1232
1233         /* do inband token exchange -
1234          * wait for and receive SMC Proposal CLC message
1235          */
1236         pclc = (struct smc_clc_msg_proposal *)&buf;
1237         reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
1238                                        SMC_CLC_PROPOSAL);
1239         if (reason_code) {
1240                 smc_listen_decline(new_smc, reason_code, 0);
1241                 return;
1242         }
1243
1244         /* IPSec connections opt out of SMC-R optimizations */
1245         if (using_ipsec(new_smc)) {
1246                 smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0);
1247                 return;
1248         }
1249
1250         mutex_lock(&smc_create_lgr_pending);
1251         smc_close_init(new_smc);
1252         smc_rx_init(new_smc);
1253         smc_tx_init(new_smc);
1254
1255         /* check if ISM is available */
1256         if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) &&
1257             !smc_check_ism(new_smc, &ismdev) &&
1258             !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) {
1259                 ism_supported = true;
1260         }
1261
1262         /* check if RDMA is available */
1263         if (!ism_supported &&
1264             ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) ||
1265              smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) ||
1266              smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) ||
1267              smc_listen_rdma_check(new_smc, pclc) ||
1268              smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
1269                                   &local_contact) ||
1270              smc_listen_rdma_reg(new_smc, local_contact))) {
1271                 /* SMC not supported, decline */
1272                 mutex_unlock(&smc_create_lgr_pending);
1273                 smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP,
1274                                    local_contact);
1275                 return;
1276         }
1277
1278         /* send SMC Accept CLC message */
1279         rc = smc_clc_send_accept(new_smc, local_contact);
1280         if (rc) {
1281                 mutex_unlock(&smc_create_lgr_pending);
1282                 smc_listen_decline(new_smc, rc, local_contact);
1283                 return;
1284         }
1285
1286         /* receive SMC Confirm CLC message */
1287         reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
1288                                        SMC_CLC_CONFIRM);
1289         if (reason_code) {
1290                 mutex_unlock(&smc_create_lgr_pending);
1291                 smc_listen_decline(new_smc, reason_code, local_contact);
1292                 return;
1293         }
1294
1295         /* finish worker */
1296         if (!ism_supported) {
1297                 if (smc_listen_rdma_finish(new_smc, &cclc, local_contact))
1298                         return;
1299         }
1300         smc_conn_save_peer_info(new_smc, &cclc);
1301         mutex_unlock(&smc_create_lgr_pending);
1302         smc_listen_out_connected(new_smc);
1303 }
1304
1305 static void smc_tcp_listen_work(struct work_struct *work)
1306 {
1307         struct smc_sock *lsmc = container_of(work, struct smc_sock,
1308                                              tcp_listen_work);
1309         struct sock *lsk = &lsmc->sk;
1310         struct smc_sock *new_smc;
1311         int rc = 0;
1312
1313         lock_sock(lsk);
1314         while (lsk->sk_state == SMC_LISTEN) {
1315                 rc = smc_clcsock_accept(lsmc, &new_smc);
1316                 if (rc)
1317                         goto out;
1318                 if (!new_smc)
1319                         continue;
1320
1321                 new_smc->listen_smc = lsmc;
1322                 new_smc->use_fallback = lsmc->use_fallback;
1323                 new_smc->fallback_rsn = lsmc->fallback_rsn;
1324                 sock_hold(lsk); /* sock_put in smc_listen_work */
1325                 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
1326                 smc_copy_sock_settings_to_smc(new_smc);
1327                 new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
1328                 new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
1329                 sock_hold(&new_smc->sk); /* sock_put in passive closing */
1330                 if (!schedule_work(&new_smc->smc_listen_work))
1331                         sock_put(&new_smc->sk);
1332         }
1333
1334 out:
1335         release_sock(lsk);
1336         sock_put(&lsmc->sk); /* sock_hold in smc_listen */
1337 }
1338
1339 static int smc_listen(struct socket *sock, int backlog)
1340 {
1341         struct sock *sk = sock->sk;
1342         struct smc_sock *smc;
1343         int rc;
1344
1345         smc = smc_sk(sk);
1346         lock_sock(sk);
1347
1348         rc = -EINVAL;
1349         if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN))
1350                 goto out;
1351
1352         rc = 0;
1353         if (sk->sk_state == SMC_LISTEN) {
1354                 sk->sk_max_ack_backlog = backlog;
1355                 goto out;
1356         }
1357         /* some socket options are handled in core, so we could not apply
1358          * them to the clc socket -- copy smc socket options to clc socket
1359          */
1360         smc_copy_sock_settings_to_clc(smc);
1361         if (!smc->use_fallback)
1362                 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
1363
1364         rc = kernel_listen(smc->clcsock, backlog);
1365         if (rc)
1366                 goto out;
1367         sk->sk_max_ack_backlog = backlog;
1368         sk->sk_ack_backlog = 0;
1369         sk->sk_state = SMC_LISTEN;
1370         INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
1371         sock_hold(sk); /* sock_hold in tcp_listen_worker */
1372         if (!schedule_work(&smc->tcp_listen_work))
1373                 sock_put(sk);
1374
1375 out:
1376         release_sock(sk);
1377         return rc;
1378 }
1379
1380 static int smc_accept(struct socket *sock, struct socket *new_sock,
1381                       int flags, bool kern)
1382 {
1383         struct sock *sk = sock->sk, *nsk;
1384         DECLARE_WAITQUEUE(wait, current);
1385         struct smc_sock *lsmc;
1386         long timeo;
1387         int rc = 0;
1388
1389         lsmc = smc_sk(sk);
1390         sock_hold(sk); /* sock_put below */
1391         lock_sock(sk);
1392
1393         if (lsmc->sk.sk_state != SMC_LISTEN) {
1394                 rc = -EINVAL;
1395                 release_sock(sk);
1396                 goto out;
1397         }
1398
1399         /* Wait for an incoming connection */
1400         timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1401         add_wait_queue_exclusive(sk_sleep(sk), &wait);
1402         while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
1403                 set_current_state(TASK_INTERRUPTIBLE);
1404                 if (!timeo) {
1405                         rc = -EAGAIN;
1406                         break;
1407                 }
1408                 release_sock(sk);
1409                 timeo = schedule_timeout(timeo);
1410                 /* wakeup by sk_data_ready in smc_listen_work() */
1411                 sched_annotate_sleep();
1412                 lock_sock(sk);
1413                 if (signal_pending(current)) {
1414                         rc = sock_intr_errno(timeo);
1415                         break;
1416                 }
1417         }
1418         set_current_state(TASK_RUNNING);
1419         remove_wait_queue(sk_sleep(sk), &wait);
1420
1421         if (!rc)
1422                 rc = sock_error(nsk);
1423         release_sock(sk);
1424         if (rc)
1425                 goto out;
1426
1427         if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
1428                 /* wait till data arrives on the socket */
1429                 timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
1430                                                                 MSEC_PER_SEC);
1431                 if (smc_sk(nsk)->use_fallback) {
1432                         struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
1433
1434                         lock_sock(clcsk);
1435                         if (skb_queue_empty(&clcsk->sk_receive_queue))
1436                                 sk_wait_data(clcsk, &timeo, NULL);
1437                         release_sock(clcsk);
1438                 } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
1439                         lock_sock(nsk);
1440                         smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
1441                         release_sock(nsk);
1442                 }
1443         }
1444
1445 out:
1446         sock_put(sk); /* sock_hold above */
1447         return rc;
1448 }
1449
1450 static int smc_getname(struct socket *sock, struct sockaddr *addr,
1451                        int peer)
1452 {
1453         struct smc_sock *smc;
1454
1455         if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
1456             (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
1457                 return -ENOTCONN;
1458
1459         smc = smc_sk(sock->sk);
1460
1461         return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
1462 }
1463
1464 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1465 {
1466         struct sock *sk = sock->sk;
1467         struct smc_sock *smc;
1468         int rc = -EPIPE;
1469
1470         smc = smc_sk(sk);
1471         lock_sock(sk);
1472         if ((sk->sk_state != SMC_ACTIVE) &&
1473             (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1474             (sk->sk_state != SMC_INIT))
1475                 goto out;
1476
1477         if (msg->msg_flags & MSG_FASTOPEN) {
1478                 if (sk->sk_state == SMC_INIT) {
1479                         smc->use_fallback = true;
1480                         smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
1481                 } else {
1482                         rc = -EINVAL;
1483                         goto out;
1484                 }
1485         }
1486
1487         if (smc->use_fallback)
1488                 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
1489         else
1490                 rc = smc_tx_sendmsg(smc, msg, len);
1491 out:
1492         release_sock(sk);
1493         return rc;
1494 }
1495
1496 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1497                        int flags)
1498 {
1499         struct sock *sk = sock->sk;
1500         struct smc_sock *smc;
1501         int rc = -ENOTCONN;
1502
1503         smc = smc_sk(sk);
1504         lock_sock(sk);
1505         if ((sk->sk_state == SMC_INIT) ||
1506             (sk->sk_state == SMC_LISTEN) ||
1507             (sk->sk_state == SMC_CLOSED))
1508                 goto out;
1509
1510         if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1511                 rc = 0;
1512                 goto out;
1513         }
1514
1515         if (smc->use_fallback) {
1516                 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
1517         } else {
1518                 msg->msg_namelen = 0;
1519                 rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
1520         }
1521
1522 out:
1523         release_sock(sk);
1524         return rc;
1525 }
1526
1527 static __poll_t smc_accept_poll(struct sock *parent)
1528 {
1529         struct smc_sock *isk = smc_sk(parent);
1530         __poll_t mask = 0;
1531
1532         spin_lock(&isk->accept_q_lock);
1533         if (!list_empty(&isk->accept_q))
1534                 mask = EPOLLIN | EPOLLRDNORM;
1535         spin_unlock(&isk->accept_q_lock);
1536
1537         return mask;
1538 }
1539
1540 static __poll_t smc_poll(struct file *file, struct socket *sock,
1541                              poll_table *wait)
1542 {
1543         struct sock *sk = sock->sk;
1544         __poll_t mask = 0;
1545         struct smc_sock *smc;
1546
1547         if (!sk)
1548                 return EPOLLNVAL;
1549
1550         smc = smc_sk(sock->sk);
1551         if (smc->use_fallback) {
1552                 /* delegate to CLC child sock */
1553                 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
1554                 sk->sk_err = smc->clcsock->sk->sk_err;
1555                 if (sk->sk_err)
1556                         mask |= EPOLLERR;
1557         } else {
1558                 if (sk->sk_state != SMC_CLOSED)
1559                         sock_poll_wait(file, sock, wait);
1560                 if (sk->sk_err)
1561                         mask |= EPOLLERR;
1562                 if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
1563                     (sk->sk_state == SMC_CLOSED))
1564                         mask |= EPOLLHUP;
1565                 if (sk->sk_state == SMC_LISTEN) {
1566                         /* woken up by sk_data_ready in smc_listen_work() */
1567                         mask = smc_accept_poll(sk);
1568                 } else {
1569                         if (atomic_read(&smc->conn.sndbuf_space) ||
1570                             sk->sk_shutdown & SEND_SHUTDOWN) {
1571                                 mask |= EPOLLOUT | EPOLLWRNORM;
1572                         } else {
1573                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1574                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1575                         }
1576                         if (atomic_read(&smc->conn.bytes_to_rcv))
1577                                 mask |= EPOLLIN | EPOLLRDNORM;
1578                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1579                                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
1580                         if (sk->sk_state == SMC_APPCLOSEWAIT1)
1581                                 mask |= EPOLLIN;
1582                         if (smc->conn.urg_state == SMC_URG_VALID)
1583                                 mask |= EPOLLPRI;
1584                 }
1585         }
1586
1587         return mask;
1588 }
1589
1590 static int smc_shutdown(struct socket *sock, int how)
1591 {
1592         struct sock *sk = sock->sk;
1593         struct smc_sock *smc;
1594         int rc = -EINVAL;
1595         int rc1 = 0;
1596
1597         smc = smc_sk(sk);
1598
1599         if ((how < SHUT_RD) || (how > SHUT_RDWR))
1600                 return rc;
1601
1602         lock_sock(sk);
1603
1604         rc = -ENOTCONN;
1605         if ((sk->sk_state != SMC_ACTIVE) &&
1606             (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
1607             (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
1608             (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1609             (sk->sk_state != SMC_APPCLOSEWAIT2) &&
1610             (sk->sk_state != SMC_APPFINCLOSEWAIT))
1611                 goto out;
1612         if (smc->use_fallback) {
1613                 rc = kernel_sock_shutdown(smc->clcsock, how);
1614                 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
1615                 if (sk->sk_shutdown == SHUTDOWN_MASK)
1616                         sk->sk_state = SMC_CLOSED;
1617                 goto out;
1618         }
1619         switch (how) {
1620         case SHUT_RDWR:         /* shutdown in both directions */
1621                 rc = smc_close_active(smc);
1622                 break;
1623         case SHUT_WR:
1624                 rc = smc_close_shutdown_write(smc);
1625                 break;
1626         case SHUT_RD:
1627                 rc = 0;
1628                 /* nothing more to do because peer is not involved */
1629                 break;
1630         }
1631         if (smc->clcsock)
1632                 rc1 = kernel_sock_shutdown(smc->clcsock, how);
1633         /* map sock_shutdown_cmd constants to sk_shutdown value range */
1634         sk->sk_shutdown |= how + 1;
1635
1636 out:
1637         release_sock(sk);
1638         return rc ? rc : rc1;
1639 }
1640
1641 static int smc_setsockopt(struct socket *sock, int level, int optname,
1642                           char __user *optval, unsigned int optlen)
1643 {
1644         struct sock *sk = sock->sk;
1645         struct smc_sock *smc;
1646         int val, rc;
1647
1648         smc = smc_sk(sk);
1649
1650         /* generic setsockopts reaching us here always apply to the
1651          * CLC socket
1652          */
1653         rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
1654                                            optval, optlen);
1655         if (smc->clcsock->sk->sk_err) {
1656                 sk->sk_err = smc->clcsock->sk->sk_err;
1657                 sk->sk_error_report(sk);
1658         }
1659         if (rc)
1660                 return rc;
1661
1662         if (optlen < sizeof(int))
1663                 return -EINVAL;
1664         if (get_user(val, (int __user *)optval))
1665                 return -EFAULT;
1666
1667         lock_sock(sk);
1668         switch (optname) {
1669         case TCP_ULP:
1670         case TCP_FASTOPEN:
1671         case TCP_FASTOPEN_CONNECT:
1672         case TCP_FASTOPEN_KEY:
1673         case TCP_FASTOPEN_NO_COOKIE:
1674                 /* option not supported by SMC */
1675                 if (sk->sk_state == SMC_INIT) {
1676                         smc->use_fallback = true;
1677                         smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
1678                 } else {
1679                         if (!smc->use_fallback)
1680                                 rc = -EINVAL;
1681                 }
1682                 break;
1683         case TCP_NODELAY:
1684                 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
1685                         if (val && !smc->use_fallback)
1686                                 mod_delayed_work(system_wq, &smc->conn.tx_work,
1687                                                  0);
1688                 }
1689                 break;
1690         case TCP_CORK:
1691                 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
1692                         if (!val && !smc->use_fallback)
1693                                 mod_delayed_work(system_wq, &smc->conn.tx_work,
1694                                                  0);
1695                 }
1696                 break;
1697         case TCP_DEFER_ACCEPT:
1698                 smc->sockopt_defer_accept = val;
1699                 break;
1700         default:
1701                 break;
1702         }
1703         release_sock(sk);
1704
1705         return rc;
1706 }
1707
1708 static int smc_getsockopt(struct socket *sock, int level, int optname,
1709                           char __user *optval, int __user *optlen)
1710 {
1711         struct smc_sock *smc;
1712
1713         smc = smc_sk(sock->sk);
1714         /* socket options apply to the CLC socket */
1715         return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
1716                                              optval, optlen);
1717 }
1718
1719 static int smc_ioctl(struct socket *sock, unsigned int cmd,
1720                      unsigned long arg)
1721 {
1722         union smc_host_cursor cons, urg;
1723         struct smc_connection *conn;
1724         struct smc_sock *smc;
1725         int answ;
1726
1727         smc = smc_sk(sock->sk);
1728         conn = &smc->conn;
1729         lock_sock(&smc->sk);
1730         if (smc->use_fallback) {
1731                 if (!smc->clcsock) {
1732                         release_sock(&smc->sk);
1733                         return -EBADF;
1734                 }
1735                 answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
1736                 release_sock(&smc->sk);
1737                 return answ;
1738         }
1739         switch (cmd) {
1740         case SIOCINQ: /* same as FIONREAD */
1741                 if (smc->sk.sk_state == SMC_LISTEN) {
1742                         release_sock(&smc->sk);
1743                         return -EINVAL;
1744                 }
1745                 if (smc->sk.sk_state == SMC_INIT ||
1746                     smc->sk.sk_state == SMC_CLOSED)
1747                         answ = 0;
1748                 else
1749                         answ = atomic_read(&smc->conn.bytes_to_rcv);
1750                 break;
1751         case SIOCOUTQ:
1752                 /* output queue size (not send + not acked) */
1753                 if (smc->sk.sk_state == SMC_LISTEN) {
1754                         release_sock(&smc->sk);
1755                         return -EINVAL;
1756                 }
1757                 if (smc->sk.sk_state == SMC_INIT ||
1758                     smc->sk.sk_state == SMC_CLOSED)
1759                         answ = 0;
1760                 else
1761                         answ = smc->conn.sndbuf_desc->len -
1762                                         atomic_read(&smc->conn.sndbuf_space);
1763                 break;
1764         case SIOCOUTQNSD:
1765                 /* output queue size (not send only) */
1766                 if (smc->sk.sk_state == SMC_LISTEN) {
1767                         release_sock(&smc->sk);
1768                         return -EINVAL;
1769                 }
1770                 if (smc->sk.sk_state == SMC_INIT ||
1771                     smc->sk.sk_state == SMC_CLOSED)
1772                         answ = 0;
1773                 else
1774                         answ = smc_tx_prepared_sends(&smc->conn);
1775                 break;
1776         case SIOCATMARK:
1777                 if (smc->sk.sk_state == SMC_LISTEN) {
1778                         release_sock(&smc->sk);
1779                         return -EINVAL;
1780                 }
1781                 if (smc->sk.sk_state == SMC_INIT ||
1782                     smc->sk.sk_state == SMC_CLOSED) {
1783                         answ = 0;
1784                 } else {
1785                         smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
1786                         smc_curs_copy(&urg, &conn->urg_curs, conn);
1787                         answ = smc_curs_diff(conn->rmb_desc->len,
1788                                              &cons, &urg) == 1;
1789                 }
1790                 break;
1791         default:
1792                 release_sock(&smc->sk);
1793                 return -ENOIOCTLCMD;
1794         }
1795         release_sock(&smc->sk);
1796
1797         return put_user(answ, (int __user *)arg);
1798 }
1799
1800 static ssize_t smc_sendpage(struct socket *sock, struct page *page,
1801                             int offset, size_t size, int flags)
1802 {
1803         struct sock *sk = sock->sk;
1804         struct smc_sock *smc;
1805         int rc = -EPIPE;
1806
1807         smc = smc_sk(sk);
1808         lock_sock(sk);
1809         if (sk->sk_state != SMC_ACTIVE) {
1810                 release_sock(sk);
1811                 goto out;
1812         }
1813         release_sock(sk);
1814         if (smc->use_fallback)
1815                 rc = kernel_sendpage(smc->clcsock, page, offset,
1816                                      size, flags);
1817         else
1818                 rc = sock_no_sendpage(sock, page, offset, size, flags);
1819
1820 out:
1821         return rc;
1822 }
1823
1824 /* Map the affected portions of the rmbe into an spd, note the number of bytes
1825  * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor
1826  * updates till whenever a respective page has been fully processed.
1827  * Note that subsequent recv() calls have to wait till all splice() processing
1828  * completed.
1829  */
1830 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
1831                                struct pipe_inode_info *pipe, size_t len,
1832                                unsigned int flags)
1833 {
1834         struct sock *sk = sock->sk;
1835         struct smc_sock *smc;
1836         int rc = -ENOTCONN;
1837
1838         smc = smc_sk(sk);
1839         lock_sock(sk);
1840
1841         if (sk->sk_state == SMC_INIT ||
1842             sk->sk_state == SMC_LISTEN ||
1843             sk->sk_state == SMC_CLOSED)
1844                 goto out;
1845
1846         if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1847                 rc = 0;
1848                 goto out;
1849         }
1850
1851         if (smc->use_fallback) {
1852                 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
1853                                                     pipe, len, flags);
1854         } else {
1855                 if (*ppos) {
1856                         rc = -ESPIPE;
1857                         goto out;
1858                 }
1859                 if (flags & SPLICE_F_NONBLOCK)
1860                         flags = MSG_DONTWAIT;
1861                 else
1862                         flags = 0;
1863                 rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
1864         }
1865 out:
1866         release_sock(sk);
1867
1868         return rc;
1869 }
1870
1871 /* must look like tcp */
1872 static const struct proto_ops smc_sock_ops = {
1873         .family         = PF_SMC,
1874         .owner          = THIS_MODULE,
1875         .release        = smc_release,
1876         .bind           = smc_bind,
1877         .connect        = smc_connect,
1878         .socketpair     = sock_no_socketpair,
1879         .accept         = smc_accept,
1880         .getname        = smc_getname,
1881         .poll           = smc_poll,
1882         .ioctl          = smc_ioctl,
1883         .listen         = smc_listen,
1884         .shutdown       = smc_shutdown,
1885         .setsockopt     = smc_setsockopt,
1886         .getsockopt     = smc_getsockopt,
1887         .sendmsg        = smc_sendmsg,
1888         .recvmsg        = smc_recvmsg,
1889         .mmap           = sock_no_mmap,
1890         .sendpage       = smc_sendpage,
1891         .splice_read    = smc_splice_read,
1892 };
1893
1894 static int smc_create(struct net *net, struct socket *sock, int protocol,
1895                       int kern)
1896 {
1897         int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
1898         struct smc_sock *smc;
1899         struct sock *sk;
1900         int rc;
1901
1902         rc = -ESOCKTNOSUPPORT;
1903         if (sock->type != SOCK_STREAM)
1904                 goto out;
1905
1906         rc = -EPROTONOSUPPORT;
1907         if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
1908                 goto out;
1909
1910         rc = -ENOBUFS;
1911         sock->ops = &smc_sock_ops;
1912         sk = smc_sock_alloc(net, sock, protocol);
1913         if (!sk)
1914                 goto out;
1915
1916         /* create internal TCP socket for CLC handshake and fallback */
1917         smc = smc_sk(sk);
1918         smc->use_fallback = false; /* assume rdma capability first */
1919         smc->fallback_rsn = 0;
1920         rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
1921                               &smc->clcsock);
1922         if (rc) {
1923                 sk_common_release(sk);
1924                 goto out;
1925         }
1926         smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
1927         smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
1928
1929 out:
1930         return rc;
1931 }
1932
1933 static const struct net_proto_family smc_sock_family_ops = {
1934         .family = PF_SMC,
1935         .owner  = THIS_MODULE,
1936         .create = smc_create,
1937 };
1938
1939 static int __init smc_init(void)
1940 {
1941         int rc;
1942
1943         rc = smc_pnet_init();
1944         if (rc)
1945                 return rc;
1946
1947         rc = smc_llc_init();
1948         if (rc) {
1949                 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
1950                 goto out_pnet;
1951         }
1952
1953         rc = smc_cdc_init();
1954         if (rc) {
1955                 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
1956                 goto out_pnet;
1957         }
1958
1959         rc = proto_register(&smc_proto, 1);
1960         if (rc) {
1961                 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
1962                 goto out_pnet;
1963         }
1964
1965         rc = proto_register(&smc_proto6, 1);
1966         if (rc) {
1967                 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
1968                 goto out_proto;
1969         }
1970
1971         rc = sock_register(&smc_sock_family_ops);
1972         if (rc) {
1973                 pr_err("%s: sock_register fails with %d\n", __func__, rc);
1974                 goto out_proto6;
1975         }
1976         INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
1977         INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
1978
1979         rc = smc_ib_register_client();
1980         if (rc) {
1981                 pr_err("%s: ib_register fails with %d\n", __func__, rc);
1982                 goto out_sock;
1983         }
1984
1985         static_branch_enable(&tcp_have_smc);
1986         return 0;
1987
1988 out_sock:
1989         sock_unregister(PF_SMC);
1990 out_proto6:
1991         proto_unregister(&smc_proto6);
1992 out_proto:
1993         proto_unregister(&smc_proto);
1994 out_pnet:
1995         smc_pnet_exit();
1996         return rc;
1997 }
1998
1999 static void __exit smc_exit(void)
2000 {
2001         smc_core_exit();
2002         static_branch_disable(&tcp_have_smc);
2003         smc_ib_unregister_client();
2004         sock_unregister(PF_SMC);
2005         proto_unregister(&smc_proto6);
2006         proto_unregister(&smc_proto);
2007         smc_pnet_exit();
2008 }
2009
2010 module_init(smc_init);
2011 module_exit(smc_exit);
2012
2013 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
2014 MODULE_DESCRIPTION("smc socket address family");
2015 MODULE_LICENSE("GPL");
2016 MODULE_ALIAS_NETPROTO(PF_SMC);