OSDN Git Service

ixgbe: use mii_bus to handle MII related ioctls
[uclinux-h8/linux.git] / net / smc / af_smc.c
1 /*
2  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
3  *
4  *  AF_SMC protocol family socket handler keeping the AF_INET sock address type
5  *  applies to SOCK_STREAM sockets only
6  *  offers an alternative communication option for TCP-protocol sockets
7  *  applicable with RoCE-cards only
8  *
9  *  Initial restrictions:
10  *    - support for alternate links postponed
11  *
12  *  Copyright IBM Corp. 2016, 2018
13  *
14  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
15  *              based on prototype from Frank Blaschka
16  */
17
18 #define KMSG_COMPONENT "smc"
19 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
20
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/workqueue.h>
24 #include <linux/in.h>
25 #include <linux/sched/signal.h>
26 #include <linux/if_vlan.h>
27
28 #include <net/sock.h>
29 #include <net/tcp.h>
30 #include <net/smc.h>
31 #include <asm/ioctls.h>
32
33 #include "smc.h"
34 #include "smc_clc.h"
35 #include "smc_llc.h"
36 #include "smc_cdc.h"
37 #include "smc_core.h"
38 #include "smc_ib.h"
39 #include "smc_ism.h"
40 #include "smc_pnet.h"
41 #include "smc_tx.h"
42 #include "smc_rx.h"
43 #include "smc_close.h"
44
45 static DEFINE_MUTEX(smc_create_lgr_pending);    /* serialize link group
46                                                  * creation
47                                                  */
48
49 static void smc_tcp_listen_work(struct work_struct *);
50 static void smc_connect_work(struct work_struct *);
51
52 static void smc_set_keepalive(struct sock *sk, int val)
53 {
54         struct smc_sock *smc = smc_sk(sk);
55
56         smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
57 }
58
59 static struct smc_hashinfo smc_v4_hashinfo = {
60         .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
61 };
62
63 static struct smc_hashinfo smc_v6_hashinfo = {
64         .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
65 };
66
67 int smc_hash_sk(struct sock *sk)
68 {
69         struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
70         struct hlist_head *head;
71
72         head = &h->ht;
73
74         write_lock_bh(&h->lock);
75         sk_add_node(sk, head);
76         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
77         write_unlock_bh(&h->lock);
78
79         return 0;
80 }
81 EXPORT_SYMBOL_GPL(smc_hash_sk);
82
83 void smc_unhash_sk(struct sock *sk)
84 {
85         struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
86
87         write_lock_bh(&h->lock);
88         if (sk_del_node_init(sk))
89                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
90         write_unlock_bh(&h->lock);
91 }
92 EXPORT_SYMBOL_GPL(smc_unhash_sk);
93
94 struct proto smc_proto = {
95         .name           = "SMC",
96         .owner          = THIS_MODULE,
97         .keepalive      = smc_set_keepalive,
98         .hash           = smc_hash_sk,
99         .unhash         = smc_unhash_sk,
100         .obj_size       = sizeof(struct smc_sock),
101         .h.smc_hash     = &smc_v4_hashinfo,
102         .slab_flags     = SLAB_TYPESAFE_BY_RCU,
103 };
104 EXPORT_SYMBOL_GPL(smc_proto);
105
106 struct proto smc_proto6 = {
107         .name           = "SMC6",
108         .owner          = THIS_MODULE,
109         .keepalive      = smc_set_keepalive,
110         .hash           = smc_hash_sk,
111         .unhash         = smc_unhash_sk,
112         .obj_size       = sizeof(struct smc_sock),
113         .h.smc_hash     = &smc_v6_hashinfo,
114         .slab_flags     = SLAB_TYPESAFE_BY_RCU,
115 };
116 EXPORT_SYMBOL_GPL(smc_proto6);
117
118 static int smc_release(struct socket *sock)
119 {
120         struct sock *sk = sock->sk;
121         struct smc_sock *smc;
122         int rc = 0;
123
124         if (!sk)
125                 goto out;
126
127         smc = smc_sk(sk);
128
129         /* cleanup for a dangling non-blocking connect */
130         if (smc->connect_info && sk->sk_state == SMC_INIT)
131                 tcp_abort(smc->clcsock->sk, ECONNABORTED);
132         flush_work(&smc->connect_work);
133         kfree(smc->connect_info);
134         smc->connect_info = NULL;
135
136         if (sk->sk_state == SMC_LISTEN)
137                 /* smc_close_non_accepted() is called and acquires
138                  * sock lock for child sockets again
139                  */
140                 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
141         else
142                 lock_sock(sk);
143
144         if (!smc->use_fallback) {
145                 rc = smc_close_active(smc);
146                 sock_set_flag(sk, SOCK_DEAD);
147                 sk->sk_shutdown |= SHUTDOWN_MASK;
148         }
149         if (smc->clcsock) {
150                 sock_release(smc->clcsock);
151                 smc->clcsock = NULL;
152         }
153         if (smc->use_fallback) {
154                 if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
155                         sock_put(sk); /* passive closing */
156                 sk->sk_state = SMC_CLOSED;
157                 sk->sk_state_change(sk);
158         }
159
160         /* detach socket */
161         sock_orphan(sk);
162         sock->sk = NULL;
163         if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
164                 smc_conn_free(&smc->conn);
165         release_sock(sk);
166
167         sk->sk_prot->unhash(sk);
168         sock_put(sk); /* final sock_put */
169 out:
170         return rc;
171 }
172
173 static void smc_destruct(struct sock *sk)
174 {
175         if (sk->sk_state != SMC_CLOSED)
176                 return;
177         if (!sock_flag(sk, SOCK_DEAD))
178                 return;
179
180         sk_refcnt_debug_dec(sk);
181 }
182
183 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
184                                    int protocol)
185 {
186         struct smc_sock *smc;
187         struct proto *prot;
188         struct sock *sk;
189
190         prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
191         sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
192         if (!sk)
193                 return NULL;
194
195         sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
196         sk->sk_state = SMC_INIT;
197         sk->sk_destruct = smc_destruct;
198         sk->sk_protocol = protocol;
199         smc = smc_sk(sk);
200         INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
201         INIT_WORK(&smc->connect_work, smc_connect_work);
202         INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
203         INIT_LIST_HEAD(&smc->accept_q);
204         spin_lock_init(&smc->accept_q_lock);
205         spin_lock_init(&smc->conn.send_lock);
206         sk->sk_prot->hash(sk);
207         sk_refcnt_debug_inc(sk);
208
209         return sk;
210 }
211
212 static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
213                     int addr_len)
214 {
215         struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
216         struct sock *sk = sock->sk;
217         struct smc_sock *smc;
218         int rc;
219
220         smc = smc_sk(sk);
221
222         /* replicate tests from inet_bind(), to be safe wrt. future changes */
223         rc = -EINVAL;
224         if (addr_len < sizeof(struct sockaddr_in))
225                 goto out;
226
227         rc = -EAFNOSUPPORT;
228         if (addr->sin_family != AF_INET &&
229             addr->sin_family != AF_INET6 &&
230             addr->sin_family != AF_UNSPEC)
231                 goto out;
232         /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
233         if (addr->sin_family == AF_UNSPEC &&
234             addr->sin_addr.s_addr != htonl(INADDR_ANY))
235                 goto out;
236
237         lock_sock(sk);
238
239         /* Check if socket is already active */
240         rc = -EINVAL;
241         if (sk->sk_state != SMC_INIT)
242                 goto out_rel;
243
244         smc->clcsock->sk->sk_reuse = sk->sk_reuse;
245         rc = kernel_bind(smc->clcsock, uaddr, addr_len);
246
247 out_rel:
248         release_sock(sk);
249 out:
250         return rc;
251 }
252
253 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
254                                    unsigned long mask)
255 {
256         /* options we don't get control via setsockopt for */
257         nsk->sk_type = osk->sk_type;
258         nsk->sk_sndbuf = osk->sk_sndbuf;
259         nsk->sk_rcvbuf = osk->sk_rcvbuf;
260         nsk->sk_sndtimeo = osk->sk_sndtimeo;
261         nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
262         nsk->sk_mark = osk->sk_mark;
263         nsk->sk_priority = osk->sk_priority;
264         nsk->sk_rcvlowat = osk->sk_rcvlowat;
265         nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
266         nsk->sk_err = osk->sk_err;
267
268         nsk->sk_flags &= ~mask;
269         nsk->sk_flags |= osk->sk_flags & mask;
270 }
271
272 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
273                              (1UL << SOCK_KEEPOPEN) | \
274                              (1UL << SOCK_LINGER) | \
275                              (1UL << SOCK_BROADCAST) | \
276                              (1UL << SOCK_TIMESTAMP) | \
277                              (1UL << SOCK_DBG) | \
278                              (1UL << SOCK_RCVTSTAMP) | \
279                              (1UL << SOCK_RCVTSTAMPNS) | \
280                              (1UL << SOCK_LOCALROUTE) | \
281                              (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
282                              (1UL << SOCK_RXQ_OVFL) | \
283                              (1UL << SOCK_WIFI_STATUS) | \
284                              (1UL << SOCK_NOFCS) | \
285                              (1UL << SOCK_FILTER_LOCKED))
286 /* copy only relevant settings and flags of SOL_SOCKET level from smc to
287  * clc socket (since smc is not called for these options from net/core)
288  */
289 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
290 {
291         smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
292 }
293
294 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
295                              (1UL << SOCK_KEEPOPEN) | \
296                              (1UL << SOCK_LINGER) | \
297                              (1UL << SOCK_DBG))
298 /* copy only settings and flags relevant for smc from clc to smc socket */
299 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
300 {
301         smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
302 }
303
304 /* register a new rmb, send confirm_rkey msg to register with peer */
305 static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
306                        bool conf_rkey)
307 {
308         if (!rmb_desc->wr_reg) {
309                 /* register memory region for new rmb */
310                 if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
311                         rmb_desc->regerr = 1;
312                         return -EFAULT;
313                 }
314                 rmb_desc->wr_reg = 1;
315         }
316         if (!conf_rkey)
317                 return 0;
318         /* exchange confirm_rkey msg with peer */
319         if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
320                 rmb_desc->regerr = 1;
321                 return -EFAULT;
322         }
323         return 0;
324 }
325
326 static int smc_clnt_conf_first_link(struct smc_sock *smc)
327 {
328         struct net *net = sock_net(smc->clcsock->sk);
329         struct smc_link_group *lgr = smc->conn.lgr;
330         struct smc_link *link;
331         int rest;
332         int rc;
333
334         link = &lgr->lnk[SMC_SINGLE_LINK];
335         /* receive CONFIRM LINK request from server over RoCE fabric */
336         rest = wait_for_completion_interruptible_timeout(
337                 &link->llc_confirm,
338                 SMC_LLC_WAIT_FIRST_TIME);
339         if (rest <= 0) {
340                 struct smc_clc_msg_decline dclc;
341
342                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
343                                       SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
344                 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
345         }
346
347         if (link->llc_confirm_rc)
348                 return SMC_CLC_DECL_RMBE_EC;
349
350         rc = smc_ib_modify_qp_rts(link);
351         if (rc)
352                 return SMC_CLC_DECL_ERR_RDYLNK;
353
354         smc_wr_remember_qp_attr(link);
355
356         if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
357                 return SMC_CLC_DECL_ERR_REGRMB;
358
359         /* send CONFIRM LINK response over RoCE fabric */
360         rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
361         if (rc < 0)
362                 return SMC_CLC_DECL_TIMEOUT_CL;
363
364         /* receive ADD LINK request from server over RoCE fabric */
365         rest = wait_for_completion_interruptible_timeout(&link->llc_add,
366                                                          SMC_LLC_WAIT_TIME);
367         if (rest <= 0) {
368                 struct smc_clc_msg_decline dclc;
369
370                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
371                                       SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
372                 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
373         }
374
375         /* send add link reject message, only one link supported for now */
376         rc = smc_llc_send_add_link(link,
377                                    link->smcibdev->mac[link->ibport - 1],
378                                    link->gid, SMC_LLC_RESP);
379         if (rc < 0)
380                 return SMC_CLC_DECL_TIMEOUT_AL;
381
382         smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
383
384         return 0;
385 }
386
387 static void smcr_conn_save_peer_info(struct smc_sock *smc,
388                                      struct smc_clc_msg_accept_confirm *clc)
389 {
390         int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
391
392         smc->conn.peer_rmbe_idx = clc->rmbe_idx;
393         smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token);
394         smc->conn.peer_rmbe_size = bufsize;
395         atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
396         smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
397 }
398
399 static void smcd_conn_save_peer_info(struct smc_sock *smc,
400                                      struct smc_clc_msg_accept_confirm *clc)
401 {
402         int bufsize = smc_uncompress_bufsize(clc->dmbe_size);
403
404         smc->conn.peer_rmbe_idx = clc->dmbe_idx;
405         smc->conn.peer_token = clc->token;
406         /* msg header takes up space in the buffer */
407         smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
408         atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
409         smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
410 }
411
412 static void smc_conn_save_peer_info(struct smc_sock *smc,
413                                     struct smc_clc_msg_accept_confirm *clc)
414 {
415         if (smc->conn.lgr->is_smcd)
416                 smcd_conn_save_peer_info(smc, clc);
417         else
418                 smcr_conn_save_peer_info(smc, clc);
419 }
420
421 static void smc_link_save_peer_info(struct smc_link *link,
422                                     struct smc_clc_msg_accept_confirm *clc)
423 {
424         link->peer_qpn = ntoh24(clc->qpn);
425         memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE);
426         memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac));
427         link->peer_psn = ntoh24(clc->psn);
428         link->peer_mtu = clc->qp_mtu;
429 }
430
431 /* fall back during connect */
432 static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
433 {
434         smc->use_fallback = true;
435         smc->fallback_rsn = reason_code;
436         smc_copy_sock_settings_to_clc(smc);
437         if (smc->sk.sk_state == SMC_INIT)
438                 smc->sk.sk_state = SMC_ACTIVE;
439         return 0;
440 }
441
442 /* decline and fall back during connect */
443 static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
444 {
445         int rc;
446
447         if (reason_code < 0) { /* error, fallback is not possible */
448                 if (smc->sk.sk_state == SMC_INIT)
449                         sock_put(&smc->sk); /* passive closing */
450                 return reason_code;
451         }
452         if (reason_code != SMC_CLC_DECL_PEERDECL) {
453                 rc = smc_clc_send_decline(smc, reason_code);
454                 if (rc < 0) {
455                         if (smc->sk.sk_state == SMC_INIT)
456                                 sock_put(&smc->sk); /* passive closing */
457                         return rc;
458                 }
459         }
460         return smc_connect_fallback(smc, reason_code);
461 }
462
463 /* abort connecting */
464 static int smc_connect_abort(struct smc_sock *smc, int reason_code,
465                              int local_contact)
466 {
467         if (local_contact == SMC_FIRST_CONTACT)
468                 smc_lgr_forget(smc->conn.lgr);
469         mutex_unlock(&smc_create_lgr_pending);
470         smc_conn_free(&smc->conn);
471         return reason_code;
472 }
473
474 /* check if there is a rdma device available for this connection. */
475 /* called for connect and listen */
476 static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
477                           u8 *ibport, unsigned short vlan_id, u8 gid[])
478 {
479         int reason_code = 0;
480
481         /* PNET table look up: search active ib_device and port
482          * within same PNETID that also contains the ethernet device
483          * used for the internal TCP socket
484          */
485         smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id,
486                                     gid);
487         if (!(*ibdev))
488                 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
489
490         return reason_code;
491 }
492
493 /* check if there is an ISM device available for this connection. */
494 /* called for connect and listen */
495 static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev)
496 {
497         /* Find ISM device with same PNETID as connecting interface  */
498         smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev);
499         if (!(*ismdev))
500                 return SMC_CLC_DECL_CNFERR; /* configuration error */
501         return 0;
502 }
503
504 /* Check for VLAN ID and register it on ISM device just for CLC handshake */
505 static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
506                                       struct smcd_dev *ismdev,
507                                       unsigned short vlan_id)
508 {
509         if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id))
510                 return SMC_CLC_DECL_CNFERR;
511         return 0;
512 }
513
514 /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
515  * used, the VLAN ID will be registered again during the connection setup.
516  */
517 static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
518                                         struct smcd_dev *ismdev,
519                                         unsigned short vlan_id)
520 {
521         if (!is_smcd)
522                 return 0;
523         if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id))
524                 return SMC_CLC_DECL_CNFERR;
525         return 0;
526 }
527
528 /* CLC handshake during connect */
529 static int smc_connect_clc(struct smc_sock *smc, int smc_type,
530                            struct smc_clc_msg_accept_confirm *aclc,
531                            struct smc_ib_device *ibdev, u8 ibport,
532                            u8 gid[], struct smcd_dev *ismdev)
533 {
534         int rc = 0;
535
536         /* do inband token exchange */
537         rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev);
538         if (rc)
539                 return rc;
540         /* receive SMC Accept CLC message */
541         return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT,
542                                 CLC_WAIT_TIME);
543 }
544
545 /* setup for RDMA connection of client */
546 static int smc_connect_rdma(struct smc_sock *smc,
547                             struct smc_clc_msg_accept_confirm *aclc,
548                             struct smc_ib_device *ibdev, u8 ibport)
549 {
550         int local_contact = SMC_FIRST_CONTACT;
551         struct smc_link *link;
552         int reason_code = 0;
553
554         mutex_lock(&smc_create_lgr_pending);
555         local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
556                                         ibport, ntoh24(aclc->qpn), &aclc->lcl,
557                                         NULL, 0);
558         if (local_contact < 0) {
559                 if (local_contact == -ENOMEM)
560                         reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
561                 else if (local_contact == -ENOLINK)
562                         reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
563                 else
564                         reason_code = SMC_CLC_DECL_INTERR; /* other error */
565                 return smc_connect_abort(smc, reason_code, 0);
566         }
567         link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
568
569         smc_conn_save_peer_info(smc, aclc);
570
571         /* create send buffer and rmb */
572         if (smc_buf_create(smc, false))
573                 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
574
575         if (local_contact == SMC_FIRST_CONTACT)
576                 smc_link_save_peer_info(link, aclc);
577
578         if (smc_rmb_rtoken_handling(&smc->conn, aclc))
579                 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
580                                          local_contact);
581
582         smc_close_init(smc);
583         smc_rx_init(smc);
584
585         if (local_contact == SMC_FIRST_CONTACT) {
586                 if (smc_ib_ready_link(link))
587                         return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
588                                                  local_contact);
589         } else {
590                 if (smc_reg_rmb(link, smc->conn.rmb_desc, true))
591                         return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
592                                                  local_contact);
593         }
594         smc_rmb_sync_sg_for_device(&smc->conn);
595
596         reason_code = smc_clc_send_confirm(smc);
597         if (reason_code)
598                 return smc_connect_abort(smc, reason_code, local_contact);
599
600         smc_tx_init(smc);
601
602         if (local_contact == SMC_FIRST_CONTACT) {
603                 /* QP confirmation over RoCE fabric */
604                 reason_code = smc_clnt_conf_first_link(smc);
605                 if (reason_code)
606                         return smc_connect_abort(smc, reason_code,
607                                                  local_contact);
608         }
609         mutex_unlock(&smc_create_lgr_pending);
610
611         smc_copy_sock_settings_to_clc(smc);
612         if (smc->sk.sk_state == SMC_INIT)
613                 smc->sk.sk_state = SMC_ACTIVE;
614
615         return 0;
616 }
617
618 /* setup for ISM connection of client */
619 static int smc_connect_ism(struct smc_sock *smc,
620                            struct smc_clc_msg_accept_confirm *aclc,
621                            struct smcd_dev *ismdev)
622 {
623         int local_contact = SMC_FIRST_CONTACT;
624         int rc = 0;
625
626         mutex_lock(&smc_create_lgr_pending);
627         local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0,
628                                         NULL, ismdev, aclc->gid);
629         if (local_contact < 0)
630                 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
631
632         /* Create send and receive buffers */
633         if (smc_buf_create(smc, true))
634                 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
635
636         smc_conn_save_peer_info(smc, aclc);
637         smc_close_init(smc);
638         smc_rx_init(smc);
639         smc_tx_init(smc);
640
641         rc = smc_clc_send_confirm(smc);
642         if (rc)
643                 return smc_connect_abort(smc, rc, local_contact);
644         mutex_unlock(&smc_create_lgr_pending);
645
646         smc_copy_sock_settings_to_clc(smc);
647         if (smc->sk.sk_state == SMC_INIT)
648                 smc->sk.sk_state = SMC_ACTIVE;
649
650         return 0;
651 }
652
653 /* perform steps before actually connecting */
654 static int __smc_connect(struct smc_sock *smc)
655 {
656         bool ism_supported = false, rdma_supported = false;
657         struct smc_clc_msg_accept_confirm aclc;
658         struct smc_ib_device *ibdev;
659         struct smcd_dev *ismdev;
660         u8 gid[SMC_GID_SIZE];
661         unsigned short vlan;
662         int smc_type;
663         int rc = 0;
664         u8 ibport;
665
666         sock_hold(&smc->sk); /* sock put in passive closing */
667
668         if (smc->use_fallback)
669                 return smc_connect_fallback(smc, smc->fallback_rsn);
670
671         /* if peer has not signalled SMC-capability, fall back */
672         if (!tcp_sk(smc->clcsock->sk)->syn_smc)
673                 return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
674
675         /* IPSec connections opt out of SMC-R optimizations */
676         if (using_ipsec(smc))
677                 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
678
679         /* check for VLAN ID */
680         if (smc_vlan_by_tcpsk(smc->clcsock, &vlan))
681                 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
682
683         /* check if there is an ism device available */
684         if (!smc_check_ism(smc, &ismdev) &&
685             !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) {
686                 /* ISM is supported for this connection */
687                 ism_supported = true;
688                 smc_type = SMC_TYPE_D;
689         }
690
691         /* check if there is a rdma device available */
692         if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) {
693                 /* RDMA is supported for this connection */
694                 rdma_supported = true;
695                 if (ism_supported)
696                         smc_type = SMC_TYPE_B; /* both */
697                 else
698                         smc_type = SMC_TYPE_R; /* only RDMA */
699         }
700
701         /* if neither ISM nor RDMA are supported, fallback */
702         if (!rdma_supported && !ism_supported)
703                 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
704
705         /* perform CLC handshake */
706         rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev);
707         if (rc) {
708                 smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
709                 return smc_connect_decline_fallback(smc, rc);
710         }
711
712         /* depending on previous steps, connect using rdma or ism */
713         if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
714                 rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
715         else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
716                 rc = smc_connect_ism(smc, &aclc, ismdev);
717         else
718                 rc = SMC_CLC_DECL_MODEUNSUPP;
719         if (rc) {
720                 smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
721                 return smc_connect_decline_fallback(smc, rc);
722         }
723
724         smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
725         return 0;
726 }
727
728 static void smc_connect_work(struct work_struct *work)
729 {
730         struct smc_sock *smc = container_of(work, struct smc_sock,
731                                             connect_work);
732         int rc;
733
734         lock_sock(&smc->sk);
735         rc = kernel_connect(smc->clcsock, &smc->connect_info->addr,
736                             smc->connect_info->alen, smc->connect_info->flags);
737         if (smc->clcsock->sk->sk_err) {
738                 smc->sk.sk_err = smc->clcsock->sk->sk_err;
739                 goto out;
740         }
741         if (rc < 0) {
742                 smc->sk.sk_err = -rc;
743                 goto out;
744         }
745
746         rc = __smc_connect(smc);
747         if (rc < 0)
748                 smc->sk.sk_err = -rc;
749
750 out:
751         if (smc->sk.sk_err)
752                 smc->sk.sk_state_change(&smc->sk);
753         else
754                 smc->sk.sk_write_space(&smc->sk);
755         kfree(smc->connect_info);
756         smc->connect_info = NULL;
757         release_sock(&smc->sk);
758 }
759
760 static int smc_connect(struct socket *sock, struct sockaddr *addr,
761                        int alen, int flags)
762 {
763         struct sock *sk = sock->sk;
764         struct smc_sock *smc;
765         int rc = -EINVAL;
766
767         smc = smc_sk(sk);
768
769         /* separate smc parameter checking to be safe */
770         if (alen < sizeof(addr->sa_family))
771                 goto out_err;
772         if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
773                 goto out_err;
774
775         lock_sock(sk);
776         switch (sk->sk_state) {
777         default:
778                 goto out;
779         case SMC_ACTIVE:
780                 rc = -EISCONN;
781                 goto out;
782         case SMC_INIT:
783                 rc = 0;
784                 break;
785         }
786
787         smc_copy_sock_settings_to_clc(smc);
788         tcp_sk(smc->clcsock->sk)->syn_smc = 1;
789         if (flags & O_NONBLOCK) {
790                 if (smc->connect_info) {
791                         rc = -EALREADY;
792                         goto out;
793                 }
794                 smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL);
795                 if (!smc->connect_info) {
796                         rc = -ENOMEM;
797                         goto out;
798                 }
799                 smc->connect_info->alen = alen;
800                 smc->connect_info->flags = flags ^ O_NONBLOCK;
801                 memcpy(&smc->connect_info->addr, addr, alen);
802                 schedule_work(&smc->connect_work);
803                 rc = -EINPROGRESS;
804         } else {
805                 rc = kernel_connect(smc->clcsock, addr, alen, flags);
806                 if (rc)
807                         goto out;
808
809                 rc = __smc_connect(smc);
810                 if (rc < 0)
811                         goto out;
812                 else
813                         rc = 0; /* success cases including fallback */
814         }
815
816 out:
817         release_sock(sk);
818 out_err:
819         return rc;
820 }
821
822 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
823 {
824         struct socket *new_clcsock = NULL;
825         struct sock *lsk = &lsmc->sk;
826         struct sock *new_sk;
827         int rc;
828
829         release_sock(lsk);
830         new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
831         if (!new_sk) {
832                 rc = -ENOMEM;
833                 lsk->sk_err = ENOMEM;
834                 *new_smc = NULL;
835                 lock_sock(lsk);
836                 goto out;
837         }
838         *new_smc = smc_sk(new_sk);
839
840         rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
841         lock_sock(lsk);
842         if  (rc < 0)
843                 lsk->sk_err = -rc;
844         if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
845                 if (new_clcsock)
846                         sock_release(new_clcsock);
847                 new_sk->sk_state = SMC_CLOSED;
848                 sock_set_flag(new_sk, SOCK_DEAD);
849                 new_sk->sk_prot->unhash(new_sk);
850                 sock_put(new_sk); /* final */
851                 *new_smc = NULL;
852                 goto out;
853         }
854
855         (*new_smc)->clcsock = new_clcsock;
856 out:
857         return rc;
858 }
859
860 /* add a just created sock to the accept queue of the listen sock as
861  * candidate for a following socket accept call from user space
862  */
863 static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
864 {
865         struct smc_sock *par = smc_sk(parent);
866
867         sock_hold(sk); /* sock_put in smc_accept_unlink () */
868         spin_lock(&par->accept_q_lock);
869         list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
870         spin_unlock(&par->accept_q_lock);
871         sk_acceptq_added(parent);
872 }
873
874 /* remove a socket from the accept queue of its parental listening socket */
875 static void smc_accept_unlink(struct sock *sk)
876 {
877         struct smc_sock *par = smc_sk(sk)->listen_smc;
878
879         spin_lock(&par->accept_q_lock);
880         list_del_init(&smc_sk(sk)->accept_q);
881         spin_unlock(&par->accept_q_lock);
882         sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
883         sock_put(sk); /* sock_hold in smc_accept_enqueue */
884 }
885
886 /* remove a sock from the accept queue to bind it to a new socket created
887  * for a socket accept call from user space
888  */
889 struct sock *smc_accept_dequeue(struct sock *parent,
890                                 struct socket *new_sock)
891 {
892         struct smc_sock *isk, *n;
893         struct sock *new_sk;
894
895         list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
896                 new_sk = (struct sock *)isk;
897
898                 smc_accept_unlink(new_sk);
899                 if (new_sk->sk_state == SMC_CLOSED) {
900                         if (isk->clcsock) {
901                                 sock_release(isk->clcsock);
902                                 isk->clcsock = NULL;
903                         }
904                         new_sk->sk_prot->unhash(new_sk);
905                         sock_put(new_sk); /* final */
906                         continue;
907                 }
908                 if (new_sock)
909                         sock_graft(new_sk, new_sock);
910                 return new_sk;
911         }
912         return NULL;
913 }
914
915 /* clean up for a created but never accepted sock */
916 void smc_close_non_accepted(struct sock *sk)
917 {
918         struct smc_sock *smc = smc_sk(sk);
919
920         lock_sock(sk);
921         if (!sk->sk_lingertime)
922                 /* wait for peer closing */
923                 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
924         if (!smc->use_fallback) {
925                 smc_close_active(smc);
926                 sock_set_flag(sk, SOCK_DEAD);
927                 sk->sk_shutdown |= SHUTDOWN_MASK;
928         }
929         if (smc->clcsock) {
930                 struct socket *tcp;
931
932                 tcp = smc->clcsock;
933                 smc->clcsock = NULL;
934                 sock_release(tcp);
935         }
936         if (smc->use_fallback) {
937                 sock_put(sk); /* passive closing */
938                 sk->sk_state = SMC_CLOSED;
939         } else {
940                 if (sk->sk_state == SMC_CLOSED)
941                         smc_conn_free(&smc->conn);
942         }
943         release_sock(sk);
944         sk->sk_prot->unhash(sk);
945         sock_put(sk); /* final sock_put */
946 }
947
948 static int smc_serv_conf_first_link(struct smc_sock *smc)
949 {
950         struct net *net = sock_net(smc->clcsock->sk);
951         struct smc_link_group *lgr = smc->conn.lgr;
952         struct smc_link *link;
953         int rest;
954         int rc;
955
956         link = &lgr->lnk[SMC_SINGLE_LINK];
957
958         if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
959                 return SMC_CLC_DECL_ERR_REGRMB;
960
961         /* send CONFIRM LINK request to client over the RoCE fabric */
962         rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
963         if (rc < 0)
964                 return SMC_CLC_DECL_TIMEOUT_CL;
965
966         /* receive CONFIRM LINK response from client over the RoCE fabric */
967         rest = wait_for_completion_interruptible_timeout(
968                 &link->llc_confirm_resp,
969                 SMC_LLC_WAIT_FIRST_TIME);
970         if (rest <= 0) {
971                 struct smc_clc_msg_decline dclc;
972
973                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
974                                       SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
975                 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
976         }
977
978         if (link->llc_confirm_resp_rc)
979                 return SMC_CLC_DECL_RMBE_EC;
980
981         /* send ADD LINK request to client over the RoCE fabric */
982         rc = smc_llc_send_add_link(link,
983                                    link->smcibdev->mac[link->ibport - 1],
984                                    link->gid, SMC_LLC_REQ);
985         if (rc < 0)
986                 return SMC_CLC_DECL_TIMEOUT_AL;
987
988         /* receive ADD LINK response from client over the RoCE fabric */
989         rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
990                                                          SMC_LLC_WAIT_TIME);
991         if (rest <= 0) {
992                 struct smc_clc_msg_decline dclc;
993
994                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
995                                       SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
996                 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
997         }
998
999         smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
1000
1001         return 0;
1002 }
1003
1004 /* listen worker: finish */
1005 static void smc_listen_out(struct smc_sock *new_smc)
1006 {
1007         struct smc_sock *lsmc = new_smc->listen_smc;
1008         struct sock *newsmcsk = &new_smc->sk;
1009
1010         lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
1011         if (lsmc->sk.sk_state == SMC_LISTEN) {
1012                 smc_accept_enqueue(&lsmc->sk, newsmcsk);
1013         } else { /* no longer listening */
1014                 smc_close_non_accepted(newsmcsk);
1015         }
1016         release_sock(&lsmc->sk);
1017
1018         /* Wake up accept */
1019         lsmc->sk.sk_data_ready(&lsmc->sk);
1020         sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
1021 }
1022
1023 /* listen worker: finish in state connected */
1024 static void smc_listen_out_connected(struct smc_sock *new_smc)
1025 {
1026         struct sock *newsmcsk = &new_smc->sk;
1027
1028         sk_refcnt_debug_inc(newsmcsk);
1029         if (newsmcsk->sk_state == SMC_INIT)
1030                 newsmcsk->sk_state = SMC_ACTIVE;
1031
1032         smc_listen_out(new_smc);
1033 }
1034
1035 /* listen worker: finish in error state */
1036 static void smc_listen_out_err(struct smc_sock *new_smc)
1037 {
1038         struct sock *newsmcsk = &new_smc->sk;
1039
1040         if (newsmcsk->sk_state == SMC_INIT)
1041                 sock_put(&new_smc->sk); /* passive closing */
1042         newsmcsk->sk_state = SMC_CLOSED;
1043         smc_conn_free(&new_smc->conn);
1044
1045         smc_listen_out(new_smc);
1046 }
1047
1048 /* listen worker: decline and fall back if possible */
1049 static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
1050                                int local_contact)
1051 {
1052         /* RDMA setup failed, switch back to TCP */
1053         if (local_contact == SMC_FIRST_CONTACT)
1054                 smc_lgr_forget(new_smc->conn.lgr);
1055         if (reason_code < 0) { /* error, no fallback possible */
1056                 smc_listen_out_err(new_smc);
1057                 return;
1058         }
1059         smc_conn_free(&new_smc->conn);
1060         new_smc->use_fallback = true;
1061         new_smc->fallback_rsn = reason_code;
1062         if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
1063                 if (smc_clc_send_decline(new_smc, reason_code) < 0) {
1064                         smc_listen_out_err(new_smc);
1065                         return;
1066                 }
1067         }
1068         smc_listen_out_connected(new_smc);
1069 }
1070
1071 /* listen worker: check prefixes */
1072 static int smc_listen_rdma_check(struct smc_sock *new_smc,
1073                                  struct smc_clc_msg_proposal *pclc)
1074 {
1075         struct smc_clc_msg_proposal_prefix *pclc_prfx;
1076         struct socket *newclcsock = new_smc->clcsock;
1077
1078         pclc_prfx = smc_clc_proposal_get_prefix(pclc);
1079         if (smc_clc_prfx_match(newclcsock, pclc_prfx))
1080                 return SMC_CLC_DECL_CNFERR;
1081
1082         return 0;
1083 }
1084
1085 /* listen worker: initialize connection and buffers */
1086 static int smc_listen_rdma_init(struct smc_sock *new_smc,
1087                                 struct smc_clc_msg_proposal *pclc,
1088                                 struct smc_ib_device *ibdev, u8 ibport,
1089                                 int *local_contact)
1090 {
1091         /* allocate connection / link group */
1092         *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport, 0,
1093                                          &pclc->lcl, NULL, 0);
1094         if (*local_contact < 0) {
1095                 if (*local_contact == -ENOMEM)
1096                         return SMC_CLC_DECL_MEM;/* insufficient memory*/
1097                 return SMC_CLC_DECL_INTERR; /* other error */
1098         }
1099
1100         /* create send buffer and rmb */
1101         if (smc_buf_create(new_smc, false))
1102                 return SMC_CLC_DECL_MEM;
1103
1104         return 0;
1105 }
1106
1107 /* listen worker: initialize connection and buffers for SMC-D */
1108 static int smc_listen_ism_init(struct smc_sock *new_smc,
1109                                struct smc_clc_msg_proposal *pclc,
1110                                struct smcd_dev *ismdev,
1111                                int *local_contact)
1112 {
1113         struct smc_clc_msg_smcd *pclc_smcd;
1114
1115         pclc_smcd = smc_get_clc_msg_smcd(pclc);
1116         *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, 0, NULL,
1117                                          ismdev, pclc_smcd->gid);
1118         if (*local_contact < 0) {
1119                 if (*local_contact == -ENOMEM)
1120                         return SMC_CLC_DECL_MEM;/* insufficient memory*/
1121                 return SMC_CLC_DECL_INTERR; /* other error */
1122         }
1123
1124         /* Check if peer can be reached via ISM device */
1125         if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
1126                             new_smc->conn.lgr->vlan_id,
1127                             new_smc->conn.lgr->smcd)) {
1128                 if (*local_contact == SMC_FIRST_CONTACT)
1129                         smc_lgr_forget(new_smc->conn.lgr);
1130                 smc_conn_free(&new_smc->conn);
1131                 return SMC_CLC_DECL_CNFERR;
1132         }
1133
1134         /* Create send and receive buffers */
1135         if (smc_buf_create(new_smc, true)) {
1136                 if (*local_contact == SMC_FIRST_CONTACT)
1137                         smc_lgr_forget(new_smc->conn.lgr);
1138                 smc_conn_free(&new_smc->conn);
1139                 return SMC_CLC_DECL_MEM;
1140         }
1141
1142         return 0;
1143 }
1144
1145 /* listen worker: register buffers */
1146 static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
1147 {
1148         struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
1149
1150         if (local_contact != SMC_FIRST_CONTACT) {
1151                 if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
1152                         return SMC_CLC_DECL_ERR_REGRMB;
1153         }
1154         smc_rmb_sync_sg_for_device(&new_smc->conn);
1155
1156         return 0;
1157 }
1158
1159 /* listen worker: finish RDMA setup */
1160 static int smc_listen_rdma_finish(struct smc_sock *new_smc,
1161                                   struct smc_clc_msg_accept_confirm *cclc,
1162                                   int local_contact)
1163 {
1164         struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
1165         int reason_code = 0;
1166
1167         if (local_contact == SMC_FIRST_CONTACT)
1168                 smc_link_save_peer_info(link, cclc);
1169
1170         if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
1171                 reason_code = SMC_CLC_DECL_ERR_RTOK;
1172                 goto decline;
1173         }
1174
1175         if (local_contact == SMC_FIRST_CONTACT) {
1176                 if (smc_ib_ready_link(link)) {
1177                         reason_code = SMC_CLC_DECL_ERR_RDYLNK;
1178                         goto decline;
1179                 }
1180                 /* QP confirmation over RoCE fabric */
1181                 reason_code = smc_serv_conf_first_link(new_smc);
1182                 if (reason_code)
1183                         goto decline;
1184         }
1185         return 0;
1186
1187 decline:
1188         smc_listen_decline(new_smc, reason_code, local_contact);
1189         return reason_code;
1190 }
1191
1192 /* setup for RDMA connection of server */
1193 static void smc_listen_work(struct work_struct *work)
1194 {
1195         struct smc_sock *new_smc = container_of(work, struct smc_sock,
1196                                                 smc_listen_work);
1197         struct socket *newclcsock = new_smc->clcsock;
1198         struct smc_clc_msg_accept_confirm cclc;
1199         struct smc_clc_msg_proposal *pclc;
1200         struct smc_ib_device *ibdev;
1201         bool ism_supported = false;
1202         struct smcd_dev *ismdev;
1203         u8 buf[SMC_CLC_MAX_LEN];
1204         int local_contact = 0;
1205         unsigned short vlan;
1206         int reason_code = 0;
1207         int rc = 0;
1208         u8 ibport;
1209
1210         if (new_smc->use_fallback) {
1211                 smc_listen_out_connected(new_smc);
1212                 return;
1213         }
1214
1215         /* check if peer is smc capable */
1216         if (!tcp_sk(newclcsock->sk)->syn_smc) {
1217                 new_smc->use_fallback = true;
1218                 new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
1219                 smc_listen_out_connected(new_smc);
1220                 return;
1221         }
1222
1223         /* do inband token exchange -
1224          * wait for and receive SMC Proposal CLC message
1225          */
1226         pclc = (struct smc_clc_msg_proposal *)&buf;
1227         reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
1228                                        SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
1229         if (reason_code) {
1230                 smc_listen_decline(new_smc, reason_code, 0);
1231                 return;
1232         }
1233
1234         /* IPSec connections opt out of SMC-R optimizations */
1235         if (using_ipsec(new_smc)) {
1236                 smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0);
1237                 return;
1238         }
1239
1240         mutex_lock(&smc_create_lgr_pending);
1241         smc_close_init(new_smc);
1242         smc_rx_init(new_smc);
1243         smc_tx_init(new_smc);
1244
1245         /* check if ISM is available */
1246         if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) &&
1247             !smc_check_ism(new_smc, &ismdev) &&
1248             !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) {
1249                 ism_supported = true;
1250         }
1251
1252         /* check if RDMA is available */
1253         if (!ism_supported &&
1254             ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) ||
1255              smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) ||
1256              smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) ||
1257              smc_listen_rdma_check(new_smc, pclc) ||
1258              smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
1259                                   &local_contact) ||
1260              smc_listen_rdma_reg(new_smc, local_contact))) {
1261                 /* SMC not supported, decline */
1262                 mutex_unlock(&smc_create_lgr_pending);
1263                 smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP,
1264                                    local_contact);
1265                 return;
1266         }
1267
1268         /* send SMC Accept CLC message */
1269         rc = smc_clc_send_accept(new_smc, local_contact);
1270         if (rc) {
1271                 mutex_unlock(&smc_create_lgr_pending);
1272                 smc_listen_decline(new_smc, rc, local_contact);
1273                 return;
1274         }
1275
1276         /* receive SMC Confirm CLC message */
1277         reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
1278                                        SMC_CLC_CONFIRM, CLC_WAIT_TIME);
1279         if (reason_code) {
1280                 mutex_unlock(&smc_create_lgr_pending);
1281                 smc_listen_decline(new_smc, reason_code, local_contact);
1282                 return;
1283         }
1284
1285         /* finish worker */
1286         if (!ism_supported) {
1287                 if (smc_listen_rdma_finish(new_smc, &cclc, local_contact)) {
1288                         mutex_unlock(&smc_create_lgr_pending);
1289                         return;
1290                 }
1291         }
1292         smc_conn_save_peer_info(new_smc, &cclc);
1293         mutex_unlock(&smc_create_lgr_pending);
1294         smc_listen_out_connected(new_smc);
1295 }
1296
1297 static void smc_tcp_listen_work(struct work_struct *work)
1298 {
1299         struct smc_sock *lsmc = container_of(work, struct smc_sock,
1300                                              tcp_listen_work);
1301         struct sock *lsk = &lsmc->sk;
1302         struct smc_sock *new_smc;
1303         int rc = 0;
1304
1305         lock_sock(lsk);
1306         while (lsk->sk_state == SMC_LISTEN) {
1307                 rc = smc_clcsock_accept(lsmc, &new_smc);
1308                 if (rc)
1309                         goto out;
1310                 if (!new_smc)
1311                         continue;
1312
1313                 new_smc->listen_smc = lsmc;
1314                 new_smc->use_fallback = lsmc->use_fallback;
1315                 new_smc->fallback_rsn = lsmc->fallback_rsn;
1316                 sock_hold(lsk); /* sock_put in smc_listen_work */
1317                 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
1318                 smc_copy_sock_settings_to_smc(new_smc);
1319                 new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
1320                 new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
1321                 sock_hold(&new_smc->sk); /* sock_put in passive closing */
1322                 if (!schedule_work(&new_smc->smc_listen_work))
1323                         sock_put(&new_smc->sk);
1324         }
1325
1326 out:
1327         release_sock(lsk);
1328         sock_put(&lsmc->sk); /* sock_hold in smc_listen */
1329 }
1330
1331 static int smc_listen(struct socket *sock, int backlog)
1332 {
1333         struct sock *sk = sock->sk;
1334         struct smc_sock *smc;
1335         int rc;
1336
1337         smc = smc_sk(sk);
1338         lock_sock(sk);
1339
1340         rc = -EINVAL;
1341         if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN))
1342                 goto out;
1343
1344         rc = 0;
1345         if (sk->sk_state == SMC_LISTEN) {
1346                 sk->sk_max_ack_backlog = backlog;
1347                 goto out;
1348         }
1349         /* some socket options are handled in core, so we could not apply
1350          * them to the clc socket -- copy smc socket options to clc socket
1351          */
1352         smc_copy_sock_settings_to_clc(smc);
1353         if (!smc->use_fallback)
1354                 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
1355
1356         rc = kernel_listen(smc->clcsock, backlog);
1357         if (rc)
1358                 goto out;
1359         sk->sk_max_ack_backlog = backlog;
1360         sk->sk_ack_backlog = 0;
1361         sk->sk_state = SMC_LISTEN;
1362         sock_hold(sk); /* sock_hold in tcp_listen_worker */
1363         if (!schedule_work(&smc->tcp_listen_work))
1364                 sock_put(sk);
1365
1366 out:
1367         release_sock(sk);
1368         return rc;
1369 }
1370
1371 static int smc_accept(struct socket *sock, struct socket *new_sock,
1372                       int flags, bool kern)
1373 {
1374         struct sock *sk = sock->sk, *nsk;
1375         DECLARE_WAITQUEUE(wait, current);
1376         struct smc_sock *lsmc;
1377         long timeo;
1378         int rc = 0;
1379
1380         lsmc = smc_sk(sk);
1381         sock_hold(sk); /* sock_put below */
1382         lock_sock(sk);
1383
1384         if (lsmc->sk.sk_state != SMC_LISTEN) {
1385                 rc = -EINVAL;
1386                 release_sock(sk);
1387                 goto out;
1388         }
1389
1390         /* Wait for an incoming connection */
1391         timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1392         add_wait_queue_exclusive(sk_sleep(sk), &wait);
1393         while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
1394                 set_current_state(TASK_INTERRUPTIBLE);
1395                 if (!timeo) {
1396                         rc = -EAGAIN;
1397                         break;
1398                 }
1399                 release_sock(sk);
1400                 timeo = schedule_timeout(timeo);
1401                 /* wakeup by sk_data_ready in smc_listen_work() */
1402                 sched_annotate_sleep();
1403                 lock_sock(sk);
1404                 if (signal_pending(current)) {
1405                         rc = sock_intr_errno(timeo);
1406                         break;
1407                 }
1408         }
1409         set_current_state(TASK_RUNNING);
1410         remove_wait_queue(sk_sleep(sk), &wait);
1411
1412         if (!rc)
1413                 rc = sock_error(nsk);
1414         release_sock(sk);
1415         if (rc)
1416                 goto out;
1417
1418         if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
1419                 /* wait till data arrives on the socket */
1420                 timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
1421                                                                 MSEC_PER_SEC);
1422                 if (smc_sk(nsk)->use_fallback) {
1423                         struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
1424
1425                         lock_sock(clcsk);
1426                         if (skb_queue_empty(&clcsk->sk_receive_queue))
1427                                 sk_wait_data(clcsk, &timeo, NULL);
1428                         release_sock(clcsk);
1429                 } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
1430                         lock_sock(nsk);
1431                         smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
1432                         release_sock(nsk);
1433                 }
1434         }
1435
1436 out:
1437         sock_put(sk); /* sock_hold above */
1438         return rc;
1439 }
1440
1441 static int smc_getname(struct socket *sock, struct sockaddr *addr,
1442                        int peer)
1443 {
1444         struct smc_sock *smc;
1445
1446         if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
1447             (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
1448                 return -ENOTCONN;
1449
1450         smc = smc_sk(sock->sk);
1451
1452         return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
1453 }
1454
1455 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1456 {
1457         struct sock *sk = sock->sk;
1458         struct smc_sock *smc;
1459         int rc = -EPIPE;
1460
1461         smc = smc_sk(sk);
1462         lock_sock(sk);
1463         if ((sk->sk_state != SMC_ACTIVE) &&
1464             (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1465             (sk->sk_state != SMC_INIT))
1466                 goto out;
1467
1468         if (msg->msg_flags & MSG_FASTOPEN) {
1469                 if (sk->sk_state == SMC_INIT) {
1470                         smc->use_fallback = true;
1471                         smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
1472                 } else {
1473                         rc = -EINVAL;
1474                         goto out;
1475                 }
1476         }
1477
1478         if (smc->use_fallback)
1479                 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
1480         else
1481                 rc = smc_tx_sendmsg(smc, msg, len);
1482 out:
1483         release_sock(sk);
1484         return rc;
1485 }
1486
1487 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1488                        int flags)
1489 {
1490         struct sock *sk = sock->sk;
1491         struct smc_sock *smc;
1492         int rc = -ENOTCONN;
1493
1494         smc = smc_sk(sk);
1495         lock_sock(sk);
1496         if ((sk->sk_state == SMC_INIT) ||
1497             (sk->sk_state == SMC_LISTEN) ||
1498             (sk->sk_state == SMC_CLOSED))
1499                 goto out;
1500
1501         if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1502                 rc = 0;
1503                 goto out;
1504         }
1505
1506         if (smc->use_fallback) {
1507                 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
1508         } else {
1509                 msg->msg_namelen = 0;
1510                 rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
1511         }
1512
1513 out:
1514         release_sock(sk);
1515         return rc;
1516 }
1517
1518 static __poll_t smc_accept_poll(struct sock *parent)
1519 {
1520         struct smc_sock *isk = smc_sk(parent);
1521         __poll_t mask = 0;
1522
1523         spin_lock(&isk->accept_q_lock);
1524         if (!list_empty(&isk->accept_q))
1525                 mask = EPOLLIN | EPOLLRDNORM;
1526         spin_unlock(&isk->accept_q_lock);
1527
1528         return mask;
1529 }
1530
1531 static __poll_t smc_poll(struct file *file, struct socket *sock,
1532                              poll_table *wait)
1533 {
1534         struct sock *sk = sock->sk;
1535         __poll_t mask = 0;
1536         struct smc_sock *smc;
1537
1538         if (!sk)
1539                 return EPOLLNVAL;
1540
1541         smc = smc_sk(sock->sk);
1542         if (smc->use_fallback) {
1543                 /* delegate to CLC child sock */
1544                 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
1545                 sk->sk_err = smc->clcsock->sk->sk_err;
1546                 if (sk->sk_err)
1547                         mask |= EPOLLERR;
1548         } else {
1549                 if (sk->sk_state != SMC_CLOSED)
1550                         sock_poll_wait(file, sock, wait);
1551                 if (sk->sk_err)
1552                         mask |= EPOLLERR;
1553                 if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
1554                     (sk->sk_state == SMC_CLOSED))
1555                         mask |= EPOLLHUP;
1556                 if (sk->sk_state == SMC_LISTEN) {
1557                         /* woken up by sk_data_ready in smc_listen_work() */
1558                         mask = smc_accept_poll(sk);
1559                 } else {
1560                         if (atomic_read(&smc->conn.sndbuf_space) ||
1561                             sk->sk_shutdown & SEND_SHUTDOWN) {
1562                                 mask |= EPOLLOUT | EPOLLWRNORM;
1563                         } else {
1564                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1565                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1566                         }
1567                         if (atomic_read(&smc->conn.bytes_to_rcv))
1568                                 mask |= EPOLLIN | EPOLLRDNORM;
1569                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1570                                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
1571                         if (sk->sk_state == SMC_APPCLOSEWAIT1)
1572                                 mask |= EPOLLIN;
1573                         if (smc->conn.urg_state == SMC_URG_VALID)
1574                                 mask |= EPOLLPRI;
1575                 }
1576         }
1577
1578         return mask;
1579 }
1580
1581 static int smc_shutdown(struct socket *sock, int how)
1582 {
1583         struct sock *sk = sock->sk;
1584         struct smc_sock *smc;
1585         int rc = -EINVAL;
1586         int rc1 = 0;
1587
1588         smc = smc_sk(sk);
1589
1590         if ((how < SHUT_RD) || (how > SHUT_RDWR))
1591                 return rc;
1592
1593         lock_sock(sk);
1594
1595         rc = -ENOTCONN;
1596         if ((sk->sk_state != SMC_ACTIVE) &&
1597             (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
1598             (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
1599             (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1600             (sk->sk_state != SMC_APPCLOSEWAIT2) &&
1601             (sk->sk_state != SMC_APPFINCLOSEWAIT))
1602                 goto out;
1603         if (smc->use_fallback) {
1604                 rc = kernel_sock_shutdown(smc->clcsock, how);
1605                 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
1606                 if (sk->sk_shutdown == SHUTDOWN_MASK)
1607                         sk->sk_state = SMC_CLOSED;
1608                 goto out;
1609         }
1610         switch (how) {
1611         case SHUT_RDWR:         /* shutdown in both directions */
1612                 rc = smc_close_active(smc);
1613                 break;
1614         case SHUT_WR:
1615                 rc = smc_close_shutdown_write(smc);
1616                 break;
1617         case SHUT_RD:
1618                 rc = 0;
1619                 /* nothing more to do because peer is not involved */
1620                 break;
1621         }
1622         if (smc->clcsock)
1623                 rc1 = kernel_sock_shutdown(smc->clcsock, how);
1624         /* map sock_shutdown_cmd constants to sk_shutdown value range */
1625         sk->sk_shutdown |= how + 1;
1626
1627 out:
1628         release_sock(sk);
1629         return rc ? rc : rc1;
1630 }
1631
1632 static int smc_setsockopt(struct socket *sock, int level, int optname,
1633                           char __user *optval, unsigned int optlen)
1634 {
1635         struct sock *sk = sock->sk;
1636         struct smc_sock *smc;
1637         int val, rc;
1638
1639         smc = smc_sk(sk);
1640
1641         /* generic setsockopts reaching us here always apply to the
1642          * CLC socket
1643          */
1644         rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
1645                                            optval, optlen);
1646         if (smc->clcsock->sk->sk_err) {
1647                 sk->sk_err = smc->clcsock->sk->sk_err;
1648                 sk->sk_error_report(sk);
1649         }
1650         if (rc)
1651                 return rc;
1652
1653         if (optlen < sizeof(int))
1654                 return -EINVAL;
1655         if (get_user(val, (int __user *)optval))
1656                 return -EFAULT;
1657
1658         lock_sock(sk);
1659         switch (optname) {
1660         case TCP_ULP:
1661         case TCP_FASTOPEN:
1662         case TCP_FASTOPEN_CONNECT:
1663         case TCP_FASTOPEN_KEY:
1664         case TCP_FASTOPEN_NO_COOKIE:
1665                 /* option not supported by SMC */
1666                 if (sk->sk_state == SMC_INIT) {
1667                         smc->use_fallback = true;
1668                         smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
1669                 } else {
1670                         if (!smc->use_fallback)
1671                                 rc = -EINVAL;
1672                 }
1673                 break;
1674         case TCP_NODELAY:
1675                 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
1676                         if (val && !smc->use_fallback)
1677                                 mod_delayed_work(system_wq, &smc->conn.tx_work,
1678                                                  0);
1679                 }
1680                 break;
1681         case TCP_CORK:
1682                 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
1683                         if (!val && !smc->use_fallback)
1684                                 mod_delayed_work(system_wq, &smc->conn.tx_work,
1685                                                  0);
1686                 }
1687                 break;
1688         case TCP_DEFER_ACCEPT:
1689                 smc->sockopt_defer_accept = val;
1690                 break;
1691         default:
1692                 break;
1693         }
1694         release_sock(sk);
1695
1696         return rc;
1697 }
1698
1699 static int smc_getsockopt(struct socket *sock, int level, int optname,
1700                           char __user *optval, int __user *optlen)
1701 {
1702         struct smc_sock *smc;
1703
1704         smc = smc_sk(sock->sk);
1705         /* socket options apply to the CLC socket */
1706         return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
1707                                              optval, optlen);
1708 }
1709
1710 static int smc_ioctl(struct socket *sock, unsigned int cmd,
1711                      unsigned long arg)
1712 {
1713         union smc_host_cursor cons, urg;
1714         struct smc_connection *conn;
1715         struct smc_sock *smc;
1716         int answ;
1717
1718         smc = smc_sk(sock->sk);
1719         conn = &smc->conn;
1720         lock_sock(&smc->sk);
1721         if (smc->use_fallback) {
1722                 if (!smc->clcsock) {
1723                         release_sock(&smc->sk);
1724                         return -EBADF;
1725                 }
1726                 answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
1727                 release_sock(&smc->sk);
1728                 return answ;
1729         }
1730         switch (cmd) {
1731         case SIOCINQ: /* same as FIONREAD */
1732                 if (smc->sk.sk_state == SMC_LISTEN) {
1733                         release_sock(&smc->sk);
1734                         return -EINVAL;
1735                 }
1736                 if (smc->sk.sk_state == SMC_INIT ||
1737                     smc->sk.sk_state == SMC_CLOSED)
1738                         answ = 0;
1739                 else
1740                         answ = atomic_read(&smc->conn.bytes_to_rcv);
1741                 break;
1742         case SIOCOUTQ:
1743                 /* output queue size (not send + not acked) */
1744                 if (smc->sk.sk_state == SMC_LISTEN) {
1745                         release_sock(&smc->sk);
1746                         return -EINVAL;
1747                 }
1748                 if (smc->sk.sk_state == SMC_INIT ||
1749                     smc->sk.sk_state == SMC_CLOSED)
1750                         answ = 0;
1751                 else
1752                         answ = smc->conn.sndbuf_desc->len -
1753                                         atomic_read(&smc->conn.sndbuf_space);
1754                 break;
1755         case SIOCOUTQNSD:
1756                 /* output queue size (not send only) */
1757                 if (smc->sk.sk_state == SMC_LISTEN) {
1758                         release_sock(&smc->sk);
1759                         return -EINVAL;
1760                 }
1761                 if (smc->sk.sk_state == SMC_INIT ||
1762                     smc->sk.sk_state == SMC_CLOSED)
1763                         answ = 0;
1764                 else
1765                         answ = smc_tx_prepared_sends(&smc->conn);
1766                 break;
1767         case SIOCATMARK:
1768                 if (smc->sk.sk_state == SMC_LISTEN) {
1769                         release_sock(&smc->sk);
1770                         return -EINVAL;
1771                 }
1772                 if (smc->sk.sk_state == SMC_INIT ||
1773                     smc->sk.sk_state == SMC_CLOSED) {
1774                         answ = 0;
1775                 } else {
1776                         smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
1777                         smc_curs_copy(&urg, &conn->urg_curs, conn);
1778                         answ = smc_curs_diff(conn->rmb_desc->len,
1779                                              &cons, &urg) == 1;
1780                 }
1781                 break;
1782         default:
1783                 release_sock(&smc->sk);
1784                 return -ENOIOCTLCMD;
1785         }
1786         release_sock(&smc->sk);
1787
1788         return put_user(answ, (int __user *)arg);
1789 }
1790
1791 static ssize_t smc_sendpage(struct socket *sock, struct page *page,
1792                             int offset, size_t size, int flags)
1793 {
1794         struct sock *sk = sock->sk;
1795         struct smc_sock *smc;
1796         int rc = -EPIPE;
1797
1798         smc = smc_sk(sk);
1799         lock_sock(sk);
1800         if (sk->sk_state != SMC_ACTIVE) {
1801                 release_sock(sk);
1802                 goto out;
1803         }
1804         release_sock(sk);
1805         if (smc->use_fallback)
1806                 rc = kernel_sendpage(smc->clcsock, page, offset,
1807                                      size, flags);
1808         else
1809                 rc = sock_no_sendpage(sock, page, offset, size, flags);
1810
1811 out:
1812         return rc;
1813 }
1814
1815 /* Map the affected portions of the rmbe into an spd, note the number of bytes
1816  * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor
1817  * updates till whenever a respective page has been fully processed.
1818  * Note that subsequent recv() calls have to wait till all splice() processing
1819  * completed.
1820  */
1821 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
1822                                struct pipe_inode_info *pipe, size_t len,
1823                                unsigned int flags)
1824 {
1825         struct sock *sk = sock->sk;
1826         struct smc_sock *smc;
1827         int rc = -ENOTCONN;
1828
1829         smc = smc_sk(sk);
1830         lock_sock(sk);
1831
1832         if (sk->sk_state == SMC_INIT ||
1833             sk->sk_state == SMC_LISTEN ||
1834             sk->sk_state == SMC_CLOSED)
1835                 goto out;
1836
1837         if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1838                 rc = 0;
1839                 goto out;
1840         }
1841
1842         if (smc->use_fallback) {
1843                 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
1844                                                     pipe, len, flags);
1845         } else {
1846                 if (*ppos) {
1847                         rc = -ESPIPE;
1848                         goto out;
1849                 }
1850                 if (flags & SPLICE_F_NONBLOCK)
1851                         flags = MSG_DONTWAIT;
1852                 else
1853                         flags = 0;
1854                 rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
1855         }
1856 out:
1857         release_sock(sk);
1858
1859         return rc;
1860 }
1861
1862 /* must look like tcp */
1863 static const struct proto_ops smc_sock_ops = {
1864         .family         = PF_SMC,
1865         .owner          = THIS_MODULE,
1866         .release        = smc_release,
1867         .bind           = smc_bind,
1868         .connect        = smc_connect,
1869         .socketpair     = sock_no_socketpair,
1870         .accept         = smc_accept,
1871         .getname        = smc_getname,
1872         .poll           = smc_poll,
1873         .ioctl          = smc_ioctl,
1874         .listen         = smc_listen,
1875         .shutdown       = smc_shutdown,
1876         .setsockopt     = smc_setsockopt,
1877         .getsockopt     = smc_getsockopt,
1878         .sendmsg        = smc_sendmsg,
1879         .recvmsg        = smc_recvmsg,
1880         .mmap           = sock_no_mmap,
1881         .sendpage       = smc_sendpage,
1882         .splice_read    = smc_splice_read,
1883 };
1884
1885 static int smc_create(struct net *net, struct socket *sock, int protocol,
1886                       int kern)
1887 {
1888         int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
1889         struct smc_sock *smc;
1890         struct sock *sk;
1891         int rc;
1892
1893         rc = -ESOCKTNOSUPPORT;
1894         if (sock->type != SOCK_STREAM)
1895                 goto out;
1896
1897         rc = -EPROTONOSUPPORT;
1898         if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
1899                 goto out;
1900
1901         rc = -ENOBUFS;
1902         sock->ops = &smc_sock_ops;
1903         sk = smc_sock_alloc(net, sock, protocol);
1904         if (!sk)
1905                 goto out;
1906
1907         /* create internal TCP socket for CLC handshake and fallback */
1908         smc = smc_sk(sk);
1909         smc->use_fallback = false; /* assume rdma capability first */
1910         smc->fallback_rsn = 0;
1911         rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
1912                               &smc->clcsock);
1913         if (rc) {
1914                 sk_common_release(sk);
1915                 goto out;
1916         }
1917         smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
1918         smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
1919
1920 out:
1921         return rc;
1922 }
1923
1924 static const struct net_proto_family smc_sock_family_ops = {
1925         .family = PF_SMC,
1926         .owner  = THIS_MODULE,
1927         .create = smc_create,
1928 };
1929
1930 static int __init smc_init(void)
1931 {
1932         int rc;
1933
1934         rc = smc_pnet_init();
1935         if (rc)
1936                 return rc;
1937
1938         rc = smc_llc_init();
1939         if (rc) {
1940                 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
1941                 goto out_pnet;
1942         }
1943
1944         rc = smc_cdc_init();
1945         if (rc) {
1946                 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
1947                 goto out_pnet;
1948         }
1949
1950         rc = proto_register(&smc_proto, 1);
1951         if (rc) {
1952                 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
1953                 goto out_pnet;
1954         }
1955
1956         rc = proto_register(&smc_proto6, 1);
1957         if (rc) {
1958                 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
1959                 goto out_proto;
1960         }
1961
1962         rc = sock_register(&smc_sock_family_ops);
1963         if (rc) {
1964                 pr_err("%s: sock_register fails with %d\n", __func__, rc);
1965                 goto out_proto6;
1966         }
1967         INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
1968         INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
1969
1970         rc = smc_ib_register_client();
1971         if (rc) {
1972                 pr_err("%s: ib_register fails with %d\n", __func__, rc);
1973                 goto out_sock;
1974         }
1975
1976         static_branch_enable(&tcp_have_smc);
1977         return 0;
1978
1979 out_sock:
1980         sock_unregister(PF_SMC);
1981 out_proto6:
1982         proto_unregister(&smc_proto6);
1983 out_proto:
1984         proto_unregister(&smc_proto);
1985 out_pnet:
1986         smc_pnet_exit();
1987         return rc;
1988 }
1989
1990 static void __exit smc_exit(void)
1991 {
1992         smc_core_exit();
1993         static_branch_disable(&tcp_have_smc);
1994         smc_ib_unregister_client();
1995         sock_unregister(PF_SMC);
1996         proto_unregister(&smc_proto6);
1997         proto_unregister(&smc_proto);
1998         smc_pnet_exit();
1999 }
2000
2001 module_init(smc_init);
2002 module_exit(smc_exit);
2003
2004 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
2005 MODULE_DESCRIPTION("smc socket address family");
2006 MODULE_LICENSE("GPL");
2007 MODULE_ALIAS_NETPROTO(PF_SMC);