OSDN Git Service

clk: at91: fix masterck name
[uclinux-h8/linux.git] / net / smc / smc_pnet.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Generic netlink support functions to configure an SMC-R PNET table
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Thomas Richter <tmricht@linux.vnet.ibm.com>
10  */
11
12 #include <linux/module.h>
13 #include <linux/list.h>
14 #include <linux/ctype.h>
15 #include <net/netlink.h>
16 #include <net/genetlink.h>
17
18 #include <uapi/linux/if.h>
19 #include <uapi/linux/smc.h>
20
21 #include <rdma/ib_verbs.h>
22
23 #include "smc_pnet.h"
24 #include "smc_ib.h"
25 #include "smc_ism.h"
26
27 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
28         [SMC_PNETID_NAME] = {
29                 .type = NLA_NUL_STRING,
30                 .len = SMC_MAX_PNETID_LEN - 1
31         },
32         [SMC_PNETID_ETHNAME] = {
33                 .type = NLA_NUL_STRING,
34                 .len = IFNAMSIZ - 1
35         },
36         [SMC_PNETID_IBNAME] = {
37                 .type = NLA_NUL_STRING,
38                 .len = IB_DEVICE_NAME_MAX - 1
39         },
40         [SMC_PNETID_IBPORT] = { .type = NLA_U8 }
41 };
42
43 static struct genl_family smc_pnet_nl_family;
44
45 /**
46  * struct smc_pnettable - SMC PNET table anchor
47  * @lock: Lock for list action
48  * @pnetlist: List of PNETIDs
49  */
50 static struct smc_pnettable {
51         rwlock_t lock;
52         struct list_head pnetlist;
53 } smc_pnettable = {
54         .pnetlist = LIST_HEAD_INIT(smc_pnettable.pnetlist),
55         .lock = __RW_LOCK_UNLOCKED(smc_pnettable.lock)
56 };
57
58 /**
59  * struct smc_pnetentry - pnet identifier name entry
60  * @list: List node.
61  * @pnet_name: Pnet identifier name
62  * @ndev: pointer to network device.
63  * @smcibdev: Pointer to IB device.
64  */
65 struct smc_pnetentry {
66         struct list_head list;
67         char pnet_name[SMC_MAX_PNETID_LEN + 1];
68         struct net_device *ndev;
69         struct smc_ib_device *smcibdev;
70         u8 ib_port;
71 };
72
73 /* Check if two RDMA device entries are identical. Use device name and port
74  * number for comparison.
75  */
76 static bool smc_pnet_same_ibname(struct smc_pnetentry *pnetelem, char *ibname,
77                                  u8 ibport)
78 {
79         return pnetelem->ib_port == ibport &&
80                !strncmp(pnetelem->smcibdev->ibdev->name, ibname,
81                         sizeof(pnetelem->smcibdev->ibdev->name));
82 }
83
84 /* Find a pnetid in the pnet table.
85  */
86 static struct smc_pnetentry *smc_pnet_find_pnetid(char *pnet_name)
87 {
88         struct smc_pnetentry *pnetelem, *found_pnetelem = NULL;
89
90         read_lock(&smc_pnettable.lock);
91         list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
92                 if (!strncmp(pnetelem->pnet_name, pnet_name,
93                              sizeof(pnetelem->pnet_name))) {
94                         found_pnetelem = pnetelem;
95                         break;
96                 }
97         }
98         read_unlock(&smc_pnettable.lock);
99         return found_pnetelem;
100 }
101
102 /* Remove a pnetid from the pnet table.
103  */
104 static int smc_pnet_remove_by_pnetid(char *pnet_name)
105 {
106         struct smc_pnetentry *pnetelem, *tmp_pe;
107         int rc = -ENOENT;
108
109         write_lock(&smc_pnettable.lock);
110         list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
111                                  list) {
112                 if (!strncmp(pnetelem->pnet_name, pnet_name,
113                              sizeof(pnetelem->pnet_name))) {
114                         list_del(&pnetelem->list);
115                         dev_put(pnetelem->ndev);
116                         kfree(pnetelem);
117                         rc = 0;
118                         break;
119                 }
120         }
121         write_unlock(&smc_pnettable.lock);
122         return rc;
123 }
124
125 /* Remove a pnet entry mentioning a given network device from the pnet table.
126  */
127 static int smc_pnet_remove_by_ndev(struct net_device *ndev)
128 {
129         struct smc_pnetentry *pnetelem, *tmp_pe;
130         int rc = -ENOENT;
131
132         write_lock(&smc_pnettable.lock);
133         list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
134                                  list) {
135                 if (pnetelem->ndev == ndev) {
136                         list_del(&pnetelem->list);
137                         dev_put(pnetelem->ndev);
138                         kfree(pnetelem);
139                         rc = 0;
140                         break;
141                 }
142         }
143         write_unlock(&smc_pnettable.lock);
144         return rc;
145 }
146
147 /* Remove a pnet entry mentioning a given ib device from the pnet table.
148  */
149 int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev)
150 {
151         struct smc_pnetentry *pnetelem, *tmp_pe;
152         int rc = -ENOENT;
153
154         write_lock(&smc_pnettable.lock);
155         list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
156                                  list) {
157                 if (pnetelem->smcibdev == ibdev) {
158                         list_del(&pnetelem->list);
159                         dev_put(pnetelem->ndev);
160                         kfree(pnetelem);
161                         rc = 0;
162                         break;
163                 }
164         }
165         write_unlock(&smc_pnettable.lock);
166         return rc;
167 }
168
169 /* Append a pnetid to the end of the pnet table if not already on this list.
170  */
171 static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem)
172 {
173         struct smc_pnetentry *pnetelem;
174         int rc = -EEXIST;
175
176         write_lock(&smc_pnettable.lock);
177         list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
178                 if (!strncmp(pnetelem->pnet_name, new_pnetelem->pnet_name,
179                              sizeof(new_pnetelem->pnet_name)) ||
180                     !strncmp(pnetelem->ndev->name, new_pnetelem->ndev->name,
181                              sizeof(new_pnetelem->ndev->name)) ||
182                     smc_pnet_same_ibname(pnetelem,
183                                          new_pnetelem->smcibdev->ibdev->name,
184                                          new_pnetelem->ib_port)) {
185                         dev_put(pnetelem->ndev);
186                         goto found;
187                 }
188         }
189         list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist);
190         rc = 0;
191 found:
192         write_unlock(&smc_pnettable.lock);
193         return rc;
194 }
195
196 /* The limit for pnetid is 16 characters.
197  * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
198  * Lower case letters are converted to upper case.
199  * Interior blanks should not be used.
200  */
201 static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
202 {
203         char *bf = skip_spaces(pnet_name);
204         size_t len = strlen(bf);
205         char *end = bf + len;
206
207         if (!len)
208                 return false;
209         while (--end >= bf && isspace(*end))
210                 ;
211         if (end - bf >= SMC_MAX_PNETID_LEN)
212                 return false;
213         while (bf <= end) {
214                 if (!isalnum(*bf))
215                         return false;
216                 *pnetid++ = islower(*bf) ? toupper(*bf) : *bf;
217                 bf++;
218         }
219         *pnetid = '\0';
220         return true;
221 }
222
223 /* Find an infiniband device by a given name. The device might not exist. */
224 static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
225 {
226         struct smc_ib_device *ibdev;
227
228         spin_lock(&smc_ib_devices.lock);
229         list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
230                 if (!strncmp(ibdev->ibdev->name, ib_name,
231                              sizeof(ibdev->ibdev->name))) {
232                         goto out;
233                 }
234         }
235         ibdev = NULL;
236 out:
237         spin_unlock(&smc_ib_devices.lock);
238         return ibdev;
239 }
240
241 /* Parse the supplied netlink attributes and fill a pnetentry structure.
242  * For ethernet and infiniband device names verify that the devices exist.
243  */
244 static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem,
245                                struct nlattr *tb[])
246 {
247         char *string, *ibname;
248         int rc;
249
250         memset(pnetelem, 0, sizeof(*pnetelem));
251         INIT_LIST_HEAD(&pnetelem->list);
252
253         rc = -EINVAL;
254         if (!tb[SMC_PNETID_NAME])
255                 goto error;
256         string = (char *)nla_data(tb[SMC_PNETID_NAME]);
257         if (!smc_pnetid_valid(string, pnetelem->pnet_name))
258                 goto error;
259
260         rc = -EINVAL;
261         if (!tb[SMC_PNETID_ETHNAME])
262                 goto error;
263         rc = -ENOENT;
264         string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
265         pnetelem->ndev = dev_get_by_name(net, string);
266         if (!pnetelem->ndev)
267                 goto error;
268
269         rc = -EINVAL;
270         if (!tb[SMC_PNETID_IBNAME])
271                 goto error;
272         rc = -ENOENT;
273         ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
274         ibname = strim(ibname);
275         pnetelem->smcibdev = smc_pnet_find_ib(ibname);
276         if (!pnetelem->smcibdev)
277                 goto error;
278
279         rc = -EINVAL;
280         if (!tb[SMC_PNETID_IBPORT])
281                 goto error;
282         pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
283         if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS)
284                 goto error;
285
286         return 0;
287
288 error:
289         if (pnetelem->ndev)
290                 dev_put(pnetelem->ndev);
291         return rc;
292 }
293
294 /* Convert an smc_pnetentry to a netlink attribute sequence */
295 static int smc_pnet_set_nla(struct sk_buff *msg, struct smc_pnetentry *pnetelem)
296 {
297         if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name) ||
298             nla_put_string(msg, SMC_PNETID_ETHNAME, pnetelem->ndev->name) ||
299             nla_put_string(msg, SMC_PNETID_IBNAME,
300                            pnetelem->smcibdev->ibdev->name) ||
301             nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
302                 return -1;
303         return 0;
304 }
305
306 /* Retrieve one PNETID entry */
307 static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
308 {
309         struct smc_pnetentry *pnetelem;
310         struct sk_buff *msg;
311         void *hdr;
312         int rc;
313
314         if (!info->attrs[SMC_PNETID_NAME])
315                 return -EINVAL;
316         pnetelem = smc_pnet_find_pnetid(
317                                 (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
318         if (!pnetelem)
319                 return -ENOENT;
320         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
321         if (!msg)
322                 return -ENOMEM;
323
324         hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
325                           &smc_pnet_nl_family, 0, SMC_PNETID_GET);
326         if (!hdr) {
327                 rc = -EMSGSIZE;
328                 goto err_out;
329         }
330
331         if (smc_pnet_set_nla(msg, pnetelem)) {
332                 rc = -ENOBUFS;
333                 goto err_out;
334         }
335
336         genlmsg_end(msg, hdr);
337         return genlmsg_reply(msg, info);
338
339 err_out:
340         nlmsg_free(msg);
341         return rc;
342 }
343
344 static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
345 {
346         struct net *net = genl_info_net(info);
347         struct smc_pnetentry *pnetelem;
348         int rc;
349
350         pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
351         if (!pnetelem)
352                 return -ENOMEM;
353         rc = smc_pnet_fill_entry(net, pnetelem, info->attrs);
354         if (!rc)
355                 rc = smc_pnet_enter(pnetelem);
356         if (rc) {
357                 kfree(pnetelem);
358                 return rc;
359         }
360         return rc;
361 }
362
363 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
364 {
365         if (!info->attrs[SMC_PNETID_NAME])
366                 return -EINVAL;
367         return smc_pnet_remove_by_pnetid(
368                                 (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
369 }
370
371 static int smc_pnet_dump_start(struct netlink_callback *cb)
372 {
373         cb->args[0] = 0;
374         return 0;
375 }
376
377 static int smc_pnet_dumpinfo(struct sk_buff *skb,
378                              u32 portid, u32 seq, u32 flags,
379                              struct smc_pnetentry *pnetelem)
380 {
381         void *hdr;
382
383         hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family,
384                           flags, SMC_PNETID_GET);
385         if (!hdr)
386                 return -ENOMEM;
387         if (smc_pnet_set_nla(skb, pnetelem) < 0) {
388                 genlmsg_cancel(skb, hdr);
389                 return -EMSGSIZE;
390         }
391         genlmsg_end(skb, hdr);
392         return 0;
393 }
394
395 static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb)
396 {
397         struct smc_pnetentry *pnetelem;
398         int idx = 0;
399
400         read_lock(&smc_pnettable.lock);
401         list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
402                 if (idx++ < cb->args[0])
403                         continue;
404                 if (smc_pnet_dumpinfo(skb, NETLINK_CB(cb->skb).portid,
405                                       cb->nlh->nlmsg_seq, NLM_F_MULTI,
406                                       pnetelem)) {
407                         --idx;
408                         break;
409                 }
410         }
411         cb->args[0] = idx;
412         read_unlock(&smc_pnettable.lock);
413         return skb->len;
414 }
415
416 /* Remove and delete all pnetids from pnet table.
417  */
418 static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info)
419 {
420         struct smc_pnetentry *pnetelem, *tmp_pe;
421
422         write_lock(&smc_pnettable.lock);
423         list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
424                                  list) {
425                 list_del(&pnetelem->list);
426                 dev_put(pnetelem->ndev);
427                 kfree(pnetelem);
428         }
429         write_unlock(&smc_pnettable.lock);
430         return 0;
431 }
432
433 /* SMC_PNETID generic netlink operation definition */
434 static const struct genl_ops smc_pnet_ops[] = {
435         {
436                 .cmd = SMC_PNETID_GET,
437                 .flags = GENL_ADMIN_PERM,
438                 .policy = smc_pnet_policy,
439                 .doit = smc_pnet_get,
440                 .dumpit = smc_pnet_dump,
441                 .start = smc_pnet_dump_start
442         },
443         {
444                 .cmd = SMC_PNETID_ADD,
445                 .flags = GENL_ADMIN_PERM,
446                 .policy = smc_pnet_policy,
447                 .doit = smc_pnet_add
448         },
449         {
450                 .cmd = SMC_PNETID_DEL,
451                 .flags = GENL_ADMIN_PERM,
452                 .policy = smc_pnet_policy,
453                 .doit = smc_pnet_del
454         },
455         {
456                 .cmd = SMC_PNETID_FLUSH,
457                 .flags = GENL_ADMIN_PERM,
458                 .policy = smc_pnet_policy,
459                 .doit = smc_pnet_flush
460         }
461 };
462
463 /* SMC_PNETID family definition */
464 static struct genl_family smc_pnet_nl_family __ro_after_init = {
465         .hdrsize = 0,
466         .name = SMCR_GENL_FAMILY_NAME,
467         .version = SMCR_GENL_FAMILY_VERSION,
468         .maxattr = SMC_PNETID_MAX,
469         .netnsok = true,
470         .module = THIS_MODULE,
471         .ops = smc_pnet_ops,
472         .n_ops =  ARRAY_SIZE(smc_pnet_ops)
473 };
474
475 static int smc_pnet_netdev_event(struct notifier_block *this,
476                                  unsigned long event, void *ptr)
477 {
478         struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
479
480         switch (event) {
481         case NETDEV_REBOOT:
482         case NETDEV_UNREGISTER:
483                 smc_pnet_remove_by_ndev(event_dev);
484                 return NOTIFY_OK;
485         default:
486                 return NOTIFY_DONE;
487         }
488 }
489
490 static struct notifier_block smc_netdev_notifier = {
491         .notifier_call = smc_pnet_netdev_event
492 };
493
494 int __init smc_pnet_init(void)
495 {
496         int rc;
497
498         rc = genl_register_family(&smc_pnet_nl_family);
499         if (rc)
500                 return rc;
501         rc = register_netdevice_notifier(&smc_netdev_notifier);
502         if (rc)
503                 genl_unregister_family(&smc_pnet_nl_family);
504         return rc;
505 }
506
507 void smc_pnet_exit(void)
508 {
509         smc_pnet_flush(NULL, NULL);
510         unregister_netdevice_notifier(&smc_netdev_notifier);
511         genl_unregister_family(&smc_pnet_nl_family);
512 }
513
514 /* Determine one base device for stacked net devices.
515  * If the lower device level contains more than one devices
516  * (for instance with bonding slaves), just the first device
517  * is used to reach a base device.
518  */
519 static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
520 {
521         int i, nest_lvl;
522
523         rtnl_lock();
524         nest_lvl = dev_get_nest_level(ndev);
525         for (i = 0; i < nest_lvl; i++) {
526                 struct list_head *lower = &ndev->adj_list.lower;
527
528                 if (list_empty(lower))
529                         break;
530                 lower = lower->next;
531                 ndev = netdev_lower_get_next(ndev, &lower);
532         }
533         rtnl_unlock();
534         return ndev;
535 }
536
537 /* Determine the corresponding IB device port based on the hardware PNETID.
538  * Searching stops at the first matching active IB device port with vlan_id
539  * configured.
540  */
541 static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
542                                          struct smc_ib_device **smcibdev,
543                                          u8 *ibport, unsigned short vlan_id,
544                                          u8 gid[])
545 {
546         u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
547         struct smc_ib_device *ibdev;
548         int i;
549
550         ndev = pnet_find_base_ndev(ndev);
551         if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
552                                    ndev_pnetid))
553                 return; /* pnetid could not be determined */
554
555         spin_lock(&smc_ib_devices.lock);
556         list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
557                 for (i = 1; i <= SMC_MAX_PORTS; i++) {
558                         if (!rdma_is_port_valid(ibdev->ibdev, i))
559                                 continue;
560                         if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid,
561                                     SMC_MAX_PNETID_LEN) &&
562                             smc_ib_port_active(ibdev, i) &&
563                             !smc_ib_determine_gid(ibdev, i, vlan_id, gid,
564                                                   NULL))  {
565                                 *smcibdev = ibdev;
566                                 *ibport = i;
567                                 goto out;
568                         }
569                 }
570         }
571 out:
572         spin_unlock(&smc_ib_devices.lock);
573 }
574
575 static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
576                                         struct smcd_dev **smcismdev)
577 {
578         u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
579         struct smcd_dev *ismdev;
580
581         ndev = pnet_find_base_ndev(ndev);
582         if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
583                                    ndev_pnetid))
584                 return; /* pnetid could not be determined */
585
586         spin_lock(&smcd_dev_list.lock);
587         list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
588                 if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) {
589                         *smcismdev = ismdev;
590                         break;
591                 }
592         }
593         spin_unlock(&smcd_dev_list.lock);
594 }
595
596 /* Lookup of coupled ib_device via SMC pnet table */
597 static void smc_pnet_find_roce_by_table(struct net_device *netdev,
598                                         struct smc_ib_device **smcibdev,
599                                         u8 *ibport, unsigned short vlan_id,
600                                         u8 gid[])
601 {
602         struct smc_pnetentry *pnetelem;
603
604         read_lock(&smc_pnettable.lock);
605         list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
606                 if (netdev == pnetelem->ndev) {
607                         if (smc_ib_port_active(pnetelem->smcibdev,
608                                                pnetelem->ib_port) &&
609                             !smc_ib_determine_gid(pnetelem->smcibdev,
610                                                   pnetelem->ib_port, vlan_id,
611                                                   gid, NULL)) {
612                                 *smcibdev = pnetelem->smcibdev;
613                                 *ibport = pnetelem->ib_port;
614                         }
615                         break;
616                 }
617         }
618         read_unlock(&smc_pnettable.lock);
619 }
620
621 /* PNET table analysis for a given sock:
622  * determine ib_device and port belonging to used internal TCP socket
623  * ethernet interface.
624  */
625 void smc_pnet_find_roce_resource(struct sock *sk,
626                                  struct smc_ib_device **smcibdev, u8 *ibport,
627                                  unsigned short vlan_id, u8 gid[])
628 {
629         struct dst_entry *dst = sk_dst_get(sk);
630
631         *smcibdev = NULL;
632         *ibport = 0;
633
634         if (!dst)
635                 goto out;
636         if (!dst->dev)
637                 goto out_rel;
638
639         /* if possible, lookup via hardware-defined pnetid */
640         smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid);
641         if (*smcibdev)
642                 goto out_rel;
643
644         /* lookup via SMC PNET table */
645         smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport, vlan_id, gid);
646
647 out_rel:
648         dst_release(dst);
649 out:
650         return;
651 }
652
653 void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev)
654 {
655         struct dst_entry *dst = sk_dst_get(sk);
656
657         *smcismdev = NULL;
658         if (!dst)
659                 goto out;
660         if (!dst->dev)
661                 goto out_rel;
662
663         /* if possible, lookup via hardware-defined pnetid */
664         smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev);
665
666 out_rel:
667         dst_release(dst);
668 out:
669         return;
670 }