OSDN Git Service

Merge tag 'block-5.6-2020-03-13' of git://git.kernel.dk/linux-block
[tomoyo/tomoyo-test1.git] / drivers / infiniband / core / nldev.c
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Neither the names of the copyright holders nor the names of its
13  *    contributors may be used to endorse or promote products derived from
14  *    this software without specific prior written permission.
15  *
16  * Alternatively, this software may be distributed under the terms of the
17  * GNU General Public License ("GPL") version 2 as published by the Free
18  * Software Foundation.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <linux/mutex.h>
37 #include <net/netlink.h>
38 #include <rdma/rdma_cm.h>
39 #include <rdma/rdma_netlink.h>
40
41 #include "core_priv.h"
42 #include "cma_priv.h"
43 #include "restrack.h"
44 #include "uverbs.h"
45
46 typedef int (*res_fill_func_t)(struct sk_buff*, bool,
47                                struct rdma_restrack_entry*, uint32_t);
48
49 /*
50  * Sort array elements by the netlink attribute name
51  */
52 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
53         [RDMA_NLDEV_ATTR_CHARDEV]               = { .type = NLA_U64 },
54         [RDMA_NLDEV_ATTR_CHARDEV_ABI]           = { .type = NLA_U64 },
55         [RDMA_NLDEV_ATTR_CHARDEV_NAME]          = { .type = NLA_NUL_STRING,
56                                         .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
57         [RDMA_NLDEV_ATTR_CHARDEV_TYPE]          = { .type = NLA_NUL_STRING,
58                                         .len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
59         [RDMA_NLDEV_ATTR_DEV_DIM]               = { .type = NLA_U8 },
60         [RDMA_NLDEV_ATTR_DEV_INDEX]             = { .type = NLA_U32 },
61         [RDMA_NLDEV_ATTR_DEV_NAME]              = { .type = NLA_NUL_STRING,
62                                         .len = IB_DEVICE_NAME_MAX },
63         [RDMA_NLDEV_ATTR_DEV_NODE_TYPE]         = { .type = NLA_U8 },
64         [RDMA_NLDEV_ATTR_DEV_PROTOCOL]          = { .type = NLA_NUL_STRING,
65                                         .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
66         [RDMA_NLDEV_ATTR_DRIVER]                = { .type = NLA_NESTED },
67         [RDMA_NLDEV_ATTR_DRIVER_ENTRY]          = { .type = NLA_NESTED },
68         [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE]     = { .type = NLA_U8 },
69         [RDMA_NLDEV_ATTR_DRIVER_STRING]         = { .type = NLA_NUL_STRING,
70                                         .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
71         [RDMA_NLDEV_ATTR_DRIVER_S32]            = { .type = NLA_S32 },
72         [RDMA_NLDEV_ATTR_DRIVER_S64]            = { .type = NLA_S64 },
73         [RDMA_NLDEV_ATTR_DRIVER_U32]            = { .type = NLA_U32 },
74         [RDMA_NLDEV_ATTR_DRIVER_U64]            = { .type = NLA_U64 },
75         [RDMA_NLDEV_ATTR_FW_VERSION]            = { .type = NLA_NUL_STRING,
76                                         .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
77         [RDMA_NLDEV_ATTR_LID]                   = { .type = NLA_U32 },
78         [RDMA_NLDEV_ATTR_LINK_TYPE]             = { .type = NLA_NUL_STRING,
79                                         .len = IFNAMSIZ },
80         [RDMA_NLDEV_ATTR_LMC]                   = { .type = NLA_U8 },
81         [RDMA_NLDEV_ATTR_NDEV_INDEX]            = { .type = NLA_U32 },
82         [RDMA_NLDEV_ATTR_NDEV_NAME]             = { .type = NLA_NUL_STRING,
83                                         .len = IFNAMSIZ },
84         [RDMA_NLDEV_ATTR_NODE_GUID]             = { .type = NLA_U64 },
85         [RDMA_NLDEV_ATTR_PORT_INDEX]            = { .type = NLA_U32 },
86         [RDMA_NLDEV_ATTR_PORT_PHYS_STATE]       = { .type = NLA_U8 },
87         [RDMA_NLDEV_ATTR_PORT_STATE]            = { .type = NLA_U8 },
88         [RDMA_NLDEV_ATTR_RES_CM_ID]             = { .type = NLA_NESTED },
89         [RDMA_NLDEV_ATTR_RES_CM_IDN]            = { .type = NLA_U32 },
90         [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]       = { .type = NLA_NESTED },
91         [RDMA_NLDEV_ATTR_RES_CQ]                = { .type = NLA_NESTED },
92         [RDMA_NLDEV_ATTR_RES_CQE]               = { .type = NLA_U32 },
93         [RDMA_NLDEV_ATTR_RES_CQN]               = { .type = NLA_U32 },
94         [RDMA_NLDEV_ATTR_RES_CQ_ENTRY]          = { .type = NLA_NESTED },
95         [RDMA_NLDEV_ATTR_RES_CTXN]              = { .type = NLA_U32 },
96         [RDMA_NLDEV_ATTR_RES_DST_ADDR]          = {
97                         .len = sizeof(struct __kernel_sockaddr_storage) },
98         [RDMA_NLDEV_ATTR_RES_IOVA]              = { .type = NLA_U64 },
99         [RDMA_NLDEV_ATTR_RES_KERN_NAME]         = { .type = NLA_NUL_STRING,
100                                         .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
101         [RDMA_NLDEV_ATTR_RES_LKEY]              = { .type = NLA_U32 },
102         [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]    = { .type = NLA_U32 },
103         [RDMA_NLDEV_ATTR_RES_LQPN]              = { .type = NLA_U32 },
104         [RDMA_NLDEV_ATTR_RES_MR]                = { .type = NLA_NESTED },
105         [RDMA_NLDEV_ATTR_RES_MRLEN]             = { .type = NLA_U64 },
106         [RDMA_NLDEV_ATTR_RES_MRN]               = { .type = NLA_U32 },
107         [RDMA_NLDEV_ATTR_RES_MR_ENTRY]          = { .type = NLA_NESTED },
108         [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE]    = { .type = NLA_U8 },
109         [RDMA_NLDEV_ATTR_RES_PD]                = { .type = NLA_NESTED },
110         [RDMA_NLDEV_ATTR_RES_PDN]               = { .type = NLA_U32 },
111         [RDMA_NLDEV_ATTR_RES_PD_ENTRY]          = { .type = NLA_NESTED },
112         [RDMA_NLDEV_ATTR_RES_PID]               = { .type = NLA_U32 },
113         [RDMA_NLDEV_ATTR_RES_POLL_CTX]          = { .type = NLA_U8 },
114         [RDMA_NLDEV_ATTR_RES_PS]                = { .type = NLA_U32 },
115         [RDMA_NLDEV_ATTR_RES_QP]                = { .type = NLA_NESTED },
116         [RDMA_NLDEV_ATTR_RES_QP_ENTRY]          = { .type = NLA_NESTED },
117         [RDMA_NLDEV_ATTR_RES_RKEY]              = { .type = NLA_U32 },
118         [RDMA_NLDEV_ATTR_RES_RQPN]              = { .type = NLA_U32 },
119         [RDMA_NLDEV_ATTR_RES_RQ_PSN]            = { .type = NLA_U32 },
120         [RDMA_NLDEV_ATTR_RES_SQ_PSN]            = { .type = NLA_U32 },
121         [RDMA_NLDEV_ATTR_RES_SRC_ADDR]          = {
122                         .len = sizeof(struct __kernel_sockaddr_storage) },
123         [RDMA_NLDEV_ATTR_RES_STATE]             = { .type = NLA_U8 },
124         [RDMA_NLDEV_ATTR_RES_SUMMARY]           = { .type = NLA_NESTED },
125         [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]     = { .type = NLA_NESTED },
126         [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
127         [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
128                                         .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
129         [RDMA_NLDEV_ATTR_RES_TYPE]              = { .type = NLA_U8 },
130         [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
131         [RDMA_NLDEV_ATTR_RES_USECNT]            = { .type = NLA_U64 },
132         [RDMA_NLDEV_ATTR_SM_LID]                = { .type = NLA_U32 },
133         [RDMA_NLDEV_ATTR_SUBNET_PREFIX]         = { .type = NLA_U64 },
134         [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]   = { .type = NLA_U32 },
135         [RDMA_NLDEV_ATTR_STAT_MODE]             = { .type = NLA_U32 },
136         [RDMA_NLDEV_ATTR_STAT_RES]              = { .type = NLA_U32 },
137         [RDMA_NLDEV_ATTR_STAT_COUNTER]          = { .type = NLA_NESTED },
138         [RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY]    = { .type = NLA_NESTED },
139         [RDMA_NLDEV_ATTR_STAT_COUNTER_ID]       = { .type = NLA_U32 },
140         [RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]       = { .type = NLA_NESTED },
141         [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY]  = { .type = NLA_NESTED },
142         [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
143         [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
144         [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID]        = { .type = NLA_U64 },
145         [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID]      = { .type = NLA_U32 },
146         [RDMA_NLDEV_NET_NS_FD]                  = { .type = NLA_U32 },
147         [RDMA_NLDEV_SYS_ATTR_NETNS_MODE]        = { .type = NLA_U8 },
148 };
149
150 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
151                                       enum rdma_nldev_print_type print_type)
152 {
153         if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
154                 return -EMSGSIZE;
155         if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
156             nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
157                 return -EMSGSIZE;
158
159         return 0;
160 }
161
162 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
163                                    enum rdma_nldev_print_type print_type,
164                                    u32 value)
165 {
166         if (put_driver_name_print_type(msg, name, print_type))
167                 return -EMSGSIZE;
168         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
169                 return -EMSGSIZE;
170
171         return 0;
172 }
173
174 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
175                                    enum rdma_nldev_print_type print_type,
176                                    u64 value)
177 {
178         if (put_driver_name_print_type(msg, name, print_type))
179                 return -EMSGSIZE;
180         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
181                               RDMA_NLDEV_ATTR_PAD))
182                 return -EMSGSIZE;
183
184         return 0;
185 }
186
187 int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
188                               const char *str)
189 {
190         if (put_driver_name_print_type(msg, name,
191                                        RDMA_NLDEV_PRINT_TYPE_UNSPEC))
192                 return -EMSGSIZE;
193         if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str))
194                 return -EMSGSIZE;
195
196         return 0;
197 }
198 EXPORT_SYMBOL(rdma_nl_put_driver_string);
199
200 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
201 {
202         return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
203                                        value);
204 }
205 EXPORT_SYMBOL(rdma_nl_put_driver_u32);
206
207 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
208                                u32 value)
209 {
210         return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
211                                        value);
212 }
213 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
214
215 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
216 {
217         return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
218                                        value);
219 }
220 EXPORT_SYMBOL(rdma_nl_put_driver_u64);
221
222 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
223 {
224         return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
225                                        value);
226 }
227 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
228
229 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
230 {
231         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
232                 return -EMSGSIZE;
233         if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
234                            dev_name(&device->dev)))
235                 return -EMSGSIZE;
236
237         return 0;
238 }
239
240 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
241 {
242         char fw[IB_FW_VERSION_NAME_MAX];
243         int ret = 0;
244         u8 port;
245
246         if (fill_nldev_handle(msg, device))
247                 return -EMSGSIZE;
248
249         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
250                 return -EMSGSIZE;
251
252         BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
253         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
254                               device->attrs.device_cap_flags,
255                               RDMA_NLDEV_ATTR_PAD))
256                 return -EMSGSIZE;
257
258         ib_get_device_fw_str(device, fw);
259         /* Device without FW has strlen(fw) = 0 */
260         if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
261                 return -EMSGSIZE;
262
263         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
264                               be64_to_cpu(device->node_guid),
265                               RDMA_NLDEV_ATTR_PAD))
266                 return -EMSGSIZE;
267         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
268                               be64_to_cpu(device->attrs.sys_image_guid),
269                               RDMA_NLDEV_ATTR_PAD))
270                 return -EMSGSIZE;
271         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
272                 return -EMSGSIZE;
273         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
274                 return -EMSGSIZE;
275
276         /*
277          * Link type is determined on first port and mlx4 device
278          * which can potentially have two different link type for the same
279          * IB device is considered as better to be avoided in the future,
280          */
281         port = rdma_start_port(device);
282         if (rdma_cap_opa_mad(device, port))
283                 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
284         else if (rdma_protocol_ib(device, port))
285                 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
286         else if (rdma_protocol_iwarp(device, port))
287                 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
288         else if (rdma_protocol_roce(device, port))
289                 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
290         else if (rdma_protocol_usnic(device, port))
291                 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
292                                      "usnic");
293         return ret;
294 }
295
296 static int fill_port_info(struct sk_buff *msg,
297                           struct ib_device *device, u32 port,
298                           const struct net *net)
299 {
300         struct net_device *netdev = NULL;
301         struct ib_port_attr attr;
302         int ret;
303         u64 cap_flags = 0;
304
305         if (fill_nldev_handle(msg, device))
306                 return -EMSGSIZE;
307
308         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
309                 return -EMSGSIZE;
310
311         ret = ib_query_port(device, port, &attr);
312         if (ret)
313                 return ret;
314
315         if (rdma_protocol_ib(device, port)) {
316                 BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
317                                 sizeof(attr.port_cap_flags2)) > sizeof(u64));
318                 cap_flags = attr.port_cap_flags |
319                         ((u64)attr.port_cap_flags2 << 32);
320                 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
321                                       cap_flags, RDMA_NLDEV_ATTR_PAD))
322                         return -EMSGSIZE;
323                 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
324                                       attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
325                         return -EMSGSIZE;
326                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
327                         return -EMSGSIZE;
328                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
329                         return -EMSGSIZE;
330                 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
331                         return -EMSGSIZE;
332         }
333         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
334                 return -EMSGSIZE;
335         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
336                 return -EMSGSIZE;
337
338         netdev = ib_device_get_netdev(device, port);
339         if (netdev && net_eq(dev_net(netdev), net)) {
340                 ret = nla_put_u32(msg,
341                                   RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
342                 if (ret)
343                         goto out;
344                 ret = nla_put_string(msg,
345                                      RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
346         }
347
348 out:
349         if (netdev)
350                 dev_put(netdev);
351         return ret;
352 }
353
354 static int fill_res_info_entry(struct sk_buff *msg,
355                                const char *name, u64 curr)
356 {
357         struct nlattr *entry_attr;
358
359         entry_attr = nla_nest_start_noflag(msg,
360                                            RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
361         if (!entry_attr)
362                 return -EMSGSIZE;
363
364         if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
365                 goto err;
366         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
367                               RDMA_NLDEV_ATTR_PAD))
368                 goto err;
369
370         nla_nest_end(msg, entry_attr);
371         return 0;
372
373 err:
374         nla_nest_cancel(msg, entry_attr);
375         return -EMSGSIZE;
376 }
377
378 static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
379 {
380         static const char * const names[RDMA_RESTRACK_MAX] = {
381                 [RDMA_RESTRACK_PD] = "pd",
382                 [RDMA_RESTRACK_CQ] = "cq",
383                 [RDMA_RESTRACK_QP] = "qp",
384                 [RDMA_RESTRACK_CM_ID] = "cm_id",
385                 [RDMA_RESTRACK_MR] = "mr",
386                 [RDMA_RESTRACK_CTX] = "ctx",
387         };
388
389         struct nlattr *table_attr;
390         int ret, i, curr;
391
392         if (fill_nldev_handle(msg, device))
393                 return -EMSGSIZE;
394
395         table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
396         if (!table_attr)
397                 return -EMSGSIZE;
398
399         for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
400                 if (!names[i])
401                         continue;
402                 curr = rdma_restrack_count(device, i);
403                 ret = fill_res_info_entry(msg, names[i], curr);
404                 if (ret)
405                         goto err;
406         }
407
408         nla_nest_end(msg, table_attr);
409         return 0;
410
411 err:
412         nla_nest_cancel(msg, table_attr);
413         return ret;
414 }
415
416 static int fill_res_name_pid(struct sk_buff *msg,
417                              struct rdma_restrack_entry *res)
418 {
419         int err = 0;
420
421         /*
422          * For user resources, user is should read /proc/PID/comm to get the
423          * name of the task file.
424          */
425         if (rdma_is_kernel_res(res)) {
426                 err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
427                                      res->kern_name);
428         } else {
429                 pid_t pid;
430
431                 pid = task_pid_vnr(res->task);
432                 /*
433                  * Task is dead and in zombie state.
434                  * There is no need to print PID anymore.
435                  */
436                 if (pid)
437                         /*
438                          * This part is racy, task can be killed and PID will
439                          * be zero right here but it is ok, next query won't
440                          * return PID. We don't promise real-time reflection
441                          * of SW objects.
442                          */
443                         err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid);
444         }
445
446         return err ? -EMSGSIZE : 0;
447 }
448
449 static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
450                            struct rdma_restrack_entry *res)
451 {
452         if (!dev->ops.fill_res_entry)
453                 return false;
454         return dev->ops.fill_res_entry(msg, res);
455 }
456
457 static bool fill_stat_entry(struct ib_device *dev, struct sk_buff *msg,
458                             struct rdma_restrack_entry *res)
459 {
460         if (!dev->ops.fill_stat_entry)
461                 return false;
462         return dev->ops.fill_stat_entry(msg, res);
463 }
464
465 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
466                              struct rdma_restrack_entry *res, uint32_t port)
467 {
468         struct ib_qp *qp = container_of(res, struct ib_qp, res);
469         struct ib_device *dev = qp->device;
470         struct ib_qp_init_attr qp_init_attr;
471         struct ib_qp_attr qp_attr;
472         int ret;
473
474         ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
475         if (ret)
476                 return ret;
477
478         if (port && port != qp_attr.port_num)
479                 return -EAGAIN;
480
481         /* In create_qp() port is not set yet */
482         if (qp_attr.port_num &&
483             nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
484                 goto err;
485
486         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
487                 goto err;
488         if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
489                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
490                                 qp_attr.dest_qp_num))
491                         goto err;
492                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
493                                 qp_attr.rq_psn))
494                         goto err;
495         }
496
497         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
498                 goto err;
499
500         if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
501             qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
502                 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
503                                qp_attr.path_mig_state))
504                         goto err;
505         }
506         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
507                 goto err;
508         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
509                 goto err;
510
511         if (!rdma_is_kernel_res(res) &&
512             nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
513                 goto err;
514
515         if (fill_res_name_pid(msg, res))
516                 goto err;
517
518         if (fill_res_entry(dev, msg, res))
519                 goto err;
520
521         return 0;
522
523 err:    return -EMSGSIZE;
524 }
525
526 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
527                                 struct rdma_restrack_entry *res, uint32_t port)
528 {
529         struct rdma_id_private *id_priv =
530                                 container_of(res, struct rdma_id_private, res);
531         struct ib_device *dev = id_priv->id.device;
532         struct rdma_cm_id *cm_id = &id_priv->id;
533
534         if (port && port != cm_id->port_num)
535                 return 0;
536
537         if (cm_id->port_num &&
538             nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
539                 goto err;
540
541         if (id_priv->qp_num) {
542                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
543                         goto err;
544                 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
545                         goto err;
546         }
547
548         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
549                 goto err;
550
551         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
552                 goto err;
553
554         if (cm_id->route.addr.src_addr.ss_family &&
555             nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
556                     sizeof(cm_id->route.addr.src_addr),
557                     &cm_id->route.addr.src_addr))
558                 goto err;
559         if (cm_id->route.addr.dst_addr.ss_family &&
560             nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
561                     sizeof(cm_id->route.addr.dst_addr),
562                     &cm_id->route.addr.dst_addr))
563                 goto err;
564
565         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
566                 goto err;
567
568         if (fill_res_name_pid(msg, res))
569                 goto err;
570
571         if (fill_res_entry(dev, msg, res))
572                 goto err;
573
574         return 0;
575
576 err: return -EMSGSIZE;
577 }
578
579 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
580                              struct rdma_restrack_entry *res, uint32_t port)
581 {
582         struct ib_cq *cq = container_of(res, struct ib_cq, res);
583         struct ib_device *dev = cq->device;
584
585         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
586                 goto err;
587         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
588                               atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
589                 goto err;
590
591         /* Poll context is only valid for kernel CQs */
592         if (rdma_is_kernel_res(res) &&
593             nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
594                 goto err;
595
596         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
597                 goto err;
598
599         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
600                 goto err;
601         if (!rdma_is_kernel_res(res) &&
602             nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
603                         cq->uobject->uevent.uobject.context->res.id))
604                 goto err;
605
606         if (fill_res_name_pid(msg, res))
607                 goto err;
608
609         if (fill_res_entry(dev, msg, res))
610                 goto err;
611
612         return 0;
613
614 err:    return -EMSGSIZE;
615 }
616
617 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
618                              struct rdma_restrack_entry *res, uint32_t port)
619 {
620         struct ib_mr *mr = container_of(res, struct ib_mr, res);
621         struct ib_device *dev = mr->pd->device;
622
623         if (has_cap_net_admin) {
624                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
625                         goto err;
626                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
627                         goto err;
628         }
629
630         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
631                               RDMA_NLDEV_ATTR_PAD))
632                 goto err;
633
634         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
635                 goto err;
636
637         if (!rdma_is_kernel_res(res) &&
638             nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
639                 goto err;
640
641         if (fill_res_name_pid(msg, res))
642                 goto err;
643
644         if (fill_res_entry(dev, msg, res))
645                 goto err;
646
647         return 0;
648
649 err:    return -EMSGSIZE;
650 }
651
652 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
653                              struct rdma_restrack_entry *res, uint32_t port)
654 {
655         struct ib_pd *pd = container_of(res, struct ib_pd, res);
656         struct ib_device *dev = pd->device;
657
658         if (has_cap_net_admin) {
659                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
660                                 pd->local_dma_lkey))
661                         goto err;
662                 if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
663                     nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
664                                 pd->unsafe_global_rkey))
665                         goto err;
666         }
667         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
668                               atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
669                 goto err;
670
671         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
672                 goto err;
673
674         if (!rdma_is_kernel_res(res) &&
675             nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
676                         pd->uobject->context->res.id))
677                 goto err;
678
679         if (fill_res_name_pid(msg, res))
680                 goto err;
681
682         if (fill_res_entry(dev, msg, res))
683                 goto err;
684
685         return 0;
686
687 err:    return -EMSGSIZE;
688 }
689
690 static int fill_stat_counter_mode(struct sk_buff *msg,
691                                   struct rdma_counter *counter)
692 {
693         struct rdma_counter_mode *m = &counter->mode;
694
695         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
696                 return -EMSGSIZE;
697
698         if (m->mode == RDMA_COUNTER_MODE_AUTO)
699                 if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
700                     nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
701                         return -EMSGSIZE;
702
703         return 0;
704 }
705
706 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
707 {
708         struct nlattr *entry_attr;
709
710         entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
711         if (!entry_attr)
712                 return -EMSGSIZE;
713
714         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
715                 goto err;
716
717         nla_nest_end(msg, entry_attr);
718         return 0;
719
720 err:
721         nla_nest_cancel(msg, entry_attr);
722         return -EMSGSIZE;
723 }
724
725 static int fill_stat_counter_qps(struct sk_buff *msg,
726                                  struct rdma_counter *counter)
727 {
728         struct rdma_restrack_entry *res;
729         struct rdma_restrack_root *rt;
730         struct nlattr *table_attr;
731         struct ib_qp *qp = NULL;
732         unsigned long id = 0;
733         int ret = 0;
734
735         table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
736
737         rt = &counter->device->res[RDMA_RESTRACK_QP];
738         xa_lock(&rt->xa);
739         xa_for_each(&rt->xa, id, res) {
740                 qp = container_of(res, struct ib_qp, res);
741                 if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
742                         continue;
743
744                 if (!qp->counter || (qp->counter->id != counter->id))
745                         continue;
746
747                 ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
748                 if (ret)
749                         goto err;
750         }
751
752         xa_unlock(&rt->xa);
753         nla_nest_end(msg, table_attr);
754         return 0;
755
756 err:
757         xa_unlock(&rt->xa);
758         nla_nest_cancel(msg, table_attr);
759         return ret;
760 }
761
762 int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
763                                  u64 value)
764 {
765         struct nlattr *entry_attr;
766
767         entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
768         if (!entry_attr)
769                 return -EMSGSIZE;
770
771         if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
772                            name))
773                 goto err;
774         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
775                               value, RDMA_NLDEV_ATTR_PAD))
776                 goto err;
777
778         nla_nest_end(msg, entry_attr);
779         return 0;
780
781 err:
782         nla_nest_cancel(msg, entry_attr);
783         return -EMSGSIZE;
784 }
785 EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry);
786
787 static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
788                               struct rdma_restrack_entry *res, uint32_t port)
789 {
790         struct ib_mr *mr = container_of(res, struct ib_mr, res);
791         struct ib_device *dev = mr->pd->device;
792
793         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
794                 goto err;
795
796         if (fill_stat_entry(dev, msg, res))
797                 goto err;
798
799         return 0;
800
801 err:
802         return -EMSGSIZE;
803 }
804
805 static int fill_stat_counter_hwcounters(struct sk_buff *msg,
806                                         struct rdma_counter *counter)
807 {
808         struct rdma_hw_stats *st = counter->stats;
809         struct nlattr *table_attr;
810         int i;
811
812         table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
813         if (!table_attr)
814                 return -EMSGSIZE;
815
816         for (i = 0; i < st->num_counters; i++)
817                 if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i]))
818                         goto err;
819
820         nla_nest_end(msg, table_attr);
821         return 0;
822
823 err:
824         nla_nest_cancel(msg, table_attr);
825         return -EMSGSIZE;
826 }
827
828 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
829                                   struct rdma_restrack_entry *res,
830                                   uint32_t port)
831 {
832         struct rdma_counter *counter =
833                 container_of(res, struct rdma_counter, res);
834
835         if (port && port != counter->port)
836                 return -EAGAIN;
837
838         /* Dump it even query failed */
839         rdma_counter_query_stats(counter);
840
841         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
842             nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
843             fill_res_name_pid(msg, &counter->res) ||
844             fill_stat_counter_mode(msg, counter) ||
845             fill_stat_counter_qps(msg, counter) ||
846             fill_stat_counter_hwcounters(msg, counter))
847                 return -EMSGSIZE;
848
849         return 0;
850 }
851
852 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
853                           struct netlink_ext_ack *extack)
854 {
855         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
856         struct ib_device *device;
857         struct sk_buff *msg;
858         u32 index;
859         int err;
860
861         err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
862                                      nldev_policy, extack);
863         if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
864                 return -EINVAL;
865
866         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
867
868         device = ib_device_get_by_index(sock_net(skb->sk), index);
869         if (!device)
870                 return -EINVAL;
871
872         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
873         if (!msg) {
874                 err = -ENOMEM;
875                 goto err;
876         }
877
878         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
879                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
880                         0, 0);
881
882         err = fill_dev_info(msg, device);
883         if (err)
884                 goto err_free;
885
886         nlmsg_end(msg, nlh);
887
888         ib_device_put(device);
889         return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
890
891 err_free:
892         nlmsg_free(msg);
893 err:
894         ib_device_put(device);
895         return err;
896 }
897
898 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
899                           struct netlink_ext_ack *extack)
900 {
901         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
902         struct ib_device *device;
903         u32 index;
904         int err;
905
906         err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
907                                      nldev_policy, extack);
908         if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
909                 return -EINVAL;
910
911         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
912         device = ib_device_get_by_index(sock_net(skb->sk), index);
913         if (!device)
914                 return -EINVAL;
915
916         if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
917                 char name[IB_DEVICE_NAME_MAX] = {};
918
919                 nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
920                             IB_DEVICE_NAME_MAX);
921                 err = ib_device_rename(device, name);
922                 goto done;
923         }
924
925         if (tb[RDMA_NLDEV_NET_NS_FD]) {
926                 u32 ns_fd;
927
928                 ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
929                 err = ib_device_set_netns_put(skb, device, ns_fd);
930                 goto put_done;
931         }
932
933         if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
934                 u8 use_dim;
935
936                 use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
937                 err = ib_device_set_dim(device,  use_dim);
938                 goto done;
939         }
940
941 done:
942         ib_device_put(device);
943 put_done:
944         return err;
945 }
946
947 static int _nldev_get_dumpit(struct ib_device *device,
948                              struct sk_buff *skb,
949                              struct netlink_callback *cb,
950                              unsigned int idx)
951 {
952         int start = cb->args[0];
953         struct nlmsghdr *nlh;
954
955         if (idx < start)
956                 return 0;
957
958         nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
959                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
960                         0, NLM_F_MULTI);
961
962         if (fill_dev_info(skb, device)) {
963                 nlmsg_cancel(skb, nlh);
964                 goto out;
965         }
966
967         nlmsg_end(skb, nlh);
968
969         idx++;
970
971 out:    cb->args[0] = idx;
972         return skb->len;
973 }
974
975 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
976 {
977         /*
978          * There is no need to take lock, because
979          * we are relying on ib_core's locking.
980          */
981         return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
982 }
983
984 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
985                                struct netlink_ext_ack *extack)
986 {
987         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
988         struct ib_device *device;
989         struct sk_buff *msg;
990         u32 index;
991         u32 port;
992         int err;
993
994         err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
995                                      nldev_policy, extack);
996         if (err ||
997             !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
998             !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
999                 return -EINVAL;
1000
1001         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1002         device = ib_device_get_by_index(sock_net(skb->sk), index);
1003         if (!device)
1004                 return -EINVAL;
1005
1006         port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1007         if (!rdma_is_port_valid(device, port)) {
1008                 err = -EINVAL;
1009                 goto err;
1010         }
1011
1012         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1013         if (!msg) {
1014                 err = -ENOMEM;
1015                 goto err;
1016         }
1017
1018         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1019                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1020                         0, 0);
1021
1022         err = fill_port_info(msg, device, port, sock_net(skb->sk));
1023         if (err)
1024                 goto err_free;
1025
1026         nlmsg_end(msg, nlh);
1027         ib_device_put(device);
1028
1029         return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1030
1031 err_free:
1032         nlmsg_free(msg);
1033 err:
1034         ib_device_put(device);
1035         return err;
1036 }
1037
1038 static int nldev_port_get_dumpit(struct sk_buff *skb,
1039                                  struct netlink_callback *cb)
1040 {
1041         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1042         struct ib_device *device;
1043         int start = cb->args[0];
1044         struct nlmsghdr *nlh;
1045         u32 idx = 0;
1046         u32 ifindex;
1047         int err;
1048         unsigned int p;
1049
1050         err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1051                                      nldev_policy, NULL);
1052         if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1053                 return -EINVAL;
1054
1055         ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1056         device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
1057         if (!device)
1058                 return -EINVAL;
1059
1060         rdma_for_each_port (device, p) {
1061                 /*
1062                  * The dumpit function returns all information from specific
1063                  * index. This specific index is taken from the netlink
1064                  * messages request sent by user and it is available
1065                  * in cb->args[0].
1066                  *
1067                  * Usually, the user doesn't fill this field and it causes
1068                  * to return everything.
1069                  *
1070                  */
1071                 if (idx < start) {
1072                         idx++;
1073                         continue;
1074                 }
1075
1076                 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
1077                                 cb->nlh->nlmsg_seq,
1078                                 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1079                                                  RDMA_NLDEV_CMD_PORT_GET),
1080                                 0, NLM_F_MULTI);
1081
1082                 if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
1083                         nlmsg_cancel(skb, nlh);
1084                         goto out;
1085                 }
1086                 idx++;
1087                 nlmsg_end(skb, nlh);
1088         }
1089
1090 out:
1091         ib_device_put(device);
1092         cb->args[0] = idx;
1093         return skb->len;
1094 }
1095
1096 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1097                               struct netlink_ext_ack *extack)
1098 {
1099         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1100         struct ib_device *device;
1101         struct sk_buff *msg;
1102         u32 index;
1103         int ret;
1104
1105         ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1106                                      nldev_policy, extack);
1107         if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1108                 return -EINVAL;
1109
1110         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1111         device = ib_device_get_by_index(sock_net(skb->sk), index);
1112         if (!device)
1113                 return -EINVAL;
1114
1115         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1116         if (!msg) {
1117                 ret = -ENOMEM;
1118                 goto err;
1119         }
1120
1121         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1122                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1123                         0, 0);
1124
1125         ret = fill_res_info(msg, device);
1126         if (ret)
1127                 goto err_free;
1128
1129         nlmsg_end(msg, nlh);
1130         ib_device_put(device);
1131         return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1132
1133 err_free:
1134         nlmsg_free(msg);
1135 err:
1136         ib_device_put(device);
1137         return ret;
1138 }
1139
1140 static int _nldev_res_get_dumpit(struct ib_device *device,
1141                                  struct sk_buff *skb,
1142                                  struct netlink_callback *cb,
1143                                  unsigned int idx)
1144 {
1145         int start = cb->args[0];
1146         struct nlmsghdr *nlh;
1147
1148         if (idx < start)
1149                 return 0;
1150
1151         nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1152                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1153                         0, NLM_F_MULTI);
1154
1155         if (fill_res_info(skb, device)) {
1156                 nlmsg_cancel(skb, nlh);
1157                 goto out;
1158         }
1159         nlmsg_end(skb, nlh);
1160
1161         idx++;
1162
1163 out:
1164         cb->args[0] = idx;
1165         return skb->len;
1166 }
1167
1168 static int nldev_res_get_dumpit(struct sk_buff *skb,
1169                                 struct netlink_callback *cb)
1170 {
1171         return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
1172 }
1173
1174 struct nldev_fill_res_entry {
1175         enum rdma_nldev_attr nldev_attr;
1176         enum rdma_nldev_command nldev_cmd;
1177         u8 flags;
1178         u32 entry;
1179         u32 id;
1180 };
1181
1182 enum nldev_res_flags {
1183         NLDEV_PER_DEV = 1 << 0,
1184 };
1185
1186 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
1187         [RDMA_RESTRACK_QP] = {
1188                 .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
1189                 .nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
1190                 .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
1191                 .id = RDMA_NLDEV_ATTR_RES_LQPN,
1192         },
1193         [RDMA_RESTRACK_CM_ID] = {
1194                 .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
1195                 .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
1196                 .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
1197                 .id = RDMA_NLDEV_ATTR_RES_CM_IDN,
1198         },
1199         [RDMA_RESTRACK_CQ] = {
1200                 .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
1201                 .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
1202                 .flags = NLDEV_PER_DEV,
1203                 .entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
1204                 .id = RDMA_NLDEV_ATTR_RES_CQN,
1205         },
1206         [RDMA_RESTRACK_MR] = {
1207                 .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
1208                 .nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
1209                 .flags = NLDEV_PER_DEV,
1210                 .entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
1211                 .id = RDMA_NLDEV_ATTR_RES_MRN,
1212         },
1213         [RDMA_RESTRACK_PD] = {
1214                 .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
1215                 .nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
1216                 .flags = NLDEV_PER_DEV,
1217                 .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
1218                 .id = RDMA_NLDEV_ATTR_RES_PDN,
1219         },
1220         [RDMA_RESTRACK_COUNTER] = {
1221                 .nldev_cmd = RDMA_NLDEV_CMD_STAT_GET,
1222                 .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
1223                 .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
1224                 .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
1225         },
1226 };
1227
1228 static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1229                                struct netlink_ext_ack *extack,
1230                                enum rdma_restrack_type res_type,
1231                                res_fill_func_t fill_func)
1232 {
1233         const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1234         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1235         struct rdma_restrack_entry *res;
1236         struct ib_device *device;
1237         u32 index, id, port = 0;
1238         bool has_cap_net_admin;
1239         struct sk_buff *msg;
1240         int ret;
1241
1242         ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1243                                      nldev_policy, extack);
1244         if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1245                 return -EINVAL;
1246
1247         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1248         device = ib_device_get_by_index(sock_net(skb->sk), index);
1249         if (!device)
1250                 return -EINVAL;
1251
1252         if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1253                 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1254                 if (!rdma_is_port_valid(device, port)) {
1255                         ret = -EINVAL;
1256                         goto err;
1257                 }
1258         }
1259
1260         if ((port && fe->flags & NLDEV_PER_DEV) ||
1261             (!port && ~fe->flags & NLDEV_PER_DEV)) {
1262                 ret = -EINVAL;
1263                 goto err;
1264         }
1265
1266         id = nla_get_u32(tb[fe->id]);
1267         res = rdma_restrack_get_byid(device, res_type, id);
1268         if (IS_ERR(res)) {
1269                 ret = PTR_ERR(res);
1270                 goto err;
1271         }
1272
1273         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1274         if (!msg) {
1275                 ret = -ENOMEM;
1276                 goto err_get;
1277         }
1278
1279         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1280                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
1281                         0, 0);
1282
1283         if (fill_nldev_handle(msg, device)) {
1284                 ret = -EMSGSIZE;
1285                 goto err_free;
1286         }
1287
1288         has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1289
1290         ret = fill_func(msg, has_cap_net_admin, res, port);
1291
1292         rdma_restrack_put(res);
1293         if (ret)
1294                 goto err_free;
1295
1296         nlmsg_end(msg, nlh);
1297         ib_device_put(device);
1298         return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1299
1300 err_free:
1301         nlmsg_free(msg);
1302 err_get:
1303         rdma_restrack_put(res);
1304 err:
1305         ib_device_put(device);
1306         return ret;
1307 }
1308
1309 static int res_get_common_dumpit(struct sk_buff *skb,
1310                                  struct netlink_callback *cb,
1311                                  enum rdma_restrack_type res_type,
1312                                  res_fill_func_t fill_func)
1313 {
1314         const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1315         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1316         struct rdma_restrack_entry *res;
1317         struct rdma_restrack_root *rt;
1318         int err, ret = 0, idx = 0;
1319         struct nlattr *table_attr;
1320         struct nlattr *entry_attr;
1321         struct ib_device *device;
1322         int start = cb->args[0];
1323         bool has_cap_net_admin;
1324         struct nlmsghdr *nlh;
1325         unsigned long id;
1326         u32 index, port = 0;
1327         bool filled = false;
1328
1329         err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1330                                      nldev_policy, NULL);
1331         /*
1332          * Right now, we are expecting the device index to get res information,
1333          * but it is possible to extend this code to return all devices in
1334          * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1335          * if it doesn't exist, we will iterate over all devices.
1336          *
1337          * But it is not needed for now.
1338          */
1339         if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1340                 return -EINVAL;
1341
1342         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1343         device = ib_device_get_by_index(sock_net(skb->sk), index);
1344         if (!device)
1345                 return -EINVAL;
1346
1347         /*
1348          * If no PORT_INDEX is supplied, we will return all QPs from that device
1349          */
1350         if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1351                 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1352                 if (!rdma_is_port_valid(device, port)) {
1353                         ret = -EINVAL;
1354                         goto err_index;
1355                 }
1356         }
1357
1358         nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1359                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
1360                         0, NLM_F_MULTI);
1361
1362         if (fill_nldev_handle(skb, device)) {
1363                 ret = -EMSGSIZE;
1364                 goto err;
1365         }
1366
1367         table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1368         if (!table_attr) {
1369                 ret = -EMSGSIZE;
1370                 goto err;
1371         }
1372
1373         has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1374
1375         rt = &device->res[res_type];
1376         xa_lock(&rt->xa);
1377         /*
1378          * FIXME: if the skip ahead is something common this loop should
1379          * use xas_for_each & xas_pause to optimize, we can have a lot of
1380          * objects.
1381          */
1382         xa_for_each(&rt->xa, id, res) {
1383                 if (idx < start || !rdma_restrack_get(res))
1384                         goto next;
1385
1386                 xa_unlock(&rt->xa);
1387
1388                 filled = true;
1389
1390                 entry_attr = nla_nest_start_noflag(skb, fe->entry);
1391                 if (!entry_attr) {
1392                         ret = -EMSGSIZE;
1393                         rdma_restrack_put(res);
1394                         goto msg_full;
1395                 }
1396
1397                 ret = fill_func(skb, has_cap_net_admin, res, port);
1398
1399                 rdma_restrack_put(res);
1400
1401                 if (ret) {
1402                         nla_nest_cancel(skb, entry_attr);
1403                         if (ret == -EMSGSIZE)
1404                                 goto msg_full;
1405                         if (ret == -EAGAIN)
1406                                 goto again;
1407                         goto res_err;
1408                 }
1409                 nla_nest_end(skb, entry_attr);
1410 again:          xa_lock(&rt->xa);
1411 next:           idx++;
1412         }
1413         xa_unlock(&rt->xa);
1414
1415 msg_full:
1416         nla_nest_end(skb, table_attr);
1417         nlmsg_end(skb, nlh);
1418         cb->args[0] = idx;
1419
1420         /*
1421          * No more entries to fill, cancel the message and
1422          * return 0 to mark end of dumpit.
1423          */
1424         if (!filled)
1425                 goto err;
1426
1427         ib_device_put(device);
1428         return skb->len;
1429
1430 res_err:
1431         nla_nest_cancel(skb, table_attr);
1432
1433 err:
1434         nlmsg_cancel(skb, nlh);
1435
1436 err_index:
1437         ib_device_put(device);
1438         return ret;
1439 }
1440
1441 #define RES_GET_FUNCS(name, type)                                              \
1442         static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
1443                                                  struct netlink_callback *cb)  \
1444         {                                                                      \
1445                 return res_get_common_dumpit(skb, cb, type,                    \
1446                                              fill_res_##name##_entry);         \
1447         }                                                                      \
1448         static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
1449                                                struct nlmsghdr *nlh,           \
1450                                                struct netlink_ext_ack *extack) \
1451         {                                                                      \
1452                 return res_get_common_doit(skb, nlh, extack, type,             \
1453                                            fill_res_##name##_entry);           \
1454         }
1455
1456 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1457 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1458 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1459 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1460 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1461 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
1462
1463 static LIST_HEAD(link_ops);
1464 static DECLARE_RWSEM(link_ops_rwsem);
1465
1466 static const struct rdma_link_ops *link_ops_get(const char *type)
1467 {
1468         const struct rdma_link_ops *ops;
1469
1470         list_for_each_entry(ops, &link_ops, list) {
1471                 if (!strcmp(ops->type, type))
1472                         goto out;
1473         }
1474         ops = NULL;
1475 out:
1476         return ops;
1477 }
1478
1479 void rdma_link_register(struct rdma_link_ops *ops)
1480 {
1481         down_write(&link_ops_rwsem);
1482         if (WARN_ON_ONCE(link_ops_get(ops->type)))
1483                 goto out;
1484         list_add(&ops->list, &link_ops);
1485 out:
1486         up_write(&link_ops_rwsem);
1487 }
1488 EXPORT_SYMBOL(rdma_link_register);
1489
1490 void rdma_link_unregister(struct rdma_link_ops *ops)
1491 {
1492         down_write(&link_ops_rwsem);
1493         list_del(&ops->list);
1494         up_write(&link_ops_rwsem);
1495 }
1496 EXPORT_SYMBOL(rdma_link_unregister);
1497
1498 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1499                           struct netlink_ext_ack *extack)
1500 {
1501         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1502         char ibdev_name[IB_DEVICE_NAME_MAX];
1503         const struct rdma_link_ops *ops;
1504         char ndev_name[IFNAMSIZ];
1505         struct net_device *ndev;
1506         char type[IFNAMSIZ];
1507         int err;
1508
1509         err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1510                                      nldev_policy, extack);
1511         if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1512             !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1513                 return -EINVAL;
1514
1515         nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1516                     sizeof(ibdev_name));
1517         if (strchr(ibdev_name, '%'))
1518                 return -EINVAL;
1519
1520         nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1521         nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1522                     sizeof(ndev_name));
1523
1524         ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
1525         if (!ndev)
1526                 return -ENODEV;
1527
1528         down_read(&link_ops_rwsem);
1529         ops = link_ops_get(type);
1530 #ifdef CONFIG_MODULES
1531         if (!ops) {
1532                 up_read(&link_ops_rwsem);
1533                 request_module("rdma-link-%s", type);
1534                 down_read(&link_ops_rwsem);
1535                 ops = link_ops_get(type);
1536         }
1537 #endif
1538         err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1539         up_read(&link_ops_rwsem);
1540         dev_put(ndev);
1541
1542         return err;
1543 }
1544
1545 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1546                           struct netlink_ext_ack *extack)
1547 {
1548         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1549         struct ib_device *device;
1550         u32 index;
1551         int err;
1552
1553         err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1554                                      nldev_policy, extack);
1555         if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1556                 return -EINVAL;
1557
1558         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1559         device = ib_device_get_by_index(sock_net(skb->sk), index);
1560         if (!device)
1561                 return -EINVAL;
1562
1563         if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
1564                 ib_device_put(device);
1565                 return -EINVAL;
1566         }
1567
1568         ib_unregister_device_and_put(device);
1569         return 0;
1570 }
1571
1572 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
1573                              struct netlink_ext_ack *extack)
1574 {
1575         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1576         char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
1577         struct ib_client_nl_info data = {};
1578         struct ib_device *ibdev = NULL;
1579         struct sk_buff *msg;
1580         u32 index;
1581         int err;
1582
1583         err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
1584                           extack);
1585         if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
1586                 return -EINVAL;
1587
1588         nla_strlcpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
1589                     sizeof(client_name));
1590
1591         if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
1592                 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1593                 ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
1594                 if (!ibdev)
1595                         return -EINVAL;
1596
1597                 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1598                         data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1599                         if (!rdma_is_port_valid(ibdev, data.port)) {
1600                                 err = -EINVAL;
1601                                 goto out_put;
1602                         }
1603                 } else {
1604                         data.port = -1;
1605                 }
1606         } else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1607                 return -EINVAL;
1608         }
1609
1610         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1611         if (!msg) {
1612                 err = -ENOMEM;
1613                 goto out_put;
1614         }
1615         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1616                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1617                                          RDMA_NLDEV_CMD_GET_CHARDEV),
1618                         0, 0);
1619
1620         data.nl_msg = msg;
1621         err = ib_get_client_nl_info(ibdev, client_name, &data);
1622         if (err)
1623                 goto out_nlmsg;
1624
1625         err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
1626                                 huge_encode_dev(data.cdev->devt),
1627                                 RDMA_NLDEV_ATTR_PAD);
1628         if (err)
1629                 goto out_data;
1630         err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
1631                                 RDMA_NLDEV_ATTR_PAD);
1632         if (err)
1633                 goto out_data;
1634         if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
1635                            dev_name(data.cdev))) {
1636                 err = -EMSGSIZE;
1637                 goto out_data;
1638         }
1639
1640         nlmsg_end(msg, nlh);
1641         put_device(data.cdev);
1642         if (ibdev)
1643                 ib_device_put(ibdev);
1644         return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1645
1646 out_data:
1647         put_device(data.cdev);
1648 out_nlmsg:
1649         nlmsg_free(msg);
1650 out_put:
1651         if (ibdev)
1652                 ib_device_put(ibdev);
1653         return err;
1654 }
1655
1656 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1657                               struct netlink_ext_ack *extack)
1658 {
1659         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1660         struct sk_buff *msg;
1661         int err;
1662
1663         err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1664                           nldev_policy, extack);
1665         if (err)
1666                 return err;
1667
1668         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1669         if (!msg)
1670                 return -ENOMEM;
1671
1672         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1673                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1674                                          RDMA_NLDEV_CMD_SYS_GET),
1675                         0, 0);
1676
1677         err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1678                          (u8)ib_devices_shared_netns);
1679         if (err) {
1680                 nlmsg_free(msg);
1681                 return err;
1682         }
1683         nlmsg_end(msg, nlh);
1684         return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1685 }
1686
1687 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1688                                   struct netlink_ext_ack *extack)
1689 {
1690         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1691         u8 enable;
1692         int err;
1693
1694         err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1695                           nldev_policy, extack);
1696         if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
1697                 return -EINVAL;
1698
1699         enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
1700         /* Only 0 and 1 are supported */
1701         if (enable > 1)
1702                 return -EINVAL;
1703
1704         err = rdma_compatdev_set(enable);
1705         return err;
1706 }
1707
1708 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1709                                struct netlink_ext_ack *extack)
1710 {
1711         u32 index, port, mode, mask = 0, qpn, cntn = 0;
1712         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1713         struct ib_device *device;
1714         struct sk_buff *msg;
1715         int ret;
1716
1717         ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1718                           nldev_policy, extack);
1719         /* Currently only counter for QP is supported */
1720         if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
1721             !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1722             !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE])
1723                 return -EINVAL;
1724
1725         if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
1726                 return -EINVAL;
1727
1728         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1729         device = ib_device_get_by_index(sock_net(skb->sk), index);
1730         if (!device)
1731                 return -EINVAL;
1732
1733         port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1734         if (!rdma_is_port_valid(device, port)) {
1735                 ret = -EINVAL;
1736                 goto err;
1737         }
1738
1739         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1740         if (!msg) {
1741                 ret = -ENOMEM;
1742                 goto err;
1743         }
1744         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1745                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1746                                          RDMA_NLDEV_CMD_STAT_SET),
1747                         0, 0);
1748
1749         mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
1750         if (mode == RDMA_COUNTER_MODE_AUTO) {
1751                 if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
1752                         mask = nla_get_u32(
1753                                 tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
1754
1755                 ret = rdma_counter_set_auto_mode(device, port,
1756                                                  mask ? true : false, mask);
1757                 if (ret)
1758                         goto err_msg;
1759         } else {
1760                 if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
1761                         goto err_msg;
1762                 qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
1763                 if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
1764                         cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
1765                         ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
1766                 } else {
1767                         ret = rdma_counter_bind_qpn_alloc(device, port,
1768                                                           qpn, &cntn);
1769                 }
1770                 if (ret)
1771                         goto err_msg;
1772
1773                 if (fill_nldev_handle(msg, device) ||
1774                     nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1775                     nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
1776                     nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
1777                         ret = -EMSGSIZE;
1778                         goto err_fill;
1779                 }
1780         }
1781
1782         nlmsg_end(msg, nlh);
1783         ib_device_put(device);
1784         return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1785
1786 err_fill:
1787         rdma_counter_unbind_qpn(device, port, qpn, cntn);
1788 err_msg:
1789         nlmsg_free(msg);
1790 err:
1791         ib_device_put(device);
1792         return ret;
1793 }
1794
1795 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1796                                struct netlink_ext_ack *extack)
1797 {
1798         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1799         struct ib_device *device;
1800         struct sk_buff *msg;
1801         u32 index, port, qpn, cntn;
1802         int ret;
1803
1804         ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1805                           nldev_policy, extack);
1806         if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
1807             !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
1808             !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
1809             !tb[RDMA_NLDEV_ATTR_RES_LQPN])
1810                 return -EINVAL;
1811
1812         if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
1813                 return -EINVAL;
1814
1815         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1816         device = ib_device_get_by_index(sock_net(skb->sk), index);
1817         if (!device)
1818                 return -EINVAL;
1819
1820         port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1821         if (!rdma_is_port_valid(device, port)) {
1822                 ret = -EINVAL;
1823                 goto err;
1824         }
1825
1826         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1827         if (!msg) {
1828                 ret = -ENOMEM;
1829                 goto err;
1830         }
1831         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1832                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1833                                          RDMA_NLDEV_CMD_STAT_SET),
1834                         0, 0);
1835
1836         cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
1837         qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
1838         if (fill_nldev_handle(msg, device) ||
1839             nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1840             nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
1841             nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
1842                 ret = -EMSGSIZE;
1843                 goto err_fill;
1844         }
1845
1846         ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
1847         if (ret)
1848                 goto err_fill;
1849
1850         nlmsg_end(msg, nlh);
1851         ib_device_put(device);
1852         return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1853
1854 err_fill:
1855         nlmsg_free(msg);
1856 err:
1857         ib_device_put(device);
1858         return ret;
1859 }
1860
1861 static int stat_get_doit_default_counter(struct sk_buff *skb,
1862                                          struct nlmsghdr *nlh,
1863                                          struct netlink_ext_ack *extack,
1864                                          struct nlattr *tb[])
1865 {
1866         struct rdma_hw_stats *stats;
1867         struct nlattr *table_attr;
1868         struct ib_device *device;
1869         int ret, num_cnts, i;
1870         struct sk_buff *msg;
1871         u32 index, port;
1872         u64 v;
1873
1874         if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1875                 return -EINVAL;
1876
1877         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1878         device = ib_device_get_by_index(sock_net(skb->sk), index);
1879         if (!device)
1880                 return -EINVAL;
1881
1882         if (!device->ops.alloc_hw_stats || !device->ops.get_hw_stats) {
1883                 ret = -EINVAL;
1884                 goto err;
1885         }
1886
1887         port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1888         if (!rdma_is_port_valid(device, port)) {
1889                 ret = -EINVAL;
1890                 goto err;
1891         }
1892
1893         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1894         if (!msg) {
1895                 ret = -ENOMEM;
1896                 goto err;
1897         }
1898
1899         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1900                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1901                                          RDMA_NLDEV_CMD_STAT_GET),
1902                         0, 0);
1903
1904         if (fill_nldev_handle(msg, device) ||
1905             nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
1906                 ret = -EMSGSIZE;
1907                 goto err_msg;
1908         }
1909
1910         stats = device->port_data ? device->port_data[port].hw_stats : NULL;
1911         if (stats == NULL) {
1912                 ret = -EINVAL;
1913                 goto err_msg;
1914         }
1915         mutex_lock(&stats->lock);
1916
1917         num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
1918         if (num_cnts < 0) {
1919                 ret = -EINVAL;
1920                 goto err_stats;
1921         }
1922
1923         table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
1924         if (!table_attr) {
1925                 ret = -EMSGSIZE;
1926                 goto err_stats;
1927         }
1928         for (i = 0; i < num_cnts; i++) {
1929                 v = stats->value[i] +
1930                         rdma_counter_get_hwstat_value(device, port, i);
1931                 if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) {
1932                         ret = -EMSGSIZE;
1933                         goto err_table;
1934                 }
1935         }
1936         nla_nest_end(msg, table_attr);
1937
1938         mutex_unlock(&stats->lock);
1939         nlmsg_end(msg, nlh);
1940         ib_device_put(device);
1941         return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1942
1943 err_table:
1944         nla_nest_cancel(msg, table_attr);
1945 err_stats:
1946         mutex_unlock(&stats->lock);
1947 err_msg:
1948         nlmsg_free(msg);
1949 err:
1950         ib_device_put(device);
1951         return ret;
1952 }
1953
1954 static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
1955                             struct netlink_ext_ack *extack, struct nlattr *tb[])
1956
1957 {
1958         static enum rdma_nl_counter_mode mode;
1959         static enum rdma_nl_counter_mask mask;
1960         struct ib_device *device;
1961         struct sk_buff *msg;
1962         u32 index, port;
1963         int ret;
1964
1965         if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
1966                 return nldev_res_get_counter_doit(skb, nlh, extack);
1967
1968         if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
1969             !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1970                 return -EINVAL;
1971
1972         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1973         device = ib_device_get_by_index(sock_net(skb->sk), index);
1974         if (!device)
1975                 return -EINVAL;
1976
1977         port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1978         if (!rdma_is_port_valid(device, port)) {
1979                 ret = -EINVAL;
1980                 goto err;
1981         }
1982
1983         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1984         if (!msg) {
1985                 ret = -ENOMEM;
1986                 goto err;
1987         }
1988
1989         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1990                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1991                                          RDMA_NLDEV_CMD_STAT_GET),
1992                         0, 0);
1993
1994         ret = rdma_counter_get_mode(device, port, &mode, &mask);
1995         if (ret)
1996                 goto err_msg;
1997
1998         if (fill_nldev_handle(msg, device) ||
1999             nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2000             nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
2001                 ret = -EMSGSIZE;
2002                 goto err_msg;
2003         }
2004
2005         if ((mode == RDMA_COUNTER_MODE_AUTO) &&
2006             nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
2007                 ret = -EMSGSIZE;
2008                 goto err_msg;
2009         }
2010
2011         nlmsg_end(msg, nlh);
2012         ib_device_put(device);
2013         return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2014
2015 err_msg:
2016         nlmsg_free(msg);
2017 err:
2018         ib_device_put(device);
2019         return ret;
2020 }
2021
2022 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2023                                struct netlink_ext_ack *extack)
2024 {
2025         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2026         int ret;
2027
2028         ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2029                           nldev_policy, extack);
2030         if (ret)
2031                 return -EINVAL;
2032
2033         if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
2034                 return stat_get_doit_default_counter(skb, nlh, extack, tb);
2035
2036         switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2037         case RDMA_NLDEV_ATTR_RES_QP:
2038                 ret = stat_get_doit_qp(skb, nlh, extack, tb);
2039                 break;
2040         case RDMA_NLDEV_ATTR_RES_MR:
2041                 ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR,
2042                                           fill_stat_mr_entry);
2043                 break;
2044         default:
2045                 ret = -EINVAL;
2046                 break;
2047         }
2048
2049         return ret;
2050 }
2051
2052 static int nldev_stat_get_dumpit(struct sk_buff *skb,
2053                                  struct netlink_callback *cb)
2054 {
2055         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2056         int ret;
2057
2058         ret = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2059                           nldev_policy, NULL);
2060         if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
2061                 return -EINVAL;
2062
2063         switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2064         case RDMA_NLDEV_ATTR_RES_QP:
2065                 ret = nldev_res_get_counter_dumpit(skb, cb);
2066                 break;
2067         case RDMA_NLDEV_ATTR_RES_MR:
2068                 ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR,
2069                                             fill_stat_mr_entry);
2070                 break;
2071         default:
2072                 ret = -EINVAL;
2073                 break;
2074         }
2075
2076         return ret;
2077 }
2078
2079 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
2080         [RDMA_NLDEV_CMD_GET] = {
2081                 .doit = nldev_get_doit,
2082                 .dump = nldev_get_dumpit,
2083         },
2084         [RDMA_NLDEV_CMD_GET_CHARDEV] = {
2085                 .doit = nldev_get_chardev,
2086         },
2087         [RDMA_NLDEV_CMD_SET] = {
2088                 .doit = nldev_set_doit,
2089                 .flags = RDMA_NL_ADMIN_PERM,
2090         },
2091         [RDMA_NLDEV_CMD_NEWLINK] = {
2092                 .doit = nldev_newlink,
2093                 .flags = RDMA_NL_ADMIN_PERM,
2094         },
2095         [RDMA_NLDEV_CMD_DELLINK] = {
2096                 .doit = nldev_dellink,
2097                 .flags = RDMA_NL_ADMIN_PERM,
2098         },
2099         [RDMA_NLDEV_CMD_PORT_GET] = {
2100                 .doit = nldev_port_get_doit,
2101                 .dump = nldev_port_get_dumpit,
2102         },
2103         [RDMA_NLDEV_CMD_RES_GET] = {
2104                 .doit = nldev_res_get_doit,
2105                 .dump = nldev_res_get_dumpit,
2106         },
2107         [RDMA_NLDEV_CMD_RES_QP_GET] = {
2108                 .doit = nldev_res_get_qp_doit,
2109                 .dump = nldev_res_get_qp_dumpit,
2110         },
2111         [RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
2112                 .doit = nldev_res_get_cm_id_doit,
2113                 .dump = nldev_res_get_cm_id_dumpit,
2114         },
2115         [RDMA_NLDEV_CMD_RES_CQ_GET] = {
2116                 .doit = nldev_res_get_cq_doit,
2117                 .dump = nldev_res_get_cq_dumpit,
2118         },
2119         [RDMA_NLDEV_CMD_RES_MR_GET] = {
2120                 .doit = nldev_res_get_mr_doit,
2121                 .dump = nldev_res_get_mr_dumpit,
2122         },
2123         [RDMA_NLDEV_CMD_RES_PD_GET] = {
2124                 .doit = nldev_res_get_pd_doit,
2125                 .dump = nldev_res_get_pd_dumpit,
2126         },
2127         [RDMA_NLDEV_CMD_SYS_GET] = {
2128                 .doit = nldev_sys_get_doit,
2129         },
2130         [RDMA_NLDEV_CMD_SYS_SET] = {
2131                 .doit = nldev_set_sys_set_doit,
2132         },
2133         [RDMA_NLDEV_CMD_STAT_SET] = {
2134                 .doit = nldev_stat_set_doit,
2135                 .flags = RDMA_NL_ADMIN_PERM,
2136         },
2137         [RDMA_NLDEV_CMD_STAT_GET] = {
2138                 .doit = nldev_stat_get_doit,
2139                 .dump = nldev_stat_get_dumpit,
2140         },
2141         [RDMA_NLDEV_CMD_STAT_DEL] = {
2142                 .doit = nldev_stat_del_doit,
2143                 .flags = RDMA_NL_ADMIN_PERM,
2144         },
2145 };
2146
2147 void __init nldev_init(void)
2148 {
2149         rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
2150 }
2151
2152 void __exit nldev_exit(void)
2153 {
2154         rdma_nl_unregister(RDMA_NL_NLDEV);
2155 }
2156
2157 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);