OSDN Git Service

block: Fix partition support for host aware zoned block devices
[tomoyo/tomoyo-test1.git] / drivers / net / ethernet / sfc / efx_channels.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3  * Driver for Solarflare network controllers and boards
4  * Copyright 2018 Solarflare Communications Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published
8  * by the Free Software Foundation, incorporated herein by reference.
9  */
10
11 #include "net_driver.h"
12 #include <linux/module.h>
13 #include "efx_channels.h"
14 #include "efx.h"
15 #include "efx_common.h"
16 #include "tx_common.h"
17 #include "rx_common.h"
18 #include "nic.h"
19 #include "sriov.h"
20
21 /* This is the first interrupt mode to try out of:
22  * 0 => MSI-X
23  * 1 => MSI
24  * 2 => legacy
25  */
26 static unsigned int interrupt_mode;
27 module_param(interrupt_mode, uint, 0444);
28 MODULE_PARM_DESC(interrupt_mode,
29                  "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)");
30
31 /* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
32  * i.e. the number of CPUs among which we may distribute simultaneous
33  * interrupt handling.
34  *
35  * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
36  * The default (0) means to assign an interrupt to each core.
37  */
38 static unsigned int rss_cpus;
39 module_param(rss_cpus, uint, 0444);
40 MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
41
42 static unsigned int irq_adapt_low_thresh = 8000;
43 module_param(irq_adapt_low_thresh, uint, 0644);
44 MODULE_PARM_DESC(irq_adapt_low_thresh,
45                  "Threshold score for reducing IRQ moderation");
46
47 static unsigned int irq_adapt_high_thresh = 16000;
48 module_param(irq_adapt_high_thresh, uint, 0644);
49 MODULE_PARM_DESC(irq_adapt_high_thresh,
50                  "Threshold score for increasing IRQ moderation");
51
52 /* This is the weight assigned to each of the (per-channel) virtual
53  * NAPI devices.
54  */
55 static int napi_weight = 64;
56
57 /***************
58  * Housekeeping
59  ***************/
60
61 int efx_channel_dummy_op_int(struct efx_channel *channel)
62 {
63         return 0;
64 }
65
66 void efx_channel_dummy_op_void(struct efx_channel *channel)
67 {
68 }
69
70 static const struct efx_channel_type efx_default_channel_type = {
71         .pre_probe              = efx_channel_dummy_op_int,
72         .post_remove            = efx_channel_dummy_op_void,
73         .get_name               = efx_get_channel_name,
74         .copy                   = efx_copy_channel,
75         .want_txqs              = efx_default_channel_want_txqs,
76         .keep_eventq            = false,
77         .want_pio               = true,
78 };
79
80 /*************
81  * INTERRUPTS
82  *************/
83
84 static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
85 {
86         cpumask_var_t thread_mask;
87         unsigned int count;
88         int cpu;
89
90         if (rss_cpus) {
91                 count = rss_cpus;
92         } else {
93                 if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
94                         netif_warn(efx, probe, efx->net_dev,
95                                    "RSS disabled due to allocation failure\n");
96                         return 1;
97                 }
98
99                 count = 0;
100                 for_each_online_cpu(cpu) {
101                         if (!cpumask_test_cpu(cpu, thread_mask)) {
102                                 ++count;
103                                 cpumask_or(thread_mask, thread_mask,
104                                            topology_sibling_cpumask(cpu));
105                         }
106                 }
107
108                 free_cpumask_var(thread_mask);
109         }
110
111         if (count > EFX_MAX_RX_QUEUES) {
112                 netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
113                                "Reducing number of rx queues from %u to %u.\n",
114                                count, EFX_MAX_RX_QUEUES);
115                 count = EFX_MAX_RX_QUEUES;
116         }
117
118         /* If RSS is requested for the PF *and* VFs then we can't write RSS
119          * table entries that are inaccessible to VFs
120          */
121 #ifdef CONFIG_SFC_SRIOV
122         if (efx->type->sriov_wanted) {
123                 if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
124                     count > efx_vf_size(efx)) {
125                         netif_warn(efx, probe, efx->net_dev,
126                                    "Reducing number of RSS channels from %u to %u for "
127                                    "VF support. Increase vf-msix-limit to use more "
128                                    "channels on the PF.\n",
129                                    count, efx_vf_size(efx));
130                         count = efx_vf_size(efx);
131                 }
132         }
133 #endif
134
135         return count;
136 }
137
138 static int efx_allocate_msix_channels(struct efx_nic *efx,
139                                       unsigned int max_channels,
140                                       unsigned int extra_channels,
141                                       unsigned int parallelism)
142 {
143         unsigned int n_channels = parallelism;
144         int vec_count;
145         int n_xdp_tx;
146         int n_xdp_ev;
147
148         if (efx_separate_tx_channels)
149                 n_channels *= 2;
150         n_channels += extra_channels;
151
152         /* To allow XDP transmit to happen from arbitrary NAPI contexts
153          * we allocate a TX queue per CPU. We share event queues across
154          * multiple tx queues, assuming tx and ev queues are both
155          * maximum size.
156          */
157
158         n_xdp_tx = num_possible_cpus();
159         n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES);
160
161         vec_count = pci_msix_vec_count(efx->pci_dev);
162         if (vec_count < 0)
163                 return vec_count;
164
165         max_channels = min_t(unsigned int, vec_count, max_channels);
166
167         /* Check resources.
168          * We need a channel per event queue, plus a VI per tx queue.
169          * This may be more pessimistic than it needs to be.
170          */
171         if (n_channels + n_xdp_ev > max_channels) {
172                 netif_err(efx, drv, efx->net_dev,
173                           "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
174                           n_xdp_ev, n_channels, max_channels);
175                 efx->n_xdp_channels = 0;
176                 efx->xdp_tx_per_channel = 0;
177                 efx->xdp_tx_queue_count = 0;
178         } else {
179                 efx->n_xdp_channels = n_xdp_ev;
180                 efx->xdp_tx_per_channel = EFX_TXQ_TYPES;
181                 efx->xdp_tx_queue_count = n_xdp_tx;
182                 n_channels += n_xdp_ev;
183                 netif_dbg(efx, drv, efx->net_dev,
184                           "Allocating %d TX and %d event queues for XDP\n",
185                           n_xdp_tx, n_xdp_ev);
186         }
187
188         if (vec_count < n_channels) {
189                 netif_err(efx, drv, efx->net_dev,
190                           "WARNING: Insufficient MSI-X vectors available (%d < %u).\n",
191                           vec_count, n_channels);
192                 netif_err(efx, drv, efx->net_dev,
193                           "WARNING: Performance may be reduced.\n");
194                 n_channels = vec_count;
195         }
196
197         n_channels = min(n_channels, max_channels);
198
199         efx->n_channels = n_channels;
200
201         /* Ignore XDP tx channels when creating rx channels. */
202         n_channels -= efx->n_xdp_channels;
203
204         if (efx_separate_tx_channels) {
205                 efx->n_tx_channels =
206                         min(max(n_channels / 2, 1U),
207                             efx->max_tx_channels);
208                 efx->tx_channel_offset =
209                         n_channels - efx->n_tx_channels;
210                 efx->n_rx_channels =
211                         max(n_channels -
212                             efx->n_tx_channels, 1U);
213         } else {
214                 efx->n_tx_channels = min(n_channels, efx->max_tx_channels);
215                 efx->tx_channel_offset = 0;
216                 efx->n_rx_channels = n_channels;
217         }
218
219         efx->n_rx_channels = min(efx->n_rx_channels, parallelism);
220         efx->n_tx_channels = min(efx->n_tx_channels, parallelism);
221
222         efx->xdp_channel_offset = n_channels;
223
224         netif_dbg(efx, drv, efx->net_dev,
225                   "Allocating %u RX channels\n",
226                   efx->n_rx_channels);
227
228         return efx->n_channels;
229 }
230
231 /* Probe the number and type of interrupts we are able to obtain, and
232  * the resulting numbers of channels and RX queues.
233  */
234 int efx_probe_interrupts(struct efx_nic *efx)
235 {
236         unsigned int extra_channels = 0;
237         unsigned int rss_spread;
238         unsigned int i, j;
239         int rc;
240
241         for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++)
242                 if (efx->extra_channel_type[i])
243                         ++extra_channels;
244
245         if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
246                 unsigned int parallelism = efx_wanted_parallelism(efx);
247                 struct msix_entry xentries[EFX_MAX_CHANNELS];
248                 unsigned int n_channels;
249
250                 rc = efx_allocate_msix_channels(efx, efx->max_channels,
251                                                 extra_channels, parallelism);
252                 if (rc >= 0) {
253                         n_channels = rc;
254                         for (i = 0; i < n_channels; i++)
255                                 xentries[i].entry = i;
256                         rc = pci_enable_msix_range(efx->pci_dev, xentries, 1,
257                                                    n_channels);
258                 }
259                 if (rc < 0) {
260                         /* Fall back to single channel MSI */
261                         netif_err(efx, drv, efx->net_dev,
262                                   "could not enable MSI-X\n");
263                         if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI)
264                                 efx->interrupt_mode = EFX_INT_MODE_MSI;
265                         else
266                                 return rc;
267                 } else if (rc < n_channels) {
268                         netif_err(efx, drv, efx->net_dev,
269                                   "WARNING: Insufficient MSI-X vectors"
270                                   " available (%d < %u).\n", rc, n_channels);
271                         netif_err(efx, drv, efx->net_dev,
272                                   "WARNING: Performance may be reduced.\n");
273                         n_channels = rc;
274                 }
275
276                 if (rc > 0) {
277                         for (i = 0; i < efx->n_channels; i++)
278                                 efx_get_channel(efx, i)->irq =
279                                         xentries[i].vector;
280                 }
281         }
282
283         /* Try single interrupt MSI */
284         if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
285                 efx->n_channels = 1;
286                 efx->n_rx_channels = 1;
287                 efx->n_tx_channels = 1;
288                 efx->n_xdp_channels = 0;
289                 efx->xdp_channel_offset = efx->n_channels;
290                 rc = pci_enable_msi(efx->pci_dev);
291                 if (rc == 0) {
292                         efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
293                 } else {
294                         netif_err(efx, drv, efx->net_dev,
295                                   "could not enable MSI\n");
296                         if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY)
297                                 efx->interrupt_mode = EFX_INT_MODE_LEGACY;
298                         else
299                                 return rc;
300                 }
301         }
302
303         /* Assume legacy interrupts */
304         if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
305                 efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
306                 efx->n_rx_channels = 1;
307                 efx->n_tx_channels = 1;
308                 efx->n_xdp_channels = 0;
309                 efx->xdp_channel_offset = efx->n_channels;
310                 efx->legacy_irq = efx->pci_dev->irq;
311         }
312
313         /* Assign extra channels if possible, before XDP channels */
314         efx->n_extra_tx_channels = 0;
315         j = efx->xdp_channel_offset;
316         for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
317                 if (!efx->extra_channel_type[i])
318                         continue;
319                 if (j <= efx->tx_channel_offset + efx->n_tx_channels) {
320                         efx->extra_channel_type[i]->handle_no_channel(efx);
321                 } else {
322                         --j;
323                         efx_get_channel(efx, j)->type =
324                                 efx->extra_channel_type[i];
325                         if (efx_channel_has_tx_queues(efx_get_channel(efx, j)))
326                                 efx->n_extra_tx_channels++;
327                 }
328         }
329
330         rss_spread = efx->n_rx_channels;
331         /* RSS might be usable on VFs even if it is disabled on the PF */
332 #ifdef CONFIG_SFC_SRIOV
333         if (efx->type->sriov_wanted) {
334                 efx->rss_spread = ((rss_spread > 1 ||
335                                     !efx->type->sriov_wanted(efx)) ?
336                                    rss_spread : efx_vf_size(efx));
337                 return 0;
338         }
339 #endif
340         efx->rss_spread = rss_spread;
341
342         return 0;
343 }
344
345 #if defined(CONFIG_SMP)
346 void efx_set_interrupt_affinity(struct efx_nic *efx)
347 {
348         struct efx_channel *channel;
349         unsigned int cpu;
350
351         efx_for_each_channel(channel, efx) {
352                 cpu = cpumask_local_spread(channel->channel,
353                                            pcibus_to_node(efx->pci_dev->bus));
354                 irq_set_affinity_hint(channel->irq, cpumask_of(cpu));
355         }
356 }
357
358 void efx_clear_interrupt_affinity(struct efx_nic *efx)
359 {
360         struct efx_channel *channel;
361
362         efx_for_each_channel(channel, efx)
363                 irq_set_affinity_hint(channel->irq, NULL);
364 }
365 #else
366 void
367 efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
368 {
369 }
370
371 void
372 efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
373 {
374 }
375 #endif /* CONFIG_SMP */
376
377 void efx_remove_interrupts(struct efx_nic *efx)
378 {
379         struct efx_channel *channel;
380
381         /* Remove MSI/MSI-X interrupts */
382         efx_for_each_channel(channel, efx)
383                 channel->irq = 0;
384         pci_disable_msi(efx->pci_dev);
385         pci_disable_msix(efx->pci_dev);
386
387         /* Remove legacy interrupt */
388         efx->legacy_irq = 0;
389 }
390
391 /***************
392  * EVENT QUEUES
393  ***************/
394
395 /* Create event queue
396  * Event queue memory allocations are done only once.  If the channel
397  * is reset, the memory buffer will be reused; this guards against
398  * errors during channel reset and also simplifies interrupt handling.
399  */
400 int efx_probe_eventq(struct efx_channel *channel)
401 {
402         struct efx_nic *efx = channel->efx;
403         unsigned long entries;
404
405         netif_dbg(efx, probe, efx->net_dev,
406                   "chan %d create event queue\n", channel->channel);
407
408         /* Build an event queue with room for one event per tx and rx buffer,
409          * plus some extra for link state events and MCDI completions.
410          */
411         entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
412         EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
413         channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;
414
415         return efx_nic_probe_eventq(channel);
416 }
417
418 /* Prepare channel's event queue */
419 int efx_init_eventq(struct efx_channel *channel)
420 {
421         struct efx_nic *efx = channel->efx;
422         int rc;
423
424         EFX_WARN_ON_PARANOID(channel->eventq_init);
425
426         netif_dbg(efx, drv, efx->net_dev,
427                   "chan %d init event queue\n", channel->channel);
428
429         rc = efx_nic_init_eventq(channel);
430         if (rc == 0) {
431                 efx->type->push_irq_moderation(channel);
432                 channel->eventq_read_ptr = 0;
433                 channel->eventq_init = true;
434         }
435         return rc;
436 }
437
438 /* Enable event queue processing and NAPI */
439 void efx_start_eventq(struct efx_channel *channel)
440 {
441         netif_dbg(channel->efx, ifup, channel->efx->net_dev,
442                   "chan %d start event queue\n", channel->channel);
443
444         /* Make sure the NAPI handler sees the enabled flag set */
445         channel->enabled = true;
446         smp_wmb();
447
448         napi_enable(&channel->napi_str);
449         efx_nic_eventq_read_ack(channel);
450 }
451
452 /* Disable event queue processing and NAPI */
453 void efx_stop_eventq(struct efx_channel *channel)
454 {
455         if (!channel->enabled)
456                 return;
457
458         napi_disable(&channel->napi_str);
459         channel->enabled = false;
460 }
461
462 void efx_fini_eventq(struct efx_channel *channel)
463 {
464         if (!channel->eventq_init)
465                 return;
466
467         netif_dbg(channel->efx, drv, channel->efx->net_dev,
468                   "chan %d fini event queue\n", channel->channel);
469
470         efx_nic_fini_eventq(channel);
471         channel->eventq_init = false;
472 }
473
474 void efx_remove_eventq(struct efx_channel *channel)
475 {
476         netif_dbg(channel->efx, drv, channel->efx->net_dev,
477                   "chan %d remove event queue\n", channel->channel);
478
479         efx_nic_remove_eventq(channel);
480 }
481
482 /**************************************************************************
483  *
484  * Channel handling
485  *
486  *************************************************************************/
487
488 /* Allocate and initialise a channel structure. */
489 struct efx_channel *
490 efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
491 {
492         struct efx_rx_queue *rx_queue;
493         struct efx_tx_queue *tx_queue;
494         struct efx_channel *channel;
495         int j;
496
497         channel = kzalloc(sizeof(*channel), GFP_KERNEL);
498         if (!channel)
499                 return NULL;
500
501         channel->efx = efx;
502         channel->channel = i;
503         channel->type = &efx_default_channel_type;
504
505         for (j = 0; j < EFX_TXQ_TYPES; j++) {
506                 tx_queue = &channel->tx_queue[j];
507                 tx_queue->efx = efx;
508                 tx_queue->queue = i * EFX_TXQ_TYPES + j;
509                 tx_queue->channel = channel;
510         }
511
512 #ifdef CONFIG_RFS_ACCEL
513         INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
514 #endif
515
516         rx_queue = &channel->rx_queue;
517         rx_queue->efx = efx;
518         timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
519
520         return channel;
521 }
522
523 int efx_init_channels(struct efx_nic *efx)
524 {
525         unsigned int i;
526
527         for (i = 0; i < EFX_MAX_CHANNELS; i++) {
528                 efx->channel[i] = efx_alloc_channel(efx, i, NULL);
529                 if (!efx->channel[i])
530                         return -ENOMEM;
531                 efx->msi_context[i].efx = efx;
532                 efx->msi_context[i].index = i;
533         }
534
535         /* Higher numbered interrupt modes are less capable! */
536         if (WARN_ON_ONCE(efx->type->max_interrupt_mode >
537                          efx->type->min_interrupt_mode)) {
538                 return -EIO;
539         }
540         efx->interrupt_mode = max(efx->type->max_interrupt_mode,
541                                   interrupt_mode);
542         efx->interrupt_mode = min(efx->type->min_interrupt_mode,
543                                   interrupt_mode);
544
545         return 0;
546 }
547
548 void efx_fini_channels(struct efx_nic *efx)
549 {
550         unsigned int i;
551
552         for (i = 0; i < EFX_MAX_CHANNELS; i++)
553                 if (efx->channel[i]) {
554                         kfree(efx->channel[i]);
555                         efx->channel[i] = NULL;
556                 }
557 }
558
559 /* Allocate and initialise a channel structure, copying parameters
560  * (but not resources) from an old channel structure.
561  */
562 struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel)
563 {
564         struct efx_rx_queue *rx_queue;
565         struct efx_tx_queue *tx_queue;
566         struct efx_channel *channel;
567         int j;
568
569         channel = kmalloc(sizeof(*channel), GFP_KERNEL);
570         if (!channel)
571                 return NULL;
572
573         *channel = *old_channel;
574
575         channel->napi_dev = NULL;
576         INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
577         channel->napi_str.napi_id = 0;
578         channel->napi_str.state = 0;
579         memset(&channel->eventq, 0, sizeof(channel->eventq));
580
581         for (j = 0; j < EFX_TXQ_TYPES; j++) {
582                 tx_queue = &channel->tx_queue[j];
583                 if (tx_queue->channel)
584                         tx_queue->channel = channel;
585                 tx_queue->buffer = NULL;
586                 memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
587         }
588
589         rx_queue = &channel->rx_queue;
590         rx_queue->buffer = NULL;
591         memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
592         timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
593 #ifdef CONFIG_RFS_ACCEL
594         INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
595 #endif
596
597         return channel;
598 }
599
600 static int efx_probe_channel(struct efx_channel *channel)
601 {
602         struct efx_tx_queue *tx_queue;
603         struct efx_rx_queue *rx_queue;
604         int rc;
605
606         netif_dbg(channel->efx, probe, channel->efx->net_dev,
607                   "creating channel %d\n", channel->channel);
608
609         rc = channel->type->pre_probe(channel);
610         if (rc)
611                 goto fail;
612
613         rc = efx_probe_eventq(channel);
614         if (rc)
615                 goto fail;
616
617         efx_for_each_channel_tx_queue(tx_queue, channel) {
618                 rc = efx_probe_tx_queue(tx_queue);
619                 if (rc)
620                         goto fail;
621         }
622
623         efx_for_each_channel_rx_queue(rx_queue, channel) {
624                 rc = efx_probe_rx_queue(rx_queue);
625                 if (rc)
626                         goto fail;
627         }
628
629         channel->rx_list = NULL;
630
631         return 0;
632
633 fail:
634         efx_remove_channel(channel);
635         return rc;
636 }
637
638 void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
639 {
640         struct efx_nic *efx = channel->efx;
641         const char *type;
642         int number;
643
644         number = channel->channel;
645
646         if (number >= efx->xdp_channel_offset &&
647             !WARN_ON_ONCE(!efx->n_xdp_channels)) {
648                 type = "-xdp";
649                 number -= efx->xdp_channel_offset;
650         } else if (efx->tx_channel_offset == 0) {
651                 type = "";
652         } else if (number < efx->tx_channel_offset) {
653                 type = "-rx";
654         } else {
655                 type = "-tx";
656                 number -= efx->tx_channel_offset;
657         }
658         snprintf(buf, len, "%s%s-%d", efx->name, type, number);
659 }
660
661 void efx_set_channel_names(struct efx_nic *efx)
662 {
663         struct efx_channel *channel;
664
665         efx_for_each_channel(channel, efx)
666                 channel->type->get_name(channel,
667                                         efx->msi_context[channel->channel].name,
668                                         sizeof(efx->msi_context[0].name));
669 }
670
671 int efx_probe_channels(struct efx_nic *efx)
672 {
673         struct efx_channel *channel;
674         int rc;
675
676         /* Restart special buffer allocation */
677         efx->next_buffer_table = 0;
678
679         /* Probe channels in reverse, so that any 'extra' channels
680          * use the start of the buffer table. This allows the traffic
681          * channels to be resized without moving them or wasting the
682          * entries before them.
683          */
684         efx_for_each_channel_rev(channel, efx) {
685                 rc = efx_probe_channel(channel);
686                 if (rc) {
687                         netif_err(efx, probe, efx->net_dev,
688                                   "failed to create channel %d\n",
689                                   channel->channel);
690                         goto fail;
691                 }
692         }
693         efx_set_channel_names(efx);
694
695         return 0;
696
697 fail:
698         efx_remove_channels(efx);
699         return rc;
700 }
701
702 void efx_remove_channel(struct efx_channel *channel)
703 {
704         struct efx_tx_queue *tx_queue;
705         struct efx_rx_queue *rx_queue;
706
707         netif_dbg(channel->efx, drv, channel->efx->net_dev,
708                   "destroy chan %d\n", channel->channel);
709
710         efx_for_each_channel_rx_queue(rx_queue, channel)
711                 efx_remove_rx_queue(rx_queue);
712         efx_for_each_possible_channel_tx_queue(tx_queue, channel)
713                 efx_remove_tx_queue(tx_queue);
714         efx_remove_eventq(channel);
715         channel->type->post_remove(channel);
716 }
717
718 void efx_remove_channels(struct efx_nic *efx)
719 {
720         struct efx_channel *channel;
721
722         efx_for_each_channel(channel, efx)
723                 efx_remove_channel(channel);
724
725         kfree(efx->xdp_tx_queues);
726 }
727
728 int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
729 {
730         struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
731         unsigned int i, next_buffer_table = 0;
732         u32 old_rxq_entries, old_txq_entries;
733         int rc, rc2;
734
735         rc = efx_check_disabled(efx);
736         if (rc)
737                 return rc;
738
739         /* Not all channels should be reallocated. We must avoid
740          * reallocating their buffer table entries.
741          */
742         efx_for_each_channel(channel, efx) {
743                 struct efx_rx_queue *rx_queue;
744                 struct efx_tx_queue *tx_queue;
745
746                 if (channel->type->copy)
747                         continue;
748                 next_buffer_table = max(next_buffer_table,
749                                         channel->eventq.index +
750                                         channel->eventq.entries);
751                 efx_for_each_channel_rx_queue(rx_queue, channel)
752                         next_buffer_table = max(next_buffer_table,
753                                                 rx_queue->rxd.index +
754                                                 rx_queue->rxd.entries);
755                 efx_for_each_channel_tx_queue(tx_queue, channel)
756                         next_buffer_table = max(next_buffer_table,
757                                                 tx_queue->txd.index +
758                                                 tx_queue->txd.entries);
759         }
760
761         efx_device_detach_sync(efx);
762         efx_stop_all(efx);
763         efx_soft_disable_interrupts(efx);
764
765         /* Clone channels (where possible) */
766         memset(other_channel, 0, sizeof(other_channel));
767         for (i = 0; i < efx->n_channels; i++) {
768                 channel = efx->channel[i];
769                 if (channel->type->copy)
770                         channel = channel->type->copy(channel);
771                 if (!channel) {
772                         rc = -ENOMEM;
773                         goto out;
774                 }
775                 other_channel[i] = channel;
776         }
777
778         /* Swap entry counts and channel pointers */
779         old_rxq_entries = efx->rxq_entries;
780         old_txq_entries = efx->txq_entries;
781         efx->rxq_entries = rxq_entries;
782         efx->txq_entries = txq_entries;
783         for (i = 0; i < efx->n_channels; i++) {
784                 channel = efx->channel[i];
785                 efx->channel[i] = other_channel[i];
786                 other_channel[i] = channel;
787         }
788
789         /* Restart buffer table allocation */
790         efx->next_buffer_table = next_buffer_table;
791
792         for (i = 0; i < efx->n_channels; i++) {
793                 channel = efx->channel[i];
794                 if (!channel->type->copy)
795                         continue;
796                 rc = efx_probe_channel(channel);
797                 if (rc)
798                         goto rollback;
799                 efx_init_napi_channel(efx->channel[i]);
800         }
801
802 out:
803         /* Destroy unused channel structures */
804         for (i = 0; i < efx->n_channels; i++) {
805                 channel = other_channel[i];
806                 if (channel && channel->type->copy) {
807                         efx_fini_napi_channel(channel);
808                         efx_remove_channel(channel);
809                         kfree(channel);
810                 }
811         }
812
813         rc2 = efx_soft_enable_interrupts(efx);
814         if (rc2) {
815                 rc = rc ? rc : rc2;
816                 netif_err(efx, drv, efx->net_dev,
817                           "unable to restart interrupts on channel reallocation\n");
818                 efx_schedule_reset(efx, RESET_TYPE_DISABLE);
819         } else {
820                 efx_start_all(efx);
821                 efx_device_attach_if_not_resetting(efx);
822         }
823         return rc;
824
825 rollback:
826         /* Swap back */
827         efx->rxq_entries = old_rxq_entries;
828         efx->txq_entries = old_txq_entries;
829         for (i = 0; i < efx->n_channels; i++) {
830                 channel = efx->channel[i];
831                 efx->channel[i] = other_channel[i];
832                 other_channel[i] = channel;
833         }
834         goto out;
835 }
836
837 int efx_set_channels(struct efx_nic *efx)
838 {
839         struct efx_channel *channel;
840         struct efx_tx_queue *tx_queue;
841         int xdp_queue_number;
842
843         efx->tx_channel_offset =
844                 efx_separate_tx_channels ?
845                 efx->n_channels - efx->n_tx_channels : 0;
846
847         if (efx->xdp_tx_queue_count) {
848                 EFX_WARN_ON_PARANOID(efx->xdp_tx_queues);
849
850                 /* Allocate array for XDP TX queue lookup. */
851                 efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count,
852                                              sizeof(*efx->xdp_tx_queues),
853                                              GFP_KERNEL);
854                 if (!efx->xdp_tx_queues)
855                         return -ENOMEM;
856         }
857
858         /* We need to mark which channels really have RX and TX
859          * queues, and adjust the TX queue numbers if we have separate
860          * RX-only and TX-only channels.
861          */
862         xdp_queue_number = 0;
863         efx_for_each_channel(channel, efx) {
864                 if (channel->channel < efx->n_rx_channels)
865                         channel->rx_queue.core_index = channel->channel;
866                 else
867                         channel->rx_queue.core_index = -1;
868
869                 efx_for_each_channel_tx_queue(tx_queue, channel) {
870                         tx_queue->queue -= (efx->tx_channel_offset *
871                                             EFX_TXQ_TYPES);
872
873                         if (efx_channel_is_xdp_tx(channel) &&
874                             xdp_queue_number < efx->xdp_tx_queue_count) {
875                                 efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
876                                 xdp_queue_number++;
877                         }
878                 }
879         }
880         return 0;
881 }
882
883 bool efx_default_channel_want_txqs(struct efx_channel *channel)
884 {
885         return channel->channel - channel->efx->tx_channel_offset <
886                 channel->efx->n_tx_channels;
887 }
888
889 /*************
890  * START/STOP
891  *************/
892
893 int efx_soft_enable_interrupts(struct efx_nic *efx)
894 {
895         struct efx_channel *channel, *end_channel;
896         int rc;
897
898         BUG_ON(efx->state == STATE_DISABLED);
899
900         efx->irq_soft_enabled = true;
901         smp_wmb();
902
903         efx_for_each_channel(channel, efx) {
904                 if (!channel->type->keep_eventq) {
905                         rc = efx_init_eventq(channel);
906                         if (rc)
907                                 goto fail;
908                 }
909                 efx_start_eventq(channel);
910         }
911
912         efx_mcdi_mode_event(efx);
913
914         return 0;
915 fail:
916         end_channel = channel;
917         efx_for_each_channel(channel, efx) {
918                 if (channel == end_channel)
919                         break;
920                 efx_stop_eventq(channel);
921                 if (!channel->type->keep_eventq)
922                         efx_fini_eventq(channel);
923         }
924
925         return rc;
926 }
927
928 void efx_soft_disable_interrupts(struct efx_nic *efx)
929 {
930         struct efx_channel *channel;
931
932         if (efx->state == STATE_DISABLED)
933                 return;
934
935         efx_mcdi_mode_poll(efx);
936
937         efx->irq_soft_enabled = false;
938         smp_wmb();
939
940         if (efx->legacy_irq)
941                 synchronize_irq(efx->legacy_irq);
942
943         efx_for_each_channel(channel, efx) {
944                 if (channel->irq)
945                         synchronize_irq(channel->irq);
946
947                 efx_stop_eventq(channel);
948                 if (!channel->type->keep_eventq)
949                         efx_fini_eventq(channel);
950         }
951
952         /* Flush the asynchronous MCDI request queue */
953         efx_mcdi_flush_async(efx);
954 }
955
956 int efx_enable_interrupts(struct efx_nic *efx)
957 {
958         struct efx_channel *channel, *end_channel;
959         int rc;
960
961         /* TODO: Is this really a bug? */
962         BUG_ON(efx->state == STATE_DISABLED);
963
964         if (efx->eeh_disabled_legacy_irq) {
965                 enable_irq(efx->legacy_irq);
966                 efx->eeh_disabled_legacy_irq = false;
967         }
968
969         efx->type->irq_enable_master(efx);
970
971         efx_for_each_channel(channel, efx) {
972                 if (channel->type->keep_eventq) {
973                         rc = efx_init_eventq(channel);
974                         if (rc)
975                                 goto fail;
976                 }
977         }
978
979         rc = efx_soft_enable_interrupts(efx);
980         if (rc)
981                 goto fail;
982
983         return 0;
984
985 fail:
986         end_channel = channel;
987         efx_for_each_channel(channel, efx) {
988                 if (channel == end_channel)
989                         break;
990                 if (channel->type->keep_eventq)
991                         efx_fini_eventq(channel);
992         }
993
994         efx->type->irq_disable_non_ev(efx);
995
996         return rc;
997 }
998
999 void efx_disable_interrupts(struct efx_nic *efx)
1000 {
1001         struct efx_channel *channel;
1002
1003         efx_soft_disable_interrupts(efx);
1004
1005         efx_for_each_channel(channel, efx) {
1006                 if (channel->type->keep_eventq)
1007                         efx_fini_eventq(channel);
1008         }
1009
1010         efx->type->irq_disable_non_ev(efx);
1011 }
1012
1013 void efx_start_channels(struct efx_nic *efx)
1014 {
1015         struct efx_tx_queue *tx_queue;
1016         struct efx_rx_queue *rx_queue;
1017         struct efx_channel *channel;
1018
1019         efx_for_each_channel(channel, efx) {
1020                 efx_for_each_channel_tx_queue(tx_queue, channel) {
1021                         efx_init_tx_queue(tx_queue);
1022                         atomic_inc(&efx->active_queues);
1023                 }
1024
1025                 efx_for_each_channel_rx_queue(rx_queue, channel) {
1026                         efx_init_rx_queue(rx_queue);
1027                         atomic_inc(&efx->active_queues);
1028                         efx_stop_eventq(channel);
1029                         efx_fast_push_rx_descriptors(rx_queue, false);
1030                         efx_start_eventq(channel);
1031                 }
1032
1033                 WARN_ON(channel->rx_pkt_n_frags);
1034         }
1035 }
1036
1037 void efx_stop_channels(struct efx_nic *efx)
1038 {
1039         struct efx_tx_queue *tx_queue;
1040         struct efx_rx_queue *rx_queue;
1041         struct efx_channel *channel;
1042         int rc = 0;
1043
1044         /* Stop RX refill */
1045         efx_for_each_channel(channel, efx) {
1046                 efx_for_each_channel_rx_queue(rx_queue, channel)
1047                         rx_queue->refill_enabled = false;
1048         }
1049
1050         efx_for_each_channel(channel, efx) {
1051                 /* RX packet processing is pipelined, so wait for the
1052                  * NAPI handler to complete.  At least event queue 0
1053                  * might be kept active by non-data events, so don't
1054                  * use napi_synchronize() but actually disable NAPI
1055                  * temporarily.
1056                  */
1057                 if (efx_channel_has_rx_queue(channel)) {
1058                         efx_stop_eventq(channel);
1059                         efx_start_eventq(channel);
1060                 }
1061         }
1062
1063         if (efx->type->fini_dmaq)
1064                 rc = efx->type->fini_dmaq(efx);
1065
1066         if (rc) {
1067                 netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
1068         } else {
1069                 netif_dbg(efx, drv, efx->net_dev,
1070                           "successfully flushed all queues\n");
1071         }
1072
1073         efx_for_each_channel(channel, efx) {
1074                 efx_for_each_channel_rx_queue(rx_queue, channel)
1075                         efx_fini_rx_queue(rx_queue);
1076                 efx_for_each_possible_channel_tx_queue(tx_queue, channel)
1077                         efx_fini_tx_queue(tx_queue);
1078         }
1079 }
1080
1081 /**************************************************************************
1082  *
1083  * NAPI interface
1084  *
1085  *************************************************************************/
1086
1087 /* Process channel's event queue
1088  *
1089  * This function is responsible for processing the event queue of a
1090  * single channel.  The caller must guarantee that this function will
1091  * never be concurrently called more than once on the same channel,
1092  * though different channels may be being processed concurrently.
1093  */
1094 static int efx_process_channel(struct efx_channel *channel, int budget)
1095 {
1096         struct efx_tx_queue *tx_queue;
1097         struct list_head rx_list;
1098         int spent;
1099
1100         if (unlikely(!channel->enabled))
1101                 return 0;
1102
1103         /* Prepare the batch receive list */
1104         EFX_WARN_ON_PARANOID(channel->rx_list != NULL);
1105         INIT_LIST_HEAD(&rx_list);
1106         channel->rx_list = &rx_list;
1107
1108         efx_for_each_channel_tx_queue(tx_queue, channel) {
1109                 tx_queue->pkts_compl = 0;
1110                 tx_queue->bytes_compl = 0;
1111         }
1112
1113         spent = efx_nic_process_eventq(channel, budget);
1114         if (spent && efx_channel_has_rx_queue(channel)) {
1115                 struct efx_rx_queue *rx_queue =
1116                         efx_channel_get_rx_queue(channel);
1117
1118                 efx_rx_flush_packet(channel);
1119                 efx_fast_push_rx_descriptors(rx_queue, true);
1120         }
1121
1122         /* Update BQL */
1123         efx_for_each_channel_tx_queue(tx_queue, channel) {
1124                 if (tx_queue->bytes_compl) {
1125                         netdev_tx_completed_queue(tx_queue->core_txq,
1126                                                   tx_queue->pkts_compl,
1127                                                   tx_queue->bytes_compl);
1128                 }
1129         }
1130
1131         /* Receive any packets we queued up */
1132         netif_receive_skb_list(channel->rx_list);
1133         channel->rx_list = NULL;
1134
1135         return spent;
1136 }
1137
1138 static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
1139 {
1140         int step = efx->irq_mod_step_us;
1141
1142         if (channel->irq_mod_score < irq_adapt_low_thresh) {
1143                 if (channel->irq_moderation_us > step) {
1144                         channel->irq_moderation_us -= step;
1145                         efx->type->push_irq_moderation(channel);
1146                 }
1147         } else if (channel->irq_mod_score > irq_adapt_high_thresh) {
1148                 if (channel->irq_moderation_us <
1149                     efx->irq_rx_moderation_us) {
1150                         channel->irq_moderation_us += step;
1151                         efx->type->push_irq_moderation(channel);
1152                 }
1153         }
1154
1155         channel->irq_count = 0;
1156         channel->irq_mod_score = 0;
1157 }
1158
1159 /* NAPI poll handler
1160  *
1161  * NAPI guarantees serialisation of polls of the same device, which
1162  * provides the guarantee required by efx_process_channel().
1163  */
1164 static int efx_poll(struct napi_struct *napi, int budget)
1165 {
1166         struct efx_channel *channel =
1167                 container_of(napi, struct efx_channel, napi_str);
1168         struct efx_nic *efx = channel->efx;
1169         int spent;
1170
1171         netif_vdbg(efx, intr, efx->net_dev,
1172                    "channel %d NAPI poll executing on CPU %d\n",
1173                    channel->channel, raw_smp_processor_id());
1174
1175         spent = efx_process_channel(channel, budget);
1176
1177         xdp_do_flush_map();
1178
1179         if (spent < budget) {
1180                 if (efx_channel_has_rx_queue(channel) &&
1181                     efx->irq_rx_adaptive &&
1182                     unlikely(++channel->irq_count == 1000)) {
1183                         efx_update_irq_mod(efx, channel);
1184                 }
1185
1186 #ifdef CONFIG_RFS_ACCEL
1187                 /* Perhaps expire some ARFS filters */
1188                 mod_delayed_work(system_wq, &channel->filter_work, 0);
1189 #endif
1190
1191                 /* There is no race here; although napi_disable() will
1192                  * only wait for napi_complete(), this isn't a problem
1193                  * since efx_nic_eventq_read_ack() will have no effect if
1194                  * interrupts have already been disabled.
1195                  */
1196                 if (napi_complete_done(napi, spent))
1197                         efx_nic_eventq_read_ack(channel);
1198         }
1199
1200         return spent;
1201 }
1202
1203 void efx_init_napi_channel(struct efx_channel *channel)
1204 {
1205         struct efx_nic *efx = channel->efx;
1206
1207         channel->napi_dev = efx->net_dev;
1208         netif_napi_add(channel->napi_dev, &channel->napi_str,
1209                        efx_poll, napi_weight);
1210 }
1211
1212 void efx_init_napi(struct efx_nic *efx)
1213 {
1214         struct efx_channel *channel;
1215
1216         efx_for_each_channel(channel, efx)
1217                 efx_init_napi_channel(channel);
1218 }
1219
1220 void efx_fini_napi_channel(struct efx_channel *channel)
1221 {
1222         if (channel->napi_dev)
1223                 netif_napi_del(&channel->napi_str);
1224
1225         channel->napi_dev = NULL;
1226 }
1227
1228 void efx_fini_napi(struct efx_nic *efx)
1229 {
1230         struct efx_channel *channel;
1231
1232         efx_for_each_channel(channel, efx)
1233                 efx_fini_napi_channel(channel);
1234 }