#include "net-sysfs.h"
-/* Instead of increasing this, you should create a hash table. */
#define MAX_GRO_SKBS 8
/* This should be increased if a protocol with a bigger head is added. */
struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
int i;
+ /* walk through the TCs and see if it falls into any of them */
for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
if ((txq - tc->offset) < tc->count)
return i;
}
+ /* didn't find it, just return -1 to indicate no match */
return -1;
}
unsigned int nr_ids;
if (dev->num_tc) {
+ /* Do not allow XPS on subordinate device directly */
num_tc = dev->num_tc;
+ if (num_tc < 0)
+ return -EINVAL;
+
+ /* If queue belongs to subordinate dev use its map */
+ dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
tc = netdev_txq_to_tc(dev, index);
if (tc < 0)
return -EINVAL;
EXPORT_SYMBOL(netif_set_xps_queue);
#endif
+static void netdev_unbind_all_sb_channels(struct net_device *dev)
+{
+ struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
+
+ /* Unbind any subordinate channels */
+ while (txq-- != &dev->_tx[0]) {
+ if (txq->sb_dev)
+ netdev_unbind_sb_channel(dev, txq->sb_dev);
+ }
+}
+
void netdev_reset_tc(struct net_device *dev)
{
#ifdef CONFIG_XPS
netif_reset_xps_queues_gt(dev, 0);
#endif
+ netdev_unbind_all_sb_channels(dev);
+
+ /* Reset TC configuration of device */
dev->num_tc = 0;
memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
#ifdef CONFIG_XPS
netif_reset_xps_queues_gt(dev, 0);
#endif
+ netdev_unbind_all_sb_channels(dev);
+
dev->num_tc = num_tc;
return 0;
}
EXPORT_SYMBOL(netdev_set_num_tc);
+void netdev_unbind_sb_channel(struct net_device *dev,
+ struct net_device *sb_dev)
+{
+ struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
+
+#ifdef CONFIG_XPS
+ netif_reset_xps_queues_gt(sb_dev, 0);
+#endif
+ memset(sb_dev->tc_to_txq, 0, sizeof(sb_dev->tc_to_txq));
+ memset(sb_dev->prio_tc_map, 0, sizeof(sb_dev->prio_tc_map));
+
+ while (txq-- != &dev->_tx[0]) {
+ if (txq->sb_dev == sb_dev)
+ txq->sb_dev = NULL;
+ }
+}
+EXPORT_SYMBOL(netdev_unbind_sb_channel);
+
+int netdev_bind_sb_channel_queue(struct net_device *dev,
+ struct net_device *sb_dev,
+ u8 tc, u16 count, u16 offset)
+{
+ /* Make certain the sb_dev and dev are already configured */
+ if (sb_dev->num_tc >= 0 || tc >= dev->num_tc)
+ return -EINVAL;
+
+ /* We cannot hand out queues we don't have */
+ if ((offset + count) > dev->real_num_tx_queues)
+ return -EINVAL;
+
+ /* Record the mapping */
+ sb_dev->tc_to_txq[tc].count = count;
+ sb_dev->tc_to_txq[tc].offset = offset;
+
+ /* Provide a way for Tx queue to find the tc_to_txq map or
+ * XPS map for itself.
+ */
+ while (count--)
+ netdev_get_tx_queue(dev, count + offset)->sb_dev = sb_dev;
+
+ return 0;
+}
+EXPORT_SYMBOL(netdev_bind_sb_channel_queue);
+
+int netdev_set_sb_channel(struct net_device *dev, u16 channel)
+{
+ /* Do not use a multiqueue device to represent a subordinate channel */
+ if (netif_is_multiqueue(dev))
+ return -ENODEV;
+
+ /* We allow channels 1 - 32767 to be used for subordinate channels.
+ * Channel 0 is meant to be "native" mode and used only to represent
+ * the main root device. We allow writing 0 to reset the device back
+ * to normal mode after being used as a subordinate channel.
+ */
+ if (channel > S16_MAX)
+ return -EINVAL;
+
+ dev->num_tc = -channel;
+
+ return 0;
+}
+EXPORT_SYMBOL(netdev_set_sb_channel);
+
/*
* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
* greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
* Returns a Tx hash based on the given packet descriptor a Tx queues' number
* to be used as a distribution range.
*/
-static u16 skb_tx_hash(const struct net_device *dev, struct sk_buff *skb)
+static u16 skb_tx_hash(const struct net_device *dev,
+ const struct net_device *sb_dev,
+ struct sk_buff *skb)
{
u32 hash;
u16 qoffset = 0;
u16 qcount = dev->real_num_tx_queues;
+ if (dev->num_tc) {
+ u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+
+ qoffset = sb_dev->tc_to_txq[tc].offset;
+ qcount = sb_dev->tc_to_txq[tc].count;
+ }
+
if (skb_rx_queue_recorded(skb)) {
hash = skb_get_rx_queue(skb);
while (unlikely(hash >= qcount))
hash -= qcount;
- return hash;
- }
-
- if (dev->num_tc) {
- u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
-
- qoffset = dev->tc_to_txq[tc].offset;
- qcount = dev->tc_to_txq[tc].count;
+ return hash + qoffset;
}
return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
}
#endif
-static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
+ struct sk_buff *skb)
{
#ifdef CONFIG_XPS
struct xps_dev_maps *dev_maps;
if (!static_key_false(&xps_rxqs_needed))
goto get_cpus_map;
- dev_maps = rcu_dereference(dev->xps_rxqs_map);
+ dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
if (dev_maps) {
int tci = sk_rx_queue_get(sk);
get_cpus_map:
if (queue_index < 0) {
- dev_maps = rcu_dereference(dev->xps_cpus_map);
+ dev_maps = rcu_dereference(sb_dev->xps_cpus_map);
if (dev_maps) {
unsigned int tci = skb->sender_cpu - 1;
#endif
}
-static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+static u16 ___netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev)
{
struct sock *sk = skb->sk;
int queue_index = sk_tx_queue_get(sk);
+ sb_dev = sb_dev ? : dev;
+
if (queue_index < 0 || skb->ooo_okay ||
queue_index >= dev->real_num_tx_queues) {
- int new_index = get_xps_queue(dev, skb);
+ int new_index = get_xps_queue(dev, sb_dev, skb);
if (new_index < 0)
- new_index = skb_tx_hash(dev, skb);
+ new_index = skb_tx_hash(dev, sb_dev, skb);
if (queue_index != new_index && sk &&
sk_fullsock(sk) &&
return queue_index;
}
+static u16 __netdev_pick_tx(struct net_device *dev,
+ struct sk_buff *skb)
+{
+ return ___netdev_pick_tx(dev, skb, NULL);
+}
+
struct netdev_queue *netdev_pick_tx(struct net_device *dev,
struct sk_buff *skb,
- void *accel_priv)
+ struct net_device *sb_dev)
{
int queue_index = 0;
const struct net_device_ops *ops = dev->netdev_ops;
if (ops->ndo_select_queue)
- queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
+ queue_index = ops->ndo_select_queue(dev, skb, sb_dev,
__netdev_pick_tx);
else
- queue_index = __netdev_pick_tx(dev, skb);
+ queue_index = ___netdev_pick_tx(dev, skb, sb_dev);
queue_index = netdev_cap_txqueue(dev, queue_index);
}
/**
* __dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit
- * @accel_priv: private data used for L2 forwarding offload
+ * @sb_dev: suboordinate device used for L2 forwarding offload
*
* Queue a buffer for transmission to a network device. The caller must
* have set the device and priority and built the buffer before calling
* the BH enable code must have IRQs enabled so that it will not deadlock.
* --BLG
*/
-static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
+static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
{
struct net_device *dev = skb->dev;
struct netdev_queue *txq;
else
skb_dst_force(skb);
- txq = netdev_pick_tx(dev, skb, accel_priv);
+ txq = netdev_pick_tx(dev, skb, sb_dev);
q = rcu_dereference_bh(txq->qdisc);
trace_net_dev_queue(skb);
}
EXPORT_SYMBOL(dev_queue_xmit);
-int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
+int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev)
{
- return __dev_queue_xmit(skb, accel_priv);
+ return __dev_queue_xmit(skb, sb_dev);
}
EXPORT_SYMBOL(dev_queue_xmit_accel);
return;
if (list_empty(head))
return;
-
- list_for_each_entry_safe(skb, next, head, list)
- pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+ if (pt_prev->list_func != NULL)
+ pt_prev->list_func(head, pt_prev, orig_dev);
+ else
+ list_for_each_entry_safe(skb, next, head, list)
+ pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
/* Handle the previous sublist */
list_cut_before(&sublist, head, &skb->list);
- __netif_receive_skb_list_core(&sublist, pfmemalloc);
+ if (!list_empty(&sublist))
+ __netif_receive_skb_list_core(&sublist, pfmemalloc);
pfmemalloc = !pfmemalloc;
/* See comments in __netif_receive_skb */
if (pfmemalloc)
}
}
/* Handle the remaining sublist */
- __netif_receive_skb_list_core(head, pfmemalloc);
+ if (!list_empty(head))
+ __netif_receive_skb_list_core(head, pfmemalloc);
/* Restore pflags */
if (pfmemalloc)
memalloc_noreclaim_restore(noreclaim_flag);
{
struct sk_buff *skb;
+ if (list_empty(head))
+ return;
list_for_each_entry(skb, head, list)
trace_netif_receive_skb_list_entry(skb);
netif_receive_skb_list_internal(head);
return netif_receive_skb_internal(skb);
}
-static void __napi_gro_flush_chain(struct napi_struct *napi, struct list_head *head,
+static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
bool flush_old)
{
+ struct list_head *head = &napi->gro_hash[index].list;
struct sk_buff *skb, *p;
list_for_each_entry_safe_reverse(skb, p, head, list) {
list_del_init(&skb->list);
napi_gro_complete(skb);
napi->gro_count--;
+ napi->gro_hash[index].count--;
}
}
-/* napi->gro_hash contains packets ordered by age.
+/* napi->gro_hash[].list contains packets ordered by age.
* youngest packets at the head of it.
* Complete skbs in reverse order to reduce latencies.
*/
void napi_gro_flush(struct napi_struct *napi, bool flush_old)
{
- int i;
-
- for (i = 0; i < GRO_HASH_BUCKETS; i++) {
- struct list_head *head = &napi->gro_hash[i];
+ u32 i;
- __napi_gro_flush_chain(napi, head, flush_old);
- }
+ for (i = 0; i < GRO_HASH_BUCKETS; i++)
+ __napi_gro_flush_chain(napi, i, flush_old);
}
EXPORT_SYMBOL(napi_gro_flush);
struct list_head *head;
struct sk_buff *p;
- head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)];
+ head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)].list;
list_for_each_entry(p, head, list) {
unsigned long diffs;
}
}
-static void gro_flush_oldest(struct napi_struct *napi)
+static void gro_flush_oldest(struct list_head *head)
{
- struct sk_buff *oldest = NULL;
- unsigned long age = jiffies;
- int i;
-
- for (i = 0; i < GRO_HASH_BUCKETS; i++) {
- struct list_head *head = &napi->gro_hash[i];
- struct sk_buff *skb;
+ struct sk_buff *oldest;
- if (list_empty(head))
- continue;
+ oldest = list_last_entry(head, struct sk_buff, list);
- skb = list_last_entry(head, struct sk_buff, list);
- if (!oldest || time_before(NAPI_GRO_CB(skb)->age, age)) {
- oldest = skb;
- age = NAPI_GRO_CB(skb)->age;
- }
- }
-
- /* We are called with napi->gro_count >= MAX_GRO_SKBS, so this is
+ /* We are called with head length >= MAX_GRO_SKBS, so this is
* impossible.
*/
if (WARN_ON_ONCE(!oldest))
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
+ u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
struct list_head *head = &offload_base;
struct packet_offload *ptype;
__be16 type = skb->protocol;
list_del_init(&pp->list);
napi_gro_complete(pp);
napi->gro_count--;
+ napi->gro_hash[hash].count--;
}
if (same_flow)
if (NAPI_GRO_CB(skb)->flush)
goto normal;
- if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
- gro_flush_oldest(napi);
+ if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
+ gro_flush_oldest(gro_head);
} else {
napi->gro_count++;
+ napi->gro_hash[hash].count++;
}
NAPI_GRO_CB(skb)->count = 1;
NAPI_GRO_CB(skb)->age = jiffies;
hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
napi->timer.function = napi_watchdog;
napi->gro_count = 0;
- for (i = 0; i < GRO_HASH_BUCKETS; i++)
- INIT_LIST_HEAD(&napi->gro_hash[i]);
+ for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+ INIT_LIST_HEAD(&napi->gro_hash[i].list);
+ napi->gro_hash[i].count = 0;
+ }
napi->skb = NULL;
napi->poll = poll;
if (weight > NAPI_POLL_WEIGHT)
for (i = 0; i < GRO_HASH_BUCKETS; i++) {
struct sk_buff *skb, *n;
- list_for_each_entry_safe(skb, n, &napi->gro_hash[i], list)
+ list_for_each_entry_safe(skb, n, &napi->gro_hash[i].list, list)
kfree_skb(skb);
+ napi->gro_hash[i].count = 0;
}
}