OSDN Git Service

net/mlx5: E-switch, Introduce rate limiting groups API
authorDmytro Linkin <dlinkin@nvidia.com>
Mon, 31 May 2021 14:08:14 +0000 (17:08 +0300)
committerSaeed Mahameed <saeedm@nvidia.com>
Fri, 20 Aug 2021 04:50:40 +0000 (21:50 -0700)
Extend eswitch API with rate limiting groups:

- Define new struct mlx5_esw_rate_group that is used to hold all
  internal group data.

- Implement functions that allow creation, destruction and cleanup of
  groups.

- Assign all vports to internal unlimited zero group by default.

This commit lays the groundwork for group rate limiting by implementing
devlink_ops->rate_node_{new|del}() callbacks to support creating and
deleting groups through devlink rate node objects. APIs that allows
setting rates and adding/removing members are implemented in following
patches.

Co-developed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Huy Nguyen <huyn@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
drivers/net/ethernet/mellanox/mlx5/core/devlink.c
drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
include/linux/mlx5/mlx5_ifc.h

index f4cd257..ef87d0b 100644 (file)
@@ -295,6 +295,8 @@ static const struct devlink_ops mlx5_devlink_ops = {
        .port_function_hw_addr_set = mlx5_devlink_port_function_hw_addr_set,
        .rate_leaf_tx_share_set = mlx5_esw_devlink_rate_leaf_tx_share_set,
        .rate_leaf_tx_max_set = mlx5_esw_devlink_rate_leaf_tx_max_set,
+       .rate_node_new = mlx5_esw_devlink_rate_node_new,
+       .rate_node_del = mlx5_esw_devlink_rate_node_del,
 #endif
 #ifdef CONFIG_MLX5_SF_MANAGER
        .port_new = mlx5_devlink_sf_port_new,
index fcdcddf..c9081d3 100644 (file)
 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
        min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
 
+struct mlx5_esw_rate_group {
+       u32 tsar_ix;
+       u32 max_rate;
+       u32 min_rate;
+       u32 bw_share;
+};
+
 static int esw_qos_vport_config(struct mlx5_eswitch *esw,
                                struct mlx5_vport *vport,
                                u32 max_rate, u32 bw_share,
@@ -159,6 +166,54 @@ int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
        return err;
 }
 
+static struct mlx5_esw_rate_group *
+esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
+{
+       u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+       struct mlx5_esw_rate_group *group;
+       int err;
+
+       if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
+               return ERR_PTR(-EOPNOTSUPP);
+
+       group = kzalloc(sizeof(*group), GFP_KERNEL);
+       if (!group)
+               return ERR_PTR(-ENOMEM);
+
+       MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
+                esw->qos.root_tsar_ix);
+       err = mlx5_create_scheduling_element_cmd(esw->dev,
+                                                SCHEDULING_HIERARCHY_E_SWITCH,
+                                                tsar_ctx,
+                                                &group->tsar_ix);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
+               goto err_sched_elem;
+       }
+
+       return group;
+
+err_sched_elem:
+       kfree(group);
+       return ERR_PTR(err);
+}
+
+static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
+                                     struct mlx5_esw_rate_group *group,
+                                     struct netlink_ext_ack *extack)
+{
+       int err;
+
+       err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+                                                 SCHEDULING_HIERARCHY_E_SWITCH,
+                                                 group->tsar_ix);
+       if (err)
+               NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
+
+       kfree(group);
+       return err;
+}
+
 static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
 {
        switch (type) {
@@ -191,8 +246,9 @@ void mlx5_esw_qos_create(struct mlx5_eswitch *esw)
        if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
                return;
 
+       mutex_lock(&esw->state_lock);
        if (esw->qos.enabled)
-               return;
+               goto unlock;
 
        MLX5_SET(scheduling_context, tsar_ctx, element_type,
                 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
@@ -205,27 +261,54 @@ void mlx5_esw_qos_create(struct mlx5_eswitch *esw)
                                                 tsar_ctx,
                                                 &esw->qos.root_tsar_ix);
        if (err) {
-               esw_warn(dev, "E-Switch create TSAR failed (%d)\n", err);
-               return;
+               esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
+               goto unlock;
        }
 
+       if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
+               esw->qos.group0 = esw_qos_create_rate_group(esw, NULL);
+               if (IS_ERR(esw->qos.group0)) {
+                       esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
+                                PTR_ERR(esw->qos.group0));
+                       goto err_group0;
+               }
+       }
        esw->qos.enabled = true;
+unlock:
+       mutex_unlock(&esw->state_lock);
+       return;
+
+err_group0:
+       err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+                                                 SCHEDULING_HIERARCHY_E_SWITCH,
+                                                 esw->qos.root_tsar_ix);
+       if (err)
+               esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
+       mutex_unlock(&esw->state_lock);
 }
 
 void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw)
 {
+       struct devlink *devlink = priv_to_devlink(esw->dev);
        int err;
 
+       devlink_rate_nodes_destroy(devlink);
+       mutex_lock(&esw->state_lock);
        if (!esw->qos.enabled)
-               return;
+               goto unlock;
+
+       if (esw->qos.group0)
+               esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
 
        err = mlx5_destroy_scheduling_element_cmd(esw->dev,
                                                  SCHEDULING_HIERARCHY_E_SWITCH,
                                                  esw->qos.root_tsar_ix);
        if (err)
-               esw_warn(esw->dev, "E-Switch destroy TSAR failed (%d)\n", err);
+               esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
 
        esw->qos.enabled = false;
+unlock:
+       mutex_unlock(&esw->state_lock);
 }
 
 int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
@@ -386,3 +469,51 @@ int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *
        mutex_unlock(&esw->state_lock);
        return err;
 }
+
+int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
+                                  struct netlink_ext_ack *extack)
+{
+       struct mlx5_esw_rate_group *group;
+       struct mlx5_eswitch *esw;
+       int err = 0;
+
+       esw = mlx5_devlink_eswitch_get(rate_node->devlink);
+       if (IS_ERR(esw))
+               return PTR_ERR(esw);
+
+       mutex_lock(&esw->state_lock);
+       if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Rate node creation supported only in switchdev mode");
+               err = -EOPNOTSUPP;
+               goto unlock;
+       }
+
+       group = esw_qos_create_rate_group(esw, extack);
+       if (IS_ERR(group)) {
+               err = PTR_ERR(group);
+               goto unlock;
+       }
+
+       *priv = group;
+unlock:
+       mutex_unlock(&esw->state_lock);
+       return err;
+}
+
+int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
+                                  struct netlink_ext_ack *extack)
+{
+       struct mlx5_esw_rate_group *group = priv;
+       struct mlx5_eswitch *esw;
+       int err;
+
+       esw = mlx5_devlink_eswitch_get(rate_node->devlink);
+       if (IS_ERR(esw))
+               return PTR_ERR(esw);
+
+       mutex_lock(&esw->state_lock);
+       err = esw_qos_destroy_rate_group(esw, group, extack);
+       mutex_unlock(&esw->state_lock);
+       return err;
+}
index 507c7e0..ab9fd86 100644 (file)
@@ -24,6 +24,10 @@ int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void
                                            u64 tx_share, struct netlink_ext_ack *extack);
 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
                                          u64 tx_max, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
+                                  struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
+                                  struct netlink_ext_ack *extack);
 #endif
 
 #endif
index ebeccee..3580901 100644 (file)
@@ -306,6 +306,7 @@ struct mlx5_eswitch {
        struct {
                bool            enabled;
                u32             root_tsar_ix;
+               struct mlx5_esw_rate_group *group0;
        } qos;
 
        struct mlx5_esw_bridge_offloads *br_offloads;
index fce3cba..f3638d0 100644 (file)
@@ -865,7 +865,8 @@ struct mlx5_ifc_qos_cap_bits {
        u8         nic_bw_share[0x1];
        u8         nic_rate_limit[0x1];
        u8         packet_pacing_uid[0x1];
-       u8         reserved_at_c[0x14];
+       u8         log_esw_max_sched_depth[0x4];
+       u8         reserved_at_10[0x10];
 
        u8         reserved_at_20[0xb];
        u8         log_max_qos_nic_queue_group[0x5];