From 27f9e0ccb6da0857a323c1d19a23b6666ddefe05 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 5 Dec 2022 15:32:52 +0200 Subject: [PATCH] net/mlx5: Lag, Add single RDMA device in multiport mode In MultiPort E-Switch mode a single RDMA is created. This device has multiple RDMA ports that represent the uplink ports that are connected to the E-Switch. Account for this when creating the RDMA device so it has an additional port for the non native uplink. As a side effect of this patch, use shared fdb in multiport eswitch mode. Signed-off-by: Mark Bloch Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/ib_rep.c | 18 ++++++++--- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 6 ++-- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h | 3 ++ .../net/ethernet/mellanox/mlx5/core/lag/mpesw.c | 37 +++++++++++++++++----- include/linux/mlx5/driver.h | 1 + 5 files changed, 49 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 52821485371a..ddcfc116b19a 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -37,6 +37,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) const struct mlx5_ib_profile *profile; struct mlx5_core_dev *peer_dev; struct mlx5_ib_dev *ibdev; + int second_uplink = false; u32 peer_num_ports; int vport_index; int ret; @@ -47,17 +48,24 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) peer_dev = mlx5_lag_get_peer_mdev(dev); peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev); if (mlx5_lag_is_master(dev)) { - /* Only 1 ib port is the representor for both uplinks */ - num_ports += peer_num_ports - 1; + if (mlx5_lag_is_mpesw(dev)) + num_ports += peer_num_ports; + else + num_ports += peer_num_ports - 1; + } else { - if (rep->vport == MLX5_VPORT_UPLINK) - return 0; + if (rep->vport == MLX5_VPORT_UPLINK) { + if (!mlx5_lag_is_mpesw(dev)) + return 0; + second_uplink = true; + } + vport_index += peer_num_ports; dev = peer_dev; } } - if (rep->vport == MLX5_VPORT_UPLINK) + if (rep->vport == MLX5_VPORT_UPLINK && !second_uplink) profile = &raw_eth_profile; else return mlx5_ib_set_vport_rep(dev, rep, vport_index); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 301994741b08..5d331b940f4d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -644,7 +644,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, return 0; } -static int mlx5_deactivate_lag(struct mlx5_lag *ldev) +int mlx5_deactivate_lag(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; @@ -721,7 +721,7 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) return true; } -static void mlx5_lag_add_devices(struct mlx5_lag *ldev) +void mlx5_lag_add_devices(struct mlx5_lag *ldev) { int i; @@ -738,7 +738,7 @@ static void mlx5_lag_add_devices(struct mlx5_lag *ldev) } } -static void mlx5_lag_remove_devices(struct mlx5_lag *ldev) +void mlx5_lag_remove_devices(struct mlx5_lag *ldev) { int i; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index 2dbd96a86ef8..bc1f1dd3e283 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -120,5 +120,8 @@ void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev); void mlx5_ldev_remove_debugfs(struct dentry *dbg); void mlx5_disable_lag(struct mlx5_lag *ldev); +void mlx5_lag_remove_devices(struct mlx5_lag *ldev); +int mlx5_deactivate_lag(struct mlx5_lag *ldev); +void mlx5_lag_add_devices(struct mlx5_lag *ldev); #endif /* __MLX5_LAG_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index 2f7f2af312d7..0c0ef600f643 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -67,15 +67,16 @@ err_metadata: static int enable_mpesw(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; int err; if (ldev->mode != MLX5_LAG_MODE_NONE) return -EINVAL; - if (mlx5_eswitch_mode(dev) != MLX5_ESWITCH_OFFLOADS || - !MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table) || - !MLX5_CAP_GEN(dev, create_lag_when_not_master_up) || + if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS || + !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) || + !MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) || !mlx5_lag_check_prereq(ldev)) return -EOPNOTSUPP; @@ -83,15 +84,32 @@ static int enable_mpesw(struct mlx5_lag *ldev) if (err) return err; - err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false); + mlx5_lag_remove_devices(ldev); + + err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, true); if (err) { - mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err); - goto out_err; + mlx5_core_warn(dev0, "Failed to create LAG in MPESW mode (%d)\n", err); + goto err_add_devices; } + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + err = mlx5_eswitch_reload_reps(dev0->priv.eswitch); + if (!err) + err = mlx5_eswitch_reload_reps(dev1->priv.eswitch); + if (err) + goto err_rescan_drivers; + return 0; -out_err: +err_rescan_drivers: + dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + mlx5_deactivate_lag(ldev); +err_add_devices: + mlx5_lag_add_devices(ldev); + mlx5_eswitch_reload_reps(dev0->priv.eswitch); + mlx5_eswitch_reload_reps(dev1->priv.eswitch); mlx5_mpesw_metadata_cleanup(ldev); return err; } @@ -109,6 +127,7 @@ static void mlx5_mpesw_work(struct work_struct *work) struct mlx5_mpesw_work_st *mpesww = container_of(work, struct mlx5_mpesw_work_st, work); struct mlx5_lag *ldev = mpesww->lag; + mlx5_dev_list_lock(); mutex_lock(&ldev->lock); if (ldev->mode_changes_in_progress) { mpesww->result = -EAGAIN; @@ -121,6 +140,7 @@ static void mlx5_mpesw_work(struct work_struct *work) disable_mpesw(ldev); unlock: mutex_unlock(&ldev->lock); + mlx5_dev_list_unlock(); complete(&mpesww->comp); } @@ -187,3 +207,4 @@ bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev) return ldev && ldev->mode == MLX5_LAG_MODE_MPESW; } +EXPORT_SYMBOL(mlx5_lag_is_mpesw); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a4bb5842a948..c9259350cdfc 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1162,6 +1162,7 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev); bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev); bool mlx5_lag_is_master(struct mlx5_core_dev *dev); bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev); +bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, struct net_device *slave); -- 2.11.0