Skip to content

Commit d43b700

Browse files
Eran Ben ElishaSaeed Mahameed
Eran Ben Elisha
authored and
Saeed Mahameed
committed
net/mlx5: Fix a race when moving command interface to events mode
After driver creates (via FW command) an EQ for commands, the driver will be informed on new commands completion by EQE. However, due to a race in driver's internal command mode metadata update, some new commands will still be miss-handled by driver as if we are in polling mode. Such commands can get two non forced completion, leading to already freed command entry access. CREATE_EQ command, that maps EQ to the command queue must be posted to the command queue while it is empty and no other command should be posted. Add SW mechanism that once the CREATE_EQ command is about to be executed, all other commands will return error without being sent to the FW. Allow sending other commands only after successfully changing the driver's internal command mode metadata. We can safely return error to all other commands while creating the command EQ, as all other commands might be sent from the user/application during driver load. Application can rerun them later after driver's load was finished. Fixes: e126ba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eran Ben Elisha <[email protected]> Signed-off-by: Moshe Shemesh <[email protected]> Signed-off-by: Saeed Mahameed <[email protected]>
1 parent 17d00e8 commit d43b700

File tree

3 files changed

+40
-4
lines changed

3 files changed

+40
-4
lines changed

drivers/net/ethernet/mellanox/mlx5/core/cmd.c

+31-4
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,14 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg);
848848
static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
849849
struct mlx5_cmd_msg *msg);
850850

851+
static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode)
852+
{
853+
if (cmd->allowed_opcode == CMD_ALLOWED_OPCODE_ALL)
854+
return true;
855+
856+
return cmd->allowed_opcode == opcode;
857+
}
858+
851859
static void cmd_work_handler(struct work_struct *work)
852860
{
853861
struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
@@ -914,7 +922,8 @@ static void cmd_work_handler(struct work_struct *work)
914922

915923
/* Skip sending command to fw if internal error */
916924
if (pci_channel_offline(dev->pdev) ||
917-
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
925+
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
926+
!opcode_allowed(&dev->cmd, ent->op)) {
918927
u8 status = 0;
919928
u32 drv_synd;
920929

@@ -1405,6 +1414,22 @@ static void create_debugfs_files(struct mlx5_core_dev *dev)
14051414
mlx5_cmdif_debugfs_init(dev);
14061415
}
14071416

1417+
void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode)
1418+
{
1419+
struct mlx5_cmd *cmd = &dev->cmd;
1420+
int i;
1421+
1422+
for (i = 0; i < cmd->max_reg_cmds; i++)
1423+
down(&cmd->sem);
1424+
down(&cmd->pages_sem);
1425+
1426+
cmd->allowed_opcode = opcode;
1427+
1428+
up(&cmd->pages_sem);
1429+
for (i = 0; i < cmd->max_reg_cmds; i++)
1430+
up(&cmd->sem);
1431+
}
1432+
14081433
static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
14091434
{
14101435
struct mlx5_cmd *cmd = &dev->cmd;
@@ -1681,12 +1706,13 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
16811706
int err;
16821707
u8 status = 0;
16831708
u32 drv_synd;
1709+
u16 opcode;
16841710
u8 token;
16851711

1712+
opcode = MLX5_GET(mbox_in, in, opcode);
16861713
if (pci_channel_offline(dev->pdev) ||
1687-
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1688-
u16 opcode = MLX5_GET(mbox_in, in, opcode);
1689-
1714+
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
1715+
!opcode_allowed(&dev->cmd, opcode)) {
16901716
err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
16911717
MLX5_SET(mbox_out, out, status, status);
16921718
MLX5_SET(mbox_out, out, syndrome, drv_synd);
@@ -1988,6 +2014,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
19882014
mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma));
19892015

19902016
cmd->mode = CMD_MODE_POLLING;
2017+
cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL;
19912018

19922019
create_msg_cache(dev);
19932020

drivers/net/ethernet/mellanox/mlx5/core/eq.c

+3
Original file line numberDiff line numberDiff line change
@@ -611,11 +611,13 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
611611
.nent = MLX5_NUM_CMD_EQE,
612612
.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
613613
};
614+
mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ);
614615
err = setup_async_eq(dev, &table->cmd_eq, &param, "cmd");
615616
if (err)
616617
goto err1;
617618

618619
mlx5_cmd_use_events(dev);
620+
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
619621

620622
param = (struct mlx5_eq_param) {
621623
.irq_index = 0,
@@ -645,6 +647,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
645647
mlx5_cmd_use_polling(dev);
646648
cleanup_async_eq(dev, &table->cmd_eq, "cmd");
647649
err1:
650+
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
648651
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
649652
return err;
650653
}

include/linux/mlx5/driver.h

+6
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ struct mlx5_cmd {
284284
struct semaphore sem;
285285
struct semaphore pages_sem;
286286
int mode;
287+
u16 allowed_opcode;
287288
struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS];
288289
struct dma_pool *pool;
289290
struct mlx5_cmd_debug dbg;
@@ -875,10 +876,15 @@ mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix)
875876
return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1);
876877
}
877878

879+
enum {
880+
CMD_ALLOWED_OPCODE_ALL,
881+
};
882+
878883
int mlx5_cmd_init(struct mlx5_core_dev *dev);
879884
void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
880885
void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
881886
void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
887+
void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode);
882888

883889
struct mlx5_async_ctx {
884890
struct mlx5_core_dev *dev;

0 commit comments

Comments
 (0)