Skip to content

Commit

Permalink
[patch] mlxsw: i2c: Prevent transaction execution for special chip (#279
Browse files Browse the repository at this point in the history
)

* [patch] mlxsw: i2c: Prevent transaction execution for special chip
states

Do not run transaction in cases chip is in reset or in-filed update states.

This patch fixes an issue encountered during SONiC warm-reboot: during
ISSU start the kernel reports that CPU stall is detected:

```
INFO: rcu_sched detected stalls on CPUs/tasks
```

This patch won't be upstreamed due to the upstream driver not supporting
ISSU.

Signed-off-by: Stepan Blyschak <stepanb@nvidia.com>

* remove TMP tag

Signed-off-by: Stepan Blyschak <stepanb@nvidia.com>
  • Loading branch information
stepanblyschak authored Jul 5, 2022
1 parent fa9411d commit 50e2bff
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 0 deletions.
126 changes: 126 additions & 0 deletions patch/0098-mlxsw-i2c-Prevent-transaction-execution-for.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
From d3e0bf403d527f717a62ea328781256f999d4f95 Mon Sep 17 00:00:00 2001
From: Stepan Blyschak <stepanb@nvidia.com>
Date: Mon, 20 Jun 2022 19:28:04 +0300
Subject: [PATCH] mlxsw: i2c: Prevent transaction execution for special
chip states

Do not run transaction in cases chip is in reset or in-service update
states. In such case firmware is not accessible and will reject transaction
with the relevant status "RUNNING_RESET" or "FW_ISSU_ONGOING".
In case transaction is failed do to one of these reasons, stop sending
transactions. In such case driver is about to be removed since it
cannot continue running after reset or in-service update. And
re-probed again after reset or in-service update is completed.

Signed-off-by: Vadim Pasternak <vadimp@nvidia.com>
Signed-off-by: Stepan Blyschak <stepanb@nvidia.com>
---
drivers/net/ethernet/mellanox/mlxsw/cmd.h | 4 ++++
drivers/net/ethernet/mellanox/mlxsw/i2c.c | 29 ++++++++++++++++++++---
2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
index 91f68fb0b..d8ecb8c8a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
@@ -149,6 +149,8 @@ enum mlxsw_cmd_status {
MLXSW_CMD_STATUS_BAD_NVMEM = 0x0B,
/* Device is currently running reset */
MLXSW_CMD_STATUS_RUNNING_RESET = 0x26,
+ /* FW ISSU ongoing. */
+ MLXSW_CMD_STATUS_FW_ISSU = 0x27,
/* Bad management packet (silently discarded). */
MLXSW_CMD_STATUS_BAD_PKT = 0x30,
};
@@ -180,6 +182,8 @@ static inline const char *mlxsw_cmd_status_str(u8 status)
return "BAD_NVMEM";
case MLXSW_CMD_STATUS_RUNNING_RESET:
return "RUNNING_RESET";
+ case MLXSW_CMD_STATUS_FW_ISSU:
+ return "FW_ISSU_ONGOING";
case MLXSW_CMD_STATUS_BAD_PKT:
return "BAD_PKT";
default:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
index 939b692ff..3c9d2c7a0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
@@ -63,6 +63,7 @@
* @core: switch core pointer;
* @bus_info: bus info block;
* @block_size: maximum block size allowed to pass to under layer;
+ * @status: status to indicate chip reset or in-service update;
*/
struct mlxsw_i2c {
struct {
@@ -76,6 +77,7 @@ struct mlxsw_i2c {
struct mlxsw_core *core;
struct mlxsw_bus_info bus_info;
u16 block_size;
+ u8 status;
};

#define MLXSW_I2C_READ_MSG(_client, _addr_buf, _buf, _len) { \
@@ -222,6 +224,19 @@ static int mlxsw_i2c_write_cmd(struct i2c_client *client,
return 0;
}

+static bool
+mlxsw_i2c_cmd_status_verify(struct device *dev, struct mlxsw_i2c *mlxsw_i2c,
+ u8 status)
+{
+ if (status == MLXSW_CMD_STATUS_FW_ISSU ||
+ status == MLXSW_CMD_STATUS_RUNNING_RESET) {
+ mlxsw_i2c->status = status;
+ dev_info(dev, "FW status=%x(%s)): Access to device is not allowed in this state\n", status, mlxsw_cmd_status_str(status));
+ return true;
+ }
+ return false;
+}
+
/* Routine posts initialization command to ASIC through mail box. */
static int
mlxsw_i2c_write_init_cmd(struct i2c_client *client,
@@ -405,6 +420,10 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,

WARN_ON(in_mbox_size % sizeof(u32) || out_mbox_size % sizeof(u32));

+ /* Do not run transaction if chip is in reset or in-service update state. */
+ if (mlxsw_i2c->status)
+ return 0;
+
if (in_mbox) {
reg_size = mlxsw_i2c_get_reg_size(in_mbox);
num = reg_size / mlxsw_i2c->block_size;
@@ -479,6 +498,8 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,

cmd_fail:
mutex_unlock(&mlxsw_i2c->cmd.lock);
+ if (mlxsw_i2c_cmd_status_verify(&client->dev, mlxsw_i2c, *status))
+ err = 0;
return err;
}

@@ -608,14 +629,16 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
/* Wait until go bit is cleared. */
err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, &status);
if (err) {
- dev_err(&client->dev, "HW semaphore is not released");
+ if (!mlxsw_i2c_cmd_status_verify(&client->dev, mlxsw_i2c, status))
+ dev_err(&client->dev, "HW semaphore is not released");
goto errout;
}

/* Validate transaction completion status. */
if (status) {
- dev_err(&client->dev, "Bad transaction completion status %x\n",
- status);
+ if (!mlxsw_i2c_cmd_status_verify(&client->dev, mlxsw_i2c, status))
+ dev_err(&client->dev, "Bad transaction completion status %x\n",
+ status);
err = -EIO;
goto errout;
}
--
2.17.1

1 change: 1 addition & 0 deletions patch/series
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ kernel-compat-always-include-linux-compat.h-from-net-compat.patch
0096-hwmon-mlxreg-fan-Modify-PWM-connectivity-valida.patch
0097-hwmon-mlxreg-fan-Support-distinctive-names-per-.patch
0999-Revert-mlxsw-thermal-Fix-out-of-bounds-memory-a.patch
0098-mlxsw-i2c-Prevent-transaction-execution-for.patch


# Cisco patches for 5.10 kernel
Expand Down

0 comments on commit 50e2bff

Please sign in to comment.