From cb1a8e7b37170f11b99ff829d0fffa7d8f4fa841 Mon Sep 17 00:00:00 2001 From: Samuel Angebault Date: Thu, 9 Feb 2023 10:03:09 -0800 Subject: [PATCH 1/5] Fixes for emmc unreliability (#270) --- ...-mmcblk-not-working-on-AMD-platforms.patch | 58 +++++++++++++++ ...ct-eMMC-drive-to-50Mhz-from-userland.patch | 70 +++++++++++++++++++ patch/series | 2 + 3 files changed, 130 insertions(+) create mode 100644 patch/driver-arista-mmcblk-not-working-on-AMD-platforms.patch create mode 100644 patch/driver-arista-restrict-eMMC-drive-to-50Mhz-from-userland.patch diff --git a/patch/driver-arista-mmcblk-not-working-on-AMD-platforms.patch b/patch/driver-arista-mmcblk-not-working-on-AMD-platforms.patch new file mode 100644 index 000000000..7858eb601 --- /dev/null +++ b/patch/driver-arista-mmcblk-not-working-on-AMD-platforms.patch @@ -0,0 +1,58 @@ +amd/mmc: mmcblk not working on some AMD platforms + +ADMA and ADMA-64 seem to be broken on AMD. This patch enables the +following quirks (for AMD only): + SDHCI_QUIRK_BROKEN_ADMA + SDHCI_QUIRK2_BROKEN_64_BIT_DMA + +This fixes issues that would manifest in the following fashion. + +mmc0: Timeout waiting for hardware interrupt. +sdhci: =========== REGISTER DUMP (mmc0)=========== +sdhci: Sys addr: 0x00000078 | Version: 0x00001002 +sdhci: Blk size: 0x00007200 | Blk cnt: 0x00000078 +sdhci: Argument: 0x000ab148 | Trn mode: 0x0000003b +sdhci: Present: 0x01ff0001 | Host ctl: 0x00000019 +sdhci: Power: 0x0000000f | Blk gap: 0x00000000 +sdhci: Wake-up: 0x00000000 | Clock: 0x0000fa07 +sdhci: Timeout: 0x0000000c | Int stat: 0x00000000 +sdhci: Int enab: 0x02ff008b | Sig enab: 0x02ff008b +sdhci: AC12 err: 0x00000002 | Slot int: 0x000000ff +sdhci: Caps: 0x75fec8b2 | Caps_1: 0x00002501 +sdhci: Cmd: 0x0000123a | Max curr: 0x00c80064 +sdhci: Host ctl2: 0x00000000 +sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x000000020f97b20c +sdhci: =========================================== +mmcblk0: error -110 sending status command, retrying +mmcblk0: error -110 sending status command, retrying +mmcblk0: error -110 sending status command, aborting +mmc0: cache flush error -110 +mmc0: tried to reset card, got error -110 +blk_update_request: I/O error, dev mmcblk0, sector 700744 +blk_update_request: I/O error, dev mmcblk0, sector 700752 + +Signed-off-by: Radu Rendec +Signed-off-by: Samuel Angebault +--- + drivers/mmc/host/sdhci-pci-core.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c +index bf04a08ee..c10790dd6 100644 +--- a/drivers/mmc/host/sdhci-pci-core.c ++++ b/drivers/mmc/host/sdhci-pci-core.c +@@ -1791,8 +1791,13 @@ static int amd_probe(struct sdhci_pci_chip *chip) + } + } + +- if (gen == AMD_CHIPSET_BEFORE_ML || gen == AMD_CHIPSET_CZ) ++ dev_info(&chip->pdev->dev, "identified AMD generation %d chip\n", gen); ++ ++ if (gen == AMD_CHIPSET_BEFORE_ML || gen == AMD_CHIPSET_CZ) { ++ chip->quirks |= SDHCI_QUIRK_BROKEN_ADMA; ++ chip->quirks2 |= SDHCI_QUIRK2_BROKEN_64_BIT_DMA; + chip->quirks2 |= SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD; ++ } + + return 0; + } diff --git a/patch/driver-arista-restrict-eMMC-drive-to-50Mhz-from-userland.patch b/patch/driver-arista-restrict-eMMC-drive-to-50Mhz-from-userland.patch new file mode 100644 index 000000000..5f4978542 --- /dev/null +++ b/patch/driver-arista-restrict-eMMC-drive-to-50Mhz-from-userland.patch @@ -0,0 +1,70 @@ +mmc: restrict eMMC drive to 50Mhz from userland + +This issue was fixed for kernel 3.18 by setting sdhci.debug_quirks2=0x40 +from Aboot boot0 (conditionally, for specific Aboot versions). + +For kernel 4.9, however, we need SDHCI_QUIRK2_BROKEN_64_BIT_DMA, which +is also in quirks2. The problem is that debug_quirks2 overwrites whatever +is written to host->quirks2 during device probing (see __sdhci_read_caps). +Since we set both SDHCI_QUIRK_BROKEN_DMA and +SDHCI_QUIRK2_BROKEN_64_BIT_DMA (but the former is in quirks while the +latter is in quirks2) and Aboot overwrites quirks2, we end up with only +SDHCI_QUIRK_BROKEN_DMA being set. This causes some strange behavior with +AMD devices, where the sdhci driver stalls for a while and eventually +falls back to PIO mode. We need both quirk flags to be set in order for +the controller to work in SDMA mode. + +This patch is a workaround for the quirks2 overwrite problem. It adds a +set of new sdhci module parameters (append_quirks and append_quirks2) +that *append* bits (i.e. logical "or") instead of overwriting the +values. Then Aboot can use these parameters instead in order to set +SDHCI_QUIRK2_BROKEN_HS200. Note that both quirk2 flags are set +conditionally and independently by Aboot and the sdhci-pci probe code. + +Advantages of this approach: +* This patch by itself doesn't change any kernel behavior: it just adds + two module parameters that default to zero and will have no effect + unless explicitly set to a different value from outside the driver. +* SDHCI_QUIRK2_BROKEN_HS200 can be still controlled from Aboot and + conditionally (depending on the Aboot version). +* SDHCI_QUIRK2_BROKEN_64_BIT_DMA can be set by the sdhci-pci probing + code, independently of Aboot. + +Signed-off-by: Radu Rendec +Signed-off-by: Samuel Angebault +--- + drivers/mmc/host/sdhci.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c +index b1e1d327c..817934f04 100644 +--- a/drivers/mmc/host/sdhci.c ++++ b/drivers/mmc/host/sdhci.c +@@ -45,6 +45,8 @@ + + #define MAX_TUNING_LOOP 40 + ++static unsigned int append_quirks; ++static unsigned int append_quirks2; + static unsigned int debug_quirks = 0; + static unsigned int debug_quirks2; + +@@ -3990,6 +3992,9 @@ void __sdhci_read_caps(struct sdhci_host *host, const u16 *ver, + + host->read_caps = true; + ++ host->quirks |= append_quirks; ++ host->quirks2 |= append_quirks2; ++ + if (debug_quirks) + host->quirks = debug_quirks; + +@@ -4850,6 +4855,8 @@ static void __exit sdhci_drv_exit(void) + module_init(sdhci_drv_init); + module_exit(sdhci_drv_exit); + ++module_param(append_quirks, uint, 0444); ++module_param(append_quirks2, uint, 0444); + module_param(debug_quirks, uint, 0444); + module_param(debug_quirks2, uint, 0444); + diff --git a/patch/series b/patch/series index f81628285..95f6679cc 100755 --- a/patch/series +++ b/patch/series @@ -4,6 +4,8 @@ driver-arista-net-tg3-dma-mask-4g-sb800.patch driver-arista-net-tg3-disallow-broadcom-default-mac.patch driver-arista-net-tg3-access-regs-indirectly.patch driver-arista-pci-reassign-pref-mem.patch +driver-arista-mmcblk-not-working-on-AMD-platforms.patch +driver-arista-restrict-eMMC-drive-to-50Mhz-from-userland.patch driver-support-sff-8436-eeprom.patch driver-support-sff-8436-eeprom-update.patch driver-sff-8436-use-nvmem-framework.patch From 88394e96062d4fe36bb8566bc61babb796eeb54e Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Fri, 10 Mar 2023 21:16:05 +0000 Subject: [PATCH 2/5] Read ID register to find pageable bit in optoe driver Signed-off-by: Mihir Patel --- ...river-support-optoe-oneaddr-pageable.patch | 54 +++++++++++++++++++ patch/series | 1 + 2 files changed, 55 insertions(+) create mode 100644 patch/driver-support-optoe-oneaddr-pageable.patch diff --git a/patch/driver-support-optoe-oneaddr-pageable.patch b/patch/driver-support-optoe-oneaddr-pageable.patch new file mode 100644 index 000000000..e4f0a355b --- /dev/null +++ b/patch/driver-support-optoe-oneaddr-pageable.patch @@ -0,0 +1,54 @@ +From 15ac1734f90376fcd5b7d9f2636792fd41296231 Mon Sep 17 00:00:00 2001 +From: Mihir Patel +Date: Fri, 10 Mar 2023 06:52:27 +0000 +Subject: [PATCH] Read ID register to find pageable bit in optoe driver + +The current optoe driver looks at bit 2 for all optoe1 +(dev_class as ONE_ADDR) transceivers to detect if it's pageable or not. +However, for 100G CMIS based transceivers, some platforms use it as optoe1 +and not optoe3. With CMIS, the pageable bit has now changed to bit 7 for +the same register. This causes incorrect behavior when the driver checks +for pageability on 100G CMIS transceiver and hence, we need to +read the transceiver ID to see if the transceiver is CMIS based and then +find the relevant pageable bit. + +Test result +Tested the changes on a switch with a 100G CMIS and non-CMIS transceiver + +Signed-off-by: Mihir Patel +--- + drivers/misc/eeprom/optoe.c | 18 +++++++++++++++++- + 1 file changed, 17 insertions(+), 1 deletion(-) + +diff --git a/drivers/misc/eeprom/optoe.c b/drivers/misc/eeprom/optoe.c +index 62294392c..b37441999 100644 +--- a/drivers/misc/eeprom/optoe.c ++++ b/drivers/misc/eeprom/optoe.c +@@ -630,7 +630,23 @@ static ssize_t optoe_page_legal(struct optoe_data *optoe, + return status; /* error out (no module?) */ + + if (optoe->dev_class == ONE_ADDR) { +- not_pageable = QSFP_NOT_PAGEABLE; ++ u8 idRegVal; ++ ++ status = optoe_eeprom_read(optoe, client, &idRegVal, ++ OPTOE_ID_REG, 1); ++ if (status < 0) ++ return status; /* error out (no module?) */ ++ ++ /* ++ * For 100G CMIS compliant optic, if userspace has dev_class as ONE_ADDR, ++ * the driver looks at the incorrect bit to find if it is pageable. ++ * Below check ensures we read the appropriate bit for CMIS compliant optics ++ * with dev_class as ONE_ADDR ++ */ ++ if (idRegVal == 0x18 || idRegVal == 0x19 || idRegVal == 0x1e) ++ not_pageable = CMIS_NOT_PAGEABLE; ++ else ++ not_pageable = QSFP_NOT_PAGEABLE; + } else { + not_pageable = CMIS_NOT_PAGEABLE; + } +-- +2.25.1 + diff --git a/patch/series b/patch/series index 95f6679cc..ead8a0779 100755 --- a/patch/series +++ b/patch/series @@ -26,6 +26,7 @@ driver-support-optoe-chunk-offset-fix.patch driver-support-optoe-QSFP_DD.patch driver-support-optoe-write-max.patch driver-support-optoe-twoaddr-a2h-access.patch +driver-support-optoe-oneaddr-pageable.patch driver-net-tg3-add-param-short-preamble-and-reset.patch 0004-dt-bindings-hwmon-Add-missing-documentation-for-lm75.patch 0005-dt-bindings-hwmon-Add-tmp75b-to-lm75.txt.patch From 28ab21489f020c440edde9c3a3f1c3198764e9e2 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Mon, 13 Mar 2023 18:23:51 +0000 Subject: [PATCH 3/5] Modified comment in code to make it QSFP specific --- ...river-support-optoe-oneaddr-pageable.patch | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/patch/driver-support-optoe-oneaddr-pageable.patch b/patch/driver-support-optoe-oneaddr-pageable.patch index e4f0a355b..c7a9841fb 100644 --- a/patch/driver-support-optoe-oneaddr-pageable.patch +++ b/patch/driver-support-optoe-oneaddr-pageable.patch @@ -1,14 +1,14 @@ -From 15ac1734f90376fcd5b7d9f2636792fd41296231 Mon Sep 17 00:00:00 2001 +From b79e8ac1d03701011c36904fd85a2caf16b138d1 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Fri, 10 Mar 2023 06:52:27 +0000 Subject: [PATCH] Read ID register to find pageable bit in optoe driver The current optoe driver looks at bit 2 for all optoe1 (dev_class as ONE_ADDR) transceivers to detect if it's pageable or not. -However, for 100G CMIS based transceivers, some platforms use it as optoe1 +However, for QSFP+ w/ CMIS optics, some platforms use it as optoe1 and not optoe3. With CMIS, the pageable bit has now changed to bit 7 for the same register. This causes incorrect behavior when the driver checks -for pageability on 100G CMIS transceiver and hence, we need to +for pageability on QSFP+ w/ CMIS transceiver and hence, we need to read the transceiver ID to see if the transceiver is CMIS based and then find the relevant pageable bit. @@ -17,14 +17,14 @@ Tested the changes on a switch with a 100G CMIS and non-CMIS transceiver Signed-off-by: Mihir Patel --- - drivers/misc/eeprom/optoe.c | 18 +++++++++++++++++- - 1 file changed, 17 insertions(+), 1 deletion(-) + drivers/misc/eeprom/optoe.c | 20 +++++++++++++++++++- + 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/optoe.c b/drivers/misc/eeprom/optoe.c -index 62294392c..b37441999 100644 +index 62294392c..bdfd143d1 100644 --- a/drivers/misc/eeprom/optoe.c +++ b/drivers/misc/eeprom/optoe.c -@@ -630,7 +630,23 @@ static ssize_t optoe_page_legal(struct optoe_data *optoe, +@@ -630,7 +630,25 @@ static ssize_t optoe_page_legal(struct optoe_data *optoe, return status; /* error out (no module?) */ if (optoe->dev_class == ONE_ADDR) { @@ -37,10 +37,12 @@ index 62294392c..b37441999 100644 + return status; /* error out (no module?) */ + + /* -+ * For 100G CMIS compliant optic, if userspace has dev_class as ONE_ADDR, ++ * For QSFP+ with CMIS optics, if userspace has dev_class as ONE_ADDR, + * the driver looks at the incorrect bit to find if it is pageable. -+ * Below check ensures we read the appropriate bit for CMIS compliant optics -+ * with dev_class as ONE_ADDR ++ * Below check ensures we read the appropriate bit for such QSFP+ CMIS ++ * compliant optics with dev_class as ONE_ADDR ++ * The ID values below are based on the SFF-8024 spec (Page 0, byte 0) ++ * for CMIS optics + */ + if (idRegVal == 0x18 || idRegVal == 0x19 || idRegVal == 0x1e) + not_pageable = CMIS_NOT_PAGEABLE; From 068a1976be907f113f5fbdc1c047f86425435a94 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Mon, 13 Mar 2023 19:44:24 +0000 Subject: [PATCH 4/5] Changed QSFP+ to QSFP28 in comment --- patch/driver-support-optoe-oneaddr-pageable.patch | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/patch/driver-support-optoe-oneaddr-pageable.patch b/patch/driver-support-optoe-oneaddr-pageable.patch index c7a9841fb..dcadaaf03 100644 --- a/patch/driver-support-optoe-oneaddr-pageable.patch +++ b/patch/driver-support-optoe-oneaddr-pageable.patch @@ -1,14 +1,14 @@ -From b79e8ac1d03701011c36904fd85a2caf16b138d1 Mon Sep 17 00:00:00 2001 +From 46089e210d17eeeff7a94877cd79b302a578994e Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Fri, 10 Mar 2023 06:52:27 +0000 Subject: [PATCH] Read ID register to find pageable bit in optoe driver The current optoe driver looks at bit 2 for all optoe1 (dev_class as ONE_ADDR) transceivers to detect if it's pageable or not. -However, for QSFP+ w/ CMIS optics, some platforms use it as optoe1 +However, for QSFP28 w/ CMIS optics, some platforms use it as optoe1 and not optoe3. With CMIS, the pageable bit has now changed to bit 7 for the same register. This causes incorrect behavior when the driver checks -for pageability on QSFP+ w/ CMIS transceiver and hence, we need to +for pageability on QSFP28 w/ CMIS transceiver and hence, we need to read the transceiver ID to see if the transceiver is CMIS based and then find the relevant pageable bit. @@ -21,7 +21,7 @@ Signed-off-by: Mihir Patel 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/optoe.c b/drivers/misc/eeprom/optoe.c -index 62294392c..bdfd143d1 100644 +index 62294392c..f34bfe88b 100644 --- a/drivers/misc/eeprom/optoe.c +++ b/drivers/misc/eeprom/optoe.c @@ -630,7 +630,25 @@ static ssize_t optoe_page_legal(struct optoe_data *optoe, @@ -37,9 +37,9 @@ index 62294392c..bdfd143d1 100644 + return status; /* error out (no module?) */ + + /* -+ * For QSFP+ with CMIS optics, if userspace has dev_class as ONE_ADDR, ++ * For QSFP28 with CMIS optic, if userspace has dev_class as ONE_ADDR, + * the driver looks at the incorrect bit to find if it is pageable. -+ * Below check ensures we read the appropriate bit for such QSFP+ CMIS ++ * Below check ensures we read the appropriate bit for such QSFP28 CMIS + * compliant optics with dev_class as ONE_ADDR + * The ID values below are based on the SFF-8024 spec (Page 0, byte 0) + * for CMIS optics From 6a998a0876e5de9396d420653b9e910ddbab8da6 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Tue, 14 Mar 2023 02:33:04 +0000 Subject: [PATCH 5/5] Added performance stats related to EEPROM read --- patch/driver-support-optoe-oneaddr-pageable.patch | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/patch/driver-support-optoe-oneaddr-pageable.patch b/patch/driver-support-optoe-oneaddr-pageable.patch index dcadaaf03..8e8696e3a 100644 --- a/patch/driver-support-optoe-oneaddr-pageable.patch +++ b/patch/driver-support-optoe-oneaddr-pageable.patch @@ -1,4 +1,4 @@ -From 46089e210d17eeeff7a94877cd79b302a578994e Mon Sep 17 00:00:00 2001 +From cb4523bb02cb6228e3e8e5f3333e6af9a1b15466 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Fri, 10 Mar 2023 06:52:27 +0000 Subject: [PATCH] Read ID register to find pageable bit in optoe driver @@ -12,9 +12,17 @@ for pageability on QSFP28 w/ CMIS transceiver and hence, we need to read the transceiver ID to see if the transceiver is CMIS based and then find the relevant pageable bit. -Test result +Test result summary Tested the changes on a switch with a 100G CMIS and non-CMIS transceiver +No significant time difference is seen related to EEPROM read after adding +the current changes. Below stats were taken for a 100G CMIS based +transceiver with making it as optoe3 v/s optoe1 + +Test stats (average time taken after 3 dumps) +Time to dump first 4096B from EEPROM with transceiver as optoe3 - 914ms +Time to dump first 4096B from EEPROM with transceiver as optoe1 - 911ms + Signed-off-by: Mihir Patel --- drivers/misc/eeprom/optoe.c | 20 +++++++++++++++++++-