From dfa820563c673a43210ce5a436bda9628377bbaf Mon Sep 17 00:00:00 2001 From: Pho Tran Date: Mon, 25 Jan 2021 09:26:54 +0000 Subject: [PATCH 01/66] USB: serial: cp210x: add pid/vid for WSDA-200-USB commit 3c4f6ecd93442f4376a58b38bb40ee0b8c46e0e6 upstream. Information pid/vid of WSDA-200-USB, Lord corporation company: vid: 199b pid: ba30 Signed-off-by: Pho Tran [ johan: amend comment with product name ] Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index a90801ef005548..ffc96664b42599 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -201,6 +201,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1901, 0x0194) }, /* GE Healthcare Remote Alarm Box */ { USB_DEVICE(0x1901, 0x0195) }, /* GE B850/B650/B450 CP2104 DP UART interface */ { USB_DEVICE(0x1901, 0x0196) }, /* GE B850 CP2105 DP UART interface */ + { USB_DEVICE(0x199B, 0xBA30) }, /* LORD WSDA-200-USB */ { USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */ { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ { USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */ From 5ad95c521fd5c93661d151f86b94f07e7a20f039 Mon Sep 17 00:00:00 2001 From: Chenxin Jin Date: Wed, 13 Jan 2021 16:59:05 +0800 Subject: [PATCH 02/66] USB: serial: cp210x: add new VID/PID for supporting Teraoka AD2000 commit 43377df70480f82919032eb09832e9646a8a5efb upstream. Teraoka AD2000 uses the CP210x driver, but the chip VID/PID is customized with 0988/0578. We need the driver to support the new VID/PID. Signed-off-by: Chenxin Jin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index ffc96664b42599..361a2e3ccad8d8 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -61,6 +61,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */ { USB_DEVICE(0x0908, 0x01FF) }, /* Siemens RUGGEDCOM USB Serial Console */ + { USB_DEVICE(0x0988, 0x0578) }, /* Teraoka AD2000 */ { USB_DEVICE(0x0B00, 0x3070) }, /* Ingenico 3070 */ { USB_DEVICE(0x0BED, 0x1100) }, /* MEI (TM) Cashflow-SC Bill/Voucher Acceptor */ { USB_DEVICE(0x0BED, 0x1101) }, /* MEI series 2000 Combo Acceptor */ From 96dcfabef504637355e4fb1a9c1a50c9fc81ece5 Mon Sep 17 00:00:00 2001 From: Christoph Schemmel Date: Wed, 27 Jan 2021 20:58:46 +0100 Subject: [PATCH 03/66] USB: serial: option: Adding support for Cinterion MV31 commit e478d6029dca9d8462f426aee0d32896ef64f10f upstream. Adding support for Cinterion device MV31 for enumeration with PID 0x00B3 and 0x00B7. usb-devices output for 0x00B3 T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 6 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=ef(misc ) Sub=02 Prot=01 MxPS= 9 #Cfgs= 1 P: Vendor=1e2d ProdID=00b3 Rev=04.14 S: Manufacturer=Cinterion S: Product=Cinterion PID 0x00B3 USB Mobile Broadband S: SerialNumber=b3246eed C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#=0x0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#=0x1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x3 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=cdc_wdm I: If#=0x4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option usb-devices output for 0x00B7 T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 5 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=ef(misc ) Sub=02 Prot=01 MxPS= 9 #Cfgs= 1 P: Vendor=1e2d ProdID=00b7 Rev=04.14 S: Manufacturer=Cinterion S: Product=Cinterion PID 0x00B3 USB Mobile Broadband S: SerialNumber=b3246eed C: #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#=0x0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#=0x1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option Signed-off-by: Christoph Schemmel Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index fd41b07b5aaf19..f49eae18500cc3 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -425,6 +425,8 @@ static void option_instat_callback(struct urb *urb); #define CINTERION_PRODUCT_AHXX_2RMNET 0x0084 #define CINTERION_PRODUCT_AHXX_AUDIO 0x0085 #define CINTERION_PRODUCT_CLS8 0x00b0 +#define CINTERION_PRODUCT_MV31_MBIM 0x00b3 +#define CINTERION_PRODUCT_MV31_RMNET 0x00b7 /* Olivetti products */ #define OLIVETTI_VENDOR_ID 0x0b3c @@ -1914,6 +1916,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC25_MDMNET) }, { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) }, /* HC28 enumerates with Siemens or Cinterion VID depending on FW revision */ { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC28_MDMNET) }, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_MBIM, 0xff), + .driver_info = RSVD(3)}, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_RMNET, 0xff), + .driver_info = RSVD(0)}, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100), .driver_info = RSVD(4) }, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD120), From 0d6e0a192e2ef76f9307d21eb44a8e0b5b4d7187 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sat, 2 Jan 2021 12:59:40 +0800 Subject: [PATCH 04/66] arm64: dts: qcom: c630: keep both touchpad devices enabled [ Upstream commit a9164910c5ceed63551280a4a0b85d37ac2b19a5 ] Indicated by AML code in ACPI table, the touchpad in-use could be found on two possible slave addresses on &i2c3, i.e. hid@15 and hid@2c. And which one is in-use can be determined by reading another address on the I2C bus. Unfortunately, for DT boot, there is currently no support in firmware to make this check and patch DT accordingly. This results in a non-functional touchpad on those C630 devices with hid@2c. As i2c-hid driver will stop probing the device if there is nothing on the slave address, we can actually keep both devices enabled in DT, and i2c-hid driver will only probe the existing one. The only problem is that we cannot set up pinctrl in both device nodes, as two devices with the same pinctrl will cause pin conflict that makes the second device fail to probe. Let's move the pinctrl state up to parent node to solve this problem. As the pinctrl state of parent node is already defined in sdm845.dtsi, it ends up with overwriting pinctrl-0 with i2c3_hid_active state added in there. Fixes: 11d0e4f28156 ("arm64: dts: qcom: c630: Polish i2c-hid devices") Signed-off-by: Shawn Guo Link: https://lore.kernel.org/r/20210102045940.26874-1-shawn.guo@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts index f539b3655f6b90..e638f216dbfb32 100644 --- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts +++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts @@ -243,6 +243,8 @@ &i2c3 { status = "okay"; clock-frequency = <400000>; + /* Overwrite pinctrl-0 from sdm845.dtsi */ + pinctrl-0 = <&qup_i2c3_default &i2c3_hid_active>; tsel: hid@15 { compatible = "hid-over-i2c"; @@ -250,9 +252,6 @@ hid-descr-addr = <0x1>; interrupts-extended = <&tlmm 37 IRQ_TYPE_LEVEL_HIGH>; - - pinctrl-names = "default"; - pinctrl-0 = <&i2c3_hid_active>; }; tsc2: hid@2c { @@ -261,11 +260,6 @@ hid-descr-addr = <0x20>; interrupts-extended = <&tlmm 37 IRQ_TYPE_LEVEL_HIGH>; - - pinctrl-names = "default"; - pinctrl-0 = <&i2c3_hid_active>; - - status = "disabled"; }; }; From 829bf438cb39fce39e73db53ae53b0743d3bfb0e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 3 Jan 2021 17:59:51 -0800 Subject: [PATCH 05/66] Input: i8042 - unbreak Pegatron C15B MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a3a9060ecad030e2c7903b2b258383d2c716b56c ] g++ reports drivers/input/serio/i8042-x86ia64io.h:225:3: error: ‘.matches’ designator used multiple times in the same initializer list C99 semantics is that last duplicated initialiser wins, so DMI entry gets overwritten. Fixes: a48491c65b51 ("Input: i8042 - add ByteSpeed touchpad to noloop table") Signed-off-by: Alexey Dobriyan Acked-by: Po-Hsu Lin Link: https://lore.kernel.org/r/20201228072335.GA27766@localhost.localdomain Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/serio/i8042-x86ia64io.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index eca931da76c3a1..b7dbcbac3a1a54 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -219,6 +219,8 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "PEGATRON CORPORATION"), DMI_MATCH(DMI_PRODUCT_NAME, "C15B"), }, + }, + { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ByteSpeed LLC"), DMI_MATCH(DMI_PRODUCT_NAME, "ByteSpeed Laptop C15B"), From 831132b13f0d2e1ed089c064705e62749101f478 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Thu, 10 Dec 2020 12:17:47 +0300 Subject: [PATCH 06/66] arm64: dts: amlogic: meson-g12: Set FL-adj property value [ Upstream commit 7386a559caa6414e74578172c2bc4e636d6bd0a0 ] In accordance with the DWC USB3 bindings the property is supposed to have uint32 type. It's erroneous from the DT schema and driver points of view to declare it as boolean. As Neil suggested set it to 0x20 so not break the platform and to make the dtbs checker happy. Link: https://lore.kernel.org/linux-usb/20201010224121.12672-16-Sergey.Semin@baikalelectronics.ru/ Signed-off-by: Serge Semin Reviewed-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Reviewed-by: Krzysztof Kozlowski Fixes: 9baf7d6be730 ("arm64: dts: meson: g12a: Add G12A USB nodes") Signed-off-by: Kevin Hilman Link: https://lore.kernel.org/r/20201210091756.18057-3-Sergey.Semin@baikalelectronics.ru Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index 354ef2f3eac67e..9533c85fb0a305 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -2382,7 +2382,7 @@ interrupts = ; dr_mode = "host"; snps,dis_u2_susphy_quirk; - snps,quirk-frame-length-adjustment; + snps,quirk-frame-length-adjustment = <0x20>; snps,parkmode-disable-ss-quirk; }; }; From 4921f81ce65aaaebaebd78c3fb154ce067b442ba Mon Sep 17 00:00:00 2001 From: Sandy Huang Date: Fri, 8 Jan 2021 12:06:27 +0100 Subject: [PATCH 07/66] arm64: dts: rockchip: fix vopl iommu irq on px30 [ Upstream commit 656c648354e1561fa4f445b0b3252ec1d24e3951 ] The vop-mmu shares the irq with its matched vop but not the vpu. Fixes: 7053e06b1422 ("arm64: dts: rockchip: add core dtsi file for PX30 SoCs") Signed-off-by: Sandy Huang Signed-off-by: Heiko Stuebner Reviewed-by: Ezequiel Garcia Reviewed-by: Paul Kocialkowski Tested-by: Paul Kocialkowski Link: https://lore.kernel.org/r/20210108110627.3231226-1-heiko@sntech.de Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/px30.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index 9e09909a510a18..98b014a8f9165d 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -860,7 +860,7 @@ vopl_mmu: iommu@ff470f00 { compatible = "rockchip,iommu"; reg = <0x0 0xff470f00 0x0 0x100>; - interrupts = ; + interrupts = ; interrupt-names = "vopl_mmu"; clocks = <&cru ACLK_VOPL>, <&cru HCLK_VOPL>; clock-names = "aclk", "hclk"; From 9146fffc5d2a3ec49906daf18d2e983d995b3521 Mon Sep 17 00:00:00 2001 From: Loris Reiff Date: Fri, 22 Jan 2021 17:42:31 +0100 Subject: [PATCH 08/66] bpf, cgroup: Fix optlen WARN_ON_ONCE toctou [ Upstream commit bb8b81e396f7afbe7c50d789e2107512274d2a35 ] A toctou issue in `__cgroup_bpf_run_filter_getsockopt` can trigger a WARN_ON_ONCE in a check of `copy_from_user`. `*optlen` is checked to be non-negative in the individual getsockopt functions beforehand. Changing `*optlen` in a race to a negative value will result in a `copy_from_user(ctx.optval, optval, ctx.optlen)` with `ctx.optlen` being a negative integer. Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks") Signed-off-by: Loris Reiff Signed-off-by: Daniel Borkmann Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20210122164232.61770-1-loris.reiff@liblor.ch Signed-off-by: Sasha Levin --- kernel/bpf/cgroup.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 5a8b4dfdb1419b..5b2413eb79db4a 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1109,6 +1109,11 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, goto out; } + if (ctx.optlen < 0) { + ret = -EFAULT; + goto out; + } + if (copy_from_user(ctx.optval, optval, min(ctx.optlen, max_optlen)) != 0) { ret = -EFAULT; From 02531b5549ebf39d6f3c0602f083901211149355 Mon Sep 17 00:00:00 2001 From: Loris Reiff Date: Fri, 22 Jan 2021 17:42:32 +0100 Subject: [PATCH 09/66] bpf, cgroup: Fix problematic bounds check [ Upstream commit f4a2da755a7e1f5d845c52aee71336cee289935a ] Since ctx.optlen is signed, a larger value than max_value could be passed, as it is later on used as unsigned, which causes a WARN_ON_ONCE in the copy_to_user. Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks") Signed-off-by: Loris Reiff Signed-off-by: Daniel Borkmann Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20210122164232.61770-2-loris.reiff@liblor.ch Signed-off-by: Sasha Levin --- kernel/bpf/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 5b2413eb79db4a..c2f0aa818b7af2 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1131,7 +1131,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, goto out; } - if (ctx.optlen > max_optlen) { + if (ctx.optlen > max_optlen || ctx.optlen < 0) { ret = -EFAULT; goto out; } From 98650c3d0e33726292dc8fb3b23e129c68c88108 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 7 Jan 2021 22:15:21 +0100 Subject: [PATCH 10/66] um: virtio: free vu_dev only with the contained struct device [ Upstream commit f4172b084342fd3f9e38c10650ffe19eac30d8ce ] Since struct device is refcounted, we shouldn't free the vu_dev immediately when it's removed from the platform device, but only when the references actually all go away. Move the freeing to the release to accomplish that. Fixes: 5d38f324993f ("um: drivers: Add virtio vhost-user driver") Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- arch/um/drivers/virtio_uml.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 179b41ad63baf2..18618af3835f9b 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -959,6 +959,7 @@ static void virtio_uml_release_dev(struct device *d) } os_close_file(vu_dev->sock); + kfree(vu_dev); } /* Platform device */ @@ -977,7 +978,7 @@ static int virtio_uml_probe(struct platform_device *pdev) if (!pdata) return -EINVAL; - vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL); + vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL); if (!vu_dev) return -ENOMEM; From 68e798fa3c0e097ef19d6c3c25def58838a87306 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 29 Jan 2021 23:53:50 +0000 Subject: [PATCH 11/66] rxrpc: Fix deadlock around release of dst cached on udp tunnel [ Upstream commit 5399d52233c47905bbf97dcbaa2d7a9cc31670ba ] AF_RXRPC sockets use UDP ports in encap mode. This causes socket and dst from an incoming packet to get stolen and attached to the UDP socket from whence it is leaked when that socket is closed. When a network namespace is removed, the wait for dst records to be cleaned up happens before the cleanup of the rxrpc and UDP socket, meaning that the wait never finishes. Fix this by moving the rxrpc (and, by dependence, the afs) private per-network namespace registrations to the device group rather than subsys group. This allows cached rxrpc local endpoints to be cleared and their UDP sockets closed before we try waiting for the dst records. The symptom is that lines looking like the following: unregister_netdevice: waiting for lo to become free get emitted at regular intervals after running something like the referenced syzbot test. Thanks to Vadim for tracking this down and work out the fix. Reported-by: syzbot+df400f2f24a1677cd7e0@syzkaller.appspotmail.com Reported-by: Vadim Fedorenko Fixes: 5271953cad31 ("rxrpc: Use the UDP encap_rcv hook") Signed-off-by: David Howells Acked-by: Vadim Fedorenko Link: https://lore.kernel.org/r/161196443016.3868642.5577440140646403533.stgit@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- fs/afs/main.c | 6 +++--- net/rxrpc/af_rxrpc.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/afs/main.c b/fs/afs/main.c index c9c45d7078bd18..5cd26af2464c97 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -186,7 +186,7 @@ static int __init afs_init(void) goto error_cache; #endif - ret = register_pernet_subsys(&afs_net_ops); + ret = register_pernet_device(&afs_net_ops); if (ret < 0) goto error_net; @@ -206,7 +206,7 @@ static int __init afs_init(void) error_proc: afs_fs_exit(); error_fs: - unregister_pernet_subsys(&afs_net_ops); + unregister_pernet_device(&afs_net_ops); error_net: #ifdef CONFIG_AFS_FSCACHE fscache_unregister_netfs(&afs_cache_netfs); @@ -237,7 +237,7 @@ static void __exit afs_exit(void) proc_remove(afs_proc_symlink); afs_fs_exit(); - unregister_pernet_subsys(&afs_net_ops); + unregister_pernet_device(&afs_net_ops); #ifdef CONFIG_AFS_FSCACHE fscache_unregister_netfs(&afs_cache_netfs); #endif diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 2921fc27671347..9bacec6653baca 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -976,7 +976,7 @@ static int __init af_rxrpc_init(void) goto error_security; } - ret = register_pernet_subsys(&rxrpc_net_ops); + ret = register_pernet_device(&rxrpc_net_ops); if (ret) goto error_pernet; @@ -1021,7 +1021,7 @@ static int __init af_rxrpc_init(void) error_sock: proto_unregister(&rxrpc_proto); error_proto: - unregister_pernet_subsys(&rxrpc_net_ops); + unregister_pernet_device(&rxrpc_net_ops); error_pernet: rxrpc_exit_security(); error_security: @@ -1043,7 +1043,7 @@ static void __exit af_rxrpc_exit(void) unregister_key_type(&key_type_rxrpc); sock_unregister(PF_RXRPC); proto_unregister(&rxrpc_proto); - unregister_pernet_subsys(&rxrpc_net_ops); + unregister_pernet_device(&rxrpc_net_ops); ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0); ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0); From 5ce999efcaa7efc430d7fd024c1d7920af5c59d0 Mon Sep 17 00:00:00 2001 From: Zyta Szpak Date: Thu, 21 Jan 2021 16:52:37 +0100 Subject: [PATCH 12/66] arm64: dts: ls1046a: fix dcfg address range [ Upstream commit aa880c6f3ee6dbd0d5ab02026a514ff8ea0a3328 ] Dcfg was overlapping with clockgen address space which resulted in failure in memory allocation for dcfg. According regs description dcfg size should not be bigger than 4KB. Signed-off-by: Zyta Szpak Fixes: 8126d88162a5 ("arm64: dts: add QorIQ LS1046A SoC support") Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi index d4c1da3d4bde28..04d4b1b11a00a4 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi @@ -304,7 +304,7 @@ dcfg: dcfg@1ee0000 { compatible = "fsl,ls1046a-dcfg", "syscon"; - reg = <0x0 0x1ee0000 0x0 0x10000>; + reg = <0x0 0x1ee0000 0x0 0x1000>; big-endian; }; From ec68581f7479fd2bc4d7bc1b117d53affb7cbba5 Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Sun, 20 Dec 2020 22:18:19 +0800 Subject: [PATCH 13/66] igc: set the default return value to -IGC_ERR_NVM in igc_write_nvm_srwr [ Upstream commit ebc8d125062e7dccb7922b2190b097c20d88ad96 ] This patch sets the default return value to -IGC_ERR_NVM in igc_write_nvm_srwr. Without this change it wouldn't lead to a shadow RAM write EEWR timeout. Fixes: ab4056126813 ("igc: Add NVM support") Signed-off-by: Kevin Lo Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_i225.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c index c25f555aaf8229..ed5d09c11c3893 100644 --- a/drivers/net/ethernet/intel/igc/igc_i225.c +++ b/drivers/net/ethernet/intel/igc/igc_i225.c @@ -219,9 +219,9 @@ static s32 igc_write_nvm_srwr(struct igc_hw *hw, u16 offset, u16 words, u16 *data) { struct igc_nvm_info *nvm = &hw->nvm; + s32 ret_val = -IGC_ERR_NVM; u32 attempts = 100000; u32 i, k, eewr = 0; - s32 ret_val = 0; /* A check for invalid values: offset too large, too many words, * too many words for the offset, and not enough words. @@ -229,7 +229,6 @@ static s32 igc_write_nvm_srwr(struct igc_hw *hw, u16 offset, u16 words, if (offset >= nvm->word_size || (words > (nvm->word_size - offset)) || words == 0) { hw_dbg("nvm parameter(s) out of bounds\n"); - ret_val = -IGC_ERR_NVM; goto out; } From 6380ef64b9eb9dcdac64a1789110c2f2389ecf69 Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Thu, 7 Jan 2021 14:10:38 +0800 Subject: [PATCH 14/66] igc: check return value of ret_val in igc_config_fc_after_link_up [ Upstream commit b881145642ce0bbe2be521e0882e72a5cebe93b8 ] Check return value from ret_val to make error check actually work. Fixes: 4eb8080143a9 ("igc: Add setup link functionality") Signed-off-by: Kevin Lo Acked-by: Sasha Neftin Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c index 5eeb4c8caf4aec..08adf103e90b44 100644 --- a/drivers/net/ethernet/intel/igc/igc_mac.c +++ b/drivers/net/ethernet/intel/igc/igc_mac.c @@ -647,7 +647,7 @@ s32 igc_config_fc_after_link_up(struct igc_hw *hw) } out: - return 0; + return ret_val; } /** From 67b7f73bbe3f0b838aed1fd44ea1b94e9afca226 Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Sat, 23 Jan 2021 00:22:23 +0000 Subject: [PATCH 15/66] i40e: Revert "i40e: don't report link up for a VF who hasn't enabled queues" [ Upstream commit f559a356043a55bab25a4c00505ea65c50a956fb ] This reverts commit 2ad1274fa35ace5c6360762ba48d33b63da2396c VF queues were not brought up when PF was brought up after being downed if the VF driver disabled VFs queues during PF down. This could happen in some older or external VF driver implementations. The problem was that PF driver used vf->queues_enabled as a condition to decide what link-state it would send out which caused the issue. Remove the check for vf->queues_enabled in the VF link notify. Now VF will always be notified of the current link status. Also remove the queues_enabled member from i40e_vf structure as it is not used anymore. Otherwise VNF implementation was broken and caused a link flap. The original commit was a workaround to avoid breaking existing VFs though it's really a fault of the VF code not the PF. The commit should be safe to revert as all of the VFs we know of have been fixed. Also, since we now know there is a related bug in the workaround, removing it is preferred. Fixes: 2ad1274fa35a ("i40e: don't report link up for a VF who hasn't enabled") Signed-off-by: Aleksandr Loktionov Signed-off-by: Arkadiusz Kubalewski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 13 +------------ drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 1 - 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index c20dc689698ed8..5acd599d6b9afa 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -55,12 +55,7 @@ static void i40e_vc_notify_vf_link_state(struct i40e_vf *vf) pfe.event = VIRTCHNL_EVENT_LINK_CHANGE; pfe.severity = PF_EVENT_SEVERITY_INFO; - - /* Always report link is down if the VF queues aren't enabled */ - if (!vf->queues_enabled) { - pfe.event_data.link_event.link_status = false; - pfe.event_data.link_event.link_speed = 0; - } else if (vf->link_forced) { + if (vf->link_forced) { pfe.event_data.link_event.link_status = vf->link_up; pfe.event_data.link_event.link_speed = (vf->link_up ? VIRTCHNL_LINK_SPEED_40GB : 0); @@ -70,7 +65,6 @@ static void i40e_vc_notify_vf_link_state(struct i40e_vf *vf) pfe.event_data.link_event.link_speed = i40e_virtchnl_link_speed(ls->link_speed); } - i40e_aq_send_msg_to_vf(hw, abs_vf_id, VIRTCHNL_OP_EVENT, 0, (u8 *)&pfe, sizeof(pfe), NULL); } @@ -2393,8 +2387,6 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg) } } - vf->queues_enabled = true; - error_param: /* send the response to the VF */ return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, @@ -2416,9 +2408,6 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg) struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; - /* Immediately mark queues as disabled */ - vf->queues_enabled = false; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto error_param; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 7164b9bb294ff3..f65cc0c1655029 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -99,7 +99,6 @@ struct i40e_vf { unsigned int tx_rate; /* Tx bandwidth limit in Mbps */ bool link_forced; bool link_up; /* only valid if VF link is forced */ - bool queues_enabled; /* true if the VF queues are enabled */ bool spoofchk; u16 num_mac; u16 num_vlan; From 1cef1d46add82d3edd79220465309fedc047dbd3 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 20 Jan 2021 17:41:18 +0200 Subject: [PATCH 16/66] net/mlx5: Fix leak upon failure of rule creation [ Upstream commit a5bfe6b4675e0eefbd9418055b5cc6e89af27eb4 ] When creation of a new rule that requires allocation of an FTE fails, need to call to tree_put_node on the FTE in order to release its' resource. Fixes: cefc23554fc2 ("net/mlx5: Fix FTE cleanup") Signed-off-by: Maor Gottlieb Reviewed-by: Alaa Hleihel Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 4944c40436f082..11e12761b0a6ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1697,6 +1697,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, if (!fte_tmp) continue; rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp); + /* No error check needed here, because insert_fte() is not called */ up_write_ref_node(&fte_tmp->node, false); tree_put_node(&fte_tmp->node, false); kmem_cache_free(steering->ftes_cache, fte); @@ -1745,6 +1746,8 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); + if (IS_ERR(rule)) + tree_put_node(&fte->node, false); return rule; } rule = ERR_PTR(-ENOENT); @@ -1844,6 +1847,8 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); + if (IS_ERR(rule)) + tree_put_node(&fte->node, false); tree_put_node(&g->node, false); return rule; From bf0507fb2073ee1635c34d66582ef6226fbf605c Mon Sep 17 00:00:00 2001 From: Xie He Date: Sun, 31 Jan 2021 21:57:06 -0800 Subject: [PATCH 17/66] net: lapb: Copy the skb before sending a packet [ Upstream commit 88c7a9fd9bdd3e453f04018920964c6f848a591a ] When sending a packet, we will prepend it with an LAPB header. This modifies the shared parts of a cloned skb, so we should copy the skb rather than just clone it, before we prepend the header. In "Documentation/networking/driver.rst" (the 2nd point), it states that drivers shouldn't modify the shared parts of a cloned skb when transmitting. The "dev_queue_xmit_nit" function in "net/core/dev.c", which is called when an skb is being sent, clones the skb and sents the clone to AF_PACKET sockets. Because the LAPB drivers first remove a 1-byte pseudo-header before handing over the skb to us, if we don't copy the skb before prepending the LAPB header, the first byte of the packets received on AF_PACKET sockets can be corrupted. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xie He Acked-by: Martin Schiller Link: https://lore.kernel.org/r/20210201055706.415842-1-xie.he.0141@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/lapb/lapb_out.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/lapb/lapb_out.c b/net/lapb/lapb_out.c index 7a4d0715d1c32b..a966d29c772d9f 100644 --- a/net/lapb/lapb_out.c +++ b/net/lapb/lapb_out.c @@ -82,7 +82,8 @@ void lapb_kick(struct lapb_cb *lapb) skb = skb_dequeue(&lapb->write_queue); do { - if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) { + skbn = skb_copy(skb, GFP_ATOMIC); + if (!skbn) { skb_queue_head(&lapb->write_queue, skb); break; } From c545879e8080d168df45d0bccb2cb47be8a21d2d Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Mon, 1 Feb 2021 11:35:39 +0200 Subject: [PATCH 18/66] net: mvpp2: TCAM entry enable should be written after SRAM data [ Upstream commit 43f4a20a1266d393840ce010f547486d14cc0071 ] Last TCAM data contains TCAM enable bit. It should be written after SRAM data before entry enabled. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Stefan Chulski Link: https://lore.kernel.org/r/1612172139-28343-1-git-send-email-stefanc@marvell.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index a30eb90ba3d28a..dd590086fe6a51 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -29,16 +29,16 @@ static int mvpp2_prs_hw_write(struct mvpp2 *priv, struct mvpp2_prs_entry *pe) /* Clear entry invalidation bit */ pe->tcam[MVPP2_PRS_TCAM_INV_WORD] &= ~MVPP2_PRS_TCAM_INV_MASK; - /* Write tcam index - indirect access */ - mvpp2_write(priv, MVPP2_PRS_TCAM_IDX_REG, pe->index); - for (i = 0; i < MVPP2_PRS_TCAM_WORDS; i++) - mvpp2_write(priv, MVPP2_PRS_TCAM_DATA_REG(i), pe->tcam[i]); - /* Write sram index - indirect access */ mvpp2_write(priv, MVPP2_PRS_SRAM_IDX_REG, pe->index); for (i = 0; i < MVPP2_PRS_SRAM_WORDS; i++) mvpp2_write(priv, MVPP2_PRS_SRAM_DATA_REG(i), pe->sram[i]); + /* Write tcam index - indirect access */ + mvpp2_write(priv, MVPP2_PRS_TCAM_IDX_REG, pe->index); + for (i = 0; i < MVPP2_PRS_TCAM_WORDS; i++) + mvpp2_write(priv, MVPP2_PRS_TCAM_DATA_REG(i), pe->tcam[i]); + return 0; } From d97a821b2e9cb4ee8bce9b8ba8937592cd9573f2 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 1 Feb 2021 21:50:56 +0100 Subject: [PATCH 19/66] r8169: fix WoL on shutdown if CONFIG_DEBUG_SHIRQ is set [ Upstream commit cc9f07a838c4988ed244d0907cb71d54b85482a5 ] So far phy_disconnect() is called before free_irq(). If CONFIG_DEBUG_SHIRQ is set and interrupt is shared, then free_irq() creates an "artificial" interrupt by calling the interrupt handler. The "link change" flag is set in the interrupt status register, causing phylib to eventually call phy_suspend(). Because the net_device is detached from the PHY already, the PHY driver can't recognize that WoL is configured and powers down the PHY. Fixes: f1e911d5d0df ("r8169: add basic phylib support") Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/fe732c2c-a473-9088-3974-df83cfbd6efd@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/realtek/r8169_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 366ca1b5da5cce..1e8244ec5b3323 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -6419,10 +6419,10 @@ static int rtl8169_close(struct net_device *dev) cancel_work_sync(&tp->wk.work); - phy_disconnect(tp->phydev); - free_irq(pci_irq_vector(pdev, 0), tp); + phy_disconnect(tp->phydev); + dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray, tp->RxPhyAddr); dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray, From f1c87b4b2c7bf50c644c215f078a57d39e1165c7 Mon Sep 17 00:00:00 2001 From: Hermann Lauer Date: Thu, 28 Jan 2021 12:18:42 +0100 Subject: [PATCH 20/66] ARM: dts: sun7i: a20: bananapro: Fix ethernet phy-mode [ Upstream commit a900cac3750b9f0b8f5ed0503d9c6359532f644d ] BPi Pro needs TX and RX delay for Gbit to work reliable and avoid high packet loss rates. The realtek phy driver overrides the settings of the pull ups for the delays, so fix this for BananaPro. Fix the phy-mode description to correctly reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 10662a33dcd9 ("ARM: dts: sun7i: Add dts file for Bananapro board") Signed-off-by: Hermann Lauer Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20210128111842.GA11919@lemon.iwr.uni-heidelberg.de Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sun7i-a20-bananapro.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun7i-a20-bananapro.dts b/arch/arm/boot/dts/sun7i-a20-bananapro.dts index 01ccff756996d8..5740f9442705c4 100644 --- a/arch/arm/boot/dts/sun7i-a20-bananapro.dts +++ b/arch/arm/boot/dts/sun7i-a20-bananapro.dts @@ -110,7 +110,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_gmac_3v3>; status = "okay"; }; From 7e6dcaeadc0ea240721a53a83bd45fc36e3eee3c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 3 Feb 2021 01:20:25 -0800 Subject: [PATCH 21/66] nvmet-tcp: fix out-of-bounds access when receiving multiple h2cdata PDUs [ Upstream commit cb8563f5c735a042ea2dd7df1ad55ae06d63ffeb ] When the host sends multiple h2cdata PDUs, we keep track on the receive progress and calculate the scatterlist index and offsets. The issue is that sg_offset should only be kept for the first iov entry we map in the iovec as this is the difference between our cursor and the sg entry offset itself. In addition, the sg index was calculated wrong because we should not round up when dividing the command byte offset with PAG_SIZE. Fixes: 872d26a391da ("nvmet-tcp: add NVMe over TCP target driver") Reported-by: Narayan Ayalasomayajula Tested-by: Narayan Ayalasomayajula Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/target/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index e31823f19a0faa..9242224156f5b2 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -292,7 +292,7 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) length = cmd->pdu_len; cmd->nr_mapped = DIV_ROUND_UP(length, PAGE_SIZE); offset = cmd->rbytes_done; - cmd->sg_idx = DIV_ROUND_UP(offset, PAGE_SIZE); + cmd->sg_idx = offset / PAGE_SIZE; sg_offset = offset % PAGE_SIZE; sg = &cmd->req.sg[cmd->sg_idx]; @@ -305,6 +305,7 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) length -= iov_len; sg = sg_next(sg); iov++; + sg_offset = 0; } iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov, From e57d70c59bb7f6835ee47eb633e44efce4ab51b9 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 4 Feb 2021 18:32:36 -0800 Subject: [PATCH 22/66] memblock: do not start bottom-up allocations with kernel_end [ Upstream commit 2dcb3964544177c51853a210b6ad400de78ef17d ] With kaslr the kernel image is placed at a random place, so starting the bottom-up allocation with the kernel_end can result in an allocation failure and a warning like this one: hugetlb_cma: reserve 2048 MiB, up to 2048 MiB per node ------------[ cut here ]------------ memblock: bottom-up allocation failed, memory hotremove may be affected WARNING: CPU: 0 PID: 0 at mm/memblock.c:332 memblock_find_in_range_node+0x178/0x25a Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0+ #1169 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 RIP: 0010:memblock_find_in_range_node+0x178/0x25a Code: e9 6d ff ff ff 48 85 c0 0f 85 da 00 00 00 80 3d 9b 35 df 00 00 75 15 48 c7 c7 c0 75 59 88 c6 05 8b 35 df 00 01 e8 25 8a fa ff <0f> 0b 48 c7 44 24 20 ff ff ff ff 44 89 e6 44 89 ea 48 c7 c1 70 5c RSP: 0000:ffffffff88803d18 EFLAGS: 00010086 ORIG_RAX: 0000000000000000 RAX: 0000000000000000 RBX: 0000000240000000 RCX: 00000000ffffdfff RDX: 00000000ffffdfff RSI: 00000000ffffffea RDI: 0000000000000046 RBP: 0000000100000000 R08: ffffffff88922788 R09: 0000000000009ffb R10: 00000000ffffe000 R11: 3fffffffffffffff R12: 0000000000000000 R13: 0000000000000000 R14: 0000000080000000 R15: 00000001fb42c000 FS: 0000000000000000(0000) GS:ffffffff88f71000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffa080fb401000 CR3: 00000001fa80a000 CR4: 00000000000406b0 Call Trace: memblock_alloc_range_nid+0x8d/0x11e cma_declare_contiguous_nid+0x2c4/0x38c hugetlb_cma_reserve+0xdc/0x128 flush_tlb_one_kernel+0xc/0x20 native_set_fixmap+0x82/0xd0 flat_get_apic_id+0x5/0x10 register_lapic_address+0x8e/0x97 setup_arch+0x8a5/0xc3f start_kernel+0x66/0x547 load_ucode_bsp+0x4c/0xcd secondary_startup_64_no_verify+0xb0/0xbb random: get_random_bytes called from __warn+0xab/0x110 with crng_init=0 ---[ end trace f151227d0b39be70 ]--- At the same time, the kernel image is protected with memblock_reserve(), so we can just start searching at PAGE_SIZE. In this case the bottom-up allocation has the same chances to success as a top-down allocation, so there is no reason to fallback in the case of a failure. All together it simplifies the logic. Link: https://lkml.kernel.org/r/20201217201214.3414100-2-guro@fb.com Fixes: 8fabc623238e ("powerpc: Ensure that swiotlb buffer is allocated from low memory") Signed-off-by: Roman Gushchin Reviewed-by: Mike Rapoport Cc: Joonsoo Kim Cc: Michal Hocko Cc: Rik van Riel Cc: Wonhyuk Yang Cc: Thiago Jung Bauermann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/memblock.c | 49 ++++++------------------------------------------- 1 file changed, 6 insertions(+), 43 deletions(-) diff --git a/mm/memblock.c b/mm/memblock.c index c4b16cae2bc9bd..11f6ae37d66998 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -257,14 +257,6 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, * * Find @size free area aligned to @align in the specified range and node. * - * When allocation direction is bottom-up, the @start should be greater - * than the end of the kernel image. Otherwise, it will be trimmed. The - * reason is that we want the bottom-up allocation just near the kernel - * image so it is highly likely that the allocated memory and the kernel - * will reside in the same node. - * - * If bottom-up allocation failed, will try to allocate memory top-down. - * * Return: * Found address on success, 0 on failure. */ @@ -273,8 +265,6 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, phys_addr_t end, int nid, enum memblock_flags flags) { - phys_addr_t kernel_end, ret; - /* pump up @end */ if (end == MEMBLOCK_ALLOC_ACCESSIBLE || end == MEMBLOCK_ALLOC_KASAN) @@ -283,40 +273,13 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, /* avoid allocating the first page */ start = max_t(phys_addr_t, start, PAGE_SIZE); end = max(start, end); - kernel_end = __pa_symbol(_end); - - /* - * try bottom-up allocation only when bottom-up mode - * is set and @end is above the kernel image. - */ - if (memblock_bottom_up() && end > kernel_end) { - phys_addr_t bottom_up_start; - - /* make sure we will allocate above the kernel */ - bottom_up_start = max(start, kernel_end); - /* ok, try bottom-up allocation first */ - ret = __memblock_find_range_bottom_up(bottom_up_start, end, - size, align, nid, flags); - if (ret) - return ret; - - /* - * we always limit bottom-up allocation above the kernel, - * but top-down allocation doesn't have the limit, so - * retrying top-down allocation may succeed when bottom-up - * allocation failed. - * - * bottom-up allocation is expected to be fail very rarely, - * so we use WARN_ONCE() here to see the stack trace if - * fail happens. - */ - WARN_ONCE(IS_ENABLED(CONFIG_MEMORY_HOTREMOVE), - "memblock: bottom-up allocation failed, memory hotremove may be affected\n"); - } - - return __memblock_find_range_top_down(start, end, size, align, nid, - flags); + if (memblock_bottom_up()) + return __memblock_find_range_bottom_up(start, end, size, align, + nid, flags); + else + return __memblock_find_range_top_down(start, end, size, align, + nid, flags); } /** From 522567fe540d11de67c20c03d14eea8080047d2c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 28 Jan 2021 12:33:42 +0300 Subject: [PATCH 23/66] USB: gadget: legacy: fix an error code in eth_bind() commit 3e1f4a2e1184ae6ad7f4caf682ced9554141a0f4 upstream. This code should return -ENOMEM if the allocation fails but it currently returns success. Fixes: 9b95236eebdb ("usb: gadget: ether: allocate and init otg descriptor by otg capabilities") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/YBKE9rqVuJEOUWpW@mwanda Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/ether.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/legacy/ether.c b/drivers/usb/gadget/legacy/ether.c index 30313b233680d6..99c7fc0d1d5973 100644 --- a/drivers/usb/gadget/legacy/ether.c +++ b/drivers/usb/gadget/legacy/ether.c @@ -403,8 +403,10 @@ static int eth_bind(struct usb_composite_dev *cdev) struct usb_descriptor_header *usb_desc; usb_desc = usb_otg_descriptor_alloc(gadget); - if (!usb_desc) + if (!usb_desc) { + status = -ENOMEM; goto fail1; + } usb_otg_descriptor_init(gadget, usb_desc); otg_desc[0] = usb_desc; otg_desc[1] = NULL; From 4d1d959348c1c7a32cbb59370dec9bd8fc6dc1df Mon Sep 17 00:00:00 2001 From: Jeremy Figgins Date: Sat, 23 Jan 2021 18:21:36 -0600 Subject: [PATCH 24/66] USB: usblp: don't call usb_set_interface if there's a single alt commit d8c6edfa3f4ee0d45d7ce5ef18d1245b78774b9d upstream. Some devices, such as the Winbond Electronics Corp. Virtual Com Port (Vendor=0416, ProdId=5011), lockup when usb_set_interface() or usb_clear_halt() are called. This device has only a single altsetting, so it should not be necessary to call usb_set_interface(). Acked-by: Pete Zaitcev Signed-off-by: Jeremy Figgins Link: https://lore.kernel.org/r/YAy9kJhM/rG8EQXC@watson Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usblp.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 134dc2005ce97d..c9f6e975828852 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -1329,14 +1329,17 @@ static int usblp_set_protocol(struct usblp *usblp, int protocol) if (protocol < USBLP_FIRST_PROTOCOL || protocol > USBLP_LAST_PROTOCOL) return -EINVAL; - alts = usblp->protocol[protocol].alt_setting; - if (alts < 0) - return -EINVAL; - r = usb_set_interface(usblp->dev, usblp->ifnum, alts); - if (r < 0) { - printk(KERN_ERR "usblp: can't set desired altsetting %d on interface %d\n", - alts, usblp->ifnum); - return r; + /* Don't unnecessarily set the interface if there's a single alt. */ + if (usblp->intf->num_altsetting > 1) { + alts = usblp->protocol[protocol].alt_setting; + if (alts < 0) + return -EINVAL; + r = usb_set_interface(usblp->dev, usblp->ifnum, alts); + if (r < 0) { + printk(KERN_ERR "usblp: can't set desired altsetting %d on interface %d\n", + alts, usblp->ifnum); + return r; + } } usblp->bidir = (usblp->protocol[protocol].epread != NULL); From 2a968ab0d2dd1af683603bf58fb0d65d302f679a Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 1 Feb 2021 21:47:20 +0900 Subject: [PATCH 25/66] usb: renesas_usbhs: Clear pipe running flag in usbhs_pkt_pop() commit 9917f0e3cdba7b9f1a23f70e3f70b1a106be54a8 upstream. Should clear the pipe running flag in usbhs_pkt_pop(). Otherwise, we cannot use this pipe after dequeue was called while the pipe was running. Fixes: 8355b2b3082d ("usb: renesas_usbhs: fix the behavior of some usbhs_pkt_handle") Reported-by: Tho Vu Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/1612183640-8898-1-git-send-email-yoshihiro.shimoda.uh@renesas.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/fifo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 05cdad13933b1e..cfc16943979d55 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -126,6 +126,7 @@ struct usbhs_pkt *usbhs_pkt_pop(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt) } usbhs_pipe_clear_without_sequence(pipe, 0, 0); + usbhs_pipe_running(pipe, 0); __usbhsf_pkt_del(pkt); } From 9d058a06149b16884a1c969cfa26cb5a49d4ef7d Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 27 Jan 2021 11:39:19 +0100 Subject: [PATCH 26/66] usb: dwc2: Fix endpoint direction check in ep_from_windex commit f670e9f9c8cac716c3506c6bac9e997b27ad441a upstream. dwc2_hsotg_process_req_status uses ep_from_windex() to retrieve the endpoint for the index provided in the wIndex request param. In a test-case with a rndis gadget running and sending a malformed packet to it like: dev.ctrl_transfer( 0x82, # bmRequestType 0x00, # bRequest 0x0000, # wValue 0x0001, # wIndex 0x00 # wLength ) it is possible to cause a crash: [ 217.533022] dwc2 ff300000.usb: dwc2_hsotg_process_req_status: USB_REQ_GET_STATUS [ 217.559003] Unable to handle kernel read from unreadable memory at virtual address 0000000000000088 ... [ 218.313189] Call trace: [ 218.330217] ep_from_windex+0x3c/0x54 [ 218.348565] usb_gadget_giveback_request+0x10/0x20 [ 218.368056] dwc2_hsotg_complete_request+0x144/0x184 This happens because ep_from_windex wants to compare the endpoint direction even if index_to_ep() didn't return an endpoint due to the direction not matching. The fix is easy insofar that the actual direction check is already happening when calling index_to_ep() which will return NULL if there is no endpoint for the targeted direction, so the offending check can go away completely. Fixes: c6f5c050e2a7 ("usb: dwc2: gadget: add bi-directional endpoint support") Cc: stable@vger.kernel.org Reported-by: Gerhard Klostermeier Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20210127103919.58215-1-heiko@sntech.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/gadget.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 70ac47a341ac25..e3f1f20c49221b 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -1543,7 +1543,6 @@ static void dwc2_hsotg_complete_oursetup(struct usb_ep *ep, static struct dwc2_hsotg_ep *ep_from_windex(struct dwc2_hsotg *hsotg, u32 windex) { - struct dwc2_hsotg_ep *ep; int dir = (windex & USB_DIR_IN) ? 1 : 0; int idx = windex & 0x7F; @@ -1553,12 +1552,7 @@ static struct dwc2_hsotg_ep *ep_from_windex(struct dwc2_hsotg *hsotg, if (idx > hsotg->num_of_eps) return NULL; - ep = index_to_ep(hsotg, idx, dir); - - if (idx && ep->dir_in != dir) - return NULL; - - return ep; + return index_to_ep(hsotg, idx, dir); } /** From 1f9e9c1048b8544e2c2953ff87c1ea4c975b2e34 Mon Sep 17 00:00:00 2001 From: Gary Bisson Date: Mon, 25 Jan 2021 17:19:34 +0100 Subject: [PATCH 27/66] usb: dwc3: fix clock issue during resume in OTG mode commit 0e5a3c8284a30f4c43fd81d7285528ece74563b5 upstream. Commit fe8abf332b8f ("usb: dwc3: support clocks and resets for DWC3 core") introduced clock support and a new function named dwc3_core_init_for_resume() which enables the clock before calling dwc3_core_init() during resume as clocks get disabled during suspend. Unfortunately in this commit the DWC3_GCTL_PRTCAP_OTG case was forgotten and therefore during resume, a platform could call dwc3_core_init() without re-enabling the clocks first, preventing to resume properly. So update the resume path to call dwc3_core_init_for_resume() as it should. Fixes: fe8abf332b8f ("usb: dwc3: support clocks and resets for DWC3 core") Cc: stable@vger.kernel.org Signed-off-by: Gary Bisson Link: https://lore.kernel.org/r/20210125161934.527820-1-gary.bisson@boundarydevices.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 440dbf55ddf702..90ec65d31059fb 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1718,7 +1718,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) if (PMSG_IS_AUTO(msg)) break; - ret = dwc3_core_init(dwc); + ret = dwc3_core_init_for_resume(dwc); if (ret) return ret; From f4e4f067f94cfb36de430b2aa582b584e1ec27fe Mon Sep 17 00:00:00 2001 From: Ikjoon Jang Date: Wed, 13 Jan 2021 18:05:11 +0800 Subject: [PATCH 28/66] usb: xhci-mtk: fix unreleased bandwidth data commit 1d69f9d901ef14d81c3b004e3282b8cc7b456280 upstream. xhci-mtk needs XHCI_MTK_HOST quirk functions in add_endpoint() and drop_endpoint() to handle its own sw bandwidth management. It stores bandwidth data into an internal table every time add_endpoint() is called, and drops those in drop_endpoint(). But when bandwidth allocation fails at one endpoint, all earlier allocation from the same interface could still remain at the table. This patch moves bandwidth management codes to check_bandwidth() and reset_bandwidth() path. To do so, this patch also adds those functions to xhci_driver_overrides and lets mtk-xhci to release all failed endpoints in reset_bandwidth() path. Fixes: 08e469de87a2 ("usb: xhci-mtk: supports bandwidth scheduling with multi-TT") Signed-off-by: Ikjoon Jang Link: https://lore.kernel.org/r/20210113180444.v6.1.Id0d31b5f3ddf5e734d2ab11161ac5821921b1e1e@changeid Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk-sch.c | 123 ++++++++++++++++++++++---------- drivers/usb/host/xhci-mtk.c | 2 + drivers/usb/host/xhci-mtk.h | 13 ++++ drivers/usb/host/xhci.c | 8 ++- drivers/usb/host/xhci.h | 4 ++ 5 files changed, 111 insertions(+), 39 deletions(-) diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index 45c54d56ecbd55..a313e75ff1c6b8 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -200,6 +200,7 @@ static struct mu3h_sch_ep_info *create_sch_ep(struct usb_device *udev, sch_ep->sch_tt = tt; sch_ep->ep = ep; + INIT_LIST_HEAD(&sch_ep->tt_endpoint); return sch_ep; } @@ -583,6 +584,8 @@ int xhci_mtk_sch_init(struct xhci_hcd_mtk *mtk) mtk->sch_array = sch_array; + INIT_LIST_HEAD(&mtk->bw_ep_list_new); + return 0; } EXPORT_SYMBOL_GPL(xhci_mtk_sch_init); @@ -601,19 +604,14 @@ int xhci_mtk_add_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, struct xhci_ep_ctx *ep_ctx; struct xhci_slot_ctx *slot_ctx; struct xhci_virt_device *virt_dev; - struct mu3h_sch_bw_info *sch_bw; struct mu3h_sch_ep_info *sch_ep; - struct mu3h_sch_bw_info *sch_array; unsigned int ep_index; - int bw_index; - int ret = 0; xhci = hcd_to_xhci(hcd); virt_dev = xhci->devs[udev->slot_id]; ep_index = xhci_get_endpoint_index(&ep->desc); slot_ctx = xhci_get_slot_ctx(xhci, virt_dev->in_ctx); ep_ctx = xhci_get_ep_ctx(xhci, virt_dev->in_ctx, ep_index); - sch_array = mtk->sch_array; xhci_dbg(xhci, "%s() type:%d, speed:%d, mpkt:%d, dir:%d, ep:%p\n", __func__, usb_endpoint_type(&ep->desc), udev->speed, @@ -632,39 +630,34 @@ int xhci_mtk_add_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, return 0; } - bw_index = get_bw_index(xhci, udev, ep); - sch_bw = &sch_array[bw_index]; - sch_ep = create_sch_ep(udev, ep, ep_ctx); if (IS_ERR_OR_NULL(sch_ep)) return -ENOMEM; setup_sch_info(udev, ep_ctx, sch_ep); - ret = check_sch_bw(udev, sch_bw, sch_ep); - if (ret) { - xhci_err(xhci, "Not enough bandwidth!\n"); - if (is_fs_or_ls(udev->speed)) - drop_tt(udev); - - kfree(sch_ep); - return -ENOSPC; - } + list_add_tail(&sch_ep->endpoint, &mtk->bw_ep_list_new); - list_add_tail(&sch_ep->endpoint, &sch_bw->bw_ep_list); + return 0; +} +EXPORT_SYMBOL_GPL(xhci_mtk_add_ep_quirk); - ep_ctx->reserved[0] |= cpu_to_le32(EP_BPKTS(sch_ep->pkts) - | EP_BCSCOUNT(sch_ep->cs_count) | EP_BBM(sch_ep->burst_mode)); - ep_ctx->reserved[1] |= cpu_to_le32(EP_BOFFSET(sch_ep->offset) - | EP_BREPEAT(sch_ep->repeat)); +static void xhci_mtk_drop_ep(struct xhci_hcd_mtk *mtk, struct usb_device *udev, + struct mu3h_sch_ep_info *sch_ep) +{ + struct xhci_hcd *xhci = hcd_to_xhci(mtk->hcd); + int bw_index = get_bw_index(xhci, udev, sch_ep->ep); + struct mu3h_sch_bw_info *sch_bw = &mtk->sch_array[bw_index]; - xhci_dbg(xhci, " PKTS:%x, CSCOUNT:%x, BM:%x, OFFSET:%x, REPEAT:%x\n", - sch_ep->pkts, sch_ep->cs_count, sch_ep->burst_mode, - sch_ep->offset, sch_ep->repeat); + update_bus_bw(sch_bw, sch_ep, 0); + list_del(&sch_ep->endpoint); - return 0; + if (sch_ep->sch_tt) { + list_del(&sch_ep->tt_endpoint); + drop_tt(udev); + } + kfree(sch_ep); } -EXPORT_SYMBOL_GPL(xhci_mtk_add_ep_quirk); void xhci_mtk_drop_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *ep) @@ -675,7 +668,7 @@ void xhci_mtk_drop_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, struct xhci_virt_device *virt_dev; struct mu3h_sch_bw_info *sch_array; struct mu3h_sch_bw_info *sch_bw; - struct mu3h_sch_ep_info *sch_ep; + struct mu3h_sch_ep_info *sch_ep, *tmp; int bw_index; xhci = hcd_to_xhci(hcd); @@ -694,17 +687,73 @@ void xhci_mtk_drop_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, bw_index = get_bw_index(xhci, udev, ep); sch_bw = &sch_array[bw_index]; - list_for_each_entry(sch_ep, &sch_bw->bw_ep_list, endpoint) { + list_for_each_entry_safe(sch_ep, tmp, &sch_bw->bw_ep_list, endpoint) { if (sch_ep->ep == ep) { - update_bus_bw(sch_bw, sch_ep, 0); - list_del(&sch_ep->endpoint); - if (is_fs_or_ls(udev->speed)) { - list_del(&sch_ep->tt_endpoint); - drop_tt(udev); - } - kfree(sch_ep); - break; + xhci_mtk_drop_ep(mtk, udev, sch_ep); } } } EXPORT_SYMBOL_GPL(xhci_mtk_drop_ep_quirk); + +int xhci_mtk_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) +{ + struct xhci_hcd_mtk *mtk = hcd_to_mtk(hcd); + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct xhci_virt_device *virt_dev = xhci->devs[udev->slot_id]; + struct mu3h_sch_bw_info *sch_bw; + struct mu3h_sch_ep_info *sch_ep, *tmp; + int bw_index, ret; + + dev_dbg(&udev->dev, "%s\n", __func__); + + list_for_each_entry(sch_ep, &mtk->bw_ep_list_new, endpoint) { + bw_index = get_bw_index(xhci, udev, sch_ep->ep); + sch_bw = &mtk->sch_array[bw_index]; + + ret = check_sch_bw(udev, sch_bw, sch_ep); + if (ret) { + xhci_err(xhci, "Not enough bandwidth!\n"); + return -ENOSPC; + } + } + + list_for_each_entry_safe(sch_ep, tmp, &mtk->bw_ep_list_new, endpoint) { + struct xhci_ep_ctx *ep_ctx; + struct usb_host_endpoint *ep = sch_ep->ep; + unsigned int ep_index = xhci_get_endpoint_index(&ep->desc); + + bw_index = get_bw_index(xhci, udev, ep); + sch_bw = &mtk->sch_array[bw_index]; + + list_move_tail(&sch_ep->endpoint, &sch_bw->bw_ep_list); + + ep_ctx = xhci_get_ep_ctx(xhci, virt_dev->in_ctx, ep_index); + ep_ctx->reserved[0] |= cpu_to_le32(EP_BPKTS(sch_ep->pkts) + | EP_BCSCOUNT(sch_ep->cs_count) + | EP_BBM(sch_ep->burst_mode)); + ep_ctx->reserved[1] |= cpu_to_le32(EP_BOFFSET(sch_ep->offset) + | EP_BREPEAT(sch_ep->repeat)); + + xhci_dbg(xhci, " PKTS:%x, CSCOUNT:%x, BM:%x, OFFSET:%x, REPEAT:%x\n", + sch_ep->pkts, sch_ep->cs_count, sch_ep->burst_mode, + sch_ep->offset, sch_ep->repeat); + } + + return xhci_check_bandwidth(hcd, udev); +} +EXPORT_SYMBOL_GPL(xhci_mtk_check_bandwidth); + +void xhci_mtk_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) +{ + struct xhci_hcd_mtk *mtk = hcd_to_mtk(hcd); + struct mu3h_sch_ep_info *sch_ep, *tmp; + + dev_dbg(&udev->dev, "%s\n", __func__); + + list_for_each_entry_safe(sch_ep, tmp, &mtk->bw_ep_list_new, endpoint) { + xhci_mtk_drop_ep(mtk, udev, sch_ep); + } + + xhci_reset_bandwidth(hcd, udev); +} +EXPORT_SYMBOL_GPL(xhci_mtk_reset_bandwidth); diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c index 85f1ff0399a9c2..09b67219fd146d 100644 --- a/drivers/usb/host/xhci-mtk.c +++ b/drivers/usb/host/xhci-mtk.c @@ -347,6 +347,8 @@ static void usb_wakeup_set(struct xhci_hcd_mtk *mtk, bool enable) static int xhci_mtk_setup(struct usb_hcd *hcd); static const struct xhci_driver_overrides xhci_mtk_overrides __initconst = { .reset = xhci_mtk_setup, + .check_bandwidth = xhci_mtk_check_bandwidth, + .reset_bandwidth = xhci_mtk_reset_bandwidth, }; static struct hc_driver __read_mostly xhci_mtk_hc_driver; diff --git a/drivers/usb/host/xhci-mtk.h b/drivers/usb/host/xhci-mtk.h index 5ac458b7d2e0ef..5ba25b62bf5b22 100644 --- a/drivers/usb/host/xhci-mtk.h +++ b/drivers/usb/host/xhci-mtk.h @@ -130,6 +130,7 @@ struct mu3c_ippc_regs { struct xhci_hcd_mtk { struct device *dev; struct usb_hcd *hcd; + struct list_head bw_ep_list_new; struct mu3h_sch_bw_info *sch_array; struct mu3c_ippc_regs __iomem *ippc_regs; bool has_ippc; @@ -166,6 +167,8 @@ int xhci_mtk_add_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *ep); void xhci_mtk_drop_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *ep); +int xhci_mtk_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev); +void xhci_mtk_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev); #else static inline int xhci_mtk_add_ep_quirk(struct usb_hcd *hcd, @@ -179,6 +182,16 @@ static inline void xhci_mtk_drop_ep_quirk(struct usb_hcd *hcd, { } +static inline int xhci_mtk_check_bandwidth(struct usb_hcd *hcd, + struct usb_device *udev) +{ + return 0; +} + +static inline void xhci_mtk_reset_bandwidth(struct usb_hcd *hcd, + struct usb_device *udev) +{ +} #endif #endif /* _XHCI_MTK_H_ */ diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 70aa3055c41e71..91330517444e7f 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -2861,7 +2861,7 @@ static void xhci_check_bw_drop_ep_streams(struct xhci_hcd *xhci, * else should be touching the xhci->devs[slot_id] structure, so we * don't need to take the xhci->lock for manipulating that. */ -static int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) +int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) { int i; int ret = 0; @@ -2959,7 +2959,7 @@ static int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) return ret; } -static void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) +void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) { struct xhci_hcd *xhci; struct xhci_virt_device *virt_dev; @@ -5380,6 +5380,10 @@ void xhci_init_driver(struct hc_driver *drv, drv->reset = over->reset; if (over->start) drv->start = over->start; + if (over->check_bandwidth) + drv->check_bandwidth = over->check_bandwidth; + if (over->reset_bandwidth) + drv->reset_bandwidth = over->reset_bandwidth; } } EXPORT_SYMBOL_GPL(xhci_init_driver); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index b483317bcb17b4..5d894d2196467f 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1911,6 +1911,8 @@ struct xhci_driver_overrides { size_t extra_priv_size; int (*reset)(struct usb_hcd *hcd); int (*start)(struct usb_hcd *hcd); + int (*check_bandwidth)(struct usb_hcd *, struct usb_device *); + void (*reset_bandwidth)(struct usb_hcd *, struct usb_device *); }; #define XHCI_CFC_DELAY 10 @@ -2063,6 +2065,8 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks); void xhci_shutdown(struct usb_hcd *hcd); void xhci_init_driver(struct hc_driver *drv, const struct xhci_driver_overrides *over); +int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev); +void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev); int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id); int xhci_ext_cap_init(struct xhci_hcd *xhci); From 32410786279fc8422764056ff008dabfcee6f09a Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Mon, 1 Feb 2021 13:57:44 +0800 Subject: [PATCH 29/66] usb: xhci-mtk: skip dropping bandwidth of unchecked endpoints commit 54f6a8af372213a254af6609758d99f7c0b6b5ad upstream. For those unchecked endpoints, we don't allocate bandwidth for them, so no need free the bandwidth, otherwise will decrease the allocated bandwidth. Meanwhile use xhci_dbg() instead of dev_dbg() to print logs and rename bw_ep_list_new as bw_ep_chk_list. Fixes: 1d69f9d901ef ("usb: xhci-mtk: fix unreleased bandwidth data") Cc: stable Reviewed-and-tested-by: Ikjoon Jang Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/1612159064-28413-1-git-send-email-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk-sch.c | 61 ++++++++++++++++++--------------- drivers/usb/host/xhci-mtk.h | 4 ++- 2 files changed, 36 insertions(+), 29 deletions(-) diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index a313e75ff1c6b8..dee8a329076d11 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -200,6 +200,7 @@ static struct mu3h_sch_ep_info *create_sch_ep(struct usb_device *udev, sch_ep->sch_tt = tt; sch_ep->ep = ep; + INIT_LIST_HEAD(&sch_ep->endpoint); INIT_LIST_HEAD(&sch_ep->tt_endpoint); return sch_ep; @@ -374,6 +375,7 @@ static void update_bus_bw(struct mu3h_sch_bw_info *sch_bw, sch_ep->bw_budget_table[j]; } } + sch_ep->allocated = used; } static int check_sch_tt(struct usb_device *udev, @@ -542,6 +544,22 @@ static int check_sch_bw(struct usb_device *udev, return 0; } +static void destroy_sch_ep(struct usb_device *udev, + struct mu3h_sch_bw_info *sch_bw, struct mu3h_sch_ep_info *sch_ep) +{ + /* only release ep bw check passed by check_sch_bw() */ + if (sch_ep->allocated) + update_bus_bw(sch_bw, sch_ep, 0); + + list_del(&sch_ep->endpoint); + + if (sch_ep->sch_tt) { + list_del(&sch_ep->tt_endpoint); + drop_tt(udev); + } + kfree(sch_ep); +} + static bool need_bw_sch(struct usb_host_endpoint *ep, enum usb_device_speed speed, int has_tt) { @@ -584,7 +602,7 @@ int xhci_mtk_sch_init(struct xhci_hcd_mtk *mtk) mtk->sch_array = sch_array; - INIT_LIST_HEAD(&mtk->bw_ep_list_new); + INIT_LIST_HEAD(&mtk->bw_ep_chk_list); return 0; } @@ -636,29 +654,12 @@ int xhci_mtk_add_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, setup_sch_info(udev, ep_ctx, sch_ep); - list_add_tail(&sch_ep->endpoint, &mtk->bw_ep_list_new); + list_add_tail(&sch_ep->endpoint, &mtk->bw_ep_chk_list); return 0; } EXPORT_SYMBOL_GPL(xhci_mtk_add_ep_quirk); -static void xhci_mtk_drop_ep(struct xhci_hcd_mtk *mtk, struct usb_device *udev, - struct mu3h_sch_ep_info *sch_ep) -{ - struct xhci_hcd *xhci = hcd_to_xhci(mtk->hcd); - int bw_index = get_bw_index(xhci, udev, sch_ep->ep); - struct mu3h_sch_bw_info *sch_bw = &mtk->sch_array[bw_index]; - - update_bus_bw(sch_bw, sch_ep, 0); - list_del(&sch_ep->endpoint); - - if (sch_ep->sch_tt) { - list_del(&sch_ep->tt_endpoint); - drop_tt(udev); - } - kfree(sch_ep); -} - void xhci_mtk_drop_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *ep) { @@ -688,9 +689,8 @@ void xhci_mtk_drop_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, sch_bw = &sch_array[bw_index]; list_for_each_entry_safe(sch_ep, tmp, &sch_bw->bw_ep_list, endpoint) { - if (sch_ep->ep == ep) { - xhci_mtk_drop_ep(mtk, udev, sch_ep); - } + if (sch_ep->ep == ep) + destroy_sch_ep(udev, sch_bw, sch_ep); } } EXPORT_SYMBOL_GPL(xhci_mtk_drop_ep_quirk); @@ -704,9 +704,9 @@ int xhci_mtk_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) struct mu3h_sch_ep_info *sch_ep, *tmp; int bw_index, ret; - dev_dbg(&udev->dev, "%s\n", __func__); + xhci_dbg(xhci, "%s() udev %s\n", __func__, dev_name(&udev->dev)); - list_for_each_entry(sch_ep, &mtk->bw_ep_list_new, endpoint) { + list_for_each_entry(sch_ep, &mtk->bw_ep_chk_list, endpoint) { bw_index = get_bw_index(xhci, udev, sch_ep->ep); sch_bw = &mtk->sch_array[bw_index]; @@ -717,7 +717,7 @@ int xhci_mtk_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) } } - list_for_each_entry_safe(sch_ep, tmp, &mtk->bw_ep_list_new, endpoint) { + list_for_each_entry_safe(sch_ep, tmp, &mtk->bw_ep_chk_list, endpoint) { struct xhci_ep_ctx *ep_ctx; struct usb_host_endpoint *ep = sch_ep->ep; unsigned int ep_index = xhci_get_endpoint_index(&ep->desc); @@ -746,12 +746,17 @@ EXPORT_SYMBOL_GPL(xhci_mtk_check_bandwidth); void xhci_mtk_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) { struct xhci_hcd_mtk *mtk = hcd_to_mtk(hcd); + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct mu3h_sch_bw_info *sch_bw; struct mu3h_sch_ep_info *sch_ep, *tmp; + int bw_index; - dev_dbg(&udev->dev, "%s\n", __func__); + xhci_dbg(xhci, "%s() udev %s\n", __func__, dev_name(&udev->dev)); - list_for_each_entry_safe(sch_ep, tmp, &mtk->bw_ep_list_new, endpoint) { - xhci_mtk_drop_ep(mtk, udev, sch_ep); + list_for_each_entry_safe(sch_ep, tmp, &mtk->bw_ep_chk_list, endpoint) { + bw_index = get_bw_index(xhci, udev, sch_ep->ep); + sch_bw = &mtk->sch_array[bw_index]; + destroy_sch_ep(udev, sch_bw, sch_ep); } xhci_reset_bandwidth(hcd, udev); diff --git a/drivers/usb/host/xhci-mtk.h b/drivers/usb/host/xhci-mtk.h index 5ba25b62bf5b22..734c5513aa1bfd 100644 --- a/drivers/usb/host/xhci-mtk.h +++ b/drivers/usb/host/xhci-mtk.h @@ -59,6 +59,7 @@ struct mu3h_sch_bw_info { * @ep_type: endpoint type * @maxpkt: max packet size of endpoint * @ep: address of usb_host_endpoint struct + * @allocated: the bandwidth is aready allocated from bus_bw * @offset: which uframe of the interval that transfer should be * scheduled first time within the interval * @repeat: the time gap between two uframes that transfers are @@ -86,6 +87,7 @@ struct mu3h_sch_ep_info { u32 ep_type; u32 maxpkt; void *ep; + bool allocated; /* * mtk xHCI scheduling information put into reserved DWs * in ep context @@ -130,8 +132,8 @@ struct mu3c_ippc_regs { struct xhci_hcd_mtk { struct device *dev; struct usb_hcd *hcd; - struct list_head bw_ep_list_new; struct mu3h_sch_bw_info *sch_array; + struct list_head bw_ep_chk_list; struct mu3c_ippc_regs __iomem *ippc_regs; bool has_ippc; int num_u2_ports; From ddc682d33024e6af30a6182479b7cc6fb25b7152 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Tue, 2 Feb 2021 16:38:24 +0800 Subject: [PATCH 30/66] usb: xhci-mtk: break loop when find the endpoint to drop commit a50ea34d6dd00a12c9cd29cf7b0fa72816bffbcb upstream. No need to check the following endpoints after finding the endpoint wanted to drop. Fixes: 54f6a8af3722 ("usb: xhci-mtk: skip dropping bandwidth of unchecked endpoints") Cc: stable Reported-by: Ikjoon Jang Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/1612255104-5363-1-git-send-email-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk-sch.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index dee8a329076d11..b45e5bf0899798 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -689,8 +689,10 @@ void xhci_mtk_drop_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, sch_bw = &sch_array[bw_index]; list_for_each_entry_safe(sch_ep, tmp, &sch_bw->bw_ep_list, endpoint) { - if (sch_ep->ep == ep) + if (sch_ep->ep == ep) { destroy_sch_ep(udev, sch_bw, sch_ep); + break; + } } } EXPORT_SYMBOL_GPL(xhci_mtk_drop_ep_quirk); From 6b9a2e5c0c4294f39dbe018e87728618c8df3140 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 18 Sep 2020 16:17:46 +0300 Subject: [PATCH 31/66] usb: host: xhci-plat: add priv quirk for skip PHY initialization commit f768e718911e03a4a20b65f984eaa9b09045e4cd upstream. Some DRD controllers (eg, dwc3 & cdns3) have PHY management at their own driver to cover both device and host mode, so add one priv quirk for such users to skip PHY management from HCD core. Reviewed-by: Jun Li Signed-off-by: Peter Chen Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200918131752.16488-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 8 ++++++-- drivers/usb/host/xhci.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 52c625c0234106..7f78818ebbb34d 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -163,6 +163,8 @@ static int xhci_plat_probe(struct platform_device *pdev) struct usb_hcd *hcd; int ret; int irq; + struct xhci_plat_priv *priv = NULL; + if (usb_disabled()) return -ENODEV; @@ -257,8 +259,7 @@ static int xhci_plat_probe(struct platform_device *pdev) priv_match = of_device_get_match_data(&pdev->dev); if (priv_match) { - struct xhci_plat_priv *priv = hcd_to_xhci_priv(hcd); - + priv = hcd_to_xhci_priv(hcd); /* Just copy data for now */ if (priv_match) *priv = *priv_match; @@ -307,6 +308,9 @@ static int xhci_plat_probe(struct platform_device *pdev) hcd->tpl_support = of_usb_host_tpl_support(sysdev->of_node); xhci->shared_hcd->tpl_support = hcd->tpl_support; + if (priv && (priv->quirks & XHCI_SKIP_PHY_INIT)) + hcd->skip_phy_initialization = 1; + ret = usb_add_hcd(hcd, irq, IRQF_SHARED); if (ret) goto disable_usb_phy; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 5d894d2196467f..1ad1d6e9e99793 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1873,6 +1873,7 @@ struct xhci_hcd { #define XHCI_DEFAULT_PM_RUNTIME_ALLOW BIT_ULL(33) #define XHCI_RESET_PLL_ON_DISCONNECT BIT_ULL(34) #define XHCI_SNPS_BROKEN_SUSPEND BIT_ULL(35) +#define XHCI_SKIP_PHY_INIT BIT_ULL(37) #define XHCI_DISABLE_SPARSE BIT_ULL(38) unsigned int num_active_eps; From ecdd962c4b9bd5bb49df902b2ed25e2a15975cae Mon Sep 17 00:00:00 2001 From: Liangyan Date: Tue, 22 Dec 2020 11:06:26 +0800 Subject: [PATCH 32/66] ovl: fix dentry leak in ovl_get_redirect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e04527fefba6e4e66492f122cf8cc6314f3cf3bf upstream. We need to lock d_parent->d_lock before dget_dlock, or this may have d_lockref updated parallelly like calltrace below which will cause dentry->d_lockref leak and risk a crash. CPU 0 CPU 1 ovl_set_redirect lookup_fast ovl_get_redirect __d_lookup dget_dlock //no lock protection here spin_lock(&dentry->d_lock) dentry->d_lockref.count++ dentry->d_lockref.count++ [   49.799059] PGD 800000061fed7067 P4D 800000061fed7067 PUD 61fec5067 PMD 0 [   49.799689] Oops: 0002 [#1] SMP PTI [   49.800019] CPU: 2 PID: 2332 Comm: node Not tainted 4.19.24-7.20.al7.x86_64 #1 [   49.800678] Hardware name: Alibaba Cloud Alibaba Cloud ECS, BIOS 8a46cfe 04/01/2014 [   49.801380] RIP: 0010:_raw_spin_lock+0xc/0x20 [   49.803470] RSP: 0018:ffffac6fc5417e98 EFLAGS: 00010246 [   49.803949] RAX: 0000000000000000 RBX: ffff93b8da3446c0 RCX: 0000000a00000000 [   49.804600] RDX: 0000000000000001 RSI: 000000000000000a RDI: 0000000000000088 [   49.805252] RBP: 0000000000000000 R08: 0000000000000000 R09: ffffffff993cf040 [   49.805898] R10: ffff93b92292e580 R11: ffffd27f188a4b80 R12: 0000000000000000 [   49.806548] R13: 00000000ffffff9c R14: 00000000fffffffe R15: ffff93b8da3446c0 [   49.807200] FS:  00007ffbedffb700(0000) GS:ffff93b927880000(0000) knlGS:0000000000000000 [   49.807935] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [   49.808461] CR2: 0000000000000088 CR3: 00000005e3f74006 CR4: 00000000003606a0 [   49.809113] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [   49.809758] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [   49.810410] Call Trace: [   49.810653]  d_delete+0x2c/0xb0 [   49.810951]  vfs_rmdir+0xfd/0x120 [   49.811264]  do_rmdir+0x14f/0x1a0 [   49.811573]  do_syscall_64+0x5b/0x190 [   49.811917]  entry_SYSCALL_64_after_hwframe+0x44/0xa9 [   49.812385] RIP: 0033:0x7ffbf505ffd7 [   49.814404] RSP: 002b:00007ffbedffada8 EFLAGS: 00000297 ORIG_RAX: 0000000000000054 [   49.815098] RAX: ffffffffffffffda RBX: 00007ffbedffb640 RCX: 00007ffbf505ffd7 [   49.815744] RDX: 0000000004449700 RSI: 0000000000000000 RDI: 0000000006c8cd50 [   49.816394] RBP: 00007ffbedffaea0 R08: 0000000000000000 R09: 0000000000017d0b [   49.817038] R10: 0000000000000000 R11: 0000000000000297 R12: 0000000000000012 [   49.817687] R13: 00000000072823d8 R14: 00007ffbedffb700 R15: 00000000072823d8 [   49.818338] Modules linked in: pvpanic cirrusfb button qemu_fw_cfg atkbd libps2 i8042 [   49.819052] CR2: 0000000000000088 [   49.819368] ---[ end trace 4e652b8aa299aa2d ]--- [   49.819796] RIP: 0010:_raw_spin_lock+0xc/0x20 [   49.821880] RSP: 0018:ffffac6fc5417e98 EFLAGS: 00010246 [   49.822363] RAX: 0000000000000000 RBX: ffff93b8da3446c0 RCX: 0000000a00000000 [   49.823008] RDX: 0000000000000001 RSI: 000000000000000a RDI: 0000000000000088 [   49.823658] RBP: 0000000000000000 R08: 0000000000000000 R09: ffffffff993cf040 [   49.825404] R10: ffff93b92292e580 R11: ffffd27f188a4b80 R12: 0000000000000000 [   49.827147] R13: 00000000ffffff9c R14: 00000000fffffffe R15: ffff93b8da3446c0 [   49.828890] FS:  00007ffbedffb700(0000) GS:ffff93b927880000(0000) knlGS:0000000000000000 [   49.830725] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [   49.832359] CR2: 0000000000000088 CR3: 00000005e3f74006 CR4: 00000000003606a0 [   49.834085] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [   49.835792] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Cc: Fixes: a6c606551141 ("ovl: redirect on rename-dir") Signed-off-by: Liangyan Reviewed-by: Joseph Qi Suggested-by: Al Viro Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 29abdb1d3b5c6c..6509ec3cb3730c 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -940,8 +940,8 @@ static char *ovl_get_redirect(struct dentry *dentry, bool abs_redirect) buflen -= thislen; memcpy(&buf[buflen], name, thislen); - tmp = dget_dlock(d->d_parent); spin_unlock(&d->d_lock); + tmp = dget_parent(d); dput(d); d = tmp; From efa17285b3384607cfa4e5eabccb4f84b1489009 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 1 Feb 2021 09:33:24 +0100 Subject: [PATCH 33/66] mac80211: fix station rate table updates on assoc commit 18fe0fae61252b5ae6e26553e2676b5fac555951 upstream. If the driver uses .sta_add, station entries are only uploaded after the sta is in assoc state. Fix early station rate table updates by deferring them until the sta has been uploaded. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210201083324.3134-1-nbd@nbd.name [use rcu_access_pointer() instead since we won't dereference here] Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/driver-ops.c | 5 ++++- net/mac80211/rate.c | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index c9a8a2433e8ac5..48322e45e7ddb5 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -125,8 +125,11 @@ int drv_sta_state(struct ieee80211_local *local, } else if (old_state == IEEE80211_STA_AUTH && new_state == IEEE80211_STA_ASSOC) { ret = drv_sta_add(local, sdata, &sta->sta); - if (ret == 0) + if (ret == 0) { sta->uploaded = true; + if (rcu_access_pointer(sta->sta.rates)) + drv_sta_rate_tbl_update(local, sdata, &sta->sta); + } } else if (old_state == IEEE80211_STA_ASSOC && new_state == IEEE80211_STA_AUTH) { drv_sta_remove(local, sdata, &sta->sta); diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index b051f125d3af24..9841db84bce0a2 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -934,7 +934,8 @@ int rate_control_set_rates(struct ieee80211_hw *hw, if (old) kfree_rcu(old, rcu_head); - drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta); + if (sta->uploaded) + drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta); ieee80211_sta_set_expected_throughput(pubsta, sta_get_expected_throughput(sta)); From e80f9021d5be80b05c8f75709d23811a831af565 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 29 Jan 2021 10:13:53 -0500 Subject: [PATCH 34/66] fgraph: Initialize tracing_graph_pause at task creation commit 7e0a9220467dbcfdc5bc62825724f3e52e50ab31 upstream. On some archs, the idle task can call into cpu_suspend(). The cpu_suspend() will disable or pause function graph tracing, as there's some paths in bringing down the CPU that can have issues with its return address being modified. The task_struct structure has a "tracing_graph_pause" atomic counter, that when set to something other than zero, the function graph tracer will not modify the return address. The problem is that the tracing_graph_pause counter is initialized when the function graph tracer is enabled. This can corrupt the counter for the idle task if it is suspended in these architectures. CPU 1 CPU 2 ----- ----- do_idle() cpu_suspend() pause_graph_tracing() task_struct->tracing_graph_pause++ (0 -> 1) start_graph_tracing() for_each_online_cpu(cpu) { ftrace_graph_init_idle_task(cpu) task-struct->tracing_graph_pause = 0 (1 -> 0) unpause_graph_tracing() task_struct->tracing_graph_pause-- (0 -> -1) The above should have gone from 1 to zero, and enabled function graph tracing again. But instead, it is set to -1, which keeps it disabled. There's no reason that the field tracing_graph_pause on the task_struct can not be initialized at boot up. Cc: stable@vger.kernel.org Fixes: 380c4b1411ccd ("tracing/function-graph-tracer: append the tracing_graph_flag") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=211339 Reported-by: pierre.gondois@arm.com Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- init/init_task.c | 3 ++- kernel/trace/fgraph.c | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/init/init_task.c b/init/init_task.c index df7041be96fcaf..5d8359c44564a1 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -171,7 +171,8 @@ struct task_struct init_task .lockdep_recursion = 0, #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER - .ret_stack = NULL, + .ret_stack = NULL, + .tracing_graph_pause = ATOMIC_INIT(0), #endif #if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPTION) .trace_recursion = 0, diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 7950a0356042a3..888cd00174fe38 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -367,7 +367,6 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) } if (t->ret_stack == NULL) { - atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); t->curr_ret_stack = -1; t->curr_ret_depth = -1; @@ -462,7 +461,6 @@ static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack); static void graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) { - atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); t->ftrace_timestamp = 0; /* make curr_ret_stack visible before we add the ret_stack */ From d0f36951ead4a5fc8411fbdf206cd4f18ead5963 Mon Sep 17 00:00:00 2001 From: Wang ShaoBo Date: Thu, 28 Jan 2021 20:44:27 +0800 Subject: [PATCH 35/66] kretprobe: Avoid re-registration of the same kretprobe earlier commit 0188b87899ffc4a1d36a0badbe77d56c92fd91dc upstream. Our system encountered a re-init error when re-registering same kretprobe, where the kretprobe_instance in rp->free_instances is illegally accessed after re-init. Implementation to avoid re-registration has been introduced for kprobe before, but lags for register_kretprobe(). We must check if kprobe has been re-registered before re-initializing kretprobe, otherwise it will destroy the data struct of kretprobe registered, which can lead to memory leak, system crash, also some unexpected behaviors. We use check_kprobe_rereg() to check if kprobe has been re-registered before running register_kretprobe()'s body, for giving a warning message and terminate registration process. Link: https://lkml.kernel.org/r/20210128124427.2031088-1-bobo.shaobowang@huawei.com Cc: stable@vger.kernel.org Fixes: 1f0ab40976460 ("kprobes: Prevent re-registration of the same kprobe") [ The above commit should have been done for kretprobes too ] Acked-by: Naveen N. Rao Acked-by: Ananth N Mavinakayanahalli Acked-by: Masami Hiramatsu Signed-off-by: Wang ShaoBo Signed-off-by: Cheng Jian Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/kprobes.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 283c8b01ce7892..26ae92c12fc22d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1972,6 +1972,10 @@ int register_kretprobe(struct kretprobe *rp) if (!kprobe_on_func_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset)) return -EINVAL; + /* If only rp->kp.addr is specified, check reregistering kprobes */ + if (rp->kp.addr && check_kprobe_rereg(&rp->kp)) + return -EINVAL; + if (kretprobe_blacklist_size) { addr = kprobe_addr(&rp->kp); if (IS_ERR(addr)) From c9654bbe52b571604b0f904e927b53111844cad6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 1 Feb 2021 16:20:40 -0800 Subject: [PATCH 36/66] libnvdimm/dimm: Avoid race between probe and available_slots_show() commit 7018c897c2f243d4b5f1b94bc6b4831a7eab80fb upstream. Richard reports that the following test: (while true; do cat /sys/bus/nd/devices/nmem*/available_slots 2>&1 > /dev/null done) & while true; do for i in $(seq 0 4); do echo nmem$i > /sys/bus/nd/drivers/nvdimm/bind done for i in $(seq 0 4); do echo nmem$i > /sys/bus/nd/drivers/nvdimm/unbind done done ...fails with a crash signature like: divide error: 0000 [#1] SMP KASAN PTI RIP: 0010:nd_label_nfree+0x134/0x1a0 [libnvdimm] [..] Call Trace: available_slots_show+0x4e/0x120 [libnvdimm] dev_attr_show+0x42/0x80 ? memset+0x20/0x40 sysfs_kf_seq_show+0x218/0x410 The root cause is that available_slots_show() consults driver-data, but fails to synchronize against device-unbind setting up a TOCTOU race to access uninitialized memory. Validate driver-data under the device-lock. Fixes: 4d88a97aa9e8 ("libnvdimm, nvdimm: dimm driver and base libnvdimm device-driver infrastructure") Cc: Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Cc: Coly Li Reported-by: Richard Palethorpe Acked-by: Richard Palethorpe Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/dimm_devs.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 196aa44c4936a7..e0f411021c59d3 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -344,16 +344,16 @@ static ssize_t state_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(state); -static ssize_t available_slots_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t __available_slots_show(struct nvdimm_drvdata *ndd, char *buf) { - struct nvdimm_drvdata *ndd = dev_get_drvdata(dev); + struct device *dev; ssize_t rc; u32 nfree; if (!ndd) return -ENXIO; + dev = ndd->dev; nvdimm_bus_lock(dev); nfree = nd_label_nfree(ndd); if (nfree - 1 > nfree) { @@ -365,6 +365,18 @@ static ssize_t available_slots_show(struct device *dev, nvdimm_bus_unlock(dev); return rc; } + +static ssize_t available_slots_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + ssize_t rc; + + nd_device_lock(dev); + rc = __available_slots_show(dev_get_drvdata(dev), buf); + nd_device_unlock(dev); + + return rc; +} static DEVICE_ATTR_RO(available_slots); __weak ssize_t security_show(struct device *dev, From f6a47f2ce090bad60d4d3751dfd38909bcfa3d34 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 23 Jan 2021 12:27:59 +0000 Subject: [PATCH 37/66] genirq/msi: Activate Multi-MSI early when MSI_FLAG_ACTIVATE_EARLY is set commit 4c457e8cb75eda91906a4f89fc39bde3f9a43922 upstream. When MSI_FLAG_ACTIVATE_EARLY is set (which is the case for PCI), __msi_domain_alloc_irqs() performs the activation of the interrupt (which in the case of PCI results in the endpoint being programmed) as soon as the interrupt is allocated. But it appears that this is only done for the first vector, introducing an inconsistent behaviour for PCI Multi-MSI. Fix it by iterating over the number of vectors allocated to each MSI descriptor. This is easily achieved by introducing a new "for_each_msi_vector" iterator, together with a tiny bit of refactoring. Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated early") Reported-by: Shameer Kolothum Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Shameer Kolothum Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210123122759.1781359-1-maz@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/msi.h | 6 ++++++ kernel/irq/msi.c | 44 ++++++++++++++++++++------------------------ 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index 8ad679e9d9c04a..d695e2eb2092df 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -139,6 +139,12 @@ struct msi_desc { list_for_each_entry((desc), dev_to_msi_list((dev)), list) #define for_each_msi_entry_safe(desc, tmp, dev) \ list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list) +#define for_each_msi_vector(desc, __irq, dev) \ + for_each_msi_entry((desc), (dev)) \ + if ((desc)->irq) \ + for (__irq = (desc)->irq; \ + __irq < ((desc)->irq + (desc)->nvec_used); \ + __irq++) #ifdef CONFIG_IRQ_MSI_IOMMU static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index eb95f6106a1ee6..5d3da0db092ff8 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -437,22 +437,22 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, can_reserve = msi_check_reservation_mode(domain, info, dev); - for_each_msi_entry(desc, dev) { - virq = desc->irq; - if (desc->nvec_used == 1) - dev_dbg(dev, "irq %d for MSI\n", virq); - else + /* + * This flag is set by the PCI layer as we need to activate + * the MSI entries before the PCI layer enables MSI in the + * card. Otherwise the card latches a random msi message. + */ + if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY)) + goto skip_activate; + + for_each_msi_vector(desc, i, dev) { + if (desc->irq == i) { + virq = desc->irq; dev_dbg(dev, "irq [%d-%d] for MSI\n", virq, virq + desc->nvec_used - 1); - /* - * This flag is set by the PCI layer as we need to activate - * the MSI entries before the PCI layer enables MSI in the - * card. Otherwise the card latches a random msi message. - */ - if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY)) - continue; + } - irq_data = irq_domain_get_irq_data(domain, desc->irq); + irq_data = irq_domain_get_irq_data(domain, i); if (!can_reserve) { irqd_clr_can_reserve(irq_data); if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK) @@ -463,28 +463,24 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, goto cleanup; } +skip_activate: /* * If these interrupts use reservation mode, clear the activated bit * so request_irq() will assign the final vector. */ if (can_reserve) { - for_each_msi_entry(desc, dev) { - irq_data = irq_domain_get_irq_data(domain, desc->irq); + for_each_msi_vector(desc, i, dev) { + irq_data = irq_domain_get_irq_data(domain, i); irqd_clr_activated(irq_data); } } return 0; cleanup: - for_each_msi_entry(desc, dev) { - struct irq_data *irqd; - - if (desc->irq == virq) - break; - - irqd = irq_domain_get_irq_data(domain, desc->irq); - if (irqd_is_activated(irqd)) - irq_domain_deactivate_irq(irqd); + for_each_msi_vector(desc, i, dev) { + irq_data = irq_domain_get_irq_data(domain, i); + if (irqd_is_activated(irq_data)) + irq_domain_deactivate_irq(irq_data); } msi_domain_free_irqs(domain, dev); return ret; From fd6dc98f66ef136595e5b93d72d42f056e3801e7 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 3 Feb 2021 13:37:02 +0200 Subject: [PATCH 38/66] xhci: fix bounce buffer usage for non-sg list case commit d4a610635400ccc382792f6be69427078541c678 upstream. xhci driver may in some special cases need to copy small amounts of payload data to a bounce buffer in order to meet the boundary and alignment restrictions set by the xHCI specification. In the majority of these cases the data is in a sg list, and driver incorrectly assumed data is always in urb->sg when using the bounce buffer. If data instead is contiguous, and in urb->transfer_buffer, we may still need to bounce buffer a small part if data starts very close (less than packet size) to a 64k boundary. Check if sg list is used before copying data to/from it. Fixes: f9c589e142d0 ("xhci: TD-fragment, align the unsplittable case with a bounce buffer") Cc: stable@vger.kernel.org Reported-by: Andreas Hartmann Tested-by: Andreas Hartmann Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20210203113702.436762-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 52e156c0180423..900ea91fb3c6b8 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -695,11 +695,16 @@ static void xhci_unmap_td_bounce_buffer(struct xhci_hcd *xhci, dma_unmap_single(dev, seg->bounce_dma, ring->bounce_buf_len, DMA_FROM_DEVICE); /* for in tranfers we need to copy the data from bounce to sg */ - len = sg_pcopy_from_buffer(urb->sg, urb->num_sgs, seg->bounce_buf, - seg->bounce_len, seg->bounce_offs); - if (len != seg->bounce_len) - xhci_warn(xhci, "WARN Wrong bounce buffer read length: %zu != %d\n", - len, seg->bounce_len); + if (urb->num_sgs) { + len = sg_pcopy_from_buffer(urb->sg, urb->num_sgs, seg->bounce_buf, + seg->bounce_len, seg->bounce_offs); + if (len != seg->bounce_len) + xhci_warn(xhci, "WARN Wrong bounce buffer read length: %zu != %d\n", + len, seg->bounce_len); + } else { + memcpy(urb->transfer_buffer + seg->bounce_offs, seg->bounce_buf, + seg->bounce_len); + } seg->bounce_len = 0; seg->bounce_offs = 0; } @@ -3263,12 +3268,16 @@ static int xhci_align_td(struct xhci_hcd *xhci, struct urb *urb, u32 enqd_len, /* create a max max_pkt sized bounce buffer pointed to by last trb */ if (usb_urb_dir_out(urb)) { - len = sg_pcopy_to_buffer(urb->sg, urb->num_sgs, - seg->bounce_buf, new_buff_len, enqd_len); - if (len != new_buff_len) - xhci_warn(xhci, - "WARN Wrong bounce buffer write length: %zu != %d\n", - len, new_buff_len); + if (urb->num_sgs) { + len = sg_pcopy_to_buffer(urb->sg, urb->num_sgs, + seg->bounce_buf, new_buff_len, enqd_len); + if (len != new_buff_len) + xhci_warn(xhci, "WARN Wrong bounce buffer write length: %zu != %d\n", + len, new_buff_len); + } else { + memcpy(seg->bounce_buf, urb->transfer_buffer + enqd_len, new_buff_len); + } + seg->bounce_dma = dma_map_single(dev, seg->bounce_buf, max_pkt, DMA_TO_DEVICE); } else { From 00f581964b66386d74285164513dc38ed3b2a5a9 Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Fri, 5 Feb 2021 15:42:48 +0100 Subject: [PATCH 39/66] cifs: report error instead of invalid when revalidating a dentry fails commit 21b200d091826a83aafc95d847139b2b0582f6d1 upstream. Assuming - //HOST/a is mounted on /mnt - //HOST/b is mounted on /mnt/b On a slow connection, running 'df' and killing it while it's processing /mnt/b can make cifs_get_inode_info() returns -ERESTARTSYS. This triggers the following chain of events: => the dentry revalidation fail => dentry is put and released => superblock associated with the dentry is put => /mnt/b is unmounted This patch makes cifs_d_revalidate() return the error instead of 0 (invalid) when cifs_revalidate_dentry() fails, except for ENOENT (file deleted) and ESTALE (file recreated). Signed-off-by: Aurelien Aptel Suggested-by: Shyam Prasad N Reviewed-by: Shyam Prasad N CC: stable@vger.kernel.org Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/dir.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 5a35850ccb1abe..9ae9a514676c3e 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -738,6 +738,7 @@ static int cifs_d_revalidate(struct dentry *direntry, unsigned int flags) { struct inode *inode; + int rc; if (flags & LOOKUP_RCU) return -ECHILD; @@ -747,8 +748,25 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags) if ((flags & LOOKUP_REVAL) && !CIFS_CACHE_READ(CIFS_I(inode))) CIFS_I(inode)->time = 0; /* force reval */ - if (cifs_revalidate_dentry(direntry)) - return 0; + rc = cifs_revalidate_dentry(direntry); + if (rc) { + cifs_dbg(FYI, "cifs_revalidate_dentry failed with rc=%d", rc); + switch (rc) { + case -ENOENT: + case -ESTALE: + /* + * Those errors mean the dentry is invalid + * (file was deleted or recreated) + */ + return 0; + default: + /* + * Otherwise some unexpected error happened + * report it as-is to VFS layer + */ + return rc; + } + } else { /* * If the inode wasn't known to be a dfs entry when From eaf2f835b52c201917a04f38864bcee5b76f12db Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 1 Feb 2021 20:36:54 -0600 Subject: [PATCH 40/66] smb3: Fix out-of-bounds bug in SMB2_negotiate() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8d8d1dbefc423d42d626cf5b81aac214870ebaab upstream. While addressing some warnings generated by -Warray-bounds, I found this bug that was introduced back in 2017: CC [M] fs/cifs/smb2pdu.o fs/cifs/smb2pdu.c: In function ‘SMB2_negotiate’: fs/cifs/smb2pdu.c:822:16: warning: array subscript 1 is above array bounds of ‘__le16[1]’ {aka ‘short unsigned int[1]’} [-Warray-bounds] 822 | req->Dialects[1] = cpu_to_le16(SMB30_PROT_ID); | ~~~~~~~~~~~~~^~~ fs/cifs/smb2pdu.c:823:16: warning: array subscript 2 is above array bounds of ‘__le16[1]’ {aka ‘short unsigned int[1]’} [-Warray-bounds] 823 | req->Dialects[2] = cpu_to_le16(SMB302_PROT_ID); | ~~~~~~~~~~~~~^~~ fs/cifs/smb2pdu.c:824:16: warning: array subscript 3 is above array bounds of ‘__le16[1]’ {aka ‘short unsigned int[1]’} [-Warray-bounds] 824 | req->Dialects[3] = cpu_to_le16(SMB311_PROT_ID); | ~~~~~~~~~~~~~^~~ fs/cifs/smb2pdu.c:816:16: warning: array subscript 1 is above array bounds of ‘__le16[1]’ {aka ‘short unsigned int[1]’} [-Warray-bounds] 816 | req->Dialects[1] = cpu_to_le16(SMB302_PROT_ID); | ~~~~~~~~~~~~~^~~ At the time, the size of array _Dialects_ was changed from 1 to 3 in struct validate_negotiate_info_req, and then in 2019 it was changed from 3 to 4, but those changes were never made in struct smb2_negotiate_req, which has led to a 3 and a half years old out-of-bounds bug in function SMB2_negotiate() (fs/cifs/smb2pdu.c). Fix this by increasing the size of array _Dialects_ in struct smb2_negotiate_req to 4. Fixes: 9764c02fcbad ("SMB3: Add support for multidialect negotiate (SMB2.1 and later)") Fixes: d5c7076b772a ("smb3: add smb3.1.1 to default dialect list") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 2482978f094861..739556e385be8e 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -227,7 +227,7 @@ struct smb2_negotiate_req { __le32 NegotiateContextOffset; /* SMB3.1.1 only. MBZ earlier */ __le16 NegotiateContextCount; /* SMB3.1.1 only. MBZ earlier */ __le16 Reserved2; - __le16 Dialects[1]; /* One dialect (vers=) at a time for now */ + __le16 Dialects[4]; /* BB expand this if autonegotiate > 4 dialects */ } __packed; /* Dialects */ From 68c825bd27266504d42f88b396ca3599265dd9b6 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 2 Feb 2021 22:34:32 -0600 Subject: [PATCH 41/66] smb3: fix crediting for compounding when only one request in flight commit 91792bb8089b63b7b780251eb83939348ac58a64 upstream. Currently we try to guess if a compound request is going to succeed waiting for credits or not based on the number of requests in flight. This approach doesn't work correctly all the time because there may be only one request in flight which is going to bring multiple credits satisfying the compound request. Change the behavior to fail a request only if there are no requests in flight at all and proceed waiting for credits otherwise. Cc: # 5.1+ Signed-off-by: Pavel Shilovsky Reviewed-by: Tom Talpey Reviewed-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/transport.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 4ffbf8f9658145..eab7940bfebefd 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -659,10 +659,22 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num, spin_lock(&server->req_lock); if (*credits < num) { /* - * Return immediately if not too many requests in flight since - * we will likely be stuck on waiting for credits. + * If the server is tight on resources or just gives us less + * credits for other reasons (e.g. requests are coming out of + * order and the server delays granting more credits until it + * processes a missing mid) and we exhausted most available + * credits there may be situations when we try to send + * a compound request but we don't have enough credits. At this + * point the client needs to decide if it should wait for + * additional credits or fail the request. If at least one + * request is in flight there is a high probability that the + * server will return enough credits to satisfy this compound + * request. + * + * Return immediately if no requests in flight since we will be + * stuck on waiting for credits. */ - if (server->in_flight < num - *credits) { + if (server->in_flight == 0) { spin_unlock(&server->req_lock); return -ENOTSUPP; } From 6844143e21980fdcfc8b936a79692d95a1f46668 Mon Sep 17 00:00:00 2001 From: Fengnan Chang Date: Sat, 23 Jan 2021 11:32:31 +0800 Subject: [PATCH 42/66] mmc: core: Limit retries when analyse of SDIO tuples fails commit f92e04f764b86e55e522988e6f4b6082d19a2721 upstream. When analysing tuples fails we may loop indefinitely to retry. Let's avoid this by using a 10s timeout and bail if not completed earlier. Signed-off-by: Fengnan Chang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210123033230.36442-1-fengnanchang@gmail.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/sdio_cis.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mmc/core/sdio_cis.c b/drivers/mmc/core/sdio_cis.c index 3efaa9534a777d..9a5aaac29099b1 100644 --- a/drivers/mmc/core/sdio_cis.c +++ b/drivers/mmc/core/sdio_cis.c @@ -20,6 +20,8 @@ #include "sdio_cis.h" #include "sdio_ops.h" +#define SDIO_READ_CIS_TIMEOUT_MS (10 * 1000) /* 10s */ + static int cistpl_vers_1(struct mmc_card *card, struct sdio_func *func, const unsigned char *buf, unsigned size) { @@ -266,6 +268,8 @@ static int sdio_read_cis(struct mmc_card *card, struct sdio_func *func) do { unsigned char tpl_code, tpl_link; + unsigned long timeout = jiffies + + msecs_to_jiffies(SDIO_READ_CIS_TIMEOUT_MS); ret = mmc_io_rw_direct(card, 0, 0, ptr++, 0, &tpl_code); if (ret) @@ -318,6 +322,8 @@ static int sdio_read_cis(struct mmc_card *card, struct sdio_func *func) prev = &this->next; if (ret == -ENOENT) { + if (time_after(jiffies, timeout)) + break; /* warn about unknown tuples */ pr_warn_ratelimited("%s: queuing unknown" " CIS tuple 0x%02x (%u bytes)\n", From f9034fcb27c0bafcab482a3dd8dd8f838ed4326c Mon Sep 17 00:00:00 2001 From: Stylon Wang Date: Tue, 5 Jan 2021 11:29:34 +0800 Subject: [PATCH 43/66] drm/amd/display: Revert "Fix EDID parsing after resume from suspend" commit 1a10e5244778169a5a53a527d7830cf0438132a1 upstream. This reverts commit b24bdc37d03a0478189e20a50286092840f414fa. It caused memory leak after S3 on 4K HDMI displays. Signed-off-by: Stylon Wang Reviewed-by: Rodrigo Siqueira Acked-by: Anson Jacob Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d2dd387c95d861..de06ee7d2ad463 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1434,8 +1434,6 @@ amdgpu_dm_update_connector_after_detect(struct amdgpu_dm_connector *aconnector) drm_connector_update_edid_property(connector, aconnector->edid); - drm_add_edid_modes(connector, aconnector->edid); - if (aconnector->dc_link->aux_mode) drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux, aconnector->edid); From f9be9445e494463c6578dab3f72dd12aeacbeb2f Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Fri, 29 Jan 2021 06:24:42 +0100 Subject: [PATCH 44/66] nvme-pci: avoid the deepest sleep state on Kingston A2000 SSDs commit 538e4a8c571efdf131834431e0c14808bcfb1004 upstream. Some Kingston A2000 NVMe SSDs sooner or later get confused and stop working when they use the deepest APST sleep while running Linux. The system then crashes and one has to cold boot it to get the SSD working again. Kingston seems to known about this since at least mid-September 2020: https://bbs.archlinux.org/viewtopic.php?pid=1926994#p1926994 Someone working for a German company representing Kingston to the German press confirmed to me Kingston engineering is aware of the issue and investigating; the person stated that to their current knowledge only the deepest APST sleep state causes trouble. Therefore, make Linux avoid it for now by applying the NVME_QUIRK_NO_DEEPEST_PS to this SSD. I have two such SSDs, but it seems the problem doesn't occur with them. I hence couldn't verify if this patch really fixes the problem, but all the data in front of me suggests it should. This patch can easily be reverted or improved upon if a better solution surfaces. FWIW, there are many reports about the issue scattered around the web; most of the users disabled APST completely to make things work, some just made Linux avoid the deepest sleep state: https://bugzilla.kernel.org/show_bug.cgi?id=195039#c65 https://bugzilla.kernel.org/show_bug.cgi?id=195039#c73 https://bugzilla.kernel.org/show_bug.cgi?id=195039#c74 https://bugzilla.kernel.org/show_bug.cgi?id=195039#c78 https://bugzilla.kernel.org/show_bug.cgi?id=195039#c79 https://bugzilla.kernel.org/show_bug.cgi?id=195039#c80 https://askubuntu.com/questions/1222049/nvmekingston-a2000-sometimes-stops-giving-response-in-ubuntu-18-04dell-inspir https://community.acer.com/en/discussion/604326/m-2-nvme-ssd-aspire-517-51g-issue-compatibility-kingston-a2000-linux-ubuntu For the record, some data from 'nvme id-ctrl /dev/nvme0' NVME Identify Controller: vid : 0x2646 ssvid : 0x2646 mn : KINGSTON SA2000M81000G fr : S5Z42105 [...] ps 0 : mp:9.00W operational enlat:0 exlat:0 rrt:0 rrl:0 rwt:0 rwl:0 idle_power:- active_power:- ps 1 : mp:4.60W operational enlat:0 exlat:0 rrt:1 rrl:1 rwt:1 rwl:1 idle_power:- active_power:- ps 2 : mp:3.80W operational enlat:0 exlat:0 rrt:2 rrl:2 rwt:2 rwl:2 idle_power:- active_power:- ps 3 : mp:0.0450W non-operational enlat:2000 exlat:2000 rrt:3 rrl:3 rwt:3 rwl:3 idle_power:- active_power:- ps 4 : mp:0.0040W non-operational enlat:15000 exlat:15000 rrt:4 rrl:4 rwt:4 rwl:4 idle_power:- active_power:- Cc: stable@vger.kernel.org # 4.14+ Signed-off-by: Thorsten Leemhuis Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ef93bd3ed339ca..434d3f21f0e137 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3161,6 +3161,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, + { PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001), .driver_data = NVME_QUIRK_SINGLE_VECTOR }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, From 6d3201c77be55545317cc4bb6010a0040688e6a1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 2 Feb 2021 13:20:17 -0800 Subject: [PATCH 45/66] KVM: SVM: Treat SVM as unsupported when running as an SEV guest commit ccd85d90ce092bdb047a7f6580f3955393833b22 upstream. Don't let KVM load when running as an SEV guest, regardless of what CPUID says. Memory is encrypted with a key that is not accessible to the host (L0), thus it's impossible for L0 to emulate SVM, e.g. it'll see garbage when reading the VMCB. Technically, KVM could decrypt all memory that needs to be accessible to the L0 and use shadow paging so that L0 does not need to shadow NPT, but exposing such information to L0 largely defeats the purpose of running as an SEV guest. This can always be revisited if someone comes up with a use case for running VMs inside SEV guests. Note, VMLOAD, VMRUN, etc... will also #GP on GPAs with C-bit set, i.e. KVM is doomed even if the SEV guest is debuggable and the hypervisor is willing to decrypt the VMCB. This may or may not be fixed on CPUs that have the SVME_ADDR_CHK fix. Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210202212017.2486595-1-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 5 +++++ arch/x86/mm/mem_encrypt.c | 1 + 2 files changed, 6 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2b506904be0243..4906e480b5bb65 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -889,6 +889,11 @@ static int has_svm(void) return 0; } + if (sev_active()) { + pr_info("KVM is unsupported when running as an SEV guest\n"); + return 0; + } + return 1; } diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 9268c12458c848..dfa01bcdc3694c 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -375,6 +375,7 @@ bool force_dma_unencrypted(struct device *dev) return false; } +EXPORT_SYMBOL_GPL(sev_active); /* Architecture __weak replacement functions */ void __init mem_encrypt_free_decrypted_mem(void) From b2640b08c43c689d4351e9e51d7a3e9ad532cb3a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 2 Feb 2021 08:55:46 -0800 Subject: [PATCH 46/66] KVM: x86: Update emulator context mode if SYSENTER xfers to 64-bit mode commit 943dea8af21bd896e0d6c30ea221203fb3cd3265 upstream. Set the emulator context to PROT64 if SYSENTER transitions from 32-bit userspace (compat mode) to a 64-bit kernel, otherwise the RIP update at the end of x86_emulate_insn() will incorrectly truncate the new RIP. Note, this bug is mostly limited to running an Intel virtual CPU model on an AMD physical CPU, as other combinations of virtual and physical CPUs do not trigger full emulation. On Intel CPUs, SYSENTER in compatibility mode is legal, and unconditionally transitions to 64-bit mode. On AMD CPUs, SYSENTER is illegal in compatibility mode and #UDs. If the vCPU is AMD, KVM injects a #UD on SYSENTER in compat mode. If the pCPU is Intel, SYSENTER will execute natively and not trigger #UD->VM-Exit (ignoring guest TLB shenanigans). Fixes: fede8076aab4 ("KVM: x86: handle wrap around 32-bit address space") Cc: stable@vger.kernel.org Signed-off-by: Jonny Barker [sean: wrote changelog] Signed-off-by: Sean Christopherson Message-Id: <20210202165546.2390296-1-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 39265b55929d2c..60c8dcb907a500 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2890,6 +2890,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data : (u32)msr_data; + if (efer & EFER_LMA) + ctxt->mode = X86EMUL_MODE_PROT64; return X86EMUL_CONTINUE; } From 75be4852490f0c465796be423a9deb41aeffee51 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 18 Oct 2020 09:39:21 +0100 Subject: [PATCH 47/66] ARM: footbridge: fix dc21285 PCI configuration accessors commit 39d3454c3513840eb123b3913fda6903e45ce671 upstream. Building with gcc 4.9.2 reveals a latent bug in the PCI accessors for Footbridge platforms, which causes a fatal alignment fault while accessing IO memory. Fix this by making the assembly volatile. Cc: stable@vger.kernel.org Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-footbridge/dc21285.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c index 8b81a17f675d91..e17ec92b90dd8b 100644 --- a/arch/arm/mach-footbridge/dc21285.c +++ b/arch/arm/mach-footbridge/dc21285.c @@ -66,15 +66,15 @@ dc21285_read_config(struct pci_bus *bus, unsigned int devfn, int where, if (addr) switch (size) { case 1: - asm("ldrb %0, [%1, %2]" + asm volatile("ldrb %0, [%1, %2]" : "=r" (v) : "r" (addr), "r" (where) : "cc"); break; case 2: - asm("ldrh %0, [%1, %2]" + asm volatile("ldrh %0, [%1, %2]" : "=r" (v) : "r" (addr), "r" (where) : "cc"); break; case 4: - asm("ldr %0, [%1, %2]" + asm volatile("ldr %0, [%1, %2]" : "=r" (v) : "r" (addr), "r" (where) : "cc"); break; } @@ -100,17 +100,17 @@ dc21285_write_config(struct pci_bus *bus, unsigned int devfn, int where, if (addr) switch (size) { case 1: - asm("strb %0, [%1, %2]" + asm volatile("strb %0, [%1, %2]" : : "r" (value), "r" (addr), "r" (where) : "cc"); break; case 2: - asm("strh %0, [%1, %2]" + asm volatile("strh %0, [%1, %2]" : : "r" (value), "r" (addr), "r" (where) : "cc"); break; case 4: - asm("str %0, [%1, %2]" + asm volatile("str %0, [%1, %2]" : : "r" (value), "r" (addr), "r" (where) : "cc"); break; From 108f56ed354f2955a35ac6963e0bfa37677b0bf0 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 4 Feb 2021 18:32:03 -0800 Subject: [PATCH 48/66] mm: hugetlbfs: fix cannot migrate the fallocated HugeTLB page commit 585fc0d2871c9318c949fbf45b1f081edd489e96 upstream. If a new hugetlb page is allocated during fallocate it will not be marked as active (set_page_huge_active) which will result in a later isolate_huge_page failure when the page migration code would like to move that page. Such a failure would be unexpected and wrong. Only export set_page_huge_active, just leave clear_page_huge_active as static. Because there are no external users. Link: https://lkml.kernel.org/r/20210115124942.46403-3-songmuchun@bytedance.com Fixes: 70c3547e36f5 (hugetlbfs: add hugetlbfs_fallocate()) Signed-off-by: Muchun Song Acked-by: Michal Hocko Reviewed-by: Mike Kravetz Reviewed-by: Oscar Salvador Cc: David Hildenbrand Cc: Yang Shi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/hugetlbfs/inode.c | 3 ++- include/linux/hugetlb.h | 2 ++ mm/hugetlb.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 5fff7cb3582f04..cf3af2140c3d8e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -675,9 +675,10 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, mutex_unlock(&hugetlb_fault_mutex_table[hash]); + set_page_huge_active(page); /* * unlock_page because locked by add_to_page_cache() - * page_put due to reference from alloc_huge_page() + * put_page() due to reference from alloc_huge_page() */ unlock_page(page); put_page(page); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8a03f392f3680e..0e080ba5efbcc0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -590,6 +590,8 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, } #endif +void set_page_huge_active(struct page *page); + #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3bc33fa8381775..1b01623df94d89 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1222,7 +1222,7 @@ bool page_huge_active(struct page *page) } /* never called for tail page */ -static void set_page_huge_active(struct page *page) +void set_page_huge_active(struct page *page) { VM_BUG_ON_PAGE(!PageHeadHuge(page), page); SetPagePrivate(&page[1]); From 3264a763174f860d48be329d252a34b6392dd8c3 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 4 Feb 2021 18:32:06 -0800 Subject: [PATCH 49/66] mm: hugetlb: fix a race between freeing and dissolving the page commit 7ffddd499ba6122b1a07828f023d1d67629aa017 upstream. There is a race condition between __free_huge_page() and dissolve_free_huge_page(). CPU0: CPU1: // page_count(page) == 1 put_page(page) __free_huge_page(page) dissolve_free_huge_page(page) spin_lock(&hugetlb_lock) // PageHuge(page) && !page_count(page) update_and_free_page(page) // page is freed to the buddy spin_unlock(&hugetlb_lock) spin_lock(&hugetlb_lock) clear_page_huge_active(page) enqueue_huge_page(page) // It is wrong, the page is already freed spin_unlock(&hugetlb_lock) The race window is between put_page() and dissolve_free_huge_page(). We should make sure that the page is already on the free list when it is dissolved. As a result __free_huge_page would corrupt page(s) already in the buddy allocator. Link: https://lkml.kernel.org/r/20210115124942.46403-4-songmuchun@bytedance.com Fixes: c8721bbbdd36 ("mm: memory-hotplug: enable memory hotplug to handle hugepage") Signed-off-by: Muchun Song Reviewed-by: Mike Kravetz Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Cc: David Hildenbrand Cc: Yang Shi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1b01623df94d89..4cbc78ab674c3f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -71,6 +71,21 @@ DEFINE_SPINLOCK(hugetlb_lock); static int num_fault_mutexes; struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp; +static inline bool PageHugeFreed(struct page *head) +{ + return page_private(head + 4) == -1UL; +} + +static inline void SetPageHugeFreed(struct page *head) +{ + set_page_private(head + 4, -1UL); +} + +static inline void ClearPageHugeFreed(struct page *head) +{ + set_page_private(head + 4, 0); +} + /* Forward declaration */ static int hugetlb_acct_memory(struct hstate *h, long delta); @@ -869,6 +884,7 @@ static void enqueue_huge_page(struct hstate *h, struct page *page) list_move(&page->lru, &h->hugepage_freelists[nid]); h->free_huge_pages++; h->free_huge_pages_node[nid]++; + SetPageHugeFreed(page); } static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) @@ -886,6 +902,7 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) return NULL; list_move(&page->lru, &h->hugepage_activelist); set_page_refcounted(page); + ClearPageHugeFreed(page); h->free_huge_pages--; h->free_huge_pages_node[nid]--; return page; @@ -1375,6 +1392,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) set_hugetlb_cgroup(page, NULL); h->nr_huge_pages++; h->nr_huge_pages_node[nid]++; + ClearPageHugeFreed(page); spin_unlock(&hugetlb_lock); } @@ -1602,6 +1620,7 @@ int dissolve_free_huge_page(struct page *page) { int rc = -EBUSY; +retry: /* Not to disrupt normal path by vainly holding hugetlb_lock */ if (!PageHuge(page)) return 0; @@ -1618,6 +1637,26 @@ int dissolve_free_huge_page(struct page *page) int nid = page_to_nid(head); if (h->free_huge_pages - h->resv_huge_pages == 0) goto out; + + /* + * We should make sure that the page is already on the free list + * when it is dissolved. + */ + if (unlikely(!PageHugeFreed(head))) { + spin_unlock(&hugetlb_lock); + cond_resched(); + + /* + * Theoretically, we should return -EBUSY when we + * encounter this race. In fact, we have a chance + * to successfully dissolve the page if we do a + * retry. Because the race window is quite small. + * If we seize this opportunity, it is an optimization + * for increasing the success rate of dissolving page. + */ + goto retry; + } + /* * Move PageHWPoison flag from head page to the raw error page, * which makes any subpages rather than the error page reusable. From af5508b1e862f803484a182f6b3606fd2096a2f4 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 4 Feb 2021 18:32:10 -0800 Subject: [PATCH 50/66] mm: hugetlb: fix a race between isolating and freeing page commit 0eb2df2b5629794020f75e94655e1994af63f0d4 upstream. There is a race between isolate_huge_page() and __free_huge_page(). CPU0: CPU1: if (PageHuge(page)) put_page(page) __free_huge_page(page) spin_lock(&hugetlb_lock) update_and_free_page(page) set_compound_page_dtor(page, NULL_COMPOUND_DTOR) spin_unlock(&hugetlb_lock) isolate_huge_page(page) // trigger BUG_ON VM_BUG_ON_PAGE(!PageHead(page), page) spin_lock(&hugetlb_lock) page_huge_active(page) // trigger BUG_ON VM_BUG_ON_PAGE(!PageHuge(page), page) spin_unlock(&hugetlb_lock) When we isolate a HugeTLB page on CPU0. Meanwhile, we free it to the buddy allocator on CPU1. Then, we can trigger a BUG_ON on CPU0, because it is already freed to the buddy allocator. Link: https://lkml.kernel.org/r/20210115124942.46403-5-songmuchun@bytedance.com Fixes: c8721bbbdd36 ("mm: memory-hotplug: enable memory hotplug to handle hugepage") Signed-off-by: Muchun Song Reviewed-by: Mike Kravetz Acked-by: Michal Hocko Reviewed-by: Oscar Salvador Cc: David Hildenbrand Cc: Yang Shi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4cbc78ab674c3f..bbdff3dec390ed 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5175,9 +5175,9 @@ bool isolate_huge_page(struct page *page, struct list_head *list) { bool ret = true; - VM_BUG_ON_PAGE(!PageHead(page), page); spin_lock(&hugetlb_lock); - if (!page_huge_active(page) || !get_page_unless_zero(page)) { + if (!PageHeadHuge(page) || !page_huge_active(page) || + !get_page_unless_zero(page)) { ret = false; goto unlock; } From 90ef21e5806f992daf1be603f2b9643eabe14866 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 4 Feb 2021 18:32:13 -0800 Subject: [PATCH 51/66] mm: hugetlb: remove VM_BUG_ON_PAGE from page_huge_active commit ecbf4724e6061b4b01be20f6d797d64d462b2bc8 upstream. The page_huge_active() can be called from scan_movable_pages() which do not hold a reference count to the HugeTLB page. So when we call page_huge_active() from scan_movable_pages(), the HugeTLB page can be freed parallel. Then we will trigger a BUG_ON which is in the page_huge_active() when CONFIG_DEBUG_VM is enabled. Just remove the VM_BUG_ON_PAGE. Link: https://lkml.kernel.org/r/20210115124942.46403-6-songmuchun@bytedance.com Fixes: 7e1f049efb86 ("mm: hugetlb: cleanup using paeg_huge_active()") Signed-off-by: Muchun Song Reviewed-by: Mike Kravetz Acked-by: Michal Hocko Reviewed-by: Oscar Salvador Cc: David Hildenbrand Cc: Yang Shi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bbdff3dec390ed..d5b03b9262d4fc 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1234,8 +1234,7 @@ struct hstate *size_to_hstate(unsigned long size) */ bool page_huge_active(struct page *page) { - VM_BUG_ON_PAGE(!PageHuge(page), page); - return PageHead(page) && PagePrivate(&page[1]); + return PageHeadHuge(page) && PagePrivate(&page[1]); } /* never called for tail page */ From 56d61cd652dd75bf35c61c05303c40cd59356879 Mon Sep 17 00:00:00 2001 From: Rokudo Yan Date: Thu, 4 Feb 2021 18:32:20 -0800 Subject: [PATCH 52/66] mm, compaction: move high_pfn to the for loop scope commit 74e21484e40bb8ce0f9828bbfe1c9fc9b04249c6 upstream. In fast_isolate_freepages, high_pfn will be used if a prefered one (ie PFN >= low_fn) not found. But the high_pfn is not reset before searching an free area, so when it was used as freepage, it may from another free area searched before. As a result move_freelist_head(freelist, freepage) will have unexpected behavior (eg corrupt the MOVABLE freelist) Unable to handle kernel paging request at virtual address dead000000000200 Mem abort info: ESR = 0x96000044 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000044 CM = 0, WnR = 1 [dead000000000200] address between user and kernel address ranges -000|list_cut_before(inline) -000|move_freelist_head(inline) -000|fast_isolate_freepages(inline) -000|isolate_freepages(inline) -000|compaction_alloc(?, ?) -001|unmap_and_move(inline) -001|migrate_pages([NSD:0xFFFFFF80088CBBD0] from = 0xFFFFFF80088CBD88, [NSD:0xFFFFFF80088CBBC8] get_new_p -002|__read_once_size(inline) -002|static_key_count(inline) -002|static_key_false(inline) -002|trace_mm_compaction_migratepages(inline) -002|compact_zone(?, [NSD:0xFFFFFF80088CBCB0] capc = 0x0) -003|kcompactd_do_work(inline) -003|kcompactd([X19] p = 0xFFFFFF93227FBC40) -004|kthread([X20] _create = 0xFFFFFFE1AFB26380) -005|ret_from_fork(asm) The issue was reported on an smart phone product with 6GB ram and 3GB zram as swap device. This patch fixes the issue by reset high_pfn before searching each free area, which ensure freepage and freelist match when call move_freelist_head in fast_isolate_freepages(). Link: http://lkml.kernel.org/r/20190118175136.31341-12-mgorman@techsingularity.net Link: https://lkml.kernel.org/r/20210112094720.1238444-1-wu-yan@tcl.com Fixes: 5a811889de10f1eb ("mm, compaction: use free lists to quickly locate a migration target") Signed-off-by: Rokudo Yan Acked-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/compaction.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/compaction.c b/mm/compaction.c index 92470625f0b1e6..88c3f6bad1aba0 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1276,7 +1276,7 @@ fast_isolate_freepages(struct compact_control *cc) { unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1); unsigned int nr_scanned = 0; - unsigned long low_pfn, min_pfn, high_pfn = 0, highest = 0; + unsigned long low_pfn, min_pfn, highest = 0; unsigned long nr_isolated = 0; unsigned long distance; struct page *page = NULL; @@ -1321,6 +1321,7 @@ fast_isolate_freepages(struct compact_control *cc) struct page *freepage; unsigned long flags; unsigned int order_scanned = 0; + unsigned long high_pfn = 0; if (!area->nr_free) continue; From 40d0fff2976185efda4215f4d92c33984926cd75 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 4 Feb 2021 18:32:31 -0800 Subject: [PATCH 53/66] mm: thp: fix MADV_REMOVE deadlock on shmem THP commit 1c2f67308af4c102b4e1e6cd6f69819ae59408e0 upstream. Sergey reported deadlock between kswapd correctly doing its usual lock_page(page) followed by down_read(page->mapping->i_mmap_rwsem), and madvise(MADV_REMOVE) on an madvise(MADV_HUGEPAGE) area doing down_write(page->mapping->i_mmap_rwsem) followed by lock_page(page). This happened when shmem_fallocate(punch hole)'s unmap_mapping_range() reaches zap_pmd_range()'s call to __split_huge_pmd(). The same deadlock could occur when partially truncating a mapped huge tmpfs file, or using fallocate(FALLOC_FL_PUNCH_HOLE) on it. __split_huge_pmd()'s page lock was added in 5.8, to make sure that any concurrent use of reuse_swap_page() (holding page lock) could not catch the anon THP's mapcounts and swapcounts while they were being split. Fortunately, reuse_swap_page() is never applied to a shmem or file THP (not even by khugepaged, which checks PageSwapCache before calling), and anonymous THPs are never created in shmem or file areas: so that __split_huge_pmd()'s page lock can only be necessary for anonymous THPs, on which there is no risk of deadlock with i_mmap_rwsem. Link: https://lkml.kernel.org/r/alpine.LSU.2.11.2101161409470.2022@eggly.anvils Fixes: c444eb564fb1 ("mm: thp: make the THP mapcount atomic against __split_huge_pmd_locked()") Signed-off-by: Hugh Dickins Reported-by: Sergey Senozhatsky Reviewed-by: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 11aa763a314400..7bbf419bb86d6a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2306,7 +2306,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, { spinlock_t *ptl; struct mmu_notifier_range range; - bool was_locked = false; + bool do_unlock_page = false; pmd_t _pmd; mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, @@ -2322,7 +2322,6 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, VM_BUG_ON(freeze && !page); if (page) { VM_WARN_ON_ONCE(!PageLocked(page)); - was_locked = true; if (page != pmd_page(*pmd)) goto out; } @@ -2331,19 +2330,29 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (pmd_trans_huge(*pmd)) { if (!page) { page = pmd_page(*pmd); - if (unlikely(!trylock_page(page))) { - get_page(page); - _pmd = *pmd; - spin_unlock(ptl); - lock_page(page); - spin_lock(ptl); - if (unlikely(!pmd_same(*pmd, _pmd))) { - unlock_page(page); + /* + * An anonymous page must be locked, to ensure that a + * concurrent reuse_swap_page() sees stable mapcount; + * but reuse_swap_page() is not used on shmem or file, + * and page lock must not be taken when zap_pmd_range() + * calls __split_huge_pmd() while i_mmap_lock is held. + */ + if (PageAnon(page)) { + if (unlikely(!trylock_page(page))) { + get_page(page); + _pmd = *pmd; + spin_unlock(ptl); + lock_page(page); + spin_lock(ptl); + if (unlikely(!pmd_same(*pmd, _pmd))) { + unlock_page(page); + put_page(page); + page = NULL; + goto repeat; + } put_page(page); - page = NULL; - goto repeat; } - put_page(page); + do_unlock_page = true; } } if (PageMlocked(page)) @@ -2353,7 +2362,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, __split_huge_pmd_locked(vma, pmd, range.start, freeze); out: spin_unlock(ptl); - if (!was_locked && page) + if (do_unlock_page) unlock_page(page); /* * No need to double call mmu_notifier->invalidate_range() callback. From bc1a3aeeff0ff3719a9758d27a03283ff413b08a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 28 Jan 2021 15:52:19 -0600 Subject: [PATCH 54/66] x86/build: Disable CET instrumentation in the kernel commit 20bf2b378729c4a0366a53e2018a0b70ace94bcd upstream. With retpolines disabled, some configurations of GCC, and specifically the GCC versions 9 and 10 in Ubuntu will add Intel CET instrumentation to the kernel by default. That breaks certain tracing scenarios by adding a superfluous ENDBR64 instruction before the fentry call, for functions which can be called indirectly. CET instrumentation isn't currently necessary in the kernel, as CET is only supported in user space. Disable it unconditionally and move it into the x86's Makefile as CET/CFI... enablement should be a per-arch decision anyway. [ bp: Massage and extend commit message. ] Fixes: 29be86d7f9cb ("kbuild: add -fcf-protection=none when using retpoline flags") Reported-by: Nikolay Borisov Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Reviewed-by: Nikolay Borisov Tested-by: Nikolay Borisov Cc: Cc: Seth Forshee Cc: Masahiro Yamada Link: https://lkml.kernel.org/r/20210128215219.6kct3h2eiustncws@treble Signed-off-by: Greg Kroah-Hartman --- Makefile | 6 ------ arch/x86/Makefile | 3 +++ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 7a47a2594f9573..ef4295fea9b7b5 100644 --- a/Makefile +++ b/Makefile @@ -920,12 +920,6 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) # change __FILE__ to the relative path from the srctree KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) -# ensure -fcf-protection is disabled when using retpoline as it is -# incompatible with -mindirect-branch=thunk-extern -ifdef CONFIG_RETPOLINE -KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) -endif - include scripts/Makefile.kasan include scripts/Makefile.extrawarn include scripts/Makefile.ubsan diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 94df0868804bcb..b5e3bfd4faceaa 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -131,6 +131,9 @@ else KBUILD_CFLAGS += -mno-red-zone KBUILD_CFLAGS += -mcmodel=kernel + + # Intel CET isn't enabled in the kernel + KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) endif ifdef CONFIG_X86_X32 From 2d5705150707a4f3ec16054ab180fbd48d2ae049 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 5 Mar 2020 09:47:08 -0800 Subject: [PATCH 55/66] x86/apic: Add extra serialization for non-serializing MSRs commit 25a068b8e9a4eb193d755d58efcb3c98928636e0 upstream. Jan Kiszka reported that the x2apic_wrmsr_fence() function uses a plain MFENCE while the Intel SDM (10.12.3 MSR Access in x2APIC Mode) calls for MFENCE; LFENCE. Short summary: we have special MSRs that have weaker ordering than all the rest. Add fencing consistent with current SDM recommendations. This is not known to cause any issues in practice, only in theory. Longer story below: The reason the kernel uses a different semantic is that the SDM changed (roughly in late 2017). The SDM changed because folks at Intel were auditing all of the recommended fences in the SDM and realized that the x2apic fences were insufficient. Why was the pain MFENCE judged insufficient? WRMSR itself is normally a serializing instruction. No fences are needed because the instruction itself serializes everything. But, there are explicit exceptions for this serializing behavior written into the WRMSR instruction documentation for two classes of MSRs: IA32_TSC_DEADLINE and the X2APIC MSRs. Back to x2apic: WRMSR is *not* serializing in this specific case. But why is MFENCE insufficient? MFENCE makes writes visible, but only affects load/store instructions. WRMSR is unfortunately not a load/store instruction and is unaffected by MFENCE. This means that a non-serializing WRMSR could be reordered by the CPU to execute before the writes made visible by the MFENCE have even occurred in the first place. This means that an x2apic IPI could theoretically be triggered before there is any (visible) data to process. Does this affect anything in practice? I honestly don't know. It seems quite possible that by the time an interrupt gets to consume the (not yet) MFENCE'd data, it has become visible, mostly by accident. To be safe, add the SDM-recommended fences for all x2apic WRMSRs. This also leaves open the question of the _other_ weakly-ordered WRMSR: MSR_IA32_TSC_DEADLINE. While it has the same ordering architecture as the x2APIC MSRs, it seems substantially less likely to be a problem in practice. While writes to the in-memory Local Vector Table (LVT) might theoretically be reordered with respect to a weakly-ordered WRMSR like TSC_DEADLINE, the SDM has this to say: In x2APIC mode, the WRMSR instruction is used to write to the LVT entry. The processor ensures the ordering of this write and any subsequent WRMSR to the deadline; no fencing is required. But, that might still leave xAPIC exposed. The safest thing to do for now is to add the extra, recommended LFENCE. [ bp: Massage commit message, fix typos, drop accidentally added newline to tools/arch/x86/include/asm/barrier.h. ] Reported-by: Jan Kiszka Signed-off-by: Dave Hansen Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Cc: Link: https://lkml.kernel.org/r/20200305174708.F77040DD@viggo.jf.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/apic.h | 10 ---------- arch/x86/include/asm/barrier.h | 18 ++++++++++++++++++ arch/x86/kernel/apic/apic.c | 4 ++++ arch/x86/kernel/apic/x2apic_cluster.c | 6 ++++-- arch/x86/kernel/apic/x2apic_phys.c | 9 ++++++--- 5 files changed, 32 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 6016559ed17130..5bef1575708dc4 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -197,16 +197,6 @@ static inline bool apic_needs_pit(void) { return true; } #endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_X2APIC -/* - * Make previous memory operations globally visible before - * sending the IPI through x2apic wrmsr. We need a serializing instruction or - * mfence for this. - */ -static inline void x2apic_wrmsr_fence(void) -{ - asm volatile("mfence" : : : "memory"); -} - static inline void native_apic_msr_write(u32 reg, u32 v) { if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 7f828fe497978e..4819d5e5a3353d 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -84,4 +84,22 @@ do { \ #include +/* + * Make previous memory operations globally visible before + * a WRMSR. + * + * MFENCE makes writes visible, but only affects load/store + * instructions. WRMSR is unfortunately not a load/store + * instruction and is unaffected by MFENCE. The LFENCE ensures + * that the WRMSR is not reordered. + * + * Most WRMSRs are full serializing instructions themselves and + * do not require this barrier. This is only required for the + * IA32_TSC_DEADLINE and X2APIC MSRs. + */ +static inline void weak_wrmsr_fence(void) +{ + asm volatile("mfence; lfence" : : : "memory"); +} + #endif /* _ASM_X86_BARRIER_H */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 06fa808d720320..3dca7b8642e9cf 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -472,6 +473,9 @@ static int lapic_next_deadline(unsigned long delta, { u64 tsc; + /* This MSR is special and need a special fence: */ + weak_wrmsr_fence(); + tsc = rdtsc(); wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); return 0; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index b0889c48a2ac5e..7eec3c154fa247 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -29,7 +29,8 @@ static void x2apic_send_IPI(int cpu, int vector) { u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu); - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL); } @@ -41,7 +42,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) unsigned long flags; u32 dest; - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); local_irq_save(flags); tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask); diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index e14eae6d6ea71a..032a00e5d9fa66 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -43,7 +43,8 @@ static void x2apic_send_IPI(int cpu, int vector) { u32 dest = per_cpu(x86_cpu_to_apicid, cpu); - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); __x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL); } @@ -54,7 +55,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) unsigned long this_cpu; unsigned long flags; - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); local_irq_save(flags); @@ -125,7 +127,8 @@ void __x2apic_send_IPI_shorthand(int vector, u32 which) { unsigned long cfg = __prepare_ICR(which, vector, 0); - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); native_x2apic_icr_write(cfg, 0); } From 03d56dab56ae2a258c2295d20b5a27ecb69cbbb4 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 8 Oct 2020 18:09:43 +0300 Subject: [PATCH 56/66] iwlwifi: mvm: don't send RFH_QUEUE_CONFIG_CMD with no queues commit 64f55156f7adedb1ac5bb9cdbcbc9ac05ff5a724 upstream. If we have only a single RX queue, such as when MSI-X is not available, we should not send the RFH_QUEUEU_CONFIG_CMD, because our only queue is the same as the command queue and will be configured as part of the context info. Our code was actually trying to send the command with 0 queues, which caused UMAC assert 0x1D04. Fix that by not sending the command when we have a single queue. Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20201008180656.c35eeb3299f8.I08f79a6ebe150a7d180b7005b24504bfdba6d8b5@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index c54fe6650018e3..7272d8522a9e9f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -134,7 +134,14 @@ static int iwl_configure_rxq(struct iwl_mvm *mvm) .dataflags[0] = IWL_HCMD_DFL_NOCOPY, }; - /* Do not configure default queue, it is configured via context info */ + /* + * The default queue is configured via context info, so if we + * have a single queue, there's nothing to do here. + */ + if (mvm->trans->num_rx_queues == 1) + return 0; + + /* skip the default queue */ num_queues = mvm->trans->num_rx_queues - 1; size = struct_size(cmd, data, num_queues); From 5fdf672759e9f94c8cfacd7f3ef9ffc426f01ab9 Mon Sep 17 00:00:00 2001 From: Benjamin Valentin Date: Thu, 21 Jan 2021 19:24:17 -0800 Subject: [PATCH 57/66] Input: xpad - sync supported devices with fork on GitHub commit 9bbd77d5bbc9aff8cb74d805c31751f5f0691ba8 upstream. There is a fork of this driver on GitHub [0] that has been updated with new device IDs. Merge those into the mainline driver, so the out-of-tree fork is not needed for users of those devices anymore. [0] https://github.com/paroj/xpad Signed-off-by: Benjamin Valentin Link: https://lore.kernel.org/r/20210121142523.1b6b050f@rechenknecht2k11 Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/xpad.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 8c73377ac82ca4..3d004ca76b6ed5 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -215,9 +215,17 @@ static const struct xpad_device { { 0x0e6f, 0x0213, "Afterglow Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x021f, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0246, "Rock Candy Gamepad for Xbox One 2015", 0, XTYPE_XBOXONE }, - { 0x0e6f, 0x02ab, "PDP Controller for Xbox One", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02a0, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02a1, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02a2, "PDP Wired Controller for Xbox One - Crimson Red", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a4, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a6, "PDP Wired Controller for Xbox One - Camo Series", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02a7, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02a8, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02ab, "PDP Controller for Xbox One", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02ad, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02b3, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02b8, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0301, "Logic3 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0346, "Rock Candy Gamepad for Xbox One 2016", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0401, "Logic3 Controller", 0, XTYPE_XBOX360 }, @@ -296,6 +304,9 @@ static const struct xpad_device { { 0x1bad, 0xfa01, "MadCatz GamePad", 0, XTYPE_XBOX360 }, { 0x1bad, 0xfd00, "Razer Onza TE", 0, XTYPE_XBOX360 }, { 0x1bad, 0xfd01, "Razer Onza", 0, XTYPE_XBOX360 }, + { 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE }, + { 0x20d6, 0x281f, "PowerA Wired Controller For Xbox 360", 0, XTYPE_XBOX360 }, + { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, { 0x24c6, 0x5000, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x5300, "PowerA MINI PROEX Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5303, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 }, @@ -429,8 +440,12 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x162e), /* Joytech X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ XPAD_XBOX360_VENDOR(0x1bad), /* Harminix Rock Band Guitar and Drums */ + XPAD_XBOX360_VENDOR(0x20d6), /* PowerA Controllers */ + XPAD_XBOXONE_VENDOR(0x20d6), /* PowerA Controllers */ XPAD_XBOX360_VENDOR(0x24c6), /* PowerA Controllers */ XPAD_XBOXONE_VENDOR(0x24c6), /* PowerA Controllers */ + XPAD_XBOXONE_VENDOR(0x2e24), /* Hyperkin Duke X-Box One pad */ + XPAD_XBOX360_VENDOR(0x2f24), /* GameSir Controllers */ { } }; From e857e21eb2008356cb6f3fb0d7531a8995dca93d Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 27 Jan 2021 09:53:17 -0800 Subject: [PATCH 58/66] iommu/vt-d: Do not use flush-queue when caching-mode is on commit 29b32839725f8c89a41cb6ee054c85f3116ea8b5 upstream. When an Intel IOMMU is virtualized, and a physical device is passed-through to the VM, changes of the virtual IOMMU need to be propagated to the physical IOMMU. The hypervisor therefore needs to monitor PTE mappings in the IOMMU page-tables. Intel specifications provide "caching-mode" capability that a virtual IOMMU uses to report that the IOMMU is virtualized and a TLB flush is needed after mapping to allow the hypervisor to propagate virtual IOMMU mappings to the physical IOMMU. To the best of my knowledge no real physical IOMMU reports "caching-mode" as turned on. Synchronizing the virtual and the physical IOMMU tables is expensive if the hypervisor is unaware which PTEs have changed, as the hypervisor is required to walk all the virtualized tables and look for changes. Consequently, domain flushes are much more expensive than page-specific flushes on virtualized IOMMUs with passthrough devices. The kernel therefore exploited the "caching-mode" indication to avoid domain flushing and use page-specific flushing in virtualized environments. See commit 78d5f0f500e6 ("intel-iommu: Avoid global flushes with caching mode.") This behavior changed after commit 13cf01744608 ("iommu/vt-d: Make use of iova deferred flushing"). Now, when batched TLB flushing is used (the default), full TLB domain flushes are performed frequently, requiring the hypervisor to perform expensive synchronization between the virtual TLB and the physical one. Getting batched TLB flushes to use page-specific invalidations again in such circumstances is not easy, since the TLB invalidation scheme assumes that "full" domain TLB flushes are performed for scalability. Disable batched TLB flushes when caching-mode is on, as the performance benefit from using batched TLB invalidations is likely to be much smaller than the overhead of the virtual-to-physical IOMMU page-tables synchronization. Fixes: 13cf01744608 ("iommu/vt-d: Make use of iova deferred flushing") Signed-off-by: Nadav Amit Cc: David Woodhouse Cc: Lu Baolu Cc: Joerg Roedel Cc: Will Deacon Cc: stable@vger.kernel.org Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20210127175317.1600473-1-namit@vmware.com Signed-off-by: Joerg Roedel Signed-off-by: Nadav Amit Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 984c7a6ea4fe8e..953d86ca6d2b25 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3285,6 +3285,12 @@ static int __init init_dmars(void) if (!ecap_pass_through(iommu->ecap)) hw_pass_through = 0; + + if (!intel_iommu_strict && cap_caching_mode(iommu->cap)) { + pr_info("Disable batched IOTLB flush due to virtualization"); + intel_iommu_strict = 1; + } + #ifdef CONFIG_INTEL_IOMMU_SVM if (pasid_supported(iommu)) intel_svm_init(iommu); From 7018edb19a926d124e07ba9825474bf68a437d10 Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Thu, 10 Dec 2020 14:33:32 +0800 Subject: [PATCH 59/66] md: Set prev_flush_start and flush_bio in an atomic way commit dc5d17a3c39b06aef866afca19245a9cfb533a79 upstream. One customer reports a crash problem which causes by flush request. It triggers a warning before crash. /* new request after previous flush is completed */ if (ktime_after(req_start, mddev->prev_flush_start)) { WARN_ON(mddev->flush_bio); mddev->flush_bio = bio; bio = NULL; } The WARN_ON is triggered. We use spin lock to protect prev_flush_start and flush_bio in md_flush_request. But there is no lock protection in md_submit_flush_data. It can set flush_bio to NULL first because of compiler reordering write instructions. For example, flush bio1 sets flush bio to NULL first in md_submit_flush_data. An interrupt or vmware causing an extended stall happen between updating flush_bio and prev_flush_start. Because flush_bio is NULL, flush bio2 can get the lock and submit to underlayer disks. Then flush bio1 updates prev_flush_start after the interrupt or extended stall. Then flush bio3 enters in md_flush_request. The start time req_start is behind prev_flush_start. The flush_bio is not NULL(flush bio2 hasn't finished). So it can trigger the WARN_ON now. Then it calls INIT_WORK again. INIT_WORK() will re-initialize the list pointers in the work_struct, which then can result in a corrupted work list and the work_struct queued a second time. With the work list corrupted, it can lead in invalid work items being used and cause a crash in process_one_work. We need to make sure only one flush bio can be handled at one same time. So add spin lock in md_submit_flush_data to protect prev_flush_start and flush_bio in an atomic way. Reviewed-by: David Jeffery Signed-off-by: Xiao Ni Signed-off-by: Song Liu Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index ec5dfb7ae4e16e..cc38530804c906 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -538,8 +538,10 @@ static void md_submit_flush_data(struct work_struct *ws) * could wait for this and below md_handle_request could wait for those * bios because of suspend check */ + spin_lock_irq(&mddev->lock); mddev->last_flush = mddev->start_flush; mddev->flush_bio = NULL; + spin_unlock_irq(&mddev->lock); wake_up(&mddev->sb_wait); if (bio->bi_iter.bi_size == 0) { From 271ea7072901b157e38a4e64b8d32abdc287fa9c Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 2 Dec 2020 15:50:17 +0800 Subject: [PATCH 60/66] igc: Report speed and duplex as unknown when device is runtime suspended commit 2e99dedc73f004f650b197c9b269c15c7e01ad15 upstream. Similar to commit 165ae7a8feb5 ("igb: Report speed and duplex as unknown when device is runtime suspended"), if we try to read speed and duplex sysfs while the device is runtime suspended, igc will complain and stops working: [ 123.449883] igc 0000:03:00.0 enp3s0: PCIe link lost, device now detached [ 123.450052] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 123.450056] #PF: supervisor read access in kernel mode [ 123.450058] #PF: error_code(0x0000) - not-present page [ 123.450059] PGD 0 P4D 0 [ 123.450064] Oops: 0000 [#1] SMP NOPTI [ 123.450068] CPU: 0 PID: 2525 Comm: udevadm Tainted: G U W OE 5.10.0-1002-oem #2+rkl2-Ubuntu [ 123.450078] RIP: 0010:igc_rd32+0x1c/0x90 [igc] [ 123.450080] Code: c0 5d c3 b8 fd ff ff ff c3 0f 1f 44 00 00 0f 1f 44 00 00 55 89 f0 48 89 e5 41 56 41 55 41 54 49 89 c4 53 48 8b 57 08 48 01 d0 <44> 8b 28 41 83 fd ff 74 0c 5b 44 89 e8 41 5c 41 5d 4 [ 123.450083] RSP: 0018:ffffb0d100d6fcc0 EFLAGS: 00010202 [ 123.450085] RAX: 0000000000000008 RBX: ffffb0d100d6fd30 RCX: 0000000000000000 [ 123.450087] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff945a12716c10 [ 123.450089] RBP: ffffb0d100d6fce0 R08: ffff945a12716550 R09: ffff945a09874000 [ 123.450090] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000008 [ 123.450092] R13: ffff945a12716000 R14: ffff945a037da280 R15: ffff945a037da290 [ 123.450094] FS: 00007f3b34c868c0(0000) GS:ffff945b89200000(0000) knlGS:0000000000000000 [ 123.450096] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 123.450098] CR2: 0000000000000008 CR3: 00000001144de006 CR4: 0000000000770ef0 [ 123.450100] PKRU: 55555554 [ 123.450101] Call Trace: [ 123.450111] igc_ethtool_get_link_ksettings+0xd6/0x1b0 [igc] [ 123.450118] __ethtool_get_link_ksettings+0x71/0xb0 [ 123.450123] duplex_show+0x74/0xc0 [ 123.450129] dev_attr_show+0x1d/0x40 [ 123.450134] sysfs_kf_seq_show+0xa1/0x100 [ 123.450137] kernfs_seq_show+0x27/0x30 [ 123.450142] seq_read+0xb7/0x400 [ 123.450148] ? common_file_perm+0x72/0x170 [ 123.450151] kernfs_fop_read+0x35/0x1b0 [ 123.450155] vfs_read+0xb5/0x1b0 [ 123.450157] ksys_read+0x67/0xe0 [ 123.450160] __x64_sys_read+0x1a/0x20 [ 123.450164] do_syscall_64+0x38/0x90 [ 123.450168] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 123.450170] RIP: 0033:0x7f3b351fe142 [ 123.450173] Code: c0 e9 c2 fe ff ff 50 48 8d 3d 3a ca 0a 00 e8 f5 19 02 00 0f 1f 44 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 ec 28 48 89 54 24 [ 123.450174] RSP: 002b:00007fffef2ec138 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 123.450177] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3b351fe142 [ 123.450179] RDX: 0000000000001001 RSI: 00005644c047f070 RDI: 0000000000000003 [ 123.450180] RBP: 00007fffef2ec340 R08: 00005644c047f070 R09: 00007f3b352d9320 [ 123.450182] R10: 00005644c047c010 R11: 0000000000000246 R12: 00005644c047cbf0 [ 123.450184] R13: 00005644c047e6d0 R14: 0000000000000003 R15: 00007fffef2ec140 [ 123.450189] Modules linked in: rfcomm ccm cmac algif_hash algif_skcipher af_alg bnep toshiba_acpi industrialio toshiba_haps hp_accel lis3lv02d btusb btrtl btbcm btintel bluetooth ecdh_generic ecc joydev input_leds nls_iso8859_1 snd_sof_pci snd_sof_intel_byt snd_sof_intel_ipc snd_sof_intel_hda_common snd_soc_hdac_hda snd_hda_codec_hdmi snd_sof_xtensa_dsp snd_sof_intel_hda snd_sof snd_hda_ext_core snd_soc_acpi_intel_match snd_soc_acpi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_intel_dspcfg soundwire_intel soundwire_generic_allocation soundwire_cadence snd_hda_codec snd_hda_core ath10k_pci snd_hwdep intel_rapl_msr intel_rapl_common ath10k_core soundwire_bus snd_soc_core x86_pkg_temp_thermal ath intel_powerclamp snd_compress ac97_bus snd_pcm_dmaengine mac80211 snd_pcm coretemp snd_seq_midi snd_seq_midi_event snd_rawmidi kvm_intel cfg80211 snd_seq snd_seq_device snd_timer mei_hdcp kvm libarc4 snd crct10dif_pclmul ghash_clmulni_intel aesni_intel mei_me dell_wmi [ 123.450266] dell_smbios soundcore sparse_keymap dcdbas crypto_simd cryptd mei dell_uart_backlight glue_helper ee1004 wmi_bmof intel_wmi_thunderbolt dell_wmi_descriptor mac_hid efi_pstore acpi_pad acpi_tad intel_cstate sch_fq_codel parport_pc ppdev lp parport ip_tables x_tables autofs4 btrfs blake2b_generic raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear dm_mirror dm_region_hash dm_log hid_generic usbhid hid i915 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops cec crc32_pclmul rc_core drm intel_lpss_pci i2c_i801 ahci igc intel_lpss i2c_smbus idma64 xhci_pci libahci virt_dma xhci_pci_renesas wmi video pinctrl_tigerlake [ 123.450335] CR2: 0000000000000008 [ 123.450338] ---[ end trace 9f731e38b53c35cc ]--- The more generic approach will be wrap get_link_ksettings() with begin() and complete() callbacks, and calls runtime resume and runtime suspend routine respectively. However, igc is like igb, runtime resume routine uses rtnl_lock() which upper ethtool layer also uses. So to prevent a deadlock on rtnl, take a different approach, use pm_runtime_suspended() to avoid reading register while device is runtime suspended. Fixes: 8c5ad0dae93c ("igc: Add ethtool support") Signed-off-by: Kai-Heng Feng Acked-by: Sasha Neftin Signed-off-by: Tony Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 0303eeb7605056..0365bf2b480e32 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1709,7 +1709,8 @@ static int igc_get_link_ksettings(struct net_device *netdev, Asym_Pause); } - status = rd32(IGC_STATUS); + status = pm_runtime_suspended(&adapter->pdev->dev) ? + 0 : rd32(IGC_STATUS); if (status & IGC_STATUS_LU) { if (status & IGC_STATUS_SPEED_1000) { From 90d7459d24b88b84595c6cf9b3f40186625aae86 Mon Sep 17 00:00:00 2001 From: Chinmay Agarwal Date: Wed, 27 Jan 2021 22:24:54 +0530 Subject: [PATCH 61/66] neighbour: Prevent a dead entry from updating gc_list commit eb4e8fac00d1e01ada5e57c05d24739156086677 upstream. Following race condition was detected: - neigh_flush_dev() is under execution and calls neigh_mark_dead(n) marking the neighbour entry 'n' as dead. - Executing: __netif_receive_skb() -> __netif_receive_skb_core() -> arp_rcv() -> arp_process().arp_process() calls __neigh_lookup() which takes a reference on neighbour entry 'n'. - Moves further along neigh_flush_dev() and calls neigh_cleanup_and_release(n), but since reference count increased in t2, 'n' couldn't be destroyed. - Moves further along, arp_process() and calls neigh_update()-> __neigh_update() -> neigh_update_gc_list(), which adds the neighbour entry back in gc_list(neigh_mark_dead(), removed it earlier in t0 from gc_list) - arp_process() finally calls neigh_release(n), destroying the neighbour entry. This leads to 'n' still being part of gc_list, but the actual neighbour structure has been freed. The situation can be prevented from happening if we disallow a dead entry to have any possibility of updating gc_list. This is what the patch intends to achieve. Fixes: 9c29a2f55ec0 ("neighbor: Fix locking order for gc_list changes") Signed-off-by: Chinmay Agarwal Reviewed-by: Cong Wang Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20210127165453.GA20514@chinagar-linux.qualcomm.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/core/neighbour.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6c270fce200f4f..7080d708b7d08d 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1244,13 +1244,14 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, old = neigh->nud_state; err = -EPERM; - if (!(flags & NEIGH_UPDATE_F_ADMIN) && - (old & (NUD_NOARP | NUD_PERMANENT))) - goto out; if (neigh->dead) { NL_SET_ERR_MSG(extack, "Neighbor entry is now dead"); + new = old; goto out; } + if (!(flags & NEIGH_UPDATE_F_ADMIN) && + (old & (NUD_NOARP | NUD_PERMANENT))) + goto out; ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); From 5d3007b6cc7b4c242f2965f08824f9c73947749c Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Sat, 30 Jan 2021 01:27:47 +0300 Subject: [PATCH 62/66] net: ip_tunnel: fix mtu calculation commit 28e104d00281ade30250b24e098bf50887671ea4 upstream. dev->hard_header_len for tunnel interface is set only when header_ops are set too and already contains full overhead of any tunnel encapsulation. That's why there is not need to use this overhead twice in mtu calc. Fixes: fdafed459998 ("ip_gre: set dev->hard_header_len and dev->needed_headroom properly") Reported-by: Slava Bacherikov Signed-off-by: Vadim Fedorenko Link: https://lore.kernel.org/r/1611959267-20536-1-git-send-email-vfedorenko@novek.ru Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index ca525cf681a4e4..f64d1743b86d6a 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -317,7 +317,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) } dev->needed_headroom = t_hlen + hlen; - mtu -= (dev->hard_header_len + t_hlen); + mtu -= t_hlen; if (mtu < IPV4_MIN_MTU) mtu = IPV4_MIN_MTU; @@ -347,7 +347,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, nt = netdev_priv(dev); t_hlen = nt->hlen + sizeof(struct iphdr); dev->min_mtu = ETH_MIN_MTU; - dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen; + dev->max_mtu = IP_MAX_MTU - t_hlen; ip_tunnel_add(itn, nt); return nt; @@ -494,11 +494,10 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, int mtu; tunnel_hlen = md ? tunnel_hlen : tunnel->hlen; - pkt_size = skb->len - tunnel_hlen - dev->hard_header_len; + pkt_size = skb->len - tunnel_hlen; if (df) - mtu = dst_mtu(&rt->dst) - dev->hard_header_len - - sizeof(struct iphdr) - tunnel_hlen; + mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen); else mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; @@ -964,7 +963,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) { struct ip_tunnel *tunnel = netdev_priv(dev); int t_hlen = tunnel->hlen + sizeof(struct iphdr); - int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen; + int max_mtu = IP_MAX_MTU - t_hlen; if (new_mtu < ETH_MIN_MTU) return -EINVAL; @@ -1141,10 +1140,9 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], mtu = ip_tunnel_bind_dev(dev); if (tb[IFLA_MTU]) { - unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen; + unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr)); - mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, - (unsigned int)(max - sizeof(struct iphdr))); + mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max); } err = dev_set_mtu(dev, mtu); From e65d331755deb98a39267ff10f7746e4fd237cea Mon Sep 17 00:00:00 2001 From: DENG Qingfang Date: Sat, 30 Jan 2021 21:43:34 +0800 Subject: [PATCH 63/66] net: dsa: mv88e6xxx: override existent unicast portvec in port_fdb_add commit f72f2fb8fb6be095b98af5d740ac50cffd0b0cae upstream. Having multiple destination ports for a unicast address does not make sense. Make port_db_load_purge override existent unicast portvec instead of adding a new port bit. Fixes: 884729399260 ("net: dsa: mv88e6xxx: handle multiple ports in ATU") Signed-off-by: DENG Qingfang Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210130134334.10243-1-dqfext@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mv88e6xxx/chip.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 469b155df48859..1af09fd3fed1c1 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1517,7 +1517,11 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, if (!entry.portvec) entry.state = 0; } else { - entry.portvec |= BIT(port); + if (state == MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC) + entry.portvec = BIT(port); + else + entry.portvec |= BIT(port); + entry.state = state; } From 76ab33055fbc6ac0b05e4b0c9dc6a56d76663482 Mon Sep 17 00:00:00 2001 From: Alexander Ovechkin Date: Mon, 1 Feb 2021 23:00:49 +0300 Subject: [PATCH 64/66] net: sched: replaced invalid qdisc tree flush helper in qdisc_replace commit 938e0fcd3253efdef8924714158911286d08cfe1 upstream. Commit e5f0e8f8e456 ("net: sched: introduce and use qdisc tree flush/purge helpers") introduced qdisc tree flush/purge helpers, but erroneously used flush helper instead of purge helper in qdisc_replace function. This issue was found in our CI, that tests various qdisc setups by configuring qdisc and sending data through it. Call of invalid helper sporadically leads to corruption of vt_tree/cf_tree of hfsc_class that causes kernel oops: Oops: 0000 [#1] SMP PTI CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.11.0-8f6859df #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014 RIP: 0010:rb_insert_color+0x18/0x190 Code: c3 31 c0 c3 0f 1f 40 00 66 2e 0f 1f 84 00 00 00 00 00 48 8b 07 48 85 c0 0f 84 05 01 00 00 48 8b 10 f6 c2 01 0f 85 34 01 00 00 <48> 8b 4a 08 49 89 d0 48 39 c1 74 7d 48 85 c9 74 32 f6 01 01 75 2d RSP: 0018:ffffc900000b8bb0 EFLAGS: 00010246 RAX: ffff8881ef4c38b0 RBX: ffff8881d956e400 RCX: ffff8881ef4c38b0 RDX: 0000000000000000 RSI: ffff8881d956f0a8 RDI: ffff8881d956e4b0 RBP: 0000000000000000 R08: 000000d5c4e249da R09: 1600000000000000 R10: ffffc900000b8be0 R11: ffffc900000b8b28 R12: 0000000000000001 R13: 000000000000005a R14: ffff8881f0905000 R15: ffff8881f0387d00 FS: 0000000000000000(0000) GS:ffff8881f8b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 00000001f4796004 CR4: 0000000000060ee0 Call Trace: init_vf.isra.19+0xec/0x250 [sch_hfsc] hfsc_enqueue+0x245/0x300 [sch_hfsc] ? fib_rules_lookup+0x12a/0x1d0 ? __dev_queue_xmit+0x4b6/0x930 ? hfsc_delete_class+0x250/0x250 [sch_hfsc] __dev_queue_xmit+0x4b6/0x930 ? ip6_finish_output2+0x24d/0x590 ip6_finish_output2+0x24d/0x590 ? ip6_output+0x6c/0x130 ip6_output+0x6c/0x130 ? __ip6_finish_output+0x110/0x110 mld_sendpack+0x224/0x230 mld_ifc_timer_expire+0x186/0x2c0 ? igmp6_group_dropped+0x200/0x200 call_timer_fn+0x2d/0x150 run_timer_softirq+0x20c/0x480 ? tick_sched_do_timer+0x60/0x60 ? tick_sched_timer+0x37/0x70 __do_softirq+0xf7/0x2cb irq_exit+0xa0/0xb0 smp_apic_timer_interrupt+0x74/0x150 apic_timer_interrupt+0xf/0x20 Fixes: e5f0e8f8e456 ("net: sched: introduce and use qdisc tree flush/purge helpers") Signed-off-by: Alexander Ovechkin Reported-by: Alexander Kuznetsov Acked-by: Dmitry Monakhov Acked-by: Dmitry Yakunin Acked-by: Cong Wang Link: https://lore.kernel.org/r/20210201200049.299153-1-ovov@yandex-team.ru Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/net/sch_generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 3d03756e106990..b2ceec7b280d47 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1158,7 +1158,7 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, old = *pold; *pold = new; if (old != NULL) - qdisc_tree_flush_backlog(old); + qdisc_purge_queue(old); sch_tree_unlock(sch); return old; From 40af962eb1d4ce963cf8ada21924aa4179b23b9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 1 Feb 2021 16:08:03 +0100 Subject: [PATCH 65/66] usb: host: xhci: mvebu: make USB 3.0 PHY optional for Armada 3720 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3241929b67d28c83945d3191c6816a3271fd6b85 upstream. Older ATF does not provide SMC call for USB 3.0 phy power on functionality and therefore initialization of xhci-hcd is failing when older version of ATF is used. In this case phy_power_on() function returns -EOPNOTSUPP. [ 3.108467] mvebu-a3700-comphy d0018300.phy: unsupported SMC call, try updating your firmware [ 3.117250] phy phy-d0018300.phy.0: phy poweron failed --> -95 [ 3.123465] xhci-hcd: probe of d0058000.usb failed with error -95 This patch introduces a new plat_setup callback for xhci platform drivers which is called prior calling usb_add_hcd() function. This function at its beginning skips PHY init if hcd->skip_phy_initialization is set. Current init_quirk callback for xhci platform drivers is called from xhci_plat_setup() function which is called after chip reset completes. It happens in the middle of the usb_add_hcd() function and therefore this callback cannot be used for setting if PHY init should be skipped or not. For Armada 3720 this patch introduce a new xhci_mvebu_a3700_plat_setup() function configured as a xhci platform plat_setup callback. This new function calls phy_power_on() and in case it returns -EOPNOTSUPP then XHCI_SKIP_PHY_INIT quirk is set to instruct xhci-plat to skip PHY initialization. This patch fixes above failure by ignoring 'not supported' error in xhci-hcd driver. In this case it is expected that phy is already power on. It fixes initialization of xhci-hcd on Espressobin boards where is older Marvell's Arm Trusted Firmware without SMC call for USB 3.0 phy power. This is regression introduced in commit bd3d25b07342 ("arm64: dts: marvell: armada-37xx: link USB hosts with their PHYs") where USB 3.0 phy was defined and therefore xhci-hcd on Espressobin with older ATF started failing. Fixes: bd3d25b07342 ("arm64: dts: marvell: armada-37xx: link USB hosts with their PHYs") Cc: # 5.1+: ea17a0f153af: phy: marvell: comphy: Convert internal SMCC firmware return codes to errno Cc: # 5.1+: f768e718911e: usb: host: xhci-plat: add priv quirk for skip PHY initialization Tested-by: Tomasz Maciej Nowak Tested-by: Yoshihiro Shimoda # On R-Car Reviewed-by: Yoshihiro Shimoda # xhci-plat Acked-by: Mathias Nyman Signed-off-by: Pali Rohár Link: https://lore.kernel.org/r/20210201150803.7305-1-pali@kernel.org [pali: Backported to 5.4 by replacing of_phy_put() with phy_put()] Signed-off-by: Pali Rohár Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mvebu.c | 42 +++++++++++++++++++++++++++++++++++ drivers/usb/host/xhci-mvebu.h | 6 +++++ drivers/usb/host/xhci-plat.c | 20 ++++++++++++++++- drivers/usb/host/xhci-plat.h | 1 + 4 files changed, 68 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mvebu.c b/drivers/usb/host/xhci-mvebu.c index 60651a50770f93..f27d5c2c42f313 100644 --- a/drivers/usb/host/xhci-mvebu.c +++ b/drivers/usb/host/xhci-mvebu.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -74,6 +75,47 @@ int xhci_mvebu_mbus_init_quirk(struct usb_hcd *hcd) return 0; } +int xhci_mvebu_a3700_plat_setup(struct usb_hcd *hcd) +{ + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct device *dev = hcd->self.controller; + struct phy *phy; + int ret; + + /* Old bindings miss the PHY handle */ + phy = of_phy_get(dev->of_node, "usb3-phy"); + if (IS_ERR(phy) && PTR_ERR(phy) == -EPROBE_DEFER) + return -EPROBE_DEFER; + else if (IS_ERR(phy)) + goto phy_out; + + ret = phy_init(phy); + if (ret) + goto phy_put; + + ret = phy_set_mode(phy, PHY_MODE_USB_HOST_SS); + if (ret) + goto phy_exit; + + ret = phy_power_on(phy); + if (ret == -EOPNOTSUPP) { + /* Skip initializatin of XHCI PHY when it is unsupported by firmware */ + dev_warn(dev, "PHY unsupported by firmware\n"); + xhci->quirks |= XHCI_SKIP_PHY_INIT; + } + if (ret) + goto phy_exit; + + phy_power_off(phy); +phy_exit: + phy_exit(phy); +phy_put: + phy_put(phy); +phy_out: + + return 0; +} + int xhci_mvebu_a3700_init_quirk(struct usb_hcd *hcd) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); diff --git a/drivers/usb/host/xhci-mvebu.h b/drivers/usb/host/xhci-mvebu.h index ca0a3a5721dd72..74b4d21a498a09 100644 --- a/drivers/usb/host/xhci-mvebu.h +++ b/drivers/usb/host/xhci-mvebu.h @@ -12,6 +12,7 @@ struct usb_hcd; #if IS_ENABLED(CONFIG_USB_XHCI_MVEBU) int xhci_mvebu_mbus_init_quirk(struct usb_hcd *hcd); +int xhci_mvebu_a3700_plat_setup(struct usb_hcd *hcd); int xhci_mvebu_a3700_init_quirk(struct usb_hcd *hcd); #else static inline int xhci_mvebu_mbus_init_quirk(struct usb_hcd *hcd) @@ -19,6 +20,11 @@ static inline int xhci_mvebu_mbus_init_quirk(struct usb_hcd *hcd) return 0; } +static inline int xhci_mvebu_a3700_plat_setup(struct usb_hcd *hcd) +{ + return 0; +} + static inline int xhci_mvebu_a3700_init_quirk(struct usb_hcd *hcd) { return 0; diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 7f78818ebbb34d..84cfa854428524 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -44,6 +44,16 @@ static void xhci_priv_plat_start(struct usb_hcd *hcd) priv->plat_start(hcd); } +static int xhci_priv_plat_setup(struct usb_hcd *hcd) +{ + struct xhci_plat_priv *priv = hcd_to_xhci_priv(hcd); + + if (!priv->plat_setup) + return 0; + + return priv->plat_setup(hcd); +} + static int xhci_priv_init_quirk(struct usb_hcd *hcd) { struct xhci_plat_priv *priv = hcd_to_xhci_priv(hcd); @@ -101,6 +111,7 @@ static const struct xhci_plat_priv xhci_plat_marvell_armada = { }; static const struct xhci_plat_priv xhci_plat_marvell_armada3700 = { + .plat_setup = xhci_mvebu_a3700_plat_setup, .init_quirk = xhci_mvebu_a3700_init_quirk, }; @@ -308,7 +319,14 @@ static int xhci_plat_probe(struct platform_device *pdev) hcd->tpl_support = of_usb_host_tpl_support(sysdev->of_node); xhci->shared_hcd->tpl_support = hcd->tpl_support; - if (priv && (priv->quirks & XHCI_SKIP_PHY_INIT)) + + if (priv) { + ret = xhci_priv_plat_setup(hcd); + if (ret) + goto disable_usb_phy; + } + + if ((xhci->quirks & XHCI_SKIP_PHY_INIT) || (priv && (priv->quirks & XHCI_SKIP_PHY_INIT))) hcd->skip_phy_initialization = 1; ret = usb_add_hcd(hcd, irq, IRQF_SHARED); diff --git a/drivers/usb/host/xhci-plat.h b/drivers/usb/host/xhci-plat.h index 5681723fc9cd73..b7749151bdfb84 100644 --- a/drivers/usb/host/xhci-plat.h +++ b/drivers/usb/host/xhci-plat.h @@ -13,6 +13,7 @@ struct xhci_plat_priv { const char *firmware_name; unsigned long long quirks; + int (*plat_setup)(struct usb_hcd *); void (*plat_start)(struct usb_hcd *); int (*init_quirk)(struct usb_hcd *); int (*resume_quirk)(struct usb_hcd *); From 5e1942063dc3633f7a127aa2b159c13507580d21 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 10 Feb 2021 09:25:33 +0100 Subject: [PATCH 66/66] Linux 5.4.97 Tested-by: Florian Fainelli Tested-by: Shuah Khan Tested-by: Linux Kernel Functional Testing Tested-by: Igor Matheus Andrade Torrente Tested-by: Jason Self Tested-by: Guenter Roeck Tested-by: Ross Schmidt Link: https://lore.kernel.org/r/20210208145810.230485165@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ef4295fea9b7b5..032751f6be0c11 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 96 +SUBLEVEL = 97 EXTRAVERSION = NAME = Kleptomaniac Octopus