From 8c4b361672a6d50ce547650dc6ec7e2d12799489 Mon Sep 17 00:00:00 2001 From: Dan Aloni Date: Wed, 16 May 2018 00:03:51 +0300 Subject: [PATCH] Changes representative of linux-3.10.0-862.3.2.el7.tar.xz --- .41234.tmp | Bin 0 -> 664 bytes .47691.tmp | Bin 0 -> 936 bytes .49442.tmp | Bin 0 -> 664 bytes .51016.tmp | Bin 0 -> 1024 bytes .../ABI/testing/sysfs-devices-system-cpu | 3 +- Documentation/kernel-parameters.txt | 37 +++ Documentation/userspace-api/spec_ctrl.txt | 85 +++++++ Makefile | 2 +- arch/x86/include/asm/cpufeature.h | 2 + arch/x86/include/asm/cpufeatures.h | 16 +- arch/x86/include/asm/msr-index.h | 3 + arch/x86/include/asm/nospec-branch.h | 21 ++ arch/x86/include/asm/processor.h | 4 +- arch/x86/include/asm/spec_ctrl.h | 226 ++++++++++++----- arch/x86/include/asm/thread_info.h | 7 +- arch/x86/kernel/asm-offsets.c | 7 + arch/x86/kernel/cpu/amd.c | 21 ++ arch/x86/kernel/cpu/bugs.c | 228 +++++++++++++++++ arch/x86/kernel/cpu/common.c | 53 +++- arch/x86/kernel/machine_kexec_64.c | 2 +- arch/x86/kernel/process.c | 23 ++ arch/x86/kernel/spec_ctrl.c | 239 +++++++++++++++--- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/vmx.c | 8 +- arch/x86/lib/msr-smp.c | 11 +- drivers/base/cpu.c | 8 + fs/proc/array.c | 24 ++ include/linux/nospec.h | 16 ++ include/uapi/linux/prctl.h | 11 + kernel/seccomp.c | 17 ++ kernel/sys.c | 23 ++ 31 files changed, 981 insertions(+), 118 deletions(-) create mode 100644 .41234.tmp create mode 100644 .47691.tmp create mode 100644 .49442.tmp create mode 100755 .51016.tmp create mode 100644 Documentation/userspace-api/spec_ctrl.txt create mode 100644 include/linux/nospec.h diff --git a/.41234.tmp b/.41234.tmp new file mode 100644 index 0000000000000000000000000000000000000000..299d698d200ab945c45c7c6b2e900362ea604d31 GIT binary patch literal 664 zcmb<-^>JfjWMqH=Mg}_u1P><4z;FY>WN-kp9T?ad*cgsHs;xMIB!`X3jMYR224)5( zY|4S+%vi+riYs$V5|bG8ic5+hbOwx7l3G#1pqG+ZlE|Q!R9uWjGs!~IKpmiPU_;aC z0OhlyiKB;^B2XSAhVBzrG<`5Gx_)(_1P2BBLFQo#T@R>*2~Zm34;CN>*#!a;5Q>2l eNTa(QB((x6kM3S{z90_$E1~*zpfrp^*AD=4zZiD_ literal 0 HcmV?d00001 diff --git a/.47691.tmp b/.47691.tmp new file mode 100644 index 0000000000000000000000000000000000000000..f48862ebdb727d8f6d185db50ff00a853feb2c72 GIT binary patch literal 936 zcmb<-^>JfjWMqH=Mg}_u1P><4z>t9?=l~XWVBlonU|?`}cD7Q`aQ6$Yw0@5W#5IO_KDoL#=0W6D2pzBu$Dgnke z+4^;WdO>Cy05LLffO2$^1%Ygs{V>`TNeo$OE;0v+4buk`0`ak>mIA1P6;PTTNP{rQ kuOJ`+p@3-})9+w;s72`Uf^NSc4*haag|1M0U=+H30Bk!e4*&oF literal 0 HcmV?d00001 diff --git a/.49442.tmp b/.49442.tmp new file mode 100644 index 0000000000000000000000000000000000000000..7e8735387c4b5c4153c9366de9f1ee81e8632add GIT binary patch literal 664 zcmb<-^>JfjWMqH=Mg}_u1P><4z;FY>WN-kp9T?ad*ckXN&YVD!!pdaEY9a#zGXoP2 z^~_kr^@=NVOA?b9^omQ0Aan+dRgzj!!l0LuSdz$~msDJgMKj4l(m)-ca9~5z=>X-k zpoyc0nIcdgB!=!27c_k^F1mhopacg6`a$Ml3tbPWg$Yoa6-a~p0E=zKvO`d32r>p*E3g{~g}+K(3O literal 0 HcmV?d00001 diff --git a/.51016.tmp b/.51016.tmp new file mode 100755 index 0000000000000000000000000000000000000000..41ea2e7dc7e3f85e78fcb7c1e2bf3b781b001994 GIT binary patch literal 1024 zcmbtSF-yZx5WY0EDs+e-BIuC8p#>XTtF$Oe+S=A3ShRI;im3@&NKe*jG|G{H}i#ZzL+oSnG?^|4;>cqx$P`g zoenDR3Vp|FtkJF3>ny->oA8y-~t%oO`kbkj%*m=6rKf3 literal 0 HcmV?d00001 diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 7b467f3288837a..55338e6f629714 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -274,7 +274,8 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/meltdown /sys/devices/system/cpu/vulnerabilities/spectre_v1 /sys/devices/system/cpu/vulnerabilities/spectre_v2 -Date: Januar 2018 + /sys/devices/system/cpu/vulnerabilities/spec_store_bypass +Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a0f1f1428af0f7..1806170108be89 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2208,6 +2208,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. allow data leaks with this option, which is equivalent to spectre_v2=off. + nospec_store_disable + [HW] Disable all mitigations for the Speculative Store Bypass vulnerability + noxsave [BUGS=X86] Disables x86 extended register state save and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. @@ -3222,6 +3225,40 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Not specifying this option is equivalent to spectre_v2=auto. + spec_store_bypass_disable= + [HW] Control Speculative Store Bypass (SSB) Disable mitigation + (Speculative Store Bypass vulnerability) + + Certain CPUs are vulnerable to an exploit against a + a common industry wide performance optimization known + as "Speculative Store Bypass" in which recent stores + to the same memory location may not be observed by + later loads during speculative execution. The idea + is that such stores are unlikely and that they can + be detected prior to instruction retirement at the + end of a particular speculation execution window. + + In vulnerable processors, the speculatively forwarded + store can be used in a cache side channel attack, for + example to read memory to which the attacker does not + directly have access (e.g. inside sandboxed code). + + This parameter controls whether the Speculative Store + Bypass optimization is used. + + on - Unconditionally disable Speculative Store Bypass + off - Unconditionally enable Speculative Store Bypass + auto - Kernel detects whether the CPU model contains an + implementation of Speculative Store Bypass and + picks the most appropriate mitigation. + prctl - Control Speculative Store Bypass for a thread + via prctl. Speculative Store Bypass is enabled + for a process by default. The state of the control + is inherited on fork. + + Not specifying this option is equivalent to + spec_store_bypass_disable=auto. + spia_io_base= [HW,MTD] spia_fio_base= spia_pedr= diff --git a/Documentation/userspace-api/spec_ctrl.txt b/Documentation/userspace-api/spec_ctrl.txt new file mode 100644 index 00000000000000..a8edc31690f994 --- /dev/null +++ b/Documentation/userspace-api/spec_ctrl.txt @@ -0,0 +1,85 @@ +=================== +Speculation Control +=================== + +Quite some CPUs have speculation related misfeatures which are in fact +vulnerabilites causing data leaks in various forms even accross privilege +domains. + +The kernel provides mitigation for such vulnerabilities in various +forms. Some of these mitigations are compile time configurable and some on +the kernel command line. + +There is also a class of mitigations which are very expensive, but they can +be restricted to a certain set of processes or tasks in controlled +environments. The mechanism to control these mitigations is via prctl(2). + +There are two prctl options which are related to this: + + * PR_GET_SPECULATION_CTRL + + * PR_SET_SPECULATION_CTRL + +PR_GET_SPECULATION_CTRL +----------------------- + +PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature +which is selected with arg2 of prctl(2). The return value uses bits 0-2 with +the following meaning: + +==== ================ =================================================== +Bit Define Description +==== ================ =================================================== +0 PR_SPEC_PRCTL Mitigation can be controlled per task by + PR_SET_SPECULATION_CTRL +1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is + disabled +2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is + enabled +==== ================ =================================================== + +If all bits are 0 the CPU is not affected by the speculation misfeature. + +If PR_SPEC_PRCTL is set, then the per task control of the mitigation is +available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation +misfeature will fail. + +PR_SET_SPECULATION_CTRL +----------------------- +PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which +is selected by arg2 of prctl(2) per task. arg3 is used to hand in the control +value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE. + +Common error codes +------------------ +======= ================================================================= +Value Meaning +======= ================================================================= +EINVAL The prctl is not implemented by the architecture or unused + prctl(2) arguments are not 0 + +ENODEV arg2 is selecting a not supported speculation misfeature +======= ================================================================= + +PR_SET_SPECULATION_CTRL error codes +----------------------------------- +======= ================================================================= +Value Meaning +======= ================================================================= +0 Success + +ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor + PR_SPEC_DISABLE + +ENXIO Control of the selected speculation misfeature is not possible. + See PR_GET_SPECULATION_CTRL. +======= ================================================================= + +Speculation misfeature controls +------------------------------- +- PR_SPEC_STORE_BYPASS: Speculative Store Bypass + + Invocations: + * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); diff --git a/Makefile b/Makefile index 9c104e06a82858..761479f7e93d21 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ EXTRAVERSION = NAME = Unicycling Gorilla RHEL_MAJOR = 7 RHEL_MINOR = 5 -RHEL_RELEASE = 862.2.3 +RHEL_RELEASE = 862.3.2 # # DRM backport version diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index d91eff63b98ee5..73e378232035a5 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -115,6 +115,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) +#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) + #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) #define cpu_has_de boot_cpu_has(X86_FEATURE_DE) #define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 9284f1bf3f7a7e..9603c7252a6b55 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -207,6 +207,8 @@ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_IBP_DISABLE ( 7*32+21) /* Old AMD Indirect Branch Predictor Disable */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE (7*32+23) /* "" Disable Speculative Store Bypass. */ +#define X86_FEATURE_AMD_SSBD (7*32+24) /* "" AMD SSBD implementation */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ @@ -311,16 +313,18 @@ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_SSBD (18*32+31) /* Speculative Store Bypass Disable */ /* * BUG word(s) */ -#define X86_BUG(x) (NCAPINTS*32 + (x)) +#define X86_BUG(x) (NCAPINTS*32 + (x)) -#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ -#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ -#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ -#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* AMD Erratum 383 */ -#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* AMD Erratum 400 */ +#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ +#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ +#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ +#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* AMD Erratum 383 */ +#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* AMD Erratum 400 */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index e2029f6c93866b..facd49ff8432d6 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -298,6 +298,7 @@ #define MSR_AMD64_PATCH_LOADER 0xc0010020 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 +#define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 #define MSR_AMD64_BU_CFG2 0xc001102a #define MSR_AMD64_IBSFETCHCTL 0xc0011030 @@ -434,6 +435,8 @@ #define FEATURE_ENABLE_IBRS (1<<0) #define FEATURE_ENABLE_STIBP (1<<1) #define FEATURE_SET_IBPB (1<<0) +#define FEATURE_ENABLE_SSBD_SHIFT 2 +#define FEATURE_ENABLE_SSBD (1< .macro __IBRS_ENTRY - movl $0, %edx + movl IBRS_HI32_PCP, %edx + movl IBRS_ENTRY_PCP, %eax + GET_THREAD_INFO(%rcx) + bt $TIF_SSBD, TI_flags(%rcx) + jnc .Lno_ssbd_\@ + orl $FEATURE_ENABLE_SSBD, %eax +.Lno_ssbd_\@: movl $MSR_IA32_SPEC_CTRL, %ecx - movl PER_CPU_VAR(spec_ctrl_pcp), %eax - andl $1, %eax wrmsr .endm .macro IBRS_ENTRY - testl $SPEC_CTRL_PCP_IBRS, PER_CPU_VAR(spec_ctrl_pcp) + testl $SPEC_CTRL_PCP_IBRS_ENTRY, IBRS_ENABLED_PCP jz .Lskip_\@ pushq %rax @@ -37,7 +50,7 @@ .endm .macro IBRS_ENTRY_CLOBBER - testl $SPEC_CTRL_PCP_IBRS, PER_CPU_VAR(spec_ctrl_pcp) + testl $SPEC_CTRL_PCP_IBRS_ENTRY, IBRS_ENABLED_PCP jz .Lskip_\@ __IBRS_ENTRY @@ -48,41 +61,64 @@ .Lend_\@: .endm +#define NO_IBRS_RESTORE (-1) /* No restore on exit */ + +/* + * The save_reg is initialize to NO_IBRS_RESTORE just in case IBRS is + * enabled in the middle of an exception, this avoids the very remote risk + * of writing random save_reg content into the SPEC_CTRL MSR in such case. + */ .macro IBRS_ENTRY_SAVE_AND_CLOBBER save_reg:req - testl $SPEC_CTRL_PCP_IBRS, PER_CPU_VAR(spec_ctrl_pcp) + movl $NO_IBRS_RESTORE, \save_reg + testl $SPEC_CTRL_PCP_IBRS_ENTRY, IBRS_ENABLED_PCP jz .Lskip_\@ movl $MSR_IA32_SPEC_CTRL, %ecx rdmsr - movl %eax, \save_reg + /* + * If the content of the MSR matches the kernel entry value, + * we should still rewrite the MSR anyway to enforce the + * barrier-like semantics in some IBRS implementations. + * Nowever, we can leave the save_reg as NO_IBRS_RESTORE + * so that we won't do a rewrite on exit, + * + * When the values don't match, the state of the SSBD bit in the + * MSR is transferred to new value. + * + * %edx is initialized by rdmsr above, and so it doesn't need + * to be touched. + */ + movl IBRS_ENTRY_PCP, %ecx + cmpl %eax, %ecx + je .Lwrmsr_\@ - __IBRS_ENTRY + movl %eax, \save_reg + andl $FEATURE_ENABLE_SSBD, %eax + orl %ecx, %eax +.Lwrmsr_\@: + movl $MSR_IA32_SPEC_CTRL, %ecx + wrmsr jmp .Lend_\@ .Lskip_\@: - /* - * Simulate no IBRS just in case IBRS is enabled in the middle - * of an exception, this avoids the very remote risk of - * writing random save_reg content into the SPEC_CTRL MSR in - * such case. - */ - movl $FEATURE_ENABLE_IBRS, \save_reg - lfence .Lend_\@: .endm .macro __IBRS_EXIT - movl $0, %edx + movl IBRS_HI32_PCP, %edx + movl IBRS_EXIT_PCP, %eax + GET_THREAD_INFO(%rcx) + bt $TIF_SSBD, TI_flags(%rcx) + jnc .Lno_ssbd_\@ + orl $FEATURE_ENABLE_SSBD, %eax +.Lno_ssbd_\@: movl $MSR_IA32_SPEC_CTRL, %ecx - movl PER_CPU_VAR(spec_ctrl_pcp), %eax - shrl $1, %eax - andl $1, %eax wrmsr .endm .macro IBRS_EXIT - testl $SPEC_CTRL_PCP_IBRS, PER_CPU_VAR(spec_ctrl_pcp) + testl $SPEC_CTRL_PCP_IBRS_EXIT, IBRS_ENABLED_PCP jz .Lskip_\@ pushq %rax @@ -97,14 +133,14 @@ .endm .macro IBRS_EXIT_RESTORE_CLOBBER save_reg:req - testl $SPEC_CTRL_PCP_IBRS, PER_CPU_VAR(spec_ctrl_pcp) + testl $SPEC_CTRL_PCP_IBRS, IBRS_ENABLED_PCP jz .Lskip_\@ - cmpl $FEATURE_ENABLE_IBRS, \save_reg + cmpl $NO_IBRS_RESTORE, \save_reg je .Lskip_\@ movl $MSR_IA32_SPEC_CTRL, %ecx - movl $0, %edx + movl IBRS_HI32_PCP, %edx movl \save_reg, %eax wrmsr @@ -112,7 +148,7 @@ .endm .macro IBRS_EXIT_CLOBBER - testl $SPEC_CTRL_PCP_IBRS, PER_CPU_VAR(spec_ctrl_pcp) + testl $SPEC_CTRL_PCP_IBRS_EXIT, IBRS_ENABLED_PCP jz .Lskip_\@ __IBRS_EXIT @@ -161,6 +197,7 @@ extern struct static_key ibrs_present_key; extern void spec_ctrl_rescan_cpuid(void); extern void spec_ctrl_init(void); extern void spec_ctrl_cpu_init(void); +extern void ssb_select_mitigation(void); bool spec_ctrl_force_enable_ibrs(void); bool spec_ctrl_cond_enable_ibrs(bool full_retpoline); @@ -175,6 +212,36 @@ enum spectre_v2_mitigation spec_ctrl_get_mitigation(void); bool unprotected_firmware_begin(void); void unprotected_firmware_end(bool ibrs_on); +/* + * Percpu IBRS kernel entry/exit control structure + */ +struct kernel_ibrs_spec_ctrl { + unsigned int enabled; /* Entry and exit enabled control bits */ + unsigned int entry; /* Lower 32-bit of SPEC_CTRL MSR for entry */ + unsigned int exit; /* Lower 32-bit of SPEC_CTRL MSR for exit */ + unsigned int hi32; /* Upper 32-bit of SPEC_CTRL MSR */ +}; + +DECLARE_PER_CPU_USER_MAPPED(struct kernel_ibrs_spec_ctrl, spec_ctrl_pcp); + +extern void x86_amd_rds_enable(void); + +/* The Intel SPEC CTRL MSR base value cache */ +extern u64 x86_spec_ctrl_base; + +static inline u64 rds_tif_to_spec_ctrl(u64 tifn) +{ + BUILD_BUG_ON(TIF_SSBD < FEATURE_ENABLE_SSBD_SHIFT); + return (tifn & _TIF_SSBD) >> (TIF_SSBD - FEATURE_ENABLE_SSBD_SHIFT); +} + +static inline u64 rds_tif_to_amd_ls_cfg(u64 tifn) +{ + return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_rds_mask : 0ULL; +} + +extern void speculative_store_bypass_update(void); + enum { IBRS_DISABLED, @@ -195,33 +262,12 @@ static __always_inline int cpu_has_spec_ctrl(void) return static_key_false(&ibrs_present_key); } -static __always_inline unsigned int ibrs_enabled(void) -{ - if (cpu_has_spec_ctrl()) { - unsigned int ibrs = __this_cpu_read(spec_ctrl_pcp); - - if ((ibrs & SPEC_CTRL_PCP_IBRS_ENTRY) && - !(ibrs & SPEC_CTRL_PCP_IBRS_EXIT)) - return IBRS_ENABLED; - - if ((ibrs & SPEC_CTRL_PCP_IBRS_ENTRY) && - (ibrs & SPEC_CTRL_PCP_IBRS_EXIT)) - return IBRS_ENABLED_ALWAYS; - - if (!(ibrs & SPEC_CTRL_PCP_IBRS_ENTRY) && - (ibrs & SPEC_CTRL_PCP_IBRS_EXIT)) - return IBRS_ENABLED_USER; - } - - return IBRS_DISABLED; -} - static __always_inline bool ibrs_enabled_kernel(void) { if (cpu_has_spec_ctrl()) { - unsigned int ibrs = __this_cpu_read(spec_ctrl_pcp); + unsigned int ibrs = __this_cpu_read(spec_ctrl_pcp.entry); - return (ibrs & SPEC_CTRL_PCP_IBRS_ENTRY); + return ibrs & FEATURE_ENABLE_IBRS; } return false; @@ -243,16 +289,44 @@ static inline bool ibpb_enabled(void) (ibrs_enabled_kernel() || retp_enabled())); } +/* + * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR + * the guest has, while on VMEXIT we restore the kernel view. This + * would be easier if SPEC_CTRL were architecturally maskable or + * shadowable for guests but this is not (currently) the case. + * Takes the guest view of SPEC_CTRL MSR as a parameter. + */ + +/* + * RHEL note: Upstream implements two new functions to handle this: + * + * - extern void x86_spec_ctrl_set_guest(u64); + * - extern void x86_spec_ctrl_restore_host(u64); + * + * We already have the following two functions in RHEL so the + * above are not included in the RHEL version of the backport. + */ + static __always_inline u64 spec_ctrl_vmenter_ibrs(u64 vcpu_ibrs) { + + /* + * RHEL TODO: rename this function to just spec_ctrl_enter since + * we actually are updating the whole SPEC_CTRL MSR + */ + /* * If IBRS is enabled for host kernel mode or host always mode * we must set FEATURE_ENABLE_IBRS at vmexit. This is performance * critical code so we pass host_ibrs back to KVM. Preemption is * disabled, so we cannot race with sysfs writes. */ + u64 host_ibrs = ibrs_enabled_kernel() ? FEATURE_ENABLE_IBRS : 0; + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + host_ibrs |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + if (unlikely(vcpu_ibrs != host_ibrs)) native_wrmsrl(MSR_IA32_SPEC_CTRL, vcpu_ibrs); @@ -262,9 +336,19 @@ static __always_inline u64 spec_ctrl_vmenter_ibrs(u64 vcpu_ibrs) static __always_inline void __spec_ctrl_vmexit_ibrs(u64 host_ibrs, u64 vcpu_ibrs) { + + /* + * RHEL TODO: rename this function to just spec_ctrl_vmexit since + * we actually are updating the whole SPEC_CTRL MSR + */ + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + host_ibrs |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + /* IBRS may have barrier semantics so it must be set during vmexit. */ if (unlikely(host_ibrs || vcpu_ibrs != host_ibrs)) { - native_wrmsrl(MSR_IA32_SPEC_CTRL, host_ibrs); + native_wrmsrl(MSR_IA32_SPEC_CTRL, + x86_spec_ctrl_base|host_ibrs); return; } @@ -283,7 +367,13 @@ static __always_inline void spec_ctrl_ibrs_on(void) * mode. */ if (ibrs_enabled_kernel()) { - native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS); + u64 spec_ctrl = x86_spec_ctrl_base|FEATURE_ENABLE_IBRS; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + spec_ctrl |= rds_tif_to_spec_ctrl( + current_thread_info()->flags); + + native_wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); return; } @@ -297,8 +387,15 @@ static __always_inline void spec_ctrl_ibrs_on(void) static __always_inline void spec_ctrl_ibrs_off(void) { - if (ibrs_enabled_kernel()) - native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); + if (ibrs_enabled_kernel()) { + u64 spec_ctrl = x86_spec_ctrl_base; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + spec_ctrl |= rds_tif_to_spec_ctrl( + current_thread_info()->flags); + + native_wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); + } /* rmb not needed when disabling IBRS */ } @@ -319,7 +416,13 @@ static inline bool spec_ctrl_ibrs_on_firmware(void) bool ibrs_on = false; if (cpu_has_spec_ctrl() && retp_enabled() && !ibrs_enabled_kernel()) { - native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS); + u64 spec_ctrl = x86_spec_ctrl_base|FEATURE_ENABLE_IBRS; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + spec_ctrl |= rds_tif_to_spec_ctrl( + current_thread_info()->flags); + + native_wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); ibrs_on = true; } else { /* rmb to prevent wrong speculation for security */ @@ -331,11 +434,18 @@ static inline bool spec_ctrl_ibrs_on_firmware(void) static inline void spec_ctrl_ibrs_off_firmware(bool ibrs_on) { - if (ibrs_on) - native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); - else + if (ibrs_on) { + u64 spec_ctrl = x86_spec_ctrl_base; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + spec_ctrl |= rds_tif_to_spec_ctrl( + current_thread_info()->flags); + + native_wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); + } else { /* rmb to prevent wrong speculation for security */ rmb(); + } } static inline void __spec_ctrl_ibpb(void) @@ -360,5 +470,7 @@ static inline void spec_ctrl_ibpb_if_different_creds(struct task_struct *next) } } +extern enum ssb_mitigation ssb_mode; + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_SPEC_CTRL_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 2fcd0a3d04a212..60736f14fea3b9 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -75,6 +75,7 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +#define TIF_SSBD 5 /* Speculative Store Bypass Disable */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ @@ -101,6 +102,7 @@ struct thread_info { #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_SSBD (1 << TIF_SSBD) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -136,7 +138,7 @@ struct thread_info { #define _TIF_WORK_MASK \ (0x0000FFFF & \ ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT| \ - _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU)) + _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU|_TIF_SSBD)) /* work to do on any return to user space */ #define _TIF_ALLWORK_MASK \ @@ -152,7 +154,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) @@ -206,7 +208,6 @@ static inline struct thread_info *current_thread_info(void) DECLARE_PER_CPU(unsigned long, kernel_stack); DECLARE_PER_CPU(unsigned long, __kernel_stack_70__); DECLARE_PER_CPU_USER_MAPPED(unsigned int, kaiser_enabled_pcp); -DECLARE_PER_CPU_USER_MAPPED(unsigned int, spec_ctrl_pcp); static inline struct thread_info *current_thread_info(void) { diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 604d012f0e9e95..27118eb7e462a6 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -16,6 +16,7 @@ #include #include #include +#include #ifdef CONFIG_XEN #include @@ -73,4 +74,10 @@ void common(void) { BLANK(); DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); + + /* Kernel IBRS speculation control structure */ + OFFSET(KERNEL_IBRS_SPEC_CTRL_enabled, kernel_ibrs_spec_ctrl, enabled); + OFFSET(KERNEL_IBRS_SPEC_CTRL_entry, kernel_ibrs_spec_ctrl, entry); + OFFSET(KERNEL_IBRS_SPEC_CTRL_exit, kernel_ibrs_spec_ctrl, exit); + OFFSET(KERNEL_IBRS_SPEC_CTRL_hi32, kernel_ibrs_spec_ctrl, hi32); } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8e108518b7dc86..f2a81084c8bbb7 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #ifdef CONFIG_X86_64 @@ -586,6 +587,26 @@ static void early_init_amd(struct cpuinfo_x86 *c) clear_cpu_cap(c, X86_FEATURE_SME); } } + + if (c->x86 >= 0x15 && c->x86 <= 0x17) { + unsigned int bit; + + switch (c->x86) { + case 0x15: bit = 54; break; + case 0x16: bit = 33; break; + case 0x17: bit = 10; break; + default: return; + } + /* + * Try to cache the base value so further operations can + * avoid RMW. If that faults, do not enable SSBD. + */ + if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { + setup_force_cpu_cap(X86_FEATURE_SSBD); + setup_force_cpu_cap(X86_FEATURE_AMD_SSBD); + x86_amd_ls_cfg_rds_mask = (1ULL << bit); + } + } } static const int amd_erratum_383[]; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 29b8876b1f953e..d0912dd501a3de 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -23,20 +23,36 @@ #include #include #include +#include static void __init spectre_v2_select_mitigation(void); +static void __init ssb_parse_cmdline(void); +void ssb_select_mitigation(void); +extern void spec_ctrl_save_msr(void); void __init check_bugs(void) { identify_boot_cpu(); + spec_ctrl_save_msr(); + if (!IS_ENABLED(CONFIG_SMP)) { pr_info("CPU: "); print_cpu_info(&boot_cpu_data); } + /* + * Select proper mitigation for any exposure to the Speculative Store + * Bypass vulnerability (exposed as a bug in "Memory Disambiguation") + * This has to be done before spec_ctrl_init() to make sure that its + * SPEC_CTRL MSR value is properly set up. + */ + ssb_parse_cmdline(); + ssb_select_mitigation(); + spec_ctrl_init(); spectre_v2_select_mitigation(); + spec_ctrl_cpu_init(); #ifdef CONFIG_X86_32 @@ -75,6 +91,14 @@ void __init check_bugs(void) #endif } +void x86_amd_rds_enable(void) +{ + u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_rds_mask; + + if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) + wrmsrl(MSR_AMD64_LS_CFG, msrval); +} + /* The kernel command line selection */ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_NONE, @@ -200,6 +224,202 @@ static void __init spectre_v2_select_mitigation(void) #undef pr_fmt +#define pr_fmt(fmt) "Speculative Store Bypass: " fmt + +enum ssb_mitigation ssb_mode = SPEC_STORE_BYPASS_NONE; + +/* The kernel command line selection */ +enum ssb_mitigation_cmd { + SPEC_STORE_BYPASS_CMD_NONE, + SPEC_STORE_BYPASS_CMD_AUTO, + SPEC_STORE_BYPASS_CMD_ON, + SPEC_STORE_BYPASS_CMD_PRCTL, +}; + +static enum ssb_mitigation_cmd ssb_cmd = SPEC_STORE_BYPASS_CMD_NONE; + +static const char *ssb_strings[] = { + [SPEC_STORE_BYPASS_NONE] = "Vulnerable", + [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", + [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl" +}; + +static const struct { + const char *option; + enum ssb_mitigation_cmd cmd; +} ssb_mitigation_options[] = { + { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ + { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ + { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ + { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ +}; + +static enum ssb_mitigation_cmd __init __ssb_parse_cmdline(void) +{ + enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO; + char arg[20]; + int ret, i; + + if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) + return SPEC_STORE_BYPASS_CMD_NONE; + else { + ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable", + arg, sizeof(arg)); + if (ret < 0) + return SPEC_STORE_BYPASS_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) { + if (!match_option(arg, ret, ssb_mitigation_options[i].option)) + continue; + + cmd = ssb_mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(ssb_mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", arg); + return SPEC_STORE_BYPASS_CMD_AUTO; + } + } + + return cmd; +} + +/* + * The SSB command line parsing is now separated from SSB mitigation + * selection as the boot command line buffer will not be available after + * init and so could not be used with late microcode update. + */ +static void __init ssb_parse_cmdline(void) +{ + ssb_cmd = __ssb_parse_cmdline(); +} + +static enum ssb_mitigation __ssb_select_mitigation(void) +{ + enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; + enum ssb_mitigation_cmd cmd = ssb_cmd; + + if (!boot_cpu_has(X86_FEATURE_SSBD)) + return mode; + + if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) && + (cmd == SPEC_STORE_BYPASS_CMD_NONE || + cmd == SPEC_STORE_BYPASS_CMD_AUTO)) + return mode; + + switch (cmd) { + case SPEC_STORE_BYPASS_CMD_AUTO: + /* Choose prctl as the default mode */ + mode = SPEC_STORE_BYPASS_PRCTL; + break; + case SPEC_STORE_BYPASS_CMD_ON: + mode = SPEC_STORE_BYPASS_DISABLE; + break; + case SPEC_STORE_BYPASS_CMD_PRCTL: + mode = SPEC_STORE_BYPASS_PRCTL; + break; + case SPEC_STORE_BYPASS_CMD_NONE: + break; + } + + /* + * We have three CPU feature flags that are in play here: + * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. + * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass + * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation + */ + if (mode == SPEC_STORE_BYPASS_DISABLE) { + setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); + /* + * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses + * a completely different MSR and bit dependent on family. + */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + x86_spec_ctrl_base |= FEATURE_ENABLE_SSBD; + break; + case X86_VENDOR_AMD: + x86_amd_rds_enable(); + break; + } + } + + return mode; +} + +void ssb_select_mitigation() +{ + ssb_mode = __ssb_select_mitigation(); + + if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + pr_info("%s\n", ssb_strings[ssb_mode]); +} + +#undef pr_fmt + +static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + bool ssbd = !!test_tsk_thread_flag(task, TIF_SSBD); + + if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) + return -ENXIO; + + if (ctrl == PR_SPEC_ENABLE) + clear_tsk_thread_flag(task, TIF_SSBD); + else + set_tsk_thread_flag(task, TIF_SSBD); + + /* + * If being set on non-current task, delay setting the CPU + * mitigation until it is next scheduled. + */ + if (task == current && ssbd != !!test_tsk_thread_flag(task, TIF_SSBD)) + speculative_store_bypass_update(); + + return 0; +} + +static int ssb_prctl_get(struct task_struct *task) +{ + switch (ssb_mode) { + case SPEC_STORE_BYPASS_DISABLE: + return PR_SPEC_DISABLE; + case SPEC_STORE_BYPASS_PRCTL: + if (test_tsk_thread_flag(task, TIF_SSBD)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + default: + if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + return PR_SPEC_ENABLE; + return PR_SPEC_NOT_AFFECTED; + } +} + +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl) +{ + if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE) + return -ERANGE; + + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_set(task, ctrl); + default: + return -ENODEV; + } +} + +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_get(task); + default: + return -ENODEV; + } +} + #ifdef CONFIG_SYSFS ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) @@ -222,4 +442,12 @@ ssize_t cpu_show_spectre_v2(struct device *dev, { return sprintf(buf, "%s\n", spectre_v2_strings[spec_ctrl_get_mitigation()]); } + +ssize_t cpu_show_spec_store_bypass(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + return sprintf(buf, "Not affected\n"); + return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 49cb90f121df06..7d34918e22b971 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -43,6 +43,8 @@ #include #include #include +#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -472,8 +474,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c) return NULL; /* Not found */ } -__u32 cpu_caps_cleared[NCAPINTS]; -__u32 cpu_caps_set[NCAPINTS]; +__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; +__u32 cpu_caps_set[NCAPINTS + NBUGINTS]; void load_percpu_segment(int cpu) { @@ -853,6 +855,30 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } +static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, + { X86_VENDOR_CENTAUR, 5, }, + { X86_VENDOR_INTEL, 5, }, + { X86_VENDOR_NSC, 5, }, + { X86_VENDOR_AMD, 0x12, }, + { X86_VENDOR_AMD, 0x11, }, + { X86_VENDOR_AMD, 0x10, }, + { X86_VENDOR_AMD, 0xf, }, + { X86_VENDOR_ANY, 4, }, + {} +}; + /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -901,6 +927,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_bsp_init) this_cpu->c_bsp_init(c); + + if (!x86_match_cpu(cpu_no_spec_store_bypass)) + setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); } void __init early_cpu_init(void) @@ -1022,6 +1051,13 @@ static void validate_apic_and_package_id(struct cpuinfo_x86 *c) #else c->logical_proc_id = 0; #endif + + /* + * If we're on an AMD system using non-architectural MSRs + * and using big hammer, then set the SSBD bit accordingly + */ + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) + x86_amd_rds_enable(); } /* @@ -1059,7 +1095,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_identify(c); /* Clear/Set all flags overriden by options, after probe */ - for (i = 0; i < NCAPINTS; i++) { + for (i = 0; i < NCAPINTS + NBUGINTS; i++) { c->x86_capability[i] &= ~cpu_caps_cleared[i]; c->x86_capability[i] |= cpu_caps_set[i]; } @@ -1121,7 +1157,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) * Clear/Set all flags overriden by options, need do it * before following smp all cpus cap AND. */ - for (i = 0; i < NCAPINTS; i++) { + for (i = 0; i < NCAPINTS + NBUGINTS; i++) { c->x86_capability[i] &= ~cpu_caps_cleared[i]; c->x86_capability[i] |= cpu_caps_set[i]; } @@ -1150,6 +1186,13 @@ static void identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif + + /* + * Always write back x86_spec_ctrl_base if it has been modified. + */ + if (x86_spec_ctrl_base) + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + } #ifdef CONFIG_X86_64 @@ -1314,7 +1357,7 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = DEFINE_PER_CPU(unsigned int, irq_count) = -1; DEFINE_PER_CPU_USER_MAPPED(unsigned int, kaiser_enabled_pcp) ____cacheline_aligned; -DEFINE_PER_CPU_USER_MAPPED(unsigned int, spec_ctrl_pcp); +DEFINE_PER_CPU_USER_MAPPED(struct kernel_ibrs_spec_ctrl, spec_ctrl_pcp); EXPORT_PER_CPU_SYMBOL_GPL(spec_ctrl_pcp); DEFINE_PER_CPU_USER_MAPPED(unsigned long, kaiser_scratch); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index f5a28cc7f495ff..efd50d1239d744 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -239,7 +239,7 @@ int machine_kexec_prepare(struct kimage *image) * The second page of control_code_page may be corrupted by the * PTI code, so just clear the page for safety. */ - clear_page(image->control_code_page + PAGE_SIZE); + clear_page(page_address(image->control_code_page) + PAGE_SIZE); #endif /* update purgatory as needed */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index f741d66041de25..6eb30b7329111e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -196,6 +196,25 @@ int set_tsc_mode(unsigned int val) return 0; } +static __always_inline void __speculative_store_bypass_update(unsigned long tifn) +{ + u64 msr; + + if (static_cpu_has(X86_FEATURE_AMD_SSBD)) { + msr = x86_amd_ls_cfg_base | rds_tif_to_amd_ls_cfg(tifn); + wrmsrl(MSR_AMD64_LS_CFG, msr); + } else { + msr = this_cpu_read(spec_ctrl_pcp.entry) | + rds_tif_to_spec_ctrl(tifn); + wrmsrl(MSR_IA32_SPEC_CTRL, msr); + } +} + +void speculative_store_bypass_update(void) +{ + __speculative_store_bypass_update(current_thread_info()->flags); +} + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss) { @@ -238,6 +257,10 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } propagate_user_return_notify(prev_p, next_p); + + if (test_tsk_thread_flag(prev_p, TIF_SSBD) ^ + test_tsk_thread_flag(next_p, TIF_SSBD)) + __speculative_store_bypass_update(task_thread_info(next_p)->flags); } /* diff --git a/arch/x86/kernel/spec_ctrl.c b/arch/x86/kernel/spec_ctrl.c index a53634e96abb20..1b795c406371d8 100644 --- a/arch/x86/kernel/spec_ctrl.c +++ b/arch/x86/kernel/spec_ctrl.c @@ -20,21 +20,136 @@ static DEFINE_MUTEX(spec_ctrl_mutex); static bool noibrs_cmdline __read_mostly; static bool ibp_disabled __read_mostly; static bool unsafe_module __read_mostly; +static unsigned int ibrs_mode __read_mostly; struct static_key retp_enabled_key = STATIC_KEY_INIT_FALSE; struct static_key ibrs_present_key = STATIC_KEY_INIT_FALSE; EXPORT_SYMBOL(retp_enabled_key); EXPORT_SYMBOL(ibrs_present_key); -static void set_spec_ctrl_pcp(bool enable, int flag) +/* + * SPEC_CTRL MSR bits being managed by the kernel. + */ +#define SPEC_CTRL_MANAGED_MASK (FEATURE_ENABLE_IBRS|FEATURE_ENABLE_SSBD) + +/* + * The Intel specification for the SPEC_CTRL MSR requires that we + * preserve any already set reserved bits at boot time (e.g. for + * future additions that this kernel is not currently aware of). + * We then set any additional mitigation bits that we want + * ourselves and always use this as the base for SPEC_CTRL. + * We also use this when handling guest entry/exit as below. + * + * RHEL note: We do the above to be in sync with upstream, + * but in the RHEL case, we have both x86_spec_ctrl_base, + * and a PER_CPU spec_ctrl_pcp to track and manage. + * + * RHEL note: It's actually cleaner to directly export this + * and allow all of our assorted IBRS management code to touch + * this directly, rather than use the upstream accessors. We + * implement them, but we don't use those in the RHEL code. + */ + +/* + * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any + * writes to SPEC_CTRL contain whatever reserved bits have been set. + */ +u64 __read_mostly x86_spec_ctrl_base; +EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); +static bool spec_ctrl_msr_write; + +/* + * AMD specific MSR info for Store Bypass control. x86_amd_ls_cfg_rds_mask + * is initialized in identify_boot_cpu(). + */ +u64 __read_mostly x86_amd_ls_cfg_base; +u64 __read_mostly x86_amd_ls_cfg_rds_mask; + +void spec_ctrl_save_msr(void) { - int cpu, val = __this_cpu_read(spec_ctrl_pcp); - if (enable) - val |= flag; + int cpu; + unsigned int hival, loval; + static int savecnt; + + spec_ctrl_msr_write = false; + + /* + * Read the SPEC_CTRL MSR to account for reserved bits which may have + * unknown values. AMD64_LS_CFG MSR is cached in the early AMD + * init code as it is not enumerated and depends on the family. + */ + if (boot_cpu_has(X86_FEATURE_IBRS) && !savecnt) { + /* + * This part is run only the first time it is called. + */ + rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + if (x86_spec_ctrl_base & SPEC_CTRL_MANAGED_MASK) { + x86_spec_ctrl_base &= ~SPEC_CTRL_MANAGED_MASK; + spec_ctrl_msr_write = true; + native_wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + } + } + + /* + * RHEL only: update the PER_CPU spec_ctrl_pcp cached values + */ + + loval = x86_spec_ctrl_base & 0xffffffff; + hival = (x86_spec_ctrl_base >> 32) & 0xffffffff; + + for_each_possible_cpu(cpu) { + WRITE_ONCE(per_cpu(spec_ctrl_pcp.hi32, cpu), hival); + WRITE_ONCE(per_cpu(spec_ctrl_pcp.entry, cpu), loval); + WRITE_ONCE(per_cpu(spec_ctrl_pcp.exit, cpu), loval); + } + savecnt++; +} + +/* + * RHEL note: + * Upstream has implemented the following APIs for getting and setting + * the SPEC_CTRL MSR value. + * + * - void x86_spec_ctrl_set(u64 val) + * - u64 x86_spec_ctrl_get_default(void) + * + * We don't use it directly since we have a lot of IBRS management code + * that touches SPEC_CTRL directly. + */ + +static void set_spec_ctrl_pcp(bool entry, bool exit) +{ + unsigned int enabled = this_cpu_read(spec_ctrl_pcp.enabled); + unsigned int entry_val = this_cpu_read(spec_ctrl_pcp.entry); + unsigned int exit_val = this_cpu_read(spec_ctrl_pcp.exit); + int cpu; + + /* + * For ibrs_always, we only need to write the MSR at kernel entry + * to fulfill the barrier semantics for some CPUs. + */ + if (entry && exit) + enabled = SPEC_CTRL_PCP_IBRS_ENTRY; + else if (entry != exit) + enabled = SPEC_CTRL_PCP_IBRS_ENTRY|SPEC_CTRL_PCP_IBRS_EXIT; else - val &= ~flag; - for_each_possible_cpu(cpu) - WRITE_ONCE(per_cpu(spec_ctrl_pcp, cpu), val); + enabled = 0; + + if (entry) + entry_val |= FEATURE_ENABLE_IBRS; + else + entry_val &= ~FEATURE_ENABLE_IBRS; + + if (exit) + exit_val |= FEATURE_ENABLE_IBRS; + else + exit_val &= ~FEATURE_ENABLE_IBRS; + + for_each_possible_cpu(cpu) { + WRITE_ONCE(per_cpu(spec_ctrl_pcp.enabled, cpu), enabled); + WRITE_ONCE(per_cpu(spec_ctrl_pcp.entry, cpu), entry_val); + WRITE_ONCE(per_cpu(spec_ctrl_pcp.exit, cpu), exit_val); + } } /* @@ -42,32 +157,32 @@ static void set_spec_ctrl_pcp(bool enable, int flag) * * entry exit * ibrs 1 0 - * ibrs_always 1 1 + * ibrs_always 1 x (not written on exit) * ibrs_user 0 1 */ static void set_spec_ctrl_pcp_ibrs(void) { - set_spec_ctrl_pcp(true, SPEC_CTRL_PCP_IBRS_ENTRY); - set_spec_ctrl_pcp(false, SPEC_CTRL_PCP_IBRS_EXIT); + set_spec_ctrl_pcp(true, false); + ibrs_mode = IBRS_ENABLED; } static void set_spec_ctrl_pcp_ibrs_always(void) { - set_spec_ctrl_pcp(true, SPEC_CTRL_PCP_IBRS_ENTRY); - set_spec_ctrl_pcp(true, SPEC_CTRL_PCP_IBRS_EXIT); + set_spec_ctrl_pcp(true, true); + ibrs_mode = IBRS_ENABLED_ALWAYS; } static void set_spec_ctrl_pcp_ibrs_user(void) { - set_spec_ctrl_pcp(false, SPEC_CTRL_PCP_IBRS_ENTRY); - set_spec_ctrl_pcp(true, SPEC_CTRL_PCP_IBRS_EXIT); + set_spec_ctrl_pcp(false, true); + ibrs_mode = IBRS_ENABLED_USER; } void clear_spec_ctrl_pcp(void) { - set_spec_ctrl_pcp(false, SPEC_CTRL_PCP_IBRS_ENTRY); - set_spec_ctrl_pcp(false, SPEC_CTRL_PCP_IBRS_EXIT); + set_spec_ctrl_pcp(false, false); + ibrs_mode = IBRS_DISABLED; } static void spec_ctrl_sync_all_cpus(u32 msr_nr, u64 val) @@ -82,7 +197,8 @@ static void spec_ctrl_sync_all_cpus(u32 msr_nr, u64 val) static void sync_all_cpus_ibrs(bool enable) { spec_ctrl_sync_all_cpus(MSR_IA32_SPEC_CTRL, - enable ? FEATURE_ENABLE_IBRS : 0); + enable ? (x86_spec_ctrl_base | FEATURE_ENABLE_IBRS) + : x86_spec_ctrl_base); } static void __sync_this_cpu_ibp(void *data) @@ -125,7 +241,7 @@ static void spec_ctrl_disable_all(void) int cpu; for_each_possible_cpu(cpu) - WRITE_ONCE(per_cpu(spec_ctrl_pcp, cpu), 0); + WRITE_ONCE(per_cpu(spec_ctrl_pcp.enabled, cpu), 0); set_spec_ctrl_retp(false); } @@ -249,9 +365,9 @@ enum spectre_v2_mitigation spec_ctrl_get_mitigation(void) if (ibp_disabled) mode = SPECTRE_V2_IBP_DISABLED; - else if (ibrs_enabled() == IBRS_ENABLED_ALWAYS) + else if (ibrs_mode == IBRS_ENABLED_ALWAYS) mode = SPECTRE_V2_IBRS_ALWAYS; - else if (ibrs_enabled() == IBRS_ENABLED) + else if (ibrs_mode == IBRS_ENABLED) mode = SPECTRE_V2_IBRS; else if (retp_enabled()) { if (!retp_enabled_full()) @@ -262,7 +378,7 @@ enum spectre_v2_mitigation spec_ctrl_get_mitigation(void) mode = SPECTRE_V2_RETPOLINE_SKYLAKE; else if (unsafe_module) mode = SPECTRE_V2_RETPOLINE_UNSAFE_MODULE; - else if (ibrs_enabled() == IBRS_ENABLED_USER) + else if (ibrs_mode == IBRS_ENABLED_USER) mode = SPECTRE_V2_RETPOLINE_IBRS_USER; else mode = SPECTRE_V2_RETPOLINE; @@ -298,20 +414,23 @@ void spec_ctrl_cpu_init(void) return; } - if (ibrs_enabled() == IBRS_ENABLED_ALWAYS) - native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS); + if ((ibrs_mode == IBRS_ENABLED_ALWAYS) || + (spec_ctrl_msr_write && (system_state == SYSTEM_BOOTING))) + native_wrmsr(MSR_IA32_SPEC_CTRL, + this_cpu_read(spec_ctrl_pcp.entry), + this_cpu_read(spec_ctrl_pcp.hi32)); } static void spec_ctrl_reinit_all_cpus(void) { if (boot_cpu_has(X86_FEATURE_IBP_DISABLE)) { - sync_all_cpus_ibp(!ibrs_enabled()); + sync_all_cpus_ibp(!ibrs_mode); return; } - if (ibrs_enabled() == IBRS_ENABLED_ALWAYS) + if (ibrs_mode == IBRS_ENABLED_ALWAYS) sync_all_cpus_ibrs(true); - else if (ibrs_enabled() == IBRS_DISABLED) + else if (ibrs_mode == IBRS_DISABLED) sync_all_cpus_ibrs(false); } @@ -322,12 +441,23 @@ void spec_ctrl_init(void) else if (static_key_enabled(&ibrs_present_key) && !boot_cpu_has(X86_FEATURE_IBRS)) static_key_slow_dec(&ibrs_present_key); spec_ctrl_print_features(); + + /* + * If the x86_spec_ctrl_base is modified, propagate it to the + * percpu spec_ctrl structure as well as forcing MSR write. + */ + if (x86_spec_ctrl_base) { + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + spec_ctrl_save_msr(); + spec_ctrl_msr_write = true; + } } void spec_ctrl_rescan_cpuid(void) { enum spectre_v2_mitigation old_mode; - bool old_ibrs, old_ibpb; + bool old_ibrs, old_ibpb, old_ssbd; + bool ssbd_changed; int cpu; if (boot_cpu_has(X86_FEATURE_IBP_DISABLE)) @@ -336,36 +466,71 @@ void spec_ctrl_rescan_cpuid(void) mutex_lock(&spec_ctrl_mutex); if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + bool amd_ssbd = boot_cpu_has(X86_FEATURE_AMD_SSBD); old_ibrs = boot_cpu_has(X86_FEATURE_IBRS); old_ibpb = boot_cpu_has(X86_FEATURE_IBPB); + old_ssbd = boot_cpu_has(X86_FEATURE_SSBD); old_mode = spec_ctrl_get_mitigation(); /* detect spec ctrl related cpuid additions */ get_cpu_cap(&boot_cpu_data); + /* + * For AMD family 0x15-0x17, the SSBD bit is specially + * hard-coded. Hence, a call to get_cpu_cap() will clear + * the SSBD bit as it is part of an architectural leaf. + * The Linux internal AMD_SSBD bit may not be cleared. + * We need to detect this situation and correct it. + */ + if (amd_ssbd && !boot_cpu_has(X86_FEATURE_SSBD)) { + setup_force_cpu_cap(X86_FEATURE_SSBD); + setup_force_cpu_cap(X86_FEATURE_AMD_SSBD); + } + /* if there were no spec ctrl related changes, we're done */ + ssbd_changed = (old_ssbd != boot_cpu_has(X86_FEATURE_SSBD)); if (old_ibrs == boot_cpu_has(X86_FEATURE_IBRS) && - old_ibpb == boot_cpu_has(X86_FEATURE_IBPB)) + old_ibpb == boot_cpu_has(X86_FEATURE_IBPB) && !ssbd_changed) goto done; /* - * The SPEC_CTRL and IBPB cpuid bits may have + * The IBRS, IBPB & SSBD cpuid bits may have * just been set in the boot_cpu_data, transfer them * to the per-cpu data too. */ if (boot_cpu_has(X86_FEATURE_IBRS)) for_each_online_cpu(cpu) - set_cpu_cap(&cpu_data(cpu), - X86_FEATURE_IBRS); + set_cpu_cap(&cpu_data(cpu), X86_FEATURE_IBRS); if (boot_cpu_has(X86_FEATURE_IBPB)) for_each_online_cpu(cpu) - set_cpu_cap(&cpu_data(cpu), - X86_FEATURE_IBPB); + set_cpu_cap(&cpu_data(cpu), X86_FEATURE_IBPB); + if (boot_cpu_has(X86_FEATURE_SSBD)) + for_each_online_cpu(cpu) + set_cpu_cap(&cpu_data(cpu), X86_FEATURE_SSBD); /* update static key, print the changed IBRS/IBPB features */ spec_ctrl_init(); + if (ssbd_changed) { + u64 old_spec_ctrl = x86_spec_ctrl_base; + + /* + * Redo speculative store bypass setup. + */ + ssb_select_mitigation(); + if (x86_spec_ctrl_base != old_spec_ctrl) { + /* + * Need to propagate the new baseline to all + * the percpu spec_ctrl structures. The + * spectre v2 re-initialization below will + * reset to the right percpu values. + */ + spec_ctrl_save_msr(); + sync_all_cpus_ibrs(false); + } + } + /* * Re-execute the v2 mitigation logic based on any new CPU * features. Note that any debugfs-based changes the user may @@ -399,7 +564,7 @@ static ssize_t __enabled_read(struct file *file, char __user *user_buf, static ssize_t ibrs_enabled_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { - unsigned int enabled = ibrs_enabled(); + unsigned int enabled = ibrs_mode; if (ibp_disabled) enabled = IBRS_ENABLED_ALWAYS; @@ -427,7 +592,7 @@ static ssize_t ibrs_enabled_write(struct file *file, return -EINVAL; mutex_lock(&spec_ctrl_mutex); - if ((!ibp_disabled && enable == ibrs_enabled()) || + if ((!ibp_disabled && enable == ibrs_mode) || (ibp_disabled && enable == IBRS_ENABLED_ALWAYS)) goto out_unlock; @@ -535,9 +700,9 @@ static ssize_t retp_enabled_write(struct file *file, if (ibp_disabled) { sync_all_cpus_ibp(true); ibp_disabled = false; - } else if (ibrs_enabled() == IBRS_ENABLED) + } else if (ibrs_mode == IBRS_ENABLED) clear_spec_ctrl_pcp(); - else if (ibrs_enabled() == IBRS_ENABLED_ALWAYS) + else if (ibrs_mode == IBRS_ENABLED_ALWAYS) set_spec_ctrl_pcp_ibrs_user(); } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 7bb663af32bb41..a5c280eeffbd3b 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -384,7 +384,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | - F(SPEC_CTRL) | F(INTEL_STIBP); + F(SPEC_CTRL) | F(INTEL_STIBP) | F(SSBD); /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c3d0093f5126d1..aef3fb1c2e20b7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2959,7 +2959,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && + !guest_cpuid_has(vcpu, X86_FEATURE_SSBD)) return 1; msr_info->data = to_vmx(vcpu)->spec_ctrl; @@ -3070,11 +3071,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && + !guest_cpuid_has(vcpu, X86_FEATURE_SSBD)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(FEATURE_ENABLE_IBRS | FEATURE_ENABLE_STIBP)) + if (data & ~(FEATURE_ENABLE_IBRS | FEATURE_ENABLE_STIBP | FEATURE_ENABLE_SSBD)) return 1; vmx->spec_ctrl = data; diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c index 518532e6a3faa2..2b3725b1bc3809 100644 --- a/arch/x86/lib/msr-smp.c +++ b/arch/x86/lib/msr-smp.c @@ -2,6 +2,7 @@ #include #include #include +#include static void __rdmsr_on_cpu(void *info) { @@ -28,7 +29,15 @@ static void __wrmsr_on_cpu(void *info) else reg = &rv->reg; - wrmsr(rv->msr_no, reg->l, reg->h); + /* + * We need to set the SSBD bit in the SPEC_CTRL MSR if TIF_SSBD + * is set. + */ + if (unlikely((rv->msr_no == MSR_IA32_SPEC_CTRL) && + rds_tif_to_spec_ctrl(current_thread_info()->flags))) + wrmsr(rv->msr_no, reg->l | FEATURE_ENABLE_SSBD, reg->h); + else + wrmsr(rv->msr_no, reg->l, reg->h); } int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 4263273d1a98d9..bd82212184d725 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -414,14 +414,22 @@ ssize_t __weak cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Not affected\n"); } +ssize_t __weak cpu_show_spec_store_bypass(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0400, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0400, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0400, cpu_show_spectre_v2, NULL); +static DEVICE_ATTR(spec_store_bypass, 0400, cpu_show_spec_store_bypass, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, &dev_attr_spectre_v1.attr, &dev_attr_spectre_v2.attr, + &dev_attr_spec_store_bypass.attr, NULL }; diff --git a/fs/proc/array.c b/fs/proc/array.c index d7c97e89352e6c..02b0fc4d86b4b1 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -79,10 +79,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include @@ -365,6 +367,28 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) #ifdef CONFIG_SECCOMP seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode); #endif + seq_printf(m, "\nSpeculationStoreBypass:\t"); + switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) { + case -EINVAL: + seq_printf(m, "unknown"); + break; + case PR_SPEC_NOT_AFFECTED: + seq_printf(m, "not vulnerable"); + break; + case PR_SPEC_PRCTL | PR_SPEC_DISABLE: + seq_printf(m, "thread mitigated"); + break; + case PR_SPEC_PRCTL | PR_SPEC_ENABLE: + seq_printf(m, "thread vulnerable"); + break; + case PR_SPEC_DISABLE: + seq_printf(m, "globally mitigated"); + break; + default: + seq_printf(m, "vulnerable"); + break; + } + seq_putc(m, '\n'); } static inline void task_context_switch_counts(struct seq_file *m, diff --git a/include/linux/nospec.h b/include/linux/nospec.h new file mode 100644 index 00000000000000..e135c1d5f1a75e --- /dev/null +++ b/include/linux/nospec.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Linus Torvalds. All rights reserved. +// Copyright(c) 2018 Alexei Starovoitov. All rights reserved. +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#ifndef _LINUX_NOSPEC_H +#define _LINUX_NOSPEC_H + +struct task_struct; + +/* Speculation control prctl */ +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which); +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl); + +#endif /* _LINUX_NOSPEC_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index f818d081129b34..8ddaa82e79b14a 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -192,4 +192,15 @@ struct prctl_mm_map { # define PR_CAP_AMBIENT_LOWER 3 # define PR_CAP_AMBIENT_CLEAR_ALL 4 +/* Per task speculation control */ +#define PR_GET_SPECULATION_CTRL 52 +#define PR_SET_SPECULATION_CTRL 53 +/* Speculation control variants */ +# define PR_SPEC_STORE_BYPASS 0 +/* Return and control values for PR_SET/GET_SPECULATION_CTRL */ +# define PR_SPEC_NOT_AFFECTED 0 +# define PR_SPEC_PRCTL (1UL << 0) +# define PR_SPEC_ENABLE (1UL << 1) +# define PR_SPEC_DISABLE (1UL << 2) + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 42e55449c1a5fa..3acc15dd479c68 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include #include @@ -236,6 +238,19 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) return true; } +/* + * If a given speculation mitigation is opt-in (prctl()-controlled), + * select it, by disabling speculation (enabling mitigation). + */ +static inline void spec_mitigate(struct task_struct *task, + unsigned long which) +{ + int state = arch_prctl_spec_ctrl_get(task, which); + + if (state > 0 && (state & PR_SPEC_PRCTL)) + arch_prctl_spec_ctrl_set(task, which, PR_SPEC_DISABLE); +} + static inline void seccomp_assign_mode(struct task_struct *task, unsigned long seccomp_mode) { @@ -247,6 +262,8 @@ static inline void seccomp_assign_mode(struct task_struct *task, * filter) is set. */ smp_mb__before_atomic(); + /* Assume seccomp processes want speculation flaw mitigation. */ + spec_mitigate(task, PR_SPEC_STORE_BYPASS); set_tsk_thread_flag(task, TIF_SECCOMP); } diff --git a/kernel/sys.c b/kernel/sys.c index 113961cf6711ed..cb90b1c046fe4c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -54,6 +54,8 @@ #include #include +#include + #include /* Move somewhere else to avoid recompiling? */ #include @@ -2321,6 +2323,17 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) } #endif +int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which) +{ + return -EINVAL; +} + +int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, + unsigned long ctrl) +{ + return -EINVAL; +} + SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { @@ -2505,6 +2518,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_MPX_DISABLE_MANAGEMENT: error = MPX_DISABLE_MANAGEMENT(); break; + case PR_GET_SPECULATION_CTRL: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = arch_prctl_spec_ctrl_get(me, arg2); + break; + case PR_SET_SPECULATION_CTRL: + if (arg4 || arg5) + return -EINVAL; + error = arch_prctl_spec_ctrl_set(me, arg2, arg3); + break; default: error = -EINVAL; break;