Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Deepin Kernel SIG] [Intel] Intel: backport KVM LAM from v6.8 #539

Merged
Merged
1 change: 1 addition & 0 deletions arch/x86/include/asm/kvm-x86-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ KVM_X86_OP(msr_filter_changed)
KVM_X86_OP(complete_emulated_msr)
KVM_X86_OP(vcpu_deliver_sipi_vector)
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
KVM_X86_OP_OPTIONAL(get_untagged_addr)
KVM_X86_OP_OPTIONAL(vm_attestation)
KVM_X86_OP_OPTIONAL(control_pre_system_reset)
KVM_X86_OP_OPTIONAL(control_post_system_reset)
Expand Down
5 changes: 4 additions & 1 deletion arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
| X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
| X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
| X86_CR4_LAM_SUP))

#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)

Expand Down Expand Up @@ -1752,6 +1753,8 @@ struct kvm_x86_ops {
*/
unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);

gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);

/*
* Interfaces for HYGON CSV guest
*/
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kvm/cpuid.c
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_mask(CPUID_7_1_EAX,
F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) |
F(FZRM) | F(FSRS) | F(FSRC) |
F(AMX_FP16) | F(AVX_IFMA)
F(AMX_FP16) | F(AVX_IFMA) | F(LAM)
);

kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
Expand Down
13 changes: 8 additions & 5 deletions arch/x86/kvm/cpuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,6 @@ static inline bool kvm_vcpu_is_legal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
return !(gpa & vcpu->arch.reserved_gpa_bits);
}

static inline bool kvm_vcpu_is_illegal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
{
return !kvm_vcpu_is_legal_gpa(vcpu, gpa);
}

static inline bool kvm_vcpu_is_legal_aligned_gpa(struct kvm_vcpu *vcpu,
gpa_t gpa, gpa_t alignment)
{
Expand Down Expand Up @@ -288,4 +283,12 @@ static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu,
vcpu->arch.governed_features.enabled);
}

static inline bool kvm_vcpu_is_legal_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
if (guest_can_use(vcpu, X86_FEATURE_LAM))
cr3 &= ~(X86_CR3_LAM_U48 | X86_CR3_LAM_U57);

return kvm_vcpu_is_legal_gpa(vcpu, cr3);
}

#endif
27 changes: 15 additions & 12 deletions arch/x86/kvm/emulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -687,8 +687,8 @@ static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
struct segmented_address addr,
unsigned *max_size, unsigned size,
bool write, bool fetch,
enum x86emul_mode mode, ulong *linear)
enum x86emul_mode mode, ulong *linear,
unsigned int flags)
{
struct desc_struct desc;
bool usable;
Expand All @@ -701,7 +701,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
*max_size = 0;
switch (mode) {
case X86EMUL_MODE_PROT64:
*linear = la;
*linear = la = ctxt->ops->get_untagged_addr(ctxt, la, flags);
va_bits = ctxt_virt_addr_bits(ctxt);
if (!__is_canonical_address(la, va_bits))
goto bad;
Expand All @@ -717,11 +717,11 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
if (!usable)
goto bad;
/* code segment in protected mode or read-only data segment */
if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
|| !(desc.type & 2)) && write)
if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8)) || !(desc.type & 2)) &&
(flags & X86EMUL_F_WRITE))
goto bad;
/* unreadable code segment */
if (!fetch && (desc.type & 8) && !(desc.type & 2))
if (!(flags & X86EMUL_F_FETCH) && (desc.type & 8) && !(desc.type & 2))
goto bad;
lim = desc_limit_scaled(&desc);
if (!(desc.type & 8) && (desc.type & 4)) {
Expand Down Expand Up @@ -757,8 +757,8 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
ulong *linear)
{
unsigned max_size;
return __linearize(ctxt, addr, &max_size, size, write, false,
ctxt->mode, linear);
return __linearize(ctxt, addr, &max_size, size, ctxt->mode, linear,
write ? X86EMUL_F_WRITE : 0);
}

static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
Expand All @@ -771,7 +771,8 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)

if (ctxt->op_bytes != sizeof(unsigned long))
addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
rc = __linearize(ctxt, addr, &max_size, 1, ctxt->mode, &linear,
X86EMUL_F_FETCH);
if (rc == X86EMUL_CONTINUE)
ctxt->_eip = addr.ea;
return rc;
Expand Down Expand Up @@ -907,8 +908,8 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
* boundary check itself. Instead, we use max_size to check
* against op_size.
*/
rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
&linear);
rc = __linearize(ctxt, addr, &max_size, 0, ctxt->mode, &linear,
X86EMUL_F_FETCH);
if (unlikely(rc != X86EMUL_CONTINUE))
return rc;

Expand Down Expand Up @@ -3439,8 +3440,10 @@ static int em_invlpg(struct x86_emulate_ctxt *ctxt)
{
int rc;
ulong linear;
unsigned int max_size;

rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
rc = __linearize(ctxt, ctxt->src.addr.mem, &max_size, 1, ctxt->mode,
&linear, X86EMUL_F_INVLPG);
if (rc == X86EMUL_CONTINUE)
ctxt->ops->invlpg(ctxt, linear);
/* Disable writeback. */
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kvm/governed_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ KVM_GOVERNED_X86_FEATURE(PAUSEFILTER)
KVM_GOVERNED_X86_FEATURE(PFTHRESHOLD)
KVM_GOVERNED_X86_FEATURE(VGIF)
KVM_GOVERNED_X86_FEATURE(VNMI)
KVM_GOVERNED_X86_FEATURE(LAM)

#undef KVM_GOVERNED_X86_FEATURE
#undef KVM_GOVERNED_FEATURE
9 changes: 9 additions & 0 deletions arch/x86/kvm/kvm_emulate.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ struct x86_instruction_info {
#define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */
#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */

/* x86-specific emulation flags */
#define X86EMUL_F_WRITE BIT(0)
#define X86EMUL_F_FETCH BIT(1)
#define X86EMUL_F_IMPLICIT BIT(2)
#define X86EMUL_F_INVLPG BIT(3)

struct x86_emulate_ops {
void (*vm_bugged)(struct x86_emulate_ctxt *ctxt);
/*
Expand Down Expand Up @@ -224,6 +230,9 @@ struct x86_emulate_ops {
int (*leave_smm)(struct x86_emulate_ctxt *ctxt);
void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);

gva_t (*get_untagged_addr)(struct x86_emulate_ctxt *ctxt, gva_t addr,
unsigned int flags);
};

/* Type, address-of, and value of an instruction's operand. */
Expand Down
8 changes: 8 additions & 0 deletions arch/x86/kvm/mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,14 @@ static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
return kvm_get_pcid(vcpu, kvm_read_cr3(vcpu));
}

static inline unsigned long kvm_get_active_cr3_lam_bits(struct kvm_vcpu *vcpu)
{
if (!guest_can_use(vcpu, X86_FEATURE_LAM))
return 0;

return kvm_read_cr3(vcpu) & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
}

static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
{
u64 root_hpa = vcpu->arch.mmu->root.hpa;
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kvm/mmu/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -3774,7 +3774,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
hpa_t root;

root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu);
root_gfn = root_pgd >> PAGE_SHIFT;
root_gfn = (root_pgd & __PT_BASE_ADDR_MASK) >> PAGE_SHIFT;

if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
mmu->root.hpa = kvm_mmu_get_dummy_root();
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kvm/mmu/mmu_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#endif

/* Page table builder macros common to shadow (host) PTEs and guest PTEs. */
#define __PT_BASE_ADDR_MASK GENMASK_ULL(51, 12)
#define __PT_LEVEL_SHIFT(level, bits_per_level) \
(PAGE_SHIFT + ((level) - 1) * (bits_per_level))
#define __PT_INDEX(address, level, bits_per_level) \
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kvm/mmu/paging_tmpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
#endif

/* Common logic, but per-type values. These also need to be undefined. */
#define PT_BASE_ADDR_MASK ((pt_element_t)(((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)))
#define PT_BASE_ADDR_MASK ((pt_element_t)__PT_BASE_ADDR_MASK)
#define PT_LVL_ADDR_MASK(lvl) __PT_LVL_ADDR_MASK(PT_BASE_ADDR_MASK, lvl, PT_LEVEL_BITS)
#define PT_LVL_OFFSET_MASK(lvl) __PT_LVL_OFFSET_MASK(PT_BASE_ADDR_MASK, lvl, PT_LEVEL_BITS)
#define PT_INDEX(addr, lvl) __PT_INDEX(addr, lvl, PT_LEVEL_BITS)
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/kvm/svm/nested.c
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ static bool __nested_vmcb_check_save(struct kvm_vcpu *vcpu,
if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) {
if (CC(!(save->cr4 & X86_CR4_PAE)) ||
CC(!(save->cr0 & X86_CR0_PE)) ||
CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3)))
CC(!kvm_vcpu_is_legal_cr3(vcpu, save->cr3)))
return false;
}

Expand Down Expand Up @@ -509,7 +509,7 @@ static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu)
static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
bool nested_npt, bool reload_pdptrs)
{
if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3)))
if (CC(!kvm_vcpu_is_legal_cr3(vcpu, cr3)))
return -EINVAL;

if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) &&
Expand Down
11 changes: 8 additions & 3 deletions arch/x86/kvm/vmx/nested.c
Original file line number Diff line number Diff line change
Expand Up @@ -1086,7 +1086,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
bool nested_ept, bool reload_pdptrs,
enum vm_entry_failure_code *entry_failure_code)
{
if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3))) {
if (CC(!kvm_vcpu_is_legal_cr3(vcpu, cr3))) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
return -EINVAL;
}
Expand Down Expand Up @@ -2732,7 +2732,7 @@ static bool nested_vmx_check_eptp(struct kvm_vcpu *vcpu, u64 new_eptp)
}

/* Reserved bits should not be set */
if (CC(kvm_vcpu_is_illegal_gpa(vcpu, new_eptp) || ((new_eptp >> 7) & 0x1f)))
if (CC(!kvm_vcpu_is_legal_gpa(vcpu, new_eptp) || ((new_eptp >> 7) & 0x1f)))
return false;

/* AD, if set, should be supported */
Expand Down Expand Up @@ -2927,7 +2927,7 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,

if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
CC(kvm_vcpu_is_illegal_gpa(vcpu, vmcs12->host_cr3)))
CC(!kvm_vcpu_is_legal_cr3(vcpu, vmcs12->host_cr3)))
return -EINVAL;

if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
Expand Down Expand Up @@ -5027,6 +5027,7 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
else
*ret = off;

*ret = vmx_get_untagged_addr(vcpu, *ret, 0);
/* Long mode: #GP(0)/#SS(0) if the memory address is in a
* non-canonical form. This is the only check on the memory
* destination for long mode!
Expand Down Expand Up @@ -5850,6 +5851,10 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
vpid02 = nested_get_vpid02(vcpu);
switch (type) {
case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
/*
* LAM doesn't apply to addresses that are inputs to TLB
* invalidation.
*/
if (!operand.vpid ||
is_noncanonical_address(operand.gla, vcpu))
return nested_vmx_fail(vcpu,
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kvm/vmx/sgx.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
if (!IS_ALIGNED(*gva, alignment)) {
fault = true;
} else if (likely(is_64_bit_mode(vcpu))) {
*gva = vmx_get_untagged_addr(vcpu, *gva, 0);
fault = is_noncanonical_address(*gva, vcpu);
} else {
*gva &= 0xffffffff;
Expand Down
55 changes: 53 additions & 2 deletions arch/x86/kvm/vmx/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -3431,7 +3431,8 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
update_guest_cr3 = false;
vmx_ept_load_pdptrs(vcpu);
} else {
guest_cr3 = root_hpa | kvm_get_active_pcid(vcpu);
guest_cr3 = root_hpa | kvm_get_active_pcid(vcpu) |
kvm_get_active_cr3_lam_bits(vcpu);
}

if (update_guest_cr3)
Expand Down Expand Up @@ -5846,7 +5847,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
* would also use advanced VM-exit information for EPT violations to
* reconstruct the page fault error code.
*/
if (unlikely(allow_smaller_maxphyaddr && kvm_vcpu_is_illegal_gpa(vcpu, gpa)))
if (unlikely(allow_smaller_maxphyaddr && !kvm_vcpu_is_legal_gpa(vcpu, gpa)))
return kvm_emulate_instruction(vcpu, 0);

return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
Expand Down Expand Up @@ -7752,6 +7753,9 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP));
cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57));

entry = kvm_find_cpuid_entry_index(vcpu, 0x7, 1);
cr4_fixed1_update(X86_CR4_LAM_SUP, eax, feature_bit(LAM));

#undef cr4_fixed1_update
}

Expand Down Expand Up @@ -7838,6 +7842,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_XSAVES);

kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VMX);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LAM);

vmx_setup_uret_msrs(vmx);

Expand Down Expand Up @@ -8312,6 +8317,50 @@ static void vmx_vm_destroy(struct kvm *kvm)
free_pages((unsigned long)kvm_vmx->pid_table, vmx_get_pid_table_order(kvm));
}

/*
* Note, the SDM states that the linear address is masked *after* the modified
* canonicality check, whereas KVM masks (untags) the address and then performs
* a "normal" canonicality check. Functionally, the two methods are identical,
* and when the masking occurs relative to the canonicality check isn't visible
* to software, i.e. KVM's behavior doesn't violate the SDM.
*/
gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags)
{
int lam_bit;
unsigned long cr3_bits;

if (flags & (X86EMUL_F_FETCH | X86EMUL_F_IMPLICIT | X86EMUL_F_INVLPG))
return gva;

if (!is_64_bit_mode(vcpu))
return gva;

/*
* Bit 63 determines if the address should be treated as user address
* or a supervisor address.
*/
if (!(gva & BIT_ULL(63))) {
cr3_bits = kvm_get_active_cr3_lam_bits(vcpu);
if (!(cr3_bits & (X86_CR3_LAM_U57 | X86_CR3_LAM_U48)))
return gva;

/* LAM_U48 is ignored if LAM_U57 is set. */
lam_bit = cr3_bits & X86_CR3_LAM_U57 ? 56 : 47;
} else {
if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_LAM_SUP))
return gva;

lam_bit = kvm_is_cr4_bit_set(vcpu, X86_CR4_LA57) ? 56 : 47;
}

/*
* Untag the address by sign-extending the lam_bit, but NOT to bit 63.
* Bit 63 is retained from the raw virtual address so that untagging
* doesn't change a user access to a supervisor access, and vice versa.
*/
return (sign_extend64(gva, lam_bit) & ~BIT_ULL(63)) | (gva & BIT_ULL(63));
}

static struct kvm_x86_ops vmx_x86_ops __initdata = {
.name = KBUILD_MODNAME,

Expand Down Expand Up @@ -8452,6 +8501,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.complete_emulated_msr = kvm_complete_insn_gp,

.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,

.get_untagged_addr = vmx_get_untagged_addr,
};

static unsigned int vmx_handle_intel_pt_intr(void)
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/kvm/vmx/vmx.h
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,8 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);

gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);

static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
int type, bool value)
{
Expand Down
Loading
Loading