From f0d9c8c10ab0a61178300f2ee575f8f4f0b941ed Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Mon, 24 Jun 2024 16:02:46 -0700 Subject: [PATCH 1/2] AVX128: Fix vmovntdqa failing to zero upper 128-bits --- .../Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp | 4 ++++ unittests/ASM/VEX/vmovntdqa.asm | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp index 3381d48c2f..0dca9135f9 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp @@ -724,6 +724,10 @@ void OpDispatchBuilder::AVX128_MOVVectorNT(OpcodeArgs) { const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit, MemoryAccessType::STREAM); + + if (Op->Dest.IsGPR() && Is128Bit) { + Src.High = LoadZeroVector(OpSize::i128Bit); + } AVX128_StoreResult_WithOpSize(Op, Op->Dest, Src); } diff --git a/unittests/ASM/VEX/vmovntdqa.asm b/unittests/ASM/VEX/vmovntdqa.asm index 18e269bc16..9f9d53e187 100644 --- a/unittests/ASM/VEX/vmovntdqa.asm +++ b/unittests/ASM/VEX/vmovntdqa.asm @@ -23,7 +23,15 @@ mov [rdx + 8 * 2], rax mov rax, 0x7172737475767778 mov [rdx + 8 * 3], rax +; Load results with random data first. +vmovaps ymm0, [rel .data_random] +vmovaps ymm1, [rel .data_random] + vmovntdqa xmm0, [rdx] vmovntdqa ymm1, [rdx] hlt + +align 32 +.data_random: +dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303 From 99b2018d0ea71aacf952ba663657d81021a741e4 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Mon, 24 Jun 2024 16:31:38 -0700 Subject: [PATCH 2/2] unittests: Extend vmovntpd test --- unittests/ASM/VEX/vmovntpd.asm | 40 +++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/unittests/ASM/VEX/vmovntpd.asm b/unittests/ASM/VEX/vmovntpd.asm index 7643c09b39..38939da5dc 100644 --- a/unittests/ASM/VEX/vmovntpd.asm +++ b/unittests/ASM/VEX/vmovntpd.asm @@ -3,7 +3,11 @@ "HostFeatures": ["AVX"], "RegData": { "XMM0": ["0x6162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"], - "XMM3": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"] + "XMM3": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"], + "XMM4": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0", "0"], + "XMM5": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0x41aff21340ab4706", "0x40aa5bea411ac802"], + "XMM6": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0x41aff21340ab4706", "0x40aa5bea411ac802"], + "XMM7": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0", "0"] }, "MemoryRegions": { "0x100000000": "4096" @@ -39,4 +43,38 @@ vmovaps xmm0, [rdx + 8 * 4] vmovntpd [rdx + 8 * 4], ymm2 vmovaps ymm3, [rdx + 8 * 4] +vmovaps ymm4, [rel .data_random] +vmovaps ymm5, [rel .data_random] +vmovaps ymm6, [rel .data_random] +vmovaps ymm7, [rel .data_random] + +vmovntpd [rel .data_res1], xmm4 +vmovaps xmm4, [rel .data_res1] + +vmovntpd [rel .data_res2], xmm5 +vmovaps ymm5, [rel .data_res2] + +vmovntpd [rel .data_res3], ymm6 +vmovaps ymm6, [rel .data_res3] + +vmovntpd [rel .data_res4], ymm7 +vmovaps xmm7, [rel .data_res4] + hlt + +align 32 +.data_random: +dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303 + +align 32 +.data_res1: +dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303 + +.data_res2: +dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303 + +.data_res3: +dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303 + +.data_res4: +dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303