[LibOS] Emulate in/out instructions as if they generated SIGSEGV

Executing I/O instructions (e.g., in/out) inside an SGX enclave generates a #UD fault. Gramine's PAL tries to handle this exception and propagates it to LibOS/app as a SIGILL signal. However, I/O instructions result in a #GP fault outside SGX (which raises a SIGSEGV signal) if I/O is not permitted. Let Gramine emulate these instructions as if they ended up in SIGSEGV. This helps some apps, e.g. `lscpu`. New LibOS test is added. Co-authored-by: Nirjhar Roy <nirjhar.roy@fortanix.com> Signed-off-by: Nirjhar Roy <nirjhar.roy@fortanix.com> Signed-off-by: Dmitrii Kuvaiskii <dmitrii.kuvaiskii@intel.com>
gramineproject · Jun 27, 2024 · ac61ae1 · ac61ae1
1 parent 6d77bcf
commit ac61ae1
Show file tree

Hide file tree

Showing 9 changed files with 269 additions and 7 deletions.
diff --git a/common/include/arch/x86_64/cpu.h b/common/include/arch/x86_64/cpu.h
@@ -2,6 +2,7 @@
 
 #pragma once
 
+#include <stdbool.h>
 #include <stdint.h>
 #include <stdnoreturn.h>
 
@@ -53,6 +54,11 @@ enum extended_state_sub_leaf {
 #define CPU_BRAND_CNTD2_LEAF             0x80000004
 #define INVARIANT_TSC_LEAF               0x80000007
 
+bool is_x86_instr_legacy_prefix(uint8_t op);
+bool is_x86_instr_rex_prefix(uint8_t op);
+bool has_lock_prefix(uint8_t* rip);
+bool is_in_out(uint8_t* rip);
+
 static inline void cpuid(unsigned int leaf, unsigned int subleaf, unsigned int words[static 4]) {
     __asm__("cpuid"
             : "=a"(words[CPUID_WORD_EAX]),

diff --git a/common/src/arch/x86_64/cpu.c b/common/src/arch/x86_64/cpu.c
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: LGPL-3.0-or-later */
+/* Copyright (C) 2024 Fortanix Inc
+ *                    Nirjhar Roy <nirjhar.roy@fortanix.com>
+ */
+
+/* This file contains functions that check various features and flags specific to x86 */
+
+#include <stddef.h>
+
+#include "api.h"
+#include "cpu.h"
+
+#define INSTR_SIZE_MAX 15
+
+bool is_x86_instr_legacy_prefix(uint8_t op) {
+    /*
+     * Official source for this list is Intel SDM, Vol. 2, Chapter 2.1.1 "Instruction Prefixes".
+     * These prefixes are called "legacy" for x86-64 (64-bit mode) instructions, see Intel SDM,
+     * Vol. 2, Chapter 2.2.1 and Figure 2-3 "Prefix Ordering in 64-bit Mode".
+     */
+    switch (op) {
+        /* Group 1 */
+        case 0xf0: /* LOCK prefix */
+        case 0xf2: /* REPNE/REPNZ prefix */
+        case 0xf3: /* REP or REPE/REPZ prefix */
+        /* Group 2 */
+        case 0x2e: /* CS segment override; Branch not taken */
+        case 0x36: /* SS segment override */
+        case 0x3e: /* DS segment override; Branch taken */
+        case 0x26: /* ES segment override */
+        case 0x64: /* FS segment override */
+        case 0x65: /* GS segment override */
+        /* Group 3 */
+        case 0x66: /*  Operand-size override prefix */
+        /* Group 4 */
+        case 0x67: /* Address-size override prefix */
+            return true;
+    }
+    return false;
+}
+
+bool is_x86_instr_rex_prefix(uint8_t op) {
+    /*
+     * Optional REX prefix is located after all legacy prefixes (see above) and right before the
+     * opcode. REX prefix is 1 byte with bits [0100WRXB], from which follows that REX prefix can be
+     * any of 0x40-0x4f. For details, see Intel SDM, Vol. 2, Chapter 2.2.1 "REX Prefixes".
+     */
+    return 0x40 <= op && op <= 0x4f;
+}
+
+bool has_lock_prefix(uint8_t* rip) {
+    size_t idx = 0;
+    while (is_x86_instr_legacy_prefix(rip[idx]) && idx < INSTR_SIZE_MAX) {
+        if (rip[idx] == 0xf0)
+            return true;
+        idx++;
+    }
+    return false;
+}
+
+bool is_in_out(uint8_t* rip) {
+    /*
+     * x86-64 instructions may be at most 15 bytes in length and may have multiple instruction
+     * prefixes. See description in Intel SDM, Vol. 2, Chapter 2.1.1 "Instruction Prefixes".
+     */
+    size_t idx = 0;
+    while (is_x86_instr_legacy_prefix(rip[idx]) && idx < INSTR_SIZE_MAX)
+        idx++;
+
+    if (idx == INSTR_SIZE_MAX)
+        return false;
+
+    /* skip over the optional REX prefix */
+    if (is_x86_instr_rex_prefix(rip[idx]))
+        idx++;
+
+    if (idx == INSTR_SIZE_MAX)
+        return false;
+
+    switch (rip[idx]) {
+        /* INS opcodes */
+        case 0x6c:
+        case 0x6d:
+        /* OUTS opcodes */
+        case 0x6e:
+        case 0x6f:
+        /* IN immediate opcodes */
+        case 0xe4:
+        case 0xe5:
+        /* OUT immediate opcodes */
+        case 0xe6:
+        case 0xe7:
+        /* IN register opcodes */
+        case 0xec:
+        case 0xed:
+        /* OUT register opcodes */
+        case 0xee:
+        case 0xef:
+            return true;
+    }
+
+    return false;
+}
diff --git a/common/src/arch/x86_64/meson.build b/common/src/arch/x86_64/meson.build
@@ -2,7 +2,9 @@ common_src_arch_nasm = nasm_gen.process(
     'ct_memequal.nasm',
 )
 
-common_src_arch_c = files()
+common_src_arch_c = files(
+    'cpu.c',
+)
 
 common_src_arch = [
     common_src_arch_nasm,

diff --git a/libos/test/regression/in_out_instruction.c b/libos/test/regression/in_out_instruction.c
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: LGPL-3.0-or-later */
+/* Copyright (C) 2024 Fortanix Inc
+ *                    Nirjhar Roy <nirjhar.roy@fortanix.com>
+ */
+
+/*
+ * Verify that IN/OUT/INS/OUTS instructions generate SIGSEGV (and not SIGILL).
+ *
+ * This test is important for SGX PAL: IN/OUT/INS/OUTS instructions result in a #UD fault when
+ * executed in SGX enclaves, but result in a #GP fault when executed by normal userspace code.
+ * Gramine is supposed to transform the #UD fault into a #GP fault, which ends up as a SIGSEGV in
+ * the application.
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+
+#include "common.h"
+
+#ifndef __x86_64__
+#error Unsupported architecture
+#endif
+
+#define EXPECTED_NUM_SIGSEGVS 2
+
+static int g_sigsegv_triggered = 0;
+
+uint8_t inb_func(uint16_t port) __attribute__((visibility("internal")));
+void outb_func(uint8_t value, uint16_t port) __attribute__((visibility("internal")));
+void inb_instruction_addr(void) __attribute__((visibility("internal")));
+void outb_instruction_addr(void) __attribute__((visibility("internal")));
+void ret(void) __attribute__((visibility("internal")));
+
+__asm__ (
+".pushsection .text\n"
+".type inb_func, @function\n"
+".type outb_func, @function\n"
+".type inb_instruction_addr, @function\n"
+".type outb_instruction_addr, @function\n"
+".type ret, @function\n"
+"inb_func:\n"
+    "mov %rdi, %rdx\n"
+"inb_instruction_addr:\n"
+    "inb %dx, %al\n"
+    "ret\n"
+"outb_func:\n"
+    "mov %rsi, %rdx\n"
+    "mov %rdi, %rax\n"
+"outb_instruction_addr:\n"
+    "outb %al, %dx\n"
+"ret:\n"
+    "ret\n"
+".popsection\n"
+);
+
+static void handler(int signum, siginfo_t* si, void* uc) {
+    if (signum != SIGSEGV) {
+        /* we registered a SIGSEGV handler but got another signal?! */
+        _Exit(1);
+    }
+
+    uint64_t rip = ((ucontext_t*)uc)->uc_mcontext.gregs[REG_RIP];
+    if (g_sigsegv_triggered == 0) {
+        /* must be a fault on inb instruction */
+        if (rip != (uint64_t)(inb_instruction_addr))
+            _Exit(1);
+    } else if (g_sigsegv_triggered == 1) {
+        /* must be a fault on outb instruction */
+        if (rip != (uint64_t)(outb_instruction_addr))
+            _Exit(1);
+    } else {
+        /* too many segfaults?! */
+        _Exit(1);
+    }
+
+    g_sigsegv_triggered++;
+
+    /* no need to fixup the context (other than RIP) as we only modified caller-saved RDX and RAX in
+     * inb_func() and outb_func() */
+    ((ucontext_t*)uc)->uc_mcontext.gregs[REG_RIP] = (uint64_t)ret;
+}
+
+int main(void) {
+    struct sigaction sa = {
+        .sa_sigaction = handler,
+        .sa_flags = SA_RESTART | SA_SIGINFO,
+    };
+    CHECK(sigaction(SIGSEGV, &sa, NULL));
+
+    uint8_t value = 0;
+    uint16_t port = 0x3F8;
+
+    inb_func(port);
+    outb_func(value, port);
+
+    if (g_sigsegv_triggered != EXPECTED_NUM_SIGSEGVS)
+        errx(1, "Expected %d SIGSEGVs, got %d", EXPECTED_NUM_SIGSEGVS, g_sigsegv_triggered);
+
+    puts("TEST OK");
+    return 0;
+}
diff --git a/libos/test/regression/meson.build b/libos/test/regression/meson.build
@@ -167,6 +167,7 @@ if host_machine.cpu_family() == 'x86_64'
         'debug_regs_x86_64': {
             'c_args': '-g3',
         },
+        'in_out_instruction' : {},
         'rdtsc': {},
         'sighandler_divbyzero': {},
     }

diff --git a/libos/test/regression/test_libos.py b/libos/test/regression/test_libos.py
@@ -1572,3 +1572,9 @@ class TC_92_avx(RegressionTestCase):
     def test_000_avx(self):
         stdout, _ = self.run_binary(['avx'])
         self.assertIn('TEST OK', stdout)
+
+@unittest.skipUnless(ON_X86, 'x86-specific')
+class TC_93_In_Out(RegressionTestCase):
+    def test_000_in_out(self):
+        stdout, stderr = self.run_binary(['in_out_instruction'])
+        self.assertIn('TEST OK', stdout)
diff --git a/libos/test/regression/tests.toml b/libos/test/regression/tests.toml
@@ -144,6 +144,7 @@ manifests = [
   "avx",
   "cpuid",
   "debug_regs_x86_64",
+  "in_out_instruction",
   "rdtsc",
   "bootstrap_cpp",
   "sighandler_divbyzero",

diff --git a/libos/test/regression/tests_musl.toml b/libos/test/regression/tests_musl.toml
@@ -144,6 +144,7 @@ manifests = [
   "avx",
   "cpuid",
   "debug_regs_x86_64",
+  "in_out_instruction",
   "rdtsc",
   "sighandler_divbyzero",
 ]

diff --git a/pal/src/host/linux-sgx/pal_exception.c b/pal/src/host/linux-sgx/pal_exception.c
@@ -12,6 +12,7 @@
 
 #include "api.h"
 #include "asan.h"
+#include "cpu.h"
 #include "pal.h"
 #include "pal_internal.h"
 #include "pal_linux.h"
@@ -177,7 +178,12 @@ static void emulate_iret_and_print_warning(sgx_cpu_context_t* uc) {
 
 /* return value: true if #UD was handled and execution can be continued without propagating #UD;
  *               false if #UD was not handled and exception needs to be raised up to LibOS/app */
-static bool handle_ud(sgx_cpu_context_t* uc) {
+static bool handle_ud(sgx_cpu_context_t* uc, int* out_event_num) {
+    /* most unhandled #UD faults are translated and sent to LibOS/app as "Illegal instruction"
+     * exceptions; however some #UDs (e.g. triggered due to IN/OUT/INS/OUTS) must be translated as
+     * "Memory fault" exceptions */
+    *out_event_num = PAL_EVENT_ILLEGAL;
+
     uint8_t* instr = (uint8_t*)uc->rip;
     if (instr[0] == 0x0f && instr[1] == 0xa2) {
         /* cpuid */
@@ -224,6 +230,23 @@ static bool handle_ud(sgx_cpu_context_t* uc) {
                        " patching your application to use Gramine syscall API.");
         }
         return false;
+    } else if (is_in_out(instr) && !has_lock_prefix(instr)) {
+        /*
+         * Executing I/O instructions (e.g., IN/OUT/INS/OUTS) inside an SGX enclave generates a #UD
+         * fault. Without the below corner-case handling, PAL would propagate this fault to LibOS as
+         * an "Illegal instruction" Gramine exception. However, I/O instructions result in a #GP
+         * fault outside SGX (which corresponds to "Memory fault" Gramine exception) if I/O is not
+         * permitted (which is true in userspace apps). Let PAL emulate these instructions as if
+         * they ended up in a memory fault.
+         *
+         * Note that I/O instructions with a LOCK prefix always result in a #UD fault, so they are
+         * special-cased here.
+         */
+        if (FIRST_TIME()) {
+            log_warning("Emulating In/OUT/INS/OUTS instruction as a SIGSEGV signal to app.");
+        }
+        *out_event_num = PAL_EVENT_MEMFAULT;
+        return false;
     }
 
     char buf[LOCATION_BUF_SIZE];
@@ -280,6 +303,8 @@ void _PalExceptionHandler(uint32_t trusted_exit_info_,
      * --+-----------------------------+------------------------------------------+------------+
      */
 
+    bool is_synthetic_gp = false; /* IN/OUT/INS/OUTS instructions morph #UD into a synthetic #GP */
+
     uint32_t event_num = 0; /* illegal event */
 
     if (!trusted_exit_info.valid) {
@@ -315,11 +340,19 @@ void _PalExceptionHandler(uint32_t trusted_exit_info_,
                               PAL_EVENT_ILLEGAL, untrusted_external_event);
                     _PalProcessExit(1);
                 }
-                if (handle_ud(uc)) {
+                int event_num_from_handle_ud;
+                if (handle_ud(uc, &event_num_from_handle_ud)) {
                     restore_sgx_context(uc, xregs_state);
                     /* UNREACHABLE */
                 }
-                event_num = PAL_EVENT_ILLEGAL;
+                assert(event_num_from_handle_ud == PAL_EVENT_ILLEGAL
+                        || event_num_from_handle_ud == PAL_EVENT_MEMFAULT);
+                if (event_num_from_handle_ud == PAL_EVENT_MEMFAULT) {
+                    /* it's a #UD on IN/OUT/INS/OUTS instructions, morphed into a #GP in handle_ud()
+                     * logic: adjust exception info sent to LibOS to mimic a #GP (see code below) */
+                    is_synthetic_gp = true;
+                }
+                event_num = event_num_from_handle_ud;
                 break;
             case SGX_EXCEPTION_VECTOR_DE:
             case SGX_EXCEPTION_VECTOR_MF:
@@ -417,8 +450,8 @@ void _PalExceptionHandler(uint32_t trusted_exit_info_,
     if (trusted_exit_info.valid) {
         ctx.trapno = trusted_exit_info.vector;
         /* Only these two exceptions save information in EXINFO. */
-        if (trusted_exit_info.vector == SGX_EXCEPTION_VECTOR_GP
-                || trusted_exit_info.vector == SGX_EXCEPTION_VECTOR_PF) {
+        if (!is_synthetic_gp && (trusted_exit_info.vector == SGX_EXCEPTION_VECTOR_GP
+                || trusted_exit_info.vector == SGX_EXCEPTION_VECTOR_PF)) {
             ctx.err = exinfo->error_code_val; /* bits: Present, Write/Read, User/Kernel, etc. */
             ctx.cr2 = exinfo->maddr;          /* NOTE: on #GP, maddr = 0 */
             has_hw_fault_address = true;
@@ -431,7 +464,8 @@ void _PalExceptionHandler(uint32_t trusted_exit_info_,
             addr = uc->rip;
             break;
         case PAL_EVENT_MEMFAULT:
-            if (!has_hw_fault_address && !g_pal_linuxsgx_state.memfaults_without_exinfo_allowed) {
+            if (!has_hw_fault_address && !is_synthetic_gp
+                    && !g_pal_linuxsgx_state.memfaults_without_exinfo_allowed) {
                 log_error("Tried to handle a memory fault with no faulting address reported by "
                           "SGX. Please consider enabling 'sgx.use_exinfo' in the manifest.");
                 _PalProcessExit(1);