From c0142a19084f59856eccbe719eec6d62facc856f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 7 Apr 2022 13:35:34 +0000 Subject: [PATCH 1/2] Add code for profiling --- Makefile | 4 ++-- kernel-rs/Cargo.toml | 2 +- kernel-rs/src/arch/arm/asm.rs | 2 +- kernel-rs/src/arch/arm/timer.rs | 14 +++++++++++++- kernel-rs/src/arch/arm/trap.rs | 26 ++++++++++++++++++++++++++ kernel-rs/src/arch/interface.rs | 1 + kernel-rs/src/arch/riscv/trap.rs | 1 + kernel-rs/src/kernel.rs | 9 +++++++-- kernel-rs/src/proc/procs.rs | 4 ++-- kernel-rs/src/syscall.rs | 15 ++++++++++++++- kernel-rs/src/trap.rs | 25 +++++++++++++++++++++---- kernel/arm/trampoline.S | 6 ++++++ lmbench/lat_syscall.c | 16 +++++++++++++++- 13 files changed, 110 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index a3ebb0a8..e3a545f2 100644 --- a/Makefile +++ b/Makefile @@ -128,7 +128,7 @@ AR=ar ARCREATE=cr ifndef OPTFLAGS -OPTFALGS := -O +OPTFLAGS := -O endif CFLAGS = -Wall -Werror $(OPTFLAGS) -fno-omit-frame-pointer -ggdb @@ -313,7 +313,7 @@ QEMUGDB = $(shell if $(QEMU) -help | grep -q '^-gdb'; \ then echo "-gdb tcp::$(GDBPORT)"; \ else echo "-s -p $(GDBPORT)"; fi) ifndef CPUS -CPUS := 3 +CPUS := 1 endif QEMUOPTS = -machine virt -kernel $K/kernel -m 128M -smp $(CPUS) -nographic diff --git a/kernel-rs/Cargo.toml b/kernel-rs/Cargo.toml index ac6d34b5..58841de7 100644 --- a/kernel-rs/Cargo.toml +++ b/kernel-rs/Cargo.toml @@ -21,7 +21,7 @@ opt-level = 1 [profile.release] panic = "abort" -lto = true +opt-level = 1 [dependencies] array-macro = "2.1.0" diff --git a/kernel-rs/src/arch/arm/asm.rs b/kernel-rs/src/arch/arm/asm.rs index 8909b5ce..106fb440 100644 --- a/kernel-rs/src/arch/arm/asm.rs +++ b/kernel-rs/src/arch/arm/asm.rs @@ -134,7 +134,7 @@ pub enum SmcFunctions { /// /// Arguments must follow ARM SMC calling convention. #[no_mangle] -pub unsafe fn smc_call(x0: u64, x1: u64, x2: u64, x3: u64) -> u64 { +pub unsafe fn smc_call(x0: u64, x1: u64, x2: u64, x3: u64) -> (u64, u64, u64, u64) { let (r0, r1, r2, r3); unsafe { // NOTE: here use hvc for qemu without `virtualization=on` diff --git a/kernel-rs/src/arch/arm/timer.rs b/kernel-rs/src/arch/arm/timer.rs index c64d98af..0aa837d7 100644 --- a/kernel-rs/src/arch/arm/timer.rs +++ b/kernel-rs/src/arch/arm/timer.rs @@ -11,6 +11,16 @@ const TIMER_TICK_MS: u64 = 100; impl TimeManager for Armv8 { fn timer_init() { + let mut x: usize; + + // for user-space clock time profiling. + unsafe { + asm!("mrs {}, cntkctl_el1", out(reg) x); + x &= !((3 << 8) | (1 << 1)); + x |= 1; + asm!("msr cntkctl_el1, {}", in(reg) x); + } + set_next_timer(); } @@ -26,7 +36,9 @@ impl TimeManager for Armv8 { pub fn read_cntpct() -> u64 { // Prevent that the counter is read ahead of time due to out-of-order execution. unsafe { barrier::isb(barrier::SY) }; - CNTPCT_EL0.get() + let ret = CNTPCT_EL0.get(); + unsafe { barrier::isb(barrier::SY) }; + ret } pub fn read_freq() -> u64 { diff --git a/kernel-rs/src/arch/arm/trap.rs b/kernel-rs/src/arch/arm/trap.rs index 624b2e1f..ac8152ae 100644 --- a/kernel-rs/src/arch/arm/trap.rs +++ b/kernel-rs/src/arch/arm/trap.rs @@ -16,6 +16,8 @@ use crate::{ timer::set_next_timer, Armv8, }, + kernel::TIME, + // kernel::KERNEL, memlayout::{TRAMPOLINE, TRAPFRAME}, trap::{IrqNum, IrqTypes, TrapTypes}, }; @@ -196,6 +198,7 @@ impl TrapManager for Armv8 { trapframe: &mut TrapFrame, kernel_stack: usize, usertrap: usize, + syscall_num: usize, ) -> ! { // We're about to switch the destination of traps from // kerneltrap() to usertrap(), so turn off interrupts until @@ -219,6 +222,29 @@ impl TrapManager for Armv8 { let fn_0: usize = TRAMPOLINE + unsafe { userret.as_ptr().offset_from(trampoline.as_ptr()) } as usize; let fn_0 = unsafe { mem::transmute::<_, unsafe extern "C" fn(usize, usize) -> !>(fn_0) }; + // let clock = TargetArch::r_cycle(); + // unsafe { + // TIME[3] = clock; + // } + + // If it is getppid, print elapsed time per interval. + if _syscall_num == 26 { + unsafe { + crate::kernel::kernel_ref(|kctx| { + for i in 0..2 { + kctx.as_ref().write_fmt(format_args!( + "Interval {}: {}\n", + i + 1, + TIME[i + 1] - TIME[i] + )); + } + // for i in 0..4{ + // kctx.as_ref().write_fmt(format_args!("lap {}: {}\n", i, TIME[i])); + // } + }) + } + } + unsafe { fn_0(TRAPFRAME, user_pagetable_addr) } } diff --git a/kernel-rs/src/arch/interface.rs b/kernel-rs/src/arch/interface.rs index 04313e20..4499e1e2 100644 --- a/kernel-rs/src/arch/interface.rs +++ b/kernel-rs/src/arch/interface.rs @@ -135,6 +135,7 @@ pub trait TrapManager { trap: &mut ::TrapFrame, kernel_stack: usize, usertrap: usize, + syscall_num: usize, ) -> !; fn save_trap_regs(store: &mut [usize; 10]); diff --git a/kernel-rs/src/arch/riscv/trap.rs b/kernel-rs/src/arch/riscv/trap.rs index e3848cef..7d19336c 100644 --- a/kernel-rs/src/arch/riscv/trap.rs +++ b/kernel-rs/src/arch/riscv/trap.rs @@ -177,6 +177,7 @@ impl TrapManager for RiscV { trapframe: &mut TrapFrame, kernel_stack: usize, usertrap: usize, + _syscall_num: usize, ) -> ! { // We're about to switch the destination of traps from // kerneltrap() to usertrap(), so turn off interrupts until diff --git a/kernel-rs/src/kernel.rs b/kernel-rs/src/kernel.rs index 69e2d4c6..3bd4e968 100644 --- a/kernel-rs/src/kernel.rs +++ b/kernel-rs/src/kernel.rs @@ -27,11 +27,13 @@ use crate::{ const CONSOLE_IN_DEVSW: usize = 1; /// The kernel. -static mut KERNEL: Kernel = unsafe { Kernel::new() }; +pub static mut KERNEL: Kernel = unsafe { Kernel::new() }; + +pub static mut TIME: [usize; 10] = [0; 10]; /// Returns a shared reference to the `KERNEL`. #[inline] -fn kernel<'s>() -> StrongPin<'s, Kernel> { +pub fn kernel<'s>() -> StrongPin<'s, Kernel> { // SAFETY: there is no way to make a mutable reference to `KERNEL` except calling // `kernel_builder_unchecked_pin`, which is unsafe. unsafe { StrongPin::new_unchecked(&KERNEL) } @@ -89,6 +91,8 @@ pub struct Kernel { #[pin] file_system: DefaultFs, + + pub time_data: [usize; 10], } /// A branded reference to a `Kernel`. @@ -172,6 +176,7 @@ impl Kernel { }; NDEV], ftable: FileTable::new_ftable(), file_system: DefaultFs::new(), + time_data: [0; 10], } } diff --git a/kernel-rs/src/proc/procs.rs b/kernel-rs/src/proc/procs.rs index ab6c3e98..afd61946 100644 --- a/kernel-rs/src/proc/procs.rs +++ b/kernel-rs/src/proc/procs.rs @@ -465,7 +465,7 @@ impl<'id, 's> ProcsRef<'id, 's> { } // get the pid of current process's parent - pub fn get_parent_pid(&mut self, ctx: &mut KernelCtx<'id, '_>) -> Pid { + pub fn get_parent_pid(&self, ctx: &mut KernelCtx<'id, '_>) -> Pid { let mut parent_guard = self.wait_guard(); let parent = *ctx.proc().get_mut_parent(&mut parent_guard); @@ -491,7 +491,7 @@ unsafe fn forkret() -> ! { // regular process (e.g., because it calls sleep), and thus cannot // be run from main(). ctx.kernel().fs().init(ROOTDEV, &ctx); - unsafe { ctx.user_trap_ret() } + unsafe { ctx.user_trap_ret(0) } }; unsafe { kernel_ctx(forkret_inner) } diff --git a/kernel-rs/src/syscall.rs b/kernel-rs/src/syscall.rs index 7c3fd55d..42734795 100644 --- a/kernel-rs/src/syscall.rs +++ b/kernel-rs/src/syscall.rs @@ -16,6 +16,7 @@ use crate::{ file::{RcFile, SeekWhence, SelectEvent}, fs::{FcntlFlags, FileSystem, FileSystemExt, InodeType, Path}, hal::hal, + kernel::TIME, ok_or, page::{Page, PGSIZE}, param::{MAXARG, MAXPATH}, @@ -89,7 +90,12 @@ impl CurrentProc<'_, '_> { impl KernelCtx<'_, '_> { pub fn syscall(&mut self, num: i32) -> Result { - match num { + let clock = TargetArch::r_cycle(); + // record end of stage 2 (begin of stage 3). + unsafe { + TIME[1] = clock; + } + let ret = match num { 1 => self.sys_fork(), 2 => self.sys_exit(), 3 => self.sys_wait(), @@ -128,7 +134,14 @@ impl KernelCtx<'_, '_> { )); Err(()) } + }; + + let clock = TargetArch::r_cycle(); + // record end of stage 3 (begin of stage 4). + unsafe { + TIME[2] = clock; } + ret } /// Terminate the current process; status reported to wait(). No return. diff --git a/kernel-rs/src/trap.rs b/kernel-rs/src/trap.rs index 641e93a8..5087f2d1 100644 --- a/kernel-rs/src/trap.rs +++ b/kernel-rs/src/trap.rs @@ -4,6 +4,7 @@ use crate::{ arch::interface::{ProcManager, TrapFrameManager, TrapManager}, arch::TargetArch, hal::hal, + kernel::TIME, kernel::{kernel_ref, KernelRef}, ok_or, proc::{kernel_ctx, KernelCtx, Procstate}, @@ -38,7 +39,12 @@ pub type IrqNum = usize; /// Handle an interrupt, exception, or system call from user space. /// Called from trampoline.S. #[no_mangle] -pub unsafe extern "C" fn usertrap(arg: usize) { +pub unsafe extern "C" fn usertrap(arg: usize, _a: usize, time: usize) { + // record start of stage 2. + unsafe { + TIME[0] = time; + } + // SAFETY // * usertrap can be reached only after the initialization of the kernel. // * It's the beginning of this thread, so there's no exsiting `KernelCtx` or `CurrentProc`. @@ -78,6 +84,8 @@ impl KernelCtx<'_, '_> { TargetArch::before_handling_trap(&trap_type, Some(self.proc_mut().trap_frame_mut())); } + let mut syscall_num = 0; + match &trap_type { TrapTypes::Syscall => { // system call @@ -90,6 +98,7 @@ impl KernelCtx<'_, '_> { // SAFETY: Interrupt handlers has been configured properly unsafe { TargetArch::intr_on() }; let syscall_no = self.proc_mut().trap_frame_mut().get_param_reg(7.into()) as i32; + syscall_num = syscall_no; *self.proc_mut().trap_frame_mut().param_reg_mut(0.into()) = ok_or!(self.syscall(syscall_no), usize::MAX); } @@ -127,7 +136,7 @@ impl KernelCtx<'_, '_> { self.yield_cpu(); } - unsafe { self.user_trap_ret() } + unsafe { self.user_trap_ret(syscall_num as usize) } } /// Return to user space. @@ -135,7 +144,7 @@ impl KernelCtx<'_, '_> { /// # Safety /// /// It must be called only by `user_trap`. - pub unsafe fn user_trap_ret(mut self) -> ! { + pub unsafe fn user_trap_ret(mut self, syscall_num: usize) -> ! { // Tell trampoline.S the user page table to switch to. let user_table = self.proc().memory().page_table_addr(); @@ -144,7 +153,15 @@ impl KernelCtx<'_, '_> { let trapframe = self.proc_mut().trap_frame_mut(); // SAFETY: It is called by `user_trap_ret`, after handling the user trap. - unsafe { TargetArch::user_trap_ret(user_table, trapframe, kstack, usertrap as usize) }; + unsafe { + TargetArch::user_trap_ret( + user_table, + trapframe, + kstack, + usertrap as usize, + syscall_num, + ) + }; } } diff --git a/kernel/arm/trampoline.S b/kernel/arm/trampoline.S index 157bf8ff..61e97e87 100644 --- a/kernel/arm/trampoline.S +++ b/kernel/arm/trampoline.S @@ -73,6 +73,11 @@ trampoline: // we don't use it. msr tpidr_el1, x0 + // record stage2 start time + mrs x0, cntpct_el0 + msr tpidr_el0, x0 + + // assign TRAPFRAME address (fixed) to x0 movz x0, #0xe000 movk x0, #0xffff, lsl #16 @@ -301,6 +306,7 @@ cur_el_spx_error: .align 6 lower_el_aarch64_sync: exception_0_entry + mrs x2, tpidr_el0 mov x0, #0 # jump to usertrap(), which does not return br x1 diff --git a/lmbench/lat_syscall.c b/lmbench/lat_syscall.c index c5417adc..23a87f5a 100644 --- a/lmbench/lat_syscall.c +++ b/lmbench/lat_syscall.c @@ -17,12 +17,26 @@ struct _state { char* file; }; +static inline uint64 read_cntpct() { + uint64 x; + asm volatile("isb sy"); + asm volatile("mrs %0, CNTPCT_EL0" : "=r" (x)); + asm volatile("isb sy"); + return x; +} + void do_getppid(iter_t iterations, void *cookie) { + // uint64 x1, x2; while (iterations-- > 0) { + // start of stage 1 + // x1 = read_cntpct(); getppid(); - } + // end of total execution + // x2 = read_cntpct(); + // printf("total: %lu\n", x2 - x1); + } } void From 909482017e7454ed01167d2ada2739ec5f40d415 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 8 Apr 2022 02:56:33 +0000 Subject: [PATCH 2/2] Optimize getppid --- kernel-rs/Cargo.toml | 2 +- kernel-rs/src/arch/arm/trap.rs | 35 +++++++++++++++++----------------- kernel-rs/src/proc/procs.rs | 20 +++++++++++++++++++ kernel-rs/src/syscall.rs | 17 ++++++++--------- kernel/arm/trampoline.S | 18 ++++++++--------- lmbench/lat_syscall.c | 3 ++- user/ulib.c | 29 ++++++++++++++++++++++++---- 7 files changed, 81 insertions(+), 43 deletions(-) diff --git a/kernel-rs/Cargo.toml b/kernel-rs/Cargo.toml index 58841de7..41b03662 100644 --- a/kernel-rs/Cargo.toml +++ b/kernel-rs/Cargo.toml @@ -21,7 +21,7 @@ opt-level = 1 [profile.release] panic = "abort" -opt-level = 1 +opt-level = 2 [dependencies] array-macro = "2.1.0" diff --git a/kernel-rs/src/arch/arm/trap.rs b/kernel-rs/src/arch/arm/trap.rs index ac8152ae..88565aee 100644 --- a/kernel-rs/src/arch/arm/trap.rs +++ b/kernel-rs/src/arch/arm/trap.rs @@ -16,7 +16,6 @@ use crate::{ timer::set_next_timer, Armv8, }, - kernel::TIME, // kernel::KERNEL, memlayout::{TRAMPOLINE, TRAPFRAME}, trap::{IrqNum, IrqTypes, TrapTypes}, @@ -198,7 +197,7 @@ impl TrapManager for Armv8 { trapframe: &mut TrapFrame, kernel_stack: usize, usertrap: usize, - syscall_num: usize, + _syscall_num: usize, ) -> ! { // We're about to switch the destination of traps from // kerneltrap() to usertrap(), so turn off interrupts until @@ -228,22 +227,22 @@ impl TrapManager for Armv8 { // } // If it is getppid, print elapsed time per interval. - if _syscall_num == 26 { - unsafe { - crate::kernel::kernel_ref(|kctx| { - for i in 0..2 { - kctx.as_ref().write_fmt(format_args!( - "Interval {}: {}\n", - i + 1, - TIME[i + 1] - TIME[i] - )); - } - // for i in 0..4{ - // kctx.as_ref().write_fmt(format_args!("lap {}: {}\n", i, TIME[i])); - // } - }) - } - } + // if _syscall_num == 26 { + // unsafe { + // crate::kernel::kernel_ref(|kctx| { + // // for i in 0..8 { + // kctx.as_ref().write_fmt(format_args!( + // "Interval {}: {}\n", + // 0, + // TIME[0] - TIME[9] + // )); + // // } + // // for i in 0..4{ + // // kctx.as_ref().write_fmt(format_args!("lap {}: {}\n", i, TIME[i])); + // // } + // }) + // } + // } unsafe { fn_0(TRAPFRAME, user_pagetable_addr) } } diff --git a/kernel-rs/src/proc/procs.rs b/kernel-rs/src/proc/procs.rs index afd61946..b4388a7d 100644 --- a/kernel-rs/src/proc/procs.rs +++ b/kernel-rs/src/proc/procs.rs @@ -14,6 +14,7 @@ use super::*; use crate::{ addr::{Addr, UVAddr, PGSIZE}, arch::interface::TrapFrameManager, + arch::TargetArch, fs::{DefaultFs, FileSystem, FileSystemExt}, hal::hal, kalloc::Kmem, @@ -466,10 +467,29 @@ impl<'id, 's> ProcsRef<'id, 's> { // get the pid of current process's parent pub fn get_parent_pid(&self, ctx: &mut KernelCtx<'id, '_>) -> Pid { + // let before = TargetArch::r_cycle(); let mut parent_guard = self.wait_guard(); + + // let after = TargetArch::r_cycle(); + + // ctx.kernel().as_ref().write_fmt(format_args!("lap1: {}\n", after - before)); + + // let before = TargetArch::r_cycle(); + let parent = *ctx.proc().get_mut_parent(&mut parent_guard); + // let after = TargetArch::r_cycle(); + + // ctx.kernel().as_ref().write_fmt(format_args!("lap2: {}\n", after - before)); + + // let before = TargetArch::r_cycle(); + let lock = unsafe { (*parent).info.lock() }; + + // let after = TargetArch::r_cycle(); + + // ctx.kernel().as_ref().write_fmt(format_args!("lap3: {}\n", after - before)); + lock.pid } } diff --git a/kernel-rs/src/syscall.rs b/kernel-rs/src/syscall.rs index 42734795..a7aaf09f 100644 --- a/kernel-rs/src/syscall.rs +++ b/kernel-rs/src/syscall.rs @@ -16,7 +16,6 @@ use crate::{ file::{RcFile, SeekWhence, SelectEvent}, fs::{FcntlFlags, FileSystem, FileSystemExt, InodeType, Path}, hal::hal, - kernel::TIME, ok_or, page::{Page, PGSIZE}, param::{MAXARG, MAXPATH}, @@ -90,11 +89,11 @@ impl CurrentProc<'_, '_> { impl KernelCtx<'_, '_> { pub fn syscall(&mut self, num: i32) -> Result { - let clock = TargetArch::r_cycle(); + // let clock = TargetArch::r_cycle(); // record end of stage 2 (begin of stage 3). - unsafe { - TIME[1] = clock; - } + // unsafe { + // TIME[1] = clock; + // } let ret = match num { 1 => self.sys_fork(), 2 => self.sys_exit(), @@ -136,11 +135,11 @@ impl KernelCtx<'_, '_> { } }; - let clock = TargetArch::r_cycle(); + // let clock = TargetArch::r_cycle(); // record end of stage 3 (begin of stage 4). - unsafe { - TIME[2] = clock; - } + // unsafe { + // TIME[2] = clock; + // } ret } diff --git a/kernel/arm/trampoline.S b/kernel/arm/trampoline.S index 61e97e87..60f80397 100644 --- a/kernel/arm/trampoline.S +++ b/kernel/arm/trampoline.S @@ -5,14 +5,6 @@ .globl trampoline trampoline: -.macro flush_cache - isb sy - dsb sy - dsb ishst - tlbi vmalle1is - dsb ish - isb -.endm .macro exception_1_entry sub sp, sp, #272 @@ -143,8 +135,12 @@ trampoline: msr ttbr0_el1, x24 + mrs x3, cntpct_el0 + msr tpidr_el0, x3 + # flush all the caches - flush_cache + tlbi vmalle1is + isb .endm /* Exception vectors */ @@ -194,7 +190,9 @@ userret: # switch to the user page table. msr ttbr0_el1, x1 - flush_cache + dsb nsh + tlbi vmalle1is + isb # restore ELR, SPSR, LR, SP ldp x21, x22, [x0, #16] /* SPSR, FPSR */ diff --git a/lmbench/lat_syscall.c b/lmbench/lat_syscall.c index 23a87f5a..6f5b1c7f 100644 --- a/lmbench/lat_syscall.c +++ b/lmbench/lat_syscall.c @@ -35,7 +35,8 @@ do_getppid(iter_t iterations, void *cookie) getppid(); // end of total execution // x2 = read_cntpct(); - // printf("total: %lu\n", x2 - x1); + // printf("x1: %lu, x2: %lu\n", x1, x2); + // printf("diff: %lu\n", x2 - x1); } } diff --git a/user/ulib.c b/user/ulib.c index 178b916f..9774d065 100644 --- a/user/ulib.c +++ b/user/ulib.c @@ -4,6 +4,7 @@ #include "user/user.h" #define MICROSECS_PER_TICK 100000 +#define US_PER_S 1000000 char* strcpy(char *s, const char *t) @@ -275,13 +276,33 @@ int fsync(int fildes) // return 0; // } -// TODO: find better way to convert uptime ticks to real time. +// TODO: This is only for aarch64. Separate this. +static inline uint64 r_cntpct() { + uint64 x; + asm volatile("isb sy"); + asm volatile("mrs %0, CNTPCT_EL0" : "=r" (x)); + asm volatile("isb sy"); + return x; +} + + +// TODO: This is only for aarch64. Separate this. +static inline uint64 r_freq() { + uint64 x; + asm volatile("isb sy"); + asm volatile("mrs %0, CNTFRQ_EL0" : "=r" (x)); + asm volatile("isb sy"); + return x; +} + +// TODO: optimize this not to use floating point operations (Linux timekeeper). +// user-level uptime syscall int gettimeofday(struct timeval *__restrict__ tp, struct timezone *__restrict__ tzp) { - int cur = uptime_as_micro(); - tp->tv_sec = cur / 1000000; - tp->tv_usec = cur % 1000000; + uint64 uptime_in_micro = (r_cntpct() * US_PER_S) / r_freq(); + tp->tv_sec = uptime_in_micro / US_PER_S; + tp->tv_usec = uptime_in_micro % US_PER_S; return 0; }