From b0778ab9306ebf16926d1a1757b45080fac3c5c9 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 8 Apr 2022 02:56:33 +0000 Subject: [PATCH] Optimize lat_syscall --- kernel-rs/Cargo.toml | 2 +- kernel-rs/src/arch/arm/trap.rs | 35 +++++++++++++++++----------------- kernel-rs/src/proc/procs.rs | 20 +++++++++++++++++++ kernel-rs/src/syscall.rs | 17 ++++++++--------- kernel/arm/trampoline.S | 18 ++++++++--------- lmbench/lat_syscall.c | 3 ++- user/ulib.c | 29 ++++++++++++++++++++++++---- 7 files changed, 81 insertions(+), 43 deletions(-) diff --git a/kernel-rs/Cargo.toml b/kernel-rs/Cargo.toml index 58841de7..41b03662 100644 --- a/kernel-rs/Cargo.toml +++ b/kernel-rs/Cargo.toml @@ -21,7 +21,7 @@ opt-level = 1 [profile.release] panic = "abort" -opt-level = 1 +opt-level = 2 [dependencies] array-macro = "2.1.0" diff --git a/kernel-rs/src/arch/arm/trap.rs b/kernel-rs/src/arch/arm/trap.rs index ac8152ae..88565aee 100644 --- a/kernel-rs/src/arch/arm/trap.rs +++ b/kernel-rs/src/arch/arm/trap.rs @@ -16,7 +16,6 @@ use crate::{ timer::set_next_timer, Armv8, }, - kernel::TIME, // kernel::KERNEL, memlayout::{TRAMPOLINE, TRAPFRAME}, trap::{IrqNum, IrqTypes, TrapTypes}, @@ -198,7 +197,7 @@ impl TrapManager for Armv8 { trapframe: &mut TrapFrame, kernel_stack: usize, usertrap: usize, - syscall_num: usize, + _syscall_num: usize, ) -> ! { // We're about to switch the destination of traps from // kerneltrap() to usertrap(), so turn off interrupts until @@ -228,22 +227,22 @@ impl TrapManager for Armv8 { // } // If it is getppid, print elapsed time per interval. - if _syscall_num == 26 { - unsafe { - crate::kernel::kernel_ref(|kctx| { - for i in 0..2 { - kctx.as_ref().write_fmt(format_args!( - "Interval {}: {}\n", - i + 1, - TIME[i + 1] - TIME[i] - )); - } - // for i in 0..4{ - // kctx.as_ref().write_fmt(format_args!("lap {}: {}\n", i, TIME[i])); - // } - }) - } - } + // if _syscall_num == 26 { + // unsafe { + // crate::kernel::kernel_ref(|kctx| { + // // for i in 0..8 { + // kctx.as_ref().write_fmt(format_args!( + // "Interval {}: {}\n", + // 0, + // TIME[0] - TIME[9] + // )); + // // } + // // for i in 0..4{ + // // kctx.as_ref().write_fmt(format_args!("lap {}: {}\n", i, TIME[i])); + // // } + // }) + // } + // } unsafe { fn_0(TRAPFRAME, user_pagetable_addr) } } diff --git a/kernel-rs/src/proc/procs.rs b/kernel-rs/src/proc/procs.rs index afd61946..b4388a7d 100644 --- a/kernel-rs/src/proc/procs.rs +++ b/kernel-rs/src/proc/procs.rs @@ -14,6 +14,7 @@ use super::*; use crate::{ addr::{Addr, UVAddr, PGSIZE}, arch::interface::TrapFrameManager, + arch::TargetArch, fs::{DefaultFs, FileSystem, FileSystemExt}, hal::hal, kalloc::Kmem, @@ -466,10 +467,29 @@ impl<'id, 's> ProcsRef<'id, 's> { // get the pid of current process's parent pub fn get_parent_pid(&self, ctx: &mut KernelCtx<'id, '_>) -> Pid { + // let before = TargetArch::r_cycle(); let mut parent_guard = self.wait_guard(); + + // let after = TargetArch::r_cycle(); + + // ctx.kernel().as_ref().write_fmt(format_args!("lap1: {}\n", after - before)); + + // let before = TargetArch::r_cycle(); + let parent = *ctx.proc().get_mut_parent(&mut parent_guard); + // let after = TargetArch::r_cycle(); + + // ctx.kernel().as_ref().write_fmt(format_args!("lap2: {}\n", after - before)); + + // let before = TargetArch::r_cycle(); + let lock = unsafe { (*parent).info.lock() }; + + // let after = TargetArch::r_cycle(); + + // ctx.kernel().as_ref().write_fmt(format_args!("lap3: {}\n", after - before)); + lock.pid } } diff --git a/kernel-rs/src/syscall.rs b/kernel-rs/src/syscall.rs index 42734795..a7aaf09f 100644 --- a/kernel-rs/src/syscall.rs +++ b/kernel-rs/src/syscall.rs @@ -16,7 +16,6 @@ use crate::{ file::{RcFile, SeekWhence, SelectEvent}, fs::{FcntlFlags, FileSystem, FileSystemExt, InodeType, Path}, hal::hal, - kernel::TIME, ok_or, page::{Page, PGSIZE}, param::{MAXARG, MAXPATH}, @@ -90,11 +89,11 @@ impl CurrentProc<'_, '_> { impl KernelCtx<'_, '_> { pub fn syscall(&mut self, num: i32) -> Result { - let clock = TargetArch::r_cycle(); + // let clock = TargetArch::r_cycle(); // record end of stage 2 (begin of stage 3). - unsafe { - TIME[1] = clock; - } + // unsafe { + // TIME[1] = clock; + // } let ret = match num { 1 => self.sys_fork(), 2 => self.sys_exit(), @@ -136,11 +135,11 @@ impl KernelCtx<'_, '_> { } }; - let clock = TargetArch::r_cycle(); + // let clock = TargetArch::r_cycle(); // record end of stage 3 (begin of stage 4). - unsafe { - TIME[2] = clock; - } + // unsafe { + // TIME[2] = clock; + // } ret } diff --git a/kernel/arm/trampoline.S b/kernel/arm/trampoline.S index 61e97e87..60f80397 100644 --- a/kernel/arm/trampoline.S +++ b/kernel/arm/trampoline.S @@ -5,14 +5,6 @@ .globl trampoline trampoline: -.macro flush_cache - isb sy - dsb sy - dsb ishst - tlbi vmalle1is - dsb ish - isb -.endm .macro exception_1_entry sub sp, sp, #272 @@ -143,8 +135,12 @@ trampoline: msr ttbr0_el1, x24 + mrs x3, cntpct_el0 + msr tpidr_el0, x3 + # flush all the caches - flush_cache + tlbi vmalle1is + isb .endm /* Exception vectors */ @@ -194,7 +190,9 @@ userret: # switch to the user page table. msr ttbr0_el1, x1 - flush_cache + dsb nsh + tlbi vmalle1is + isb # restore ELR, SPSR, LR, SP ldp x21, x22, [x0, #16] /* SPSR, FPSR */ diff --git a/lmbench/lat_syscall.c b/lmbench/lat_syscall.c index 23a87f5a..6f5b1c7f 100644 --- a/lmbench/lat_syscall.c +++ b/lmbench/lat_syscall.c @@ -35,7 +35,8 @@ do_getppid(iter_t iterations, void *cookie) getppid(); // end of total execution // x2 = read_cntpct(); - // printf("total: %lu\n", x2 - x1); + // printf("x1: %lu, x2: %lu\n", x1, x2); + // printf("diff: %lu\n", x2 - x1); } } diff --git a/user/ulib.c b/user/ulib.c index 178b916f..9774d065 100644 --- a/user/ulib.c +++ b/user/ulib.c @@ -4,6 +4,7 @@ #include "user/user.h" #define MICROSECS_PER_TICK 100000 +#define US_PER_S 1000000 char* strcpy(char *s, const char *t) @@ -275,13 +276,33 @@ int fsync(int fildes) // return 0; // } -// TODO: find better way to convert uptime ticks to real time. +// TODO: This is only for aarch64. Separate this. +static inline uint64 r_cntpct() { + uint64 x; + asm volatile("isb sy"); + asm volatile("mrs %0, CNTPCT_EL0" : "=r" (x)); + asm volatile("isb sy"); + return x; +} + + +// TODO: This is only for aarch64. Separate this. +static inline uint64 r_freq() { + uint64 x; + asm volatile("isb sy"); + asm volatile("mrs %0, CNTFRQ_EL0" : "=r" (x)); + asm volatile("isb sy"); + return x; +} + +// TODO: optimize this not to use floating point operations (Linux timekeeper). +// user-level uptime syscall int gettimeofday(struct timeval *__restrict__ tp, struct timezone *__restrict__ tzp) { - int cur = uptime_as_micro(); - tp->tv_sec = cur / 1000000; - tp->tv_usec = cur % 1000000; + uint64 uptime_in_micro = (r_cntpct() * US_PER_S) / r_freq(); + tp->tv_sec = uptime_in_micro / US_PER_S; + tp->tv_usec = uptime_in_micro % US_PER_S; return 0; }