diff --git a/Cargo.lock b/Cargo.lock index 6673d58c0..1ede24905 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -363,6 +363,15 @@ name = "nodrop" version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "nom" +version = "4.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "num-traits" version = "0.2.8" @@ -570,6 +579,7 @@ dependencies = [ "static_assertions 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "sunrise-libkern 0.1.0", "sunrise-libutils 0.1.0", + "tinybmp 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "xmas-elf 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -580,6 +590,7 @@ dependencies = [ "bitfield 0.13.2 (registry+https://github.com/rust-lang/crates.io-index)", "bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "static_assertions 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "sunrise-libutils 0.1.0", ] @@ -728,6 +739,14 @@ dependencies = [ "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "tinybmp" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "nom 4.2.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "typenum" version = "1.10.0" @@ -758,6 +777,11 @@ name = "vec_map" version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "version_check" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "winapi" version = "0.3.7" @@ -841,6 +865,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e" "checksum multiboot2 0.7.1 (git+https://github.com/sunriseos/multiboot2-elf64.git)" = "" "checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" +"checksum nom 4.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6" "checksum num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32" "checksum opaque-debug 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "93f5bb2e8e8dec81642920ccff6b61f1eb94fa3020c5a325c9851ff604152409" "checksum pest 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "933085deae3f32071f135d799d75667b63c8dc1f4537159756e3d4ceab41868c" @@ -866,12 +891,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum syn 0.15.40 (registry+https://github.com/rust-lang/crates.io-index)" = "bc945221ccf4a7e8c31222b9d1fc77aefdd6638eb901a6ce457a3dc29d4c31e8" "checksum synstructure 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "02353edf96d6e4dc81aea2d8490a7e9db177bf8acb0e951c24940bf866cb313f" "checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +"checksum tinybmp 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "503e3fde7e36b1aa2345af8a3af0086c9b01d9db07b24f3fb0aab07316b9fa10" "checksum typenum 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "612d636f949607bdf9b123b4a6f6d966dedf3ff669f7f045890d3a4a73948169" "checksum ucd-trie 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8f00ed7be0c1ff1e24f46c3d2af4859f7e863672ba3a6e92e7cff702bf9f06c2" "checksum unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1967f4cdfc355b37fd76d2a954fb2ed3871034eb4f26d60537d88795cfc332a9" "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" +"checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" "checksum winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "f10e386af2b13e47c89e7236a7a14a086791a2b88ebad6df9bf42040195cf770" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/ahci/src/main.rs b/ahci/src/main.rs index c307af9dc..be2c79a74 100644 --- a/ahci/src/main.rs +++ b/ahci/src/main.rs @@ -164,6 +164,7 @@ capabilities!(CAPABILITIES = Capabilities { sunrise_libuser::syscalls::nr::CloseHandle, sunrise_libuser::syscalls::nr::WaitSynchronization, sunrise_libuser::syscalls::nr::OutputDebugString, + sunrise_libuser::syscalls::nr::SetThreadArea, sunrise_libuser::syscalls::nr::SetHeapSize, sunrise_libuser::syscalls::nr::QueryMemory, diff --git a/i386-unknown-none-user.json b/i386-unknown-none-user.json index 76cb6954a..7f9b6f109 100644 --- a/i386-unknown-none-user.json +++ b/i386-unknown-none-user.json @@ -18,7 +18,8 @@ "env": "user", "position-independent-executables": true, "dynamic-linking": false, - "has-elf-tls": false, + "has-elf-tls": true, + "tls-model": "initial-exec", "has-rpath": false, "features": "-mmx,-sse,+soft-float", "disable-redzone": true, diff --git a/i386-unknown-none.json b/i386-unknown-none.json index 31f262415..ddc6718da 100644 --- a/i386-unknown-none.json +++ b/i386-unknown-none.json @@ -17,6 +17,8 @@ }, "relocation-model": "static", "executables": true, + "has-elf-tls": true, + "tls-model": "initial-exec", "features": "-mmx,-sse,+soft-float", "disable-redzone": true, "panic-strategy": "abort" diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index bc345e759..0e2fff5fd 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -31,6 +31,7 @@ rustc-demangle = "0.1" failure = { version = "0.1", default-features = false, features = ["derive"] } bitfield = { git = "https://github.com/sunriseos/rust-bitfield" } mashup = "0.1.9" +tinybmp = "0.1.0" acpi = { git = "https://github.com/sunriseos/acpi.git" } [dependencies.smallvec] diff --git a/kernel/res/bsod.bmp b/kernel/res/bsod.bmp new file mode 100644 index 000000000..b18cc22f6 Binary files /dev/null and b/kernel/res/bsod.bmp differ diff --git a/kernel/res/cpu_locals_segmentation_doc.gif b/kernel/res/cpu_locals_segmentation_doc.gif new file mode 100644 index 000000000..b1869c94b Binary files /dev/null and b/kernel/res/cpu_locals_segmentation_doc.gif differ diff --git a/kernel/res/double_fault.txt b/kernel/res/double_fault.txt new file mode 100644 index 000000000..ddf13f486 --- /dev/null +++ b/kernel/res/double_fault.txt @@ -0,0 +1,55 @@ + + + . + ... + + . ....',;:::cccclllllcccc::;,'''.. + ...';:loodxO0KKXNWWWMMMMMMMMMMMMMMMWWWNXX0x' + ..';cox0KNNWMMMMMMMMMMMMWWMMMMMMMMMMMMMMMMMMMMWWO' + ..,:ok0XNWMMMMWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWMMMWWk. + ..;lx0NWWMMWMMMMMMMMMWWWNXK0OkdollllcccclloodxO0XNWMMMMMMd. + .. .. ..,ckKNMMWWMWWMMMMMWXKOkdc:;,'... . ..';cok0NWWo. + .. . .,lOXWMMMMWWMMWNX0kdl:'... . .,c:'. ..;lx:. + ..'lOXWMMMWMMWNXOdc;'.. . . .dWWX0xoc,... .. + . . .:kXWMMWWWMWXOo:'. .. . . .kMWWMMMWN0kdc,. .. + .. .'lONWMWMMMWKx:.. .. . ... 'OWWMMMMMMMMWWN0d:'.. .. + ..l0WMMWMMWXkc.. .. .. ... . .lO0k, 'x0KXXNWMMWMMMMMMWXOo'. .. + ..c0NMMMMMWKo,. . .. ...... ,0MMX: ....'';:ldxOKNWMMMMMWXk:. + .;ONWWMMMW0l.. . . . .;dO0K0xo;. . .OMMNc... ...,cdOXWMMWWNk;. + .oXMWMMMWKo. . . .':c:,. . ,kNWWWWMMMK; .. .,kMMWl . .,cxXWWWWXd. . .. + . . ,kNWWMMWNx'. .... .lOXWWWXo..,0MNOo:kMWWWx. .;xKNWMWo. .'l0WWWWO, . .. + . .;0WMWMWNO;. .. .ok0d. .xNWN00WMK,.xMWx..'kMWWWO' .lXWOxNMWd. . 'oKWWW0;... . + . . .:0MMMMW0c. . . ;KWWK; .xWWWk'cNWO..d0d;:kXNMWMM0,.lNMK;,0MMd. . .:KMMW0, . + . . ,0MMMWWk, . . '0MMK; cNMWNc,0WK; ..,kNKoxNMWWX:'OWMx.,0WMx. . .. . . .lNMMWx. . + .. 'kWMMMNx. . ...,OMMX;.xWMWXxOWO; ,OWNl.:XWWWMl'OMMd,kNMMx. . . .OMMMNc .. + . . .oNWMMWx... . .;dKNMMN:.OWWMN0kl..',..oWWXooKWMMMMx'xWWKK0OXMk. . .,codl;. . oWWWMk. . + . ,KWMMWk' .oXWOkWWWc.kMMWk'..:ONNc.oNWWWWWWWWWWO',kKKd''dkc. .';,. .:kXWMMWXl. ;KWWMK, . + oWWWMX: . .dNM0,:NMWl.dWWWk,'xXWWK: .cxkxoc:okOOo. .'.. .. 'dXWK; .lXMMKx0WWk. ,0MMMK; + .kMMMWx. . ,KMWd.;XMWo ,0WWWKXWWNO;... .. ......'...,dl,. 'OWWWo.:NMMNl.xWNd. ... '0MMMX; + ,KMMMX: . ,KMNo;kWMWd. ,dKNNXXOc. . ... . .'o0Xk,:KWNd. .. .xWMWo.kMMMO,lXWx' .. '0MMMK, + ;XWMMO' . '0MWKKOONWd. .';,,'. ... ..lKWWW0;.cdc. . .,OMMWocXMWM0kNXo. . . ;XMMW0' . + . :XWMMk. . . .cOKKo.,kk;. . .. ..:lod'.;dOx, .dKNWWNO, .codl. .:OXNWMWdoNMMWXOd,..;'. . lWMMMx. . + ;KMMMx. . .''. ... 'x0xcxNWWNxxXWWW0;lNMMWXl.. ,0MMK; .dXWkoXWWxoNMWXl. .l0NK: .kMMMNc . + 'OMMMk. .. .. . .. .lXWO;oWWWMWNWWWWNkkNWMW0:.. .OWMX:.dWW0;,0WMx:0MMNl.;OWMW0, . cNMMMO' + .xMWM0, ...;l;.cNMMMNdcOWWMKc:lxKNX0x:.dMWNc,0WWd.,0WMk'oNMWNKXWWNk,.. ,0WWMWo. + lWWWNc .. . .:O0Xl:KWMMO' cNMWNl 'oKMWNkOWWWl;KMWd,kWWWO..l0WWWNXk:. ,OWMMWO, + . ,0WWMk. cNWWk;OWMMd. '0MMWd. . .xMMMXOXMMo'kMWXKOkXWO' ..;::,'. ;0WWWMX:. + ..lNMMNl. . ;KWWO;xWMMd. .dWWWk. .oNMMXl;0WNx.:0XKx''xOl. . . ..cXMWWWXl. . + 'OWMMK:. 'OWM0;oWMMx. .:XWWO' 'kNMW0: .d0x; .'.. .... .:ONMMWWXl. . . + . ;0WWMK:. .. . .kWWX;cNWWk. .'kWW0:'dWWXx' .... .. .,xXMMWWW0;. + . .. ;0WWWXl. . oWMNc;KMWk. .,od:..;c:,. . .;xXWMWWWNx' + . 'xNWWNk;. .. . cNWK:.:ol,. .. . . ...cONMMMMWN0:. . + . .. .cKWWWXx;. . .. .. .ll;. .. . . .;o0WMMMMMWKl. + .'oKWMMNkc'. ... . . ..;o0NMMMWWMW0l' .. + .. .'oKWMMWKkc'. ... .. ..,lxKNMMMMWMWXk:.. + .'lONMWWWXOdc,... ..,:ok0NWMMMMMMMN0d;. ... .. + . .;d0NWWMMWNKOxol:;'.... ...',:loxOKNMMMMMWWMMMN0d;. . .. .. + ..:d0NWWWWWMWWWNXK0OOkxddooooooodddxxkO0KXWWMMMMMMMMMWWWN0xl,.. + ..;ldOXWWWWMMWWMMMMMMMMMMMMMMMMMMMMMMMMMMMWWMMWWWKOdc,.. + . .':lxOKXWMMMMMMMMMMMMWWMMMMMMWMMMWWNNX0kxl:,.. ... + .. ..';clodxOOO000KKK000OOkkxdolc;,... + ................ . + + + + diff --git a/kernel/res/kernel_fault.txt b/kernel/res/kernel_fault.txt new file mode 100644 index 000000000..a3e73bccc --- /dev/null +++ b/kernel/res/kernel_fault.txt @@ -0,0 +1,40 @@ + + ````` ` ``` ``````` +``` ````````````````````````````` ``````````` ```````````` +``` ``````` ``````````````` `````````` ```````````````` ``` ```` ````` +```````````` ``` ````` ++. .#@@@@ ```````` ````` `` ``` `````````` +``````` ````` ``````` :@@@@@@@@@@@@@@@@@@@@@@::::::,:,,:::: `````` .@@: + ```` ;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@,,:`````````.::@@@@##@@@@@@@@@@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@#';,,@@@@@@@@@@@@@@@:::`````````.: @@@@@@@@@@@@@@@@ +@@@@@@@@@@@@ @@@@@@@@@@@@@@@:::`` ``````,:`@@@@@@@@@@@@@@@@ +@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@,::```` ````:: @@@@@@@@@@@@@@@@ +@@@@@@ #@@@@ @@@@@@@@#@@@@@@@@@ @@@@@@@@@@@@@@@:::`````` ``,, @@@@@@@@@@@@@@@@ +@@@@@ @@@@ @@@@@` @@@@@@@ :;; @@@@@@@@@@@@@@@:::`````````:, @@@@@@@@@@ @@@@@ +@@@@ @@@ @@@` +@@@ @@ ;;;:. ;,#@@@@@@@@@@@:::````` `` :: @@@@@@@@@@ ;@@@ +@@@ @@@ @@` @@@@ @@: :;;:#@@@@@@@@@@@:,:,,,,,,.`:::+@@@@@@@@@@+ #@@ +@@ `@@@ @ @@@ ;;;;;;;;`.::: @@@@@@@@`:::::::::::: @@@@@@@@@@@@ @@ +@@ @@@ ' @@: ;;;:. ::;;;; ,`+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@' @@ + + ' @@@ ''; @ ;' @ ::;.`::;.@@ @@@@@@@@@@@@@@@@@@@,@@@@@@@@@ # + ' @,@ ''' ''' :.@++@ : @ .@@@@@@@@@@@@@@@@@ @@@@@@@@@ + ' @@ ., ''''' '' ;@, @ .#@ ` .@@@@@@@: `@@@@@@@@ ; + `' @@ `'''''''; '' ;@ @`;:.;;;;: @@@@@@ ` @@@@@@@ '' +' ' @@ ;''''''''' :.@@@# ::;:;;;;:: @@@@@# ', .@@@@@` `''. ' +'' '' @' ''''''''; ;;;;;;;;;:.``;::::;;..@'@@@ ' ......` @ ;''' '' +''''' .@ '''''''.,, `::;;;;;:;:;;;: @@++#@@@ @@ '' ': .' ++ ''''''' +''''' @ '''''';.,, , :;;::;::;;;;;;; @@@@@@@ @ ` '''`'++++++ ''@@@@@@@@@ +''''' ` ''''''',,, , ::: : :;;;;;;::, @@@@@+ ''@@ '++++++ ,@@@@@@@@@@ +''''', ' ''''';;.,, , :::.: :;;;;;;;::,@@@@@, ; :@@@@ ;++++'+ @@@@@@@@@@@ +'''''' ' ;;;;;;' ,, ,,,::;.:`;;;:;;;:.;'@@@@; '' ''@@@@@@@@@@@@@@@@@@@@@@@ +''''''; '' ;;;;;;; ,,,,. ,:::`:, ,`;` `@@@# ;''''''''''+'''''''''''''''' +'''''''. ''' ::::,,'; ,,,,,, `:::::` :: ,,`;:@'@# .''''''''''''''''''''''''''' +'''' ''';''; ;;;;''';;; ,,,,,,,,,,,, :;;` :;:: + `''''''''+''+'' '''''''''' +'''':''''''; ;;;; '';;' ```` :;::` ` @@ '''''''' `` ```````` +'''';''''''' ,;; ;;;;;;;`,,..... `` ` ''' ''' ``````` +'''''''''''' ; ;';;;;.,,.;;'' ,,;. ;;; '': ; '''' `````` ; +'':''.''''''' , ';' '';,,.;;;; ,,;, ;;';;;;;' `.;'` ````` .' +''.'' ''`' ''' ' ' ;;;,,.;;;' ,,;; ;;;;;;;;;;;; ` .' ```` '' +''''':'';'''''' '' ;;;,,.;;;; ,,;; ;;;;;;;';;` ''' `` ''' +'''''',''`':''''; '''` ;;;` ;;;;;'; ,,;; ';;;;;;;;: :'''' ;'''' +''''''''''''''''''''''' ``;'';;;;;;;;';;;;;;;;;;;;;;' ` ;'''''; ''''''' +''''''''''''''''''''''', ;;;;;;;;;;;';;;;;;;;;;;;;;' '''''''', ''''''''' +'''''''''''''''''''''; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;' ''''''''''':`'''''''''' diff --git a/kernel/res/kernel_panic_doc.jpg b/kernel/res/kernel_panic_doc.jpg new file mode 100644 index 000000000..53a7f9413 Binary files /dev/null and b/kernel/res/kernel_panic_doc.jpg differ diff --git a/kernel/src/cpu_locals.rs b/kernel/src/cpu_locals.rs new file mode 100644 index 000000000..473d0b3bf --- /dev/null +++ b/kernel/src/cpu_locals.rs @@ -0,0 +1,354 @@ +//! CPU local storage +//! +//! We want some statics to be cpu-local (e.g. [`CURRENT_THREAD`]). We could implement this fully +//! in software, by having an area of memory that is replicated for every cpu core, where +//! statics are indexes in this memory area, and provide getters and setters to access and modify +//! the cpu-local statics. +//! +//! However this is not ideal as it is not really optimized, and pretty tedious. +//! +//! Instead we use the very common concept of Thread Local Storage (TLS), and apply it to cpu cores +//! instead of threads, and let the compiler do all the hard work for us. +//! +//! # Usage +//! +//! In the kernel you declare a cpu-local using the [#\[thread_local\] attribute] : +//! +//! ``` +//! #[thread_local] +//! static MY_CPU_LOCAL: core::cell::Cell = core::cell::Cell::new(42); +//! ``` +//! +//! and access it as if it was a regular static, only that each cpu core will have its own view of +//! the static. +//! +//! The compiler is responsible for generating code that will access the right address, provided +//! we configured TLS correctly. +//! +//! ##### Early boot +//! +//! Note that you can't access a cpu-local static before [`init_cpu_locals`] is called, because +//! the cpu-local areas arent' initialized yet, and this will likely result to a cpu exception +//! being raised, or UB. +//! +//! This means you can't ever access cpu-locals in early boot. If your code might be called during +//! early boot, we advise you to use [`ARE_CPU_LOCALS_INITIALIZED_YET`] to know if you're allowed +//! to access your cpu-local static, and if not return an error of some kind. +//! +//! # Inner workings +//! +//! We implement the TLS according to conventions laid out by [Ulrich Drepper's paper on TLS] which +//! is followed by LLVM and most compilers. +//! +//! Since we're running on i386, we're following variant II. +//! +//! Each cpu core's `gs` segment points to a thread local memory area where cpu-locals statics live. +//! Cpu-local statics are simply accessed through an offset from `gs`. +//! Those regions can be found in [`CPU_LOCAL_REGIONS`]. +//! +//! The linker is in charge of creating an ELF segment of type `PT_TLS` where an initialization image +//! for cpu local regions can be found, and is meant to be copy-pasted for every ~~thread we create~~ +//! cpu core we have. +//! +//! ##### Segmentation +//! +//! Each core gets its own [GDT]. In each of these there is a `KTls` segment which points to this +//! core's cpu-local area, and which is meant to be loaded into `gs`. +//! +//! Because userspace might want to use Thread Local Storage too, and also needs `gs` to point to its +//! thread local area (see [`set_thread_area`]), we swap the segment `gs` points to everytime +//! we enter and leave the kernel in [`trap_gate_asm`], from `UTls_Elf` to `KTls` and back. +//! +//! TLS on x86 are really weird. It uses the variant II, where offsets must be *subtracted* from `gs`, +//! even though segmentation only supports *adding* offsets. The only way to make them work is to have +//! `gs` segment's limit be `0xffffffff`, effectively spanning the whole address space, and when +//! the cpu will add a "negative" (e.g. `0xfffffffc` for -4) offset, it will treat it as an unsigned +//! huge positive offset, which when added to `gs`'s base will "wrap around" the address space, +//! and effectively end up 4 bytes behind `gs`'s base. +//! +//! Illustration: +//! +//! ![cpu backflip](https://github.com/sunriseos/SunriseOS/blob/master/kernel/res/cpu_locals_segmentation_doc.gif) +//! +//! ##### dtv and `__tls_get_addr` +//! +//! We're the kernel, and we don't do dynamic loading (no loadable kernel modules). +//! Because of this, we know our TLS model will be static (either Initial Exec or Local Exec). +//! Those models always access thread-locals directly via `gs`, and always short-circuit the dtv. +//! +//! So we don't even bother allocating a dtv array at all. Neither do we define a `__tls_get_addr` +//! function. +//! +//! [`CURRENT_THREAD`]: crate::scheduler::CURRENT_THREAD +//! [`init_cpu_locals`]: crate::cpu_locals::init_cpu_locals +//! [`ARE_CPU_LOCALS_INITIALIZED_YET`]: self::cpu_locals::ARE_CPU_LOCALS_INITIALIZED_YET +//! [Ulrich Drepper's paper on TLS]: https://web.archive.org/web/20190710135250/https://akkadia.org/drepper/tls.pdf +//! [`CPU_LOCAL_REGIONS`]: crate::cpu_locals::CPU_LOCAL_REGIONS +//! [GDT]: crate::i386::gdt +//! [`set_thread_area`]: crate::interrupts::syscalls::set_thread_area +//! [#\[thread_local\] attribute]: https://github.com/rust-lang/rust/issues/10310 + +use crate::i386::multiboot; +use crate::elf_loader::map_grub_module; +use crate::i386::gdt::{GDT, GdtIndex}; +use sunrise_libutils::div_ceil; +use xmas_elf::program::{Type, SegmentData}; +use alloc::alloc::{alloc_zeroed, dealloc}; +use core::mem::align_of; +use core::alloc::Layout; +use core::mem::size_of; +use alloc::vec::Vec; +use spin::Once; +use core::sync::atomic::{AtomicBool, Ordering}; +use core::fmt::Debug; + +/// Use this if your code might run in an early boot stage to know if you're +/// allowed to access a cpu-local variable. Accessing one when this is false is UB. +/// +/// Always true after [`init_cpu_locals`] have been called. +pub static ARE_CPU_LOCALS_INITIALIZED_YET: AtomicBool = AtomicBool::new(false); + +/// Array of cpu local regions, copied from the initialization image in kernel's ELF. +/// +/// One per cpu core. +static CPU_LOCAL_REGIONS: Once> = Once::new(); + +/// Address that should be put in `KTls` segment's base. +/// The limit should be `0xffffffff`. +/// +/// Used for creating a core's GDT, before starting it. +/// +/// # Panics +/// +/// Panics if `cpu_id` is greater than the `cpu_count` that was supplied to [`init_cpu_locals`]. +pub fn get_cpu_locals_ptr_for_core(cpu_id: usize) -> *const u8 { + CPU_LOCAL_REGIONS.r#try() + .expect("CPU_LOCAL_REGIONS not initialized") + .get(cpu_id) + .unwrap_or_else(|| panic!("cpu locals not initialized for cpu id {}", cpu_id)) + .tcb() as *const ThreadControlBlock as *const u8 +} + +/// Initializes cpu locals during early boot stage. +/// +/// * Maps the kernel's ELF to get our `PT_TLS` program header information, including the TLS +/// initialization image. +/// * Allocates an array of `cpu_count` cpu local regions and stores them in [CPU_LOCAL_REGIONS]. +/// * Makes this core's `KTls` segment point to `CPU_LOCAL_REGIONS[0]`'s [`ThreadControlBlock`]. +/// +/// # Panics +/// +/// * Failed to map kernel's ELF. +/// * Failed to get kernel ELF's TLS initialization image. +pub fn init_cpu_locals(cpu_count: usize) { + debug_assert!(cpu_count > 0, "You can't have 0 cpu cores - I'm running code therefor I am"); + + CPU_LOCAL_REGIONS.call_once(|| { + // map our own ELF so that we can access our PT_TLS + let mapped_kernel_elf = multiboot::try_get_boot_information() + .and_then(|info| info.module_tags().nth(0)) + .and_then(|module| map_grub_module(module).ok()) + .expect("cpu_locals: cannot get kernel elf"); + let kernel_elf = mapped_kernel_elf.elf.as_ref() + .expect("cpu_locals: module 0 is not kernel elf"); + + // find the PT_TLS header + let tls_program_header = kernel_elf.program_iter() + .find(|p_header| + p_header.get_type().ok().map(|p_header_type| + match p_header_type { + Type::Tls => true, + _ => false + } + ).unwrap_or(false) + ) + .expect("cpu_locals: kernel elf has no PT_TLS program header"); + + // get our tls initialisation image at header.p_offset, header.p_filesz + let tls_init_image = match tls_program_header.get_data(kernel_elf) + .expect("cpu_locals: cannot get PT_TLS content") { + SegmentData::Undefined(tls_data) => tls_data, + x => panic!("PT_TLS: Unexpected Segment data {:?}", x) + }; + + // create one cpu local region per cpu from the initialisation image + let mut cpu_local_regions = Vec::with_capacity(cpu_count); + for _ in 0..cpu_count { + cpu_local_regions.push( + CpuLocalRegion::allocate( + tls_init_image, + tls_program_header.mem_size() as usize, + tls_program_header.align() as usize + ) + ); + } + + // make gs point to the first cpu local region. + let mut gdt = GDT.r#try() + .expect("GDT not initialized") + .lock(); + gdt.table[GdtIndex::KTls as usize].set_base( + cpu_local_regions[0].tcb() as *const _ as usize as u32 + ); + gdt.commit(None, None, None, None, None, None); + + cpu_local_regions + }); + + // yes, they are 😌 + ARE_CPU_LOCALS_INITIALIZED_YET.store(true, Ordering::Relaxed); +} + +/// The `round` function, as defined in section 3.0: +/// +/// ```text +/// round(x,y) = y * ⌈x/y⌉ +/// ``` +/// +/// Just a poorly-named `align_up`. +fn tls_align_up(x: usize, y: usize) -> usize { + y * div_ceil(x, y) +} + +/// Elf TLS TCB +/// +/// The variant II leaves the specification of the ThreadControlBlock (TCB) to the implementor, +/// with the only requirement that the first word in the TCB, pointed by `tp`, contains its own +/// address, i.e. is a pointer to itself (GNU variant). +/// +/// We don't need to store anything else in the TCB, it's just the self pointer. +#[repr(C)] +#[derive(Debug)] +struct ThreadControlBlock { + /// Pointer containing its own address. + tp_self_ptr: *const ThreadControlBlock, +} + +/// Represents an allocated cpu local region. +/// +/// Because cpu regions have a really specific layout, we don't use Box and instead interact with +/// the allocator directly. This type is the equivalent of a Box, it stores the pointer to the +/// allocated memory, and deallocates it on Drop. +struct CpuLocalRegion { + /// Pointer to the allocated memory + ptr: usize, + /// Layout of the allocated memory. Used when deallocating. + layout: Layout, + /// Offset of the TCB in this allocation. + tcb_offset: usize, +} + +impl CpuLocalRegion { + /// Returns a pointer to the [ThreadControlBlock] in the allocated region. + /// All cpu-local arithmetic are done relative to this pointer. + /// + /// For TLS to work, the value stored at this address should be the address itself, i.e. + /// having a pointer pointing to itself. + fn tcb(&self) -> &ThreadControlBlock { + unsafe { + // safe: - guaranteed to be aligned, and still in the allocation, + // - no one should ever have a mut reference to the ThreadControlBlock after its + // initialisation. + &*((self.ptr + self.tcb_offset) as *const ThreadControlBlock) + } + } + + /// Allocates a CpuLocalRegion. + /// + /// The region's content is copied from the TLS initialisation image described by `block_src`, + /// padded with 0s for `block_size`, to which is appended a [`ThreadControlBlock`]. + /// + /// The CpuLocalRegion uses `PT_TLS`'s `p_align` field passed in `block_align` + /// to compute its layout and total size. + /// + /// ### Alignment + /// + /// ```text + /// + /// V----------------------V tls_align_up(tls_size_1, align_1) + /// + /// +-- gs:0 + /// | + /// +----------------------|-- tlsoffset_1 = gs:0 - tls_align_up(tls_size_1, align_1) + /// | | + /// V V + /// + /// j----------------~-----j---------j + /// ... | tls_size_1 | pad | TCB | + /// j----------------~-----j---------j + /// + /// ^ ^ ^ + /// | | | + /// | | +-- TCB_align: Determines alignment of everything. + /// | | = max(align_of::(), align_1). e.g. : 16. + /// | | + /// | +------------------------- TCB_align - n * align_1 + /// | => still aligned to align_1 because TCB is aligned to align_1. + /// | + /// +------------------------------ alloc_align == TCB_align + /// => &TCB = &alloc + tls_align_up(gs:0 - tls_offset_1, TCB_align) + /// + /// ^---^ alloc_pad + /// + /// ``` + #[allow(clippy::cast_ptr_alignment)] + fn allocate(block_src: &[u8], block_size: usize, block_align: usize) -> Self { + let tls_offset1 = tls_align_up(block_size, block_align); + let tcb_align = usize::max(align_of::(), block_align); + let tcb_offset = tls_align_up(tls_offset1, tcb_align); + let alloc_pad_size = tcb_offset - tls_offset1; + let layout = Layout::from_size_align( + tcb_offset + size_of::(), + tcb_align + ).unwrap(); + let alloc = unsafe { + // safe: layout.size >= sizeof:: -> layout.size != 0 + alloc_zeroed(layout) + }; + assert!(!alloc.is_null(), "cpu_locals: failed static area allocation"); + + unsafe { + // safe: everything is done within our allocation, u8 is always aligned. + // copy data + core::ptr::copy_nonoverlapping( + block_src as *const [u8] as *const u8, + alloc.add(alloc_pad_size), + block_src.len() + ); + // .tbss + pad are already set to 0 by alloc_zeroed. + // write tcb + core::ptr::write( + alloc.add(tcb_offset) as *mut ThreadControlBlock, + ThreadControlBlock { + tp_self_ptr: alloc.add(tcb_offset) as *const ThreadControlBlock + } + ); + }; + Self { + ptr: alloc as usize, + layout, + tcb_offset + } + } +} + +impl Drop for CpuLocalRegion { + /// Dropping a CpuLocalRegion deallocates it. + fn drop(&mut self) { + unsafe { + // safe: - self.ptr is obviously allocated. + // - self.layout is the same argument that was used for alloc. + dealloc(self.ptr as *mut u8, self.layout) + }; + } +} + +impl Debug for CpuLocalRegion { + fn fmt(&self, f: &mut core::fmt::Formatter) -> Result<(), core::fmt::Error> { + f.debug_struct("CpuLocalRegion") + .field("start_address", &self.ptr) + .field("tcb_address", &self.tcb()) + .field("total_size", &self.layout.size()) + .finish() + } +} diff --git a/kernel/src/i386/gdt.rs b/kernel/src/i386/gdt.rs index 40cf3951d..57faad508 100644 --- a/kernel/src/i386/gdt.rs +++ b/kernel/src/i386/gdt.rs @@ -1,117 +1,294 @@ //! GDT Handler //! -//! The Global Descriptor Table is responsible for segmentation of memory. In -//! our case though, we don't really care about that. +//! The Global Descriptor Table is responsible for segmentation of memory. +//! +//! Since we manage memory permissions in the paging, we want to set-up our +//! segments so that we have a flat-memory model, i.e. having segments with +//! `base = 0; limit = 0xffffffff`. +//! +//! ### GDT segments +//! +//! | Index | Found in | Maps to | Purpose | +//! |--------------------------|----------------------------------------|--------------------------------|-------------------------------------------------------------------| +//! | [`GdtIndex::Null`] | nowhere (hopefully) | _ | _ | +//! | [`GdtIndex::KCode`] | `cs`, while in kernel code | flat: `0x00000000..0xffffffff` | kernel's code segment | +//! | [`GdtIndex::KData`] | `ds`, `es`, while in kernel code | flat: `0x00000000..0xffffffff` | kernel's data segment | +//! | [`GdtIndex::KTls`] | `gs`, while in kernel code | kernel's cpu-locals | kernel sets-up cpu-locals at this address | +//! | [`GdtIndex::KStack`] | `ss`, while in kernel code | flat: `0x00000000..0xffffffff` | kernel's stack segment | +//! | [`GdtIndex::UCode`] | `cs`, while in user code | flat: `0x00000000..0xffffffff` | user's code segment | +//! | [`GdtIndex::UData`] | `ds`, `es`, while in user code | flat: `0x00000000..0xffffffff` | user's data segment | +//! | [`GdtIndex::UTlsRegion`] | `fs`, while in user code | `&`[`TLS`]`..&`[`TLS`]`+0x200` | user can get the address of its [`TLS`] from this selector | +//! | [`GdtIndex::UTlsElf`] | `gs`, while in user code | User-defined | user can set-up elf TLS at this address | +//! | [`GdtIndex::UStack`] | `ss`, while in user code | flat: `0x00000000..0xffffffff` | | +//! | [`GdtIndex::LDT`] | _ | Points to the [`GLOBAL_LDT`] | | +//! | [`GdtIndex::TSS`] | IDT Double fault vector | Points to the [`MAIN_TASK`] | Double fault exception backups registers to this TSS | +//! | [`GdtIndex::FTSS`] | IDT Double fault vector | | Double fault exception loads registers from this TSS | +//! +//! ##### UTlsRegion +//! +//! The kernel allocates a 0x200-bytes region for every thread, and always makes `fs` point to it +//! when jumping to userspace. See [`TLS`] for more. +//! +//! This region is thread local, its address is switched at every thread-switch. +//! +//! ##### UTlsElf: +//! +//! The segment pointed by `gs` is controlled by the user. It can set its address/limit with +//! [`svcSetThreadArea`]. The segment it chooses to use is local to every thread, and defaults to `0x00000000..0xffffffff`. +//! +//! Typically, the user will want to make `gs` point to its elf TLS. +//! +//! This segment is thread local, its address and size are switched at every thread-switch. +//! +//! ### LDT segments: +//! +//! None :) +//! +//! ## x86_64 +//! +//! Because x86_64 uses `fs` for tls instead of `gs`, the purpose of `gs` and `fs` are swapped: +//! +//! | Index | Found in | Maps to | Purpose | +//! |---------------------|----------------------------------------|--------------------------------|-------------------------------------------------------------------| +//! | MSR | `fs`, while in kernel code | kernel's cpu-locals | kernel sets-up cpu-locals at this address | +//! | MSR | `gs`, while in user code | `&`[`TLS`]`..&`[`TLS`]`+0x200` | user can get the address of its [`TLS`] from this selector | +//! | MSR | `fs`, while in user code | User-defined | user can set-up elf TLS at this address | +//! +//! [`GdtIndex::Null`]: gdt::GdtIndex::Null +//! [`GdtIndex::KCode`]: gdt::GdtIndex::KCode +//! [`GdtIndex::KData`]: gdt::GdtIndex::KData +//! [`GdtIndex::KTls`]: gdt::GdtIndex::KTls +//! [`GdtIndex::KStack`]: gdt::GdtIndex::KStack +//! [`GdtIndex::UCode`]: gdt::GdtIndex::UCode +//! [`GdtIndex::UData`]: gdt::GdtIndex::UData +//! [`GdtIndex::UTlsRegion`]: gdt::GdtIndex::UTlsRegion +//! [`GdtIndex::UTlsElf`]: gdt::GdtIndex::UTlsElf +//! [`GdtIndex::UStack`]: gdt::GdtIndex::UStack +//! [`GdtIndex::LDT`]: gdt::GdtIndex::LDT +//! [`GdtIndex::TSS`]: gdt::GdtIndex::TSS +//! [`GdtIndex::FTSS`]: gdt::GdtIndex::FTSS +//! [`TLS`]: sunrise_libkern::TLS +//! [`GLOBAL_LDT`]: gdt::GLOBAL_LDT +//! [`MAIN_TASK`]: gdt::MAIN_TASK +//! [`svcSetThreadArea`]: crate::interrupts::syscalls::set_thread_area #![allow(dead_code)] -use crate::sync::{SpinLock, Once}; +use crate::sync::{SpinLockIRQ, Once}; use bit_field::BitField; -use core::mem::{self, size_of}; +use core::mem::size_of; use core::ops::{Deref, DerefMut}; -use core::slice; use core::fmt; use crate::i386::{PrivilegeLevel, TssStruct}; use crate::i386::structures::gdt::SegmentSelector; -use crate::i386::instructions::tables::{lgdt, sgdt, DescriptorTablePointer}; +use crate::i386::instructions::tables::{lgdt, lldt, ltr, DescriptorTablePointer}; use crate::i386::instructions::segmentation::*; use crate::paging::PAGE_SIZE; -use crate::paging::{MappingAccessRights, kernel_memory::get_kernel_memory}; -use crate::frame_allocator::{FrameAllocator, FrameAllocatorTrait}; -use crate::mem::VirtualAddress; -use alloc::vec::Vec; -use crate::utils::align_up; +use sunrise_libkern::TLS; +use spin::Mutex; +use bitfield::fmt::Debug; -/// The global GDT. Needs to be initialized with init_gdt(). -static GDT: Once> = Once::new(); +/// The global GDT. Needs to be initialized with [init_gdt]. +/// +/// Modifying it disables interrupts. +pub static GDT: Once> = Once::new(); /// The global LDT used by all the processes. +/// +/// Empty. static GLOBAL_LDT: Once = Once::new(); +/// Index in the GDT of each segment descriptor. +#[repr(usize)] +#[derive(Debug, Clone, Copy)] +pub enum GdtIndex { + /// The index in the GDT of the null descriptor. + Null = 0, + /// The index in the GDT of the Kernel code segment descriptor. + KCode = 1, + /// The index in the GDT of the Kernel data segment descriptor. + KData = 2, + /// The index in the GDT of the Kernel thread local storage ("cpu-locals") segment descriptor. + KTls = 3, + /// The index in the GDT of the Kernel stack segment descriptor. + KStack = 4, + /// The index in the GDT of the Userland code segment descriptor. + UCode = 5, + /// The index in the GDT of the Userland data segment descriptor. + UData = 6, + /// The index in the GDT of the Userland thread local storage segment descriptor. + UTlsRegion = 7, + /// The index in the GDT of the Userland thread local storage segment descriptor. + UTlsElf = 8, + /// The index in the GDT of the Userland stack segment descriptor. + UStack = 9, + /// The index in the GDT of the LDT descriptor. + LDT = 10, + /// The index in the GDT of the main TSS descriptor. + TSS = 11, + /// The index in the GDT of the double fault TSS descriptor. + FTSS = 12, + + /// The number of descriptors in the GDT. + DescCount, +} + +impl GdtIndex { + /// Turns a segment descriptor index to a segment selector. + /// + /// The ring part of the selector will be `0b00` for K* segments, and `0b11` for U* segments. + pub fn selector(self) -> SegmentSelector { + match self { + GdtIndex::KCode | GdtIndex::KData | GdtIndex::KTls | GdtIndex::KStack | + GdtIndex::LDT | GdtIndex::TSS | GdtIndex::FTSS + => SegmentSelector::new(self as u16, PrivilegeLevel::Ring0), + GdtIndex::UCode | GdtIndex::UData | GdtIndex::UTlsRegion | GdtIndex::UTlsElf | + GdtIndex::UStack + => SegmentSelector::new(self as u16, PrivilegeLevel::Ring3), + + _ => panic!("Cannot get segment selector of {:?}", self) + } + } +} + /// Initializes the GDT. /// -/// Creates a GDT with a flat memory segmentation model. It will create 3 kernel -/// segments (code, data, stack), three user segments (code, data, stack), an +/// Creates a GDT with a flat memory segmentation model. It will create 4 kernel +/// segments (code, data, tls, stack), 5 user segments (code, data, tls region, tls elf, stack), an /// LDT, and a TSS for the main task. /// /// This function should only be called once. Further calls will be silently /// ignored. pub fn init_gdt() { - use crate::i386::instructions::tables::{lldt, ltr}; - let ldt = GLOBAL_LDT.call_once(DescriptorTable::new); + // fill LDT with null descriptors + GLOBAL_LDT.call_once(Default::default); GDT.call_once(|| { - let mut gdt = DescriptorTable::new(); + let mut gdt = GdtManager::default(); // Push the null descriptor - gdt.push(DescriptorTableEntry::null_descriptor()); + gdt.table[GdtIndex::Null as usize] = DescriptorTableEntry::null_descriptor(); // Push a kernel code segment - gdt.push(DescriptorTableEntry::new( + gdt.table[GdtIndex::KCode as usize] = DescriptorTableEntry::new( 0, 0xffffffff, true, PrivilegeLevel::Ring0, - )); + ); // Push a kernel data segment - gdt.push(DescriptorTableEntry::new( + gdt.table[GdtIndex::KData as usize] = DescriptorTableEntry::new( + 0, + 0xffffffff, + false, + PrivilegeLevel::Ring0, + ); + // Push a dummy tls segment, will be moved and resized appropriately later + gdt.table[GdtIndex::KTls as usize] = DescriptorTableEntry::new( 0, 0xffffffff, false, PrivilegeLevel::Ring0, - )); + ); // Push a kernel stack segment - gdt.push(DescriptorTableEntry::new( + gdt.table[GdtIndex::KStack as usize] = DescriptorTableEntry::new( 0, 0xffffffff, false, PrivilegeLevel::Ring0, - )); + ); // Push a userland code segment - gdt.push(DescriptorTableEntry::new( + gdt.table[GdtIndex::UCode as usize] = DescriptorTableEntry::new( 0, 0xffffffff, true, PrivilegeLevel::Ring3, - )); + ); // Push a userland data segment - gdt.push(DescriptorTableEntry::new( + gdt.table[GdtIndex::UData as usize] = DescriptorTableEntry::new( + 0, + 0xffffffff, + false, + PrivilegeLevel::Ring3, + ); + // Push a userland thread local storage segment, will be moved at every thread-switch. + gdt.table[GdtIndex::UTlsRegion as usize] = DescriptorTableEntry::new( + 0, + (size_of::() - 1) as u32, + false, + PrivilegeLevel::Ring3, + ); + // Push a userland thread local storage segment, will be moved at every thread-switch. + gdt.table[GdtIndex::UTlsElf as usize] = DescriptorTableEntry::new( 0, 0xffffffff, false, PrivilegeLevel::Ring3, - )); + ); // Push a userland stack segment - gdt.push(DescriptorTableEntry::new( + gdt.table[GdtIndex::UStack as usize] = DescriptorTableEntry::new( 0, 0xffffffff, false, PrivilegeLevel::Ring3, - )); + ); + // Global LDT - gdt.push(DescriptorTableEntry::new_ldt(ldt, PrivilegeLevel::Ring0)); + gdt.table[GdtIndex::LDT as usize] = DescriptorTableEntry::new_ldt(&GLOBAL_LDT.r#try().unwrap(), PrivilegeLevel::Ring0); - let main_task = unsafe { - (MAIN_TASK.addr() as *mut TssStruct).as_ref().unwrap() + // Main task + let mut main_task = MAIN_TASK.lock(); + main_task.init(); + let main_tss_ref: &'static TssStruct = unsafe { + // creating a static ref to tss. + // kinda-safe: the tss is in a static so it is 'static, but is behind a lock + // and will still be accessed by the hardware with no consideration for the lock. + (&main_task.tss as *const TssStruct).as_ref().unwrap() }; + gdt.table[GdtIndex::TSS as usize] = DescriptorTableEntry::new_tss(main_tss_ref, PrivilegeLevel::Ring0, 0x2001); + + // Double fault task + let mut fault_task = DOUBLE_FAULT_TASK.lock(); + fault_task.init(); + let fault_task_stack_end = unsafe { &DOUBLE_FAULT_TASK_STACK.0 } as *const u8 as usize + size_of::(); + fault_task.esp = fault_task_stack_end as u32; + fault_task.esp0 = fault_task_stack_end as u32; + fault_task.eip = 0; // will be set by IDT init. + let fault_task_ref: &'static TssStruct = unsafe { + // creating a static ref to tss. + // safety: the tss is in a static so it is 'static, but is behind a lock + // and will still be accessed by the hardware with no consideration for the lock. + (&*fault_task as *const TssStruct).as_ref().unwrap() + }; + gdt.table[GdtIndex::FTSS as usize] = DescriptorTableEntry::new_tss(fault_task_ref, PrivilegeLevel::Ring0, 0x0); - // Main task - gdt.push(DescriptorTableEntry::new_tss(main_task, PrivilegeLevel::Ring0, 0x2001)); + SpinLockIRQ::new(gdt) + }); - info!("Loading GDT"); - let gdt = SpinLock::new(GdtManager::load(gdt, 0x8, 0x10, 0x18)); + // initialized, now let's use it ! + let cs = GdtIndex::KCode.selector(); + let ds = GdtIndex::KData.selector(); + let fs = GdtIndex::UTlsRegion.selector(); + let gs = GdtIndex::KTls.selector(); + let ss = GdtIndex::KStack.selector(); + let ldt_ss = GdtIndex::LDT.selector(); + let tss_ss = GdtIndex::TSS.selector(); - unsafe { - info!("Loading LDT"); - lldt(SegmentSelector(7 << 3)); - info!("Loading Task"); - ltr(SegmentSelector(8 << 3)); - } + let mut gdt = GDT.r#try().unwrap().lock(); - gdt - }); + debug!("Loading GDT {:#?}\ncs: {:?}\nds: {:?}\nes: {:?}\nfs: {:?}\ngs: {:?}\nss: {:?}\nldt: {:?}\ntss: {:?}", gdt.deref().table, cs, ds, ds, fs, gs, ss, ldt_ss, tss_ss); + gdt.commit(Some(cs), Some(ds), Some(ds), Some(fs), Some(gs), Some(ss)); + + unsafe { + debug!("Loading LDT {:?}", ldt_ss); + lldt(ldt_ss); + debug!("Loading Task {:?}", tss_ss); + ltr(tss_ss); + } + + info!("Loaded GDT {:#?}\ncs: {:?}\nds: {:?}\nes: {:?}\nfs: {:?}\ngs: {:?}\nss: {:?}\nldt: {:?}\ntss: {:?}", gdt.deref().table, cs, ds, ds, fs, gs, ss, ldt_ss, tss_ss); } /// Safety wrapper that manages the lifetime of GDT tables. @@ -120,169 +297,269 @@ pub fn init_gdt() { /// "live" is probably a terrible idea. To work around this, the GdtManager keeps /// two copies of the DescriptorTable, one being the currently active one (loaded /// in the GDTR), and the other being where the changes to the GDT go to until -/// they are commited. +/// they are committed. /// /// When `commit` is called, the internal GDT and current GDTR are swapped. -struct GdtManager { - /// Inactive descriptor table. Changes to the GDT are done on this table, but - /// will not be active until the table is commited. - unloaded_table: Option, +/// +/// This struct's implementation of `Deref` and `DerefMut` will always give a reference to the table +/// currently not in use, so you can make modifications to it, and call `commit` afterwards. +#[derive(Debug, Default)] +pub struct GdtManager { + /// One of the two tables. + table_a: DescriptorTable, + /// One of the two tables. + table_b: DescriptorTable, + /// The table currently pointed to by GDTR. `0` is `table_a`, `1` is `table_b`. + table_selector: bool } impl GdtManager { - /// Create a GdtManager from a DescriptorTable and segment selectors. The - /// given DescriptorTable will be loaded into the GDTR, and the segment - /// selectors reloaded with the given value. - pub fn load(cur_loaded: DescriptorTable, new_cs: u16, new_ds: u16, new_ss: u16) -> GdtManager { - let clone = cur_loaded.clone(); - info!("{:#?}", cur_loaded); - cur_loaded.load_global(new_cs, new_ds, new_ss); - - GdtManager { - unloaded_table: Some(clone) - } - } + /// Commit the changes in the currently unloaded table, and update segment registers. + /// + /// # Selectors + /// + /// To make a segment register point to a new descriptor, pass `Some(selector)` to this function. + /// + /// If `None` is passed, the register will be reloaded from its current value. + /// This is what you want if you only updated the content of the descriptor. + /// We always perform a reload of all registers to make sure they reflect the state of the GDT, + /// in case the user modified it. + pub fn commit(&mut self, new_cs: Option, + new_ds: Option, + new_es: Option, + new_fs: Option, + new_gs: Option, + new_ss: Option) { + let (previous_in_use, to_load) = if !self.table_selector { + (&mut self.table_a, &mut self.table_b) + } else { + (&mut self.table_b, &mut self.table_a) + }; - /// Commit the changes in the currently unloaded table. - pub fn commit(&mut self, new_cs: u16, new_ds: u16, new_ss: u16) { - let old_table = self.unloaded_table.take() - .expect("Commit to not be called recursively") - .load_global(new_cs, new_ds, new_ss); - unsafe { - self.unloaded_table = Some(DescriptorTable { - table: Vec::from_raw_parts( - old_table.base as *mut DescriptorTableEntry, - old_table.limit as usize / size_of::(), - old_table.limit as usize / size_of::()) - }); - } - self.set_from_loaded() + // first make gdtr point to the new table, and reload segment selector + to_load.load_global(new_cs, new_ds, new_es, new_fs, new_gs, new_ss); + // copy the new table to the old one + previous_in_use.table.copy_from_slice(&to_load.table); + // and toggle selector + self.table_selector = !self.table_selector; } } impl Deref for GdtManager { type Target = DescriptorTable; + /// Deref always returns a reference to the table not in use, so it can be modified, + /// before being committed. fn deref(&self) -> &DescriptorTable { - self.unloaded_table.as_ref().expect("Deref should not be called during commit") + if !self.table_selector { + &self.table_b + } else { + &self.table_a + } } } impl DerefMut for GdtManager { + /// DerefMut always returns a reference to the table not in use, so it can be modified, + /// before being committed. fn deref_mut(&mut self) -> &mut DescriptorTable { - self.unloaded_table.as_mut().expect("DerefMut should not be called during commit") + if !self.table_selector { + &mut self.table_b + } else { + &mut self.table_a + } } } -/// Push a task segment. -pub fn push_task_segment(task: &'static TssStruct) -> SegmentSelector { - info!("Pushing TSS: {:#?}", task); - let mut gdt = GDT.r#try().unwrap().lock(); - let idx = gdt.push(DescriptorTableEntry::new_tss(task, PrivilegeLevel::Ring0, 0)); - gdt.commit(0x8, 0x10, 0x18); - idx +/// The main TSS. See [MAIN_TASK]. +#[repr(C)] +pub struct MainTask { + /// TssStruct of the main task. + pub tss: TssStruct, + /// Array of bits representing the io-space permissions: + /// + /// * `0`: this port is addressable. + /// * `1`: this port is not addressable. + pub iopb: [u8; 0x2001] } -lazy_static! { - /// VirtualAddress of the TSS structure of the main task. Has 0x2001 bytes - /// available after the TssStruct to encode the IOPB of the current process. - pub static ref MAIN_TASK: VirtualAddress = { - // We need TssStruct + 0x2001 bytes of IOPB. - let pregion = FrameAllocator::allocate_region(align_up(size_of::() + 0x2001, PAGE_SIZE)) - .expect("Failed to allocate physical region for tss MAIN_TASK"); - let vaddr = get_kernel_memory().map_phys_region(pregion, MappingAccessRights::WRITABLE); - let tss = vaddr.addr() as *mut TssStruct; - unsafe { - *tss = TssStruct::new(); +impl Debug for MainTask { + fn fmt(&self, f: &mut core::fmt::Formatter) -> Result<(), core::fmt::Error> { + f.debug_struct("MainTask") + .field("tss", &self.tss) + .field("iopb", &"*omitted*") + .finish() + } +} - // Now, set the IOPB to 0xFF to prevent all userland accesses - slice::from_raw_parts_mut(tss.offset(1) as *mut u8, 0x2001).iter_mut().for_each(|v| *v = 0xFF); +impl MainTask { + /// Creates an empty TSS. + /// + /// Suitable for static declaration, the whole structure should end up in the `.bss`. + /// + /// This means that the IOPB will be set to everything addressable. + /// + /// Must be initialised by calling [init]. + /// + /// [init]: MainTask::init + const fn empty() -> MainTask { + MainTask { + tss: TssStruct::empty(), + iopb: [0u8; 0x2001] } - vaddr - }; + } + + /// Fills the TSS. + /// + /// The struct inherits the current task's values (except registers, which are set to 0). + /// + /// IOPB is set to nothing addressable. + fn init(&mut self) { + self.tss.init(); + for v in &mut self.iopb[..] { *v = 0xFF } + } } -// TODO: gdt::get_main_iopb does not prevent creation of multiple mut ref. -// BODY: There's currently no guarantee that we don't create multiple &mut -// BODY: pointer to the IOPB region, which would cause undefined behavior. In -// BODY: practice, it should only be used by `i386::process_switch`, and as such, -// BODY: there is never actually two main_iopb active at the same time. Still, -// BODY: it'd be nicer to have safe functions to access the IOPB. -/// Get the IOPB of the Main Task. +/// Main TSS /// -/// # Safety +/// Because Sunrise does not make use of Hardware Task Switching, we only allocate a single +/// TSS that will be used by every process, we update it at every software task switch. /// -/// This function can be used to create multiple mut references to the same -/// region, which is very UB. Care should be taken to make sure any old mut slice -/// acquired through this method is dropped before it is called again. -pub unsafe fn get_main_iopb() -> &'static mut [u8] { - slice::from_raw_parts_mut((MAIN_TASK.addr() as *mut TssStruct).offset(1) as *mut u8, 0x2001) -} +/// We mostly set the `esp0` field, updating which stack the cpu will jump to when handling an +/// exception/syscall. +/// +/// #### IOPB +/// +/// Right after the [TssStruct], the MAIN_TASK holds a bitarray indicating io-space permissions +/// for the current process, one bit for every port: +/// +/// * `0`: this port is addressable. +/// * `1`: this port is not addressable. +/// +/// This array is checked by the cpu every time a port is accessed by userspace, and we use it +/// to enforce io-space policies. This array is updated at every task switch. +/// +/// The kernel bypasses this protection by having the `IOPL` set to `0b00` in `EFLAGS`, +/// making the kernel able to access all ports at all times. +/// +/// ### Double fault +/// +/// The only exception to this is double faulting, which does use Hardware Task Switching, and +/// for which we allocate a second TSS, see [DOUBLE_FAULT_TASK]. +// todo: per-cpu TSSs / GDT +// body: There are multiple things that aren't ideal about the way we handle TSSs. +// body: +// body: ## Initialization +// body: +// body: TSSs must always be initialized with an iopb_offset of `size_of::()`, +// body: so that the TSS's data is not interpreted as the iopb. +// body: +// body: However, because MAIN_TASK has a huge iopb (0x2001 bytes), we want it to live in the +// body: .bss, and be lazy initialized (iopb_offset value, and iopb array memset to 0xFF). +// body: `lazy_static` seems appropriate for that, and we should use it, so we cannot *forget* to +// body: initialize a TSS. +// body: +// body: DOUBLE_FAULT_TASK could be statically initialized, except for the `cr3` field. +// body: +// body: ## Per-cpu +// body: +// body: But we will likely want a MAIN and DOUBLE_FAULT TSS per core. However, they cannot trivially +// body: be put behind a `#[thread_local]`, as they are initialized with the GDT, before cpu-locals +// body: are initialized. It might be possible to make them `#[thread_local]` with some +// body: post-initialization routine that switches to using the MAIN and DOUBLE_FAULT_TASK in the +// body: cpu-local memory area instead of the static early one, after cpu-local have been initialized, +// body: for core 0. +// body: The static early one could do without an iopb, since we're not going to userspace with it. +// body: +// body: For other cores, having a `#[thead_local]` inside a `lazy_static!` seems to work, but I don't +// body: yet know how cores are going to be started, whether they allocate/initialize their own +// body: GDT + MAIN + DOUBLE_FAULT TSS, if it their parent core do it. +// body: +// body: Because of these unknowns, the search for a good design for TSSs/GDT is postponed. +// body: +// body: ## Locking +// body: +// body: Since the TSSs are supposed to be cpu-local, there is no reason for them to have a mutex +// body: around them. An ideal design would be lock-less, which can either be achieved with `#[thread_local]`, +// body: or some custom wrapper around an UnsafeCell just for TSSs. +// body: +// body: ## DOUBLE_FAULT's cr3 +// body: +// body: The DOUBLE_FAULT TSS(s)'s cr3 must point to a valid page directory, which will remain valid +// body: (i.e. not be freed) for the entire lifetime of the kernel, and possibly updated when kernel +// body: page tables are modified. +// body: +// body: For now, because we have no such hierarchy, we always make DOUBLE_FAULT's cr3 point +// body: to the current cr3, and update it when we switch page table hierarchies. However the current +// body: way we do kernel paging is not viable for SMP, and we might finally implement such a hierarchy +// body: for SMP, we could then make DOUBLE_FAULT TSS(s) point to it. +pub static MAIN_TASK: Mutex = Mutex::new(MainTask::empty()); + +/// Double fault TSS +/// +/// Double faulting will most likely occur after a kernel stack overflow. +/// We can't use the regular way of handling exception, i.e. pushing some registers and handling +/// the exception on the same stack that we were using, since it has overflowed. +/// +/// We must switch the stack when it happens, and the only way to do that is via a task gate. +/// +/// We setup a Tss whose `esp0` points to [DOUBLE_FAULT_TASK_STACK], +/// its `eip` to the double fault handler, and make the double fault vector in IDT task gate to it. +/// +/// When a double fault occurs, the current (faulty) cpu registers values will be backed up +/// to [MAIN_TASK], where the double fault handler can access them to work out what happened. +/// +/// ##### IOPB +/// +/// Unlike the [MAIN_TASK], this TSS does not have an associated IOPB. +pub static DOUBLE_FAULT_TASK: Mutex = Mutex::new(TssStruct::empty()); + +/// The stack used while handling a double fault. +/// +/// Just a page aligned array of bytes. +#[repr(C, align(4096))] +struct DoubleFaultTaskStack([u8; 4096]); + +/// The stack used while handling a double fault. See [DOUBLE_FAULT_TASK]. +static mut DOUBLE_FAULT_TASK_STACK: DoubleFaultTaskStack = DoubleFaultTaskStack([0u8; PAGE_SIZE]); /// A structure containing our GDT. -#[derive(Debug, Clone)] -struct DescriptorTable { - /// The GDT table, a growable array of DescriptorTableEntry. - table: Vec, +/// +/// See [module level documentation]. +/// +/// [module level documentation]: super +#[derive(Debug, Clone, Default)] +pub struct DescriptorTable { + /// The GDT table, an array of DescriptorTableEntry. + pub table: [DescriptorTableEntry; GdtIndex::DescCount as usize], } impl DescriptorTable { - /// Create an empty GDT. This will **not** include the null entry, so make - /// sure you add it! - pub fn new() -> DescriptorTable { - DescriptorTable { - table: Vec::new() - } - } - - /// Fill the current DescriptorTable with a copy of the currently loaded entries. - pub fn set_from_loaded(&mut self) { - let loaded_ptr = sgdt(); - let loaded_table = unsafe { - slice::from_raw_parts(loaded_ptr.base as *mut DescriptorTableEntry, loaded_ptr.limit as usize / size_of::()) - }; - - self.table.clear(); - self.table.extend_from_slice(loaded_table); - } - - /// Push a new entry to the table, returning a segment selector to it. - pub fn push(&mut self, entry: DescriptorTableEntry) -> SegmentSelector { - let ret = self.table.len() << 3; - self.table.push(entry); - SegmentSelector(ret as u16) - } - - /// Load this descriptor table into the GDTR, and set the segments to the - /// given values. Returns the old GDTR. - fn load_global(mut self, new_cs: u16, new_ds: u16, new_ss: u16) -> DescriptorTablePointer { - self.table.shrink_to_fit(); - assert_eq!(self.table.len(), self.table.capacity()); + /// Load this descriptor table into the GDTR, and reload the segment registers. + fn load_global(&mut self, new_cs: Option, + new_ds: Option, + new_es: Option, + new_fs: Option, + new_gs: Option, + new_ss: Option) { let ptr = DescriptorTablePointer { base: self.table.as_ptr() as u32, limit: (self.table.len() * size_of::()) as u16, }; - let oldptr = sgdt(); - - // TODO: Figure out how to chose CS. unsafe { lgdt(ptr); // Reload segment selectors - set_cs(SegmentSelector(new_cs)); - load_ds(SegmentSelector(new_ds)); - load_es(SegmentSelector(new_ds)); - load_fs(SegmentSelector(new_ds)); - load_gs(SegmentSelector(new_ds)); - load_ss(SegmentSelector(new_ss)); + set_cs(match new_cs { Some(s) => s, None => cs() }); + load_ds(match new_ds { Some(s) => s, None => ds()}); + load_es(match new_es { Some(s) => s, None => es()}); + load_fs(match new_fs { Some(s) => s, None => fs()}); + load_gs(match new_gs { Some(s) => s, None => gs()}); + load_ss(match new_ss { Some(s) => s, None => ss()}); } - - mem::forget(self.table); - - oldptr } } @@ -309,7 +586,7 @@ enum SystemDescriptorTypes { /// LDT, or Call Gates. #[repr(transparent)] #[derive(Clone, Copy)] -struct DescriptorTableEntry(u64); +pub struct DescriptorTableEntry(u64); impl fmt::Debug for DescriptorTableEntry { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { @@ -348,12 +625,12 @@ impl DescriptorTableEntry { /// Returns an empty descriptor. Using this descriptor is an error and will /// raise a GPF. Should only be used to create a descriptor to place at index /// 0 of the GDT. - pub fn null_descriptor() -> DescriptorTableEntry { + fn null_descriptor() -> DescriptorTableEntry { DescriptorTableEntry(0) } /// Creates an empty GDT descriptor, but with some flags set correctly - pub fn new(base: u32, limit: u32, is_code: bool, priv_level: PrivilegeLevel) -> DescriptorTableEntry { + fn new(base: u32, limit: u32, is_code: bool, priv_level: PrivilegeLevel) -> DescriptorTableEntry { let mut gdt = Self::null_descriptor(); // First, the constant values. @@ -376,7 +653,7 @@ impl DescriptorTableEntry { } /// Creates an empty GDT system descriptor of the given type. - pub fn new_system(ty: SystemDescriptorTypes, base: u32, limit: u32, priv_level: PrivilegeLevel) -> DescriptorTableEntry { + fn new_system(ty: SystemDescriptorTypes, base: u32, limit: u32, priv_level: PrivilegeLevel) -> DescriptorTableEntry { let mut gdt = Self::null_descriptor(); // Set the system descriptor type @@ -391,19 +668,19 @@ impl DescriptorTableEntry { } /// Creates a new LDT descriptor. - pub fn new_ldt(base: &'static DescriptorTable, priv_level: PrivilegeLevel) -> DescriptorTableEntry { + fn new_ldt(base: &'static DescriptorTable, priv_level: PrivilegeLevel) -> DescriptorTableEntry { let limit = if base.table.is_empty() { 0 } else { base.table.len() * size_of::() - 1 }; Self::new_system(SystemDescriptorTypes::Ldt, base as *const _ as u32, limit as u32, priv_level) } /// Creates a GDT descriptor pointing to a TSS segment - pub fn new_tss(base: &'static TssStruct, priv_level: PrivilegeLevel, iobp_size: usize) -> DescriptorTableEntry { + fn new_tss(base: &'static TssStruct, priv_level: PrivilegeLevel, iobp_size: usize) -> DescriptorTableEntry { Self::new_system(SystemDescriptorTypes::AvailableTss32, base as *const _ as u32, (size_of::() + iobp_size - 1) as u32, priv_level) } /// Gets the byte length of the entry, minus 1. - fn get_limit(self) -> u32 { + pub fn get_limit(self) -> u32 { (self.0.get_bits(0..16) as u32) | ((self.0.get_bits(48..52) << 16) as u32) } @@ -414,7 +691,7 @@ impl DescriptorTableEntry { /// # Panics /// /// Panics if the given limit is higher than 65536 and not page aligned. - fn set_limit(&mut self, mut newlimit: u32) { + pub fn set_limit(&mut self, mut newlimit: u32) { if newlimit > 65536 && (newlimit & 0xFFF) != 0xFFF { panic!("Limit {} is invalid", newlimit); } @@ -429,12 +706,12 @@ impl DescriptorTableEntry { } /// Gets the base address of the entry. - fn get_base(self) -> u32 { + pub fn get_base(self) -> u32 { (self.0.get_bits(16..40) as u32) | ((self.0.get_bits(56..64) << 24) as u32) } /// Sets the base address of the entry. - fn set_base(&mut self, newbase: u32) { + pub fn set_base(&mut self, newbase: u32) { self.0.set_bits(16..40, u64::from(newbase.get_bits( 0..24))); self.0.set_bits(56..64, u64::from(newbase.get_bits(24..32))); } @@ -494,3 +771,9 @@ impl DescriptorTableEntry { self.0.get_bit(54) } } + +impl Default for DescriptorTableEntry { + fn default() -> Self { + DescriptorTableEntry::null_descriptor() + } +} diff --git a/kernel/src/i386/mod.rs b/kernel/src/i386/mod.rs index becbd166c..90f305ea2 100644 --- a/kernel/src/i386/mod.rs +++ b/kernel/src/i386/mod.rs @@ -4,9 +4,6 @@ #![cfg(any(target_arch = "x86", test, rustdoc))] #![allow(dead_code)] -use alloc::boxed::Box; -use core::ops::{Deref, DerefMut}; - pub mod acpi; #[macro_use] @@ -129,6 +126,41 @@ pub mod instructions { unsafe { asm!("mov %cs, $0" : "=r" (segment) ) }; SegmentSelector(segment) } + + /// Read the value of the stack segment register. + pub fn ss() -> SegmentSelector { + let segment: u16; + unsafe { asm!("mov %ss, $0" : "=r" (segment) ) }; + SegmentSelector(segment) + } + + /// Read the value of the data segment register. + pub fn ds() -> SegmentSelector { + let segment: u16; + unsafe { asm!("mov %ds, $0" : "=r" (segment) ) }; + SegmentSelector(segment) + } + + /// Read the value of the es segment register. + pub fn es() -> SegmentSelector { + let segment: u16; + unsafe { asm!("mov %es, $0" : "=r" (segment) ) }; + SegmentSelector(segment) + } + + /// Read the value of the fs segment register. + pub fn fs() -> SegmentSelector { + let segment: u16; + unsafe { asm!("mov %fs, $0" : "=r" (segment) ) }; + SegmentSelector(segment) + } + + /// Read the value of the gs segment register. + pub fn gs() -> SegmentSelector { + let segment: u16; + unsafe { asm!("mov %gs, $0" : "=r" (segment) ) }; + SegmentSelector(segment) + } } pub mod interrupts { //! Interrupt disabling functionality. @@ -256,7 +288,17 @@ impl PrivilegeLevel { /// information about a task. The TSS is primarily suited for hardware multitasking, /// where each individual process has its own TSS. /// ([see OSDEV](https://wiki.osdev.org/TSS)) -#[repr(C)] +#[repr(C, align(128))] // According to the IA32-E PDF, volume 3, 7.2.1: +// If paging is used: +// - Avoid placing a page boundary in the part of the TSS that the processor +// reads during a task switch (the first 104 bytes). The processor may not +// correctly perform address translations if a boundary occurs in this area. +// During a task switch, the processor reads and writes into the first 104 +// bytes of each TSS (using contiguous physical addresses beginning with the +// physical address of the first byte of the TSS). So, after TSS access +// begins, if part of the 104 bytes is not physically contiguous, the +// processor will access incorrect information without generating a +// page-fault exception. #[derive(Copy, Clone, Debug)] #[allow(missing_docs, clippy::missing_docs_in_private_items)] pub struct TssStruct { @@ -300,8 +342,17 @@ pub struct TssStruct { pub iopboffset: u16, } -impl Default for TssStruct { - fn default() -> TssStruct { +impl TssStruct { + /// Creates an empty TssStruct. + /// + /// All fields are set to `0`, suitable for static declarations, so that it can live in the `.bss`. + /// + /// The TssStruct must then be initialized with [init]. + /// + /// Note that until it is initialized properly, the `.iopboffset` field will be invalid. + /// + /// [init]: TssStruct::init + pub const fn empty() -> TssStruct { TssStruct { _reserved1: 0, link: 0, @@ -339,40 +390,23 @@ impl Default for TssStruct { gs: 0, _reservedb: 0, ldt_selector: 0, - iopboffset: ::core::mem::size_of::() as u16, + iopboffset: 0, _reservedc: 0, } } -} -const_assert_eq!(::core::mem::size_of::(), 0x68); - -impl TssStruct { - /// Creates a new TssStruct. + /// Fills the TSS. /// - /// The new struct inherits the current task's values (except registers, which are set to 0) - pub fn new() -> TssStruct { - let ds: u16; - let cs: u16; - let ss: u16; - let cr3: u32; - let ldt_selector: u16; - - unsafe { - // Safety: this is perfectly safe. Maybe I should do safe wrappers for this however... - asm!(" - mov AX, DS - mov $0, AX - mov AX, CS - mov $1, AX - mov AX, SS - mov $2, AX - mov $3, CR3 - sldt $4 - " : "=r"(ds), "=r"(cs), "=r"(ss), "=r"(cr3), "=r"(ldt_selector) :: "ax" : "intel"); - } - - TssStruct { + /// The TSS is filled with kernel segments selectors, and the current cr3. + /// Registers are set to 0. + pub fn init(&mut self) { + let ds = gdt::GdtIndex::KData.selector().0; + let cs = gdt::GdtIndex::KCode.selector().0; + let ss = gdt::GdtIndex::KStack.selector().0; + let cr3 = crate::paging::read_cr3().addr() as u32; + let ldt_selector = gdt::GdtIndex::LDT.selector().0; + + *self = TssStruct { ss0: ss, ss1: ss, ss2: ss, @@ -384,7 +418,8 @@ impl TssStruct { ds: ds, fs: ds, gs: ds, - ..TssStruct::default() + iopboffset: ::core::mem::size_of::() as u16, + ..TssStruct::empty() } } @@ -405,43 +440,3 @@ impl TssStruct { self.eip = eip; } } - -/// Wrapper around TssStruct ensuring it is kept at the page boundary. -/// -/// According to the IA32-E PDF, volume 3, 7.2.1: -/// -/// If paging is used: -/// - Avoid placing a page boundary in the part of the TSS that the processor -/// reads during a task switch (the first 104 bytes). The processor may not -/// correctly perform address translations if a boundary occurs in this area. -/// During a task switch, the processor reads and writes into the first 104 -/// bytes of each TSS (using contiguous physical addresses beginning with the -/// physical address of the first byte of the TSS). So, after TSS access -/// begins, if part of the 104 bytes is not physically contiguous, the -/// processor will access incorrect information without generating a -/// page-fault exception. -#[derive(Debug)] -#[repr(C, align(4096))] -pub struct AlignedTssStruct(TssStruct); - -impl AlignedTssStruct { - /// Create a new AlignedTssStruct, using boxing to avoid putting a ridiculously large - /// object (4kb) on the stack. - pub fn new(tss: TssStruct) -> Box { - box AlignedTssStruct(tss) - } -} - -impl Deref for AlignedTssStruct { - type Target = TssStruct; - - fn deref(&self) -> &TssStruct { - &self.0 - } -} - -impl DerefMut for AlignedTssStruct { - fn deref_mut(&mut self) -> &mut TssStruct { - &mut self.0 - } -} diff --git a/kernel/src/i386/process_switch.rs b/kernel/src/i386/process_switch.rs index 067db278f..92c7ec545 100644 --- a/kernel/src/i386/process_switch.rs +++ b/kernel/src/i386/process_switch.rs @@ -3,10 +3,10 @@ //! This modules describe low-level functions and structures needed to perform a process switch use crate::process::ThreadStruct; -use crate::i386::gdt; use alloc::sync::Arc; use core::mem::size_of; -use crate::i386::TssStruct; +use crate::i386::gdt::{GDT, MAIN_TASK}; +use crate::i386::gdt::GdtIndex; /// The hardware context of a paused thread. It contains just enough registers to get the thread /// running again. @@ -80,6 +80,7 @@ impl Default for ThreadHardwareContext { /// # Panics /// /// Panics if the locks protecting the ProcessStruct of current or B process cannot be obtained. +/// Panics if the locks protecting the MAIN_TASK TSS or DOUBLE_FAULT_TSS cannot be obtained. /// /// # Safety: /// @@ -101,6 +102,14 @@ pub unsafe extern "C" fn process_switch(thread_b: Arc, thread_curr // Switch the memory pages thread_b_lock_pmemory.switch_to(); + // Update the TLS segments. They are not loaded yet. + let mut gdt = GDT + .r#try().expect("GDT not initialized") + .try_lock().expect("Could not lock GDT"); + gdt.table[GdtIndex::UTlsRegion as usize].set_base(thread_b.tls_region.addr() as u32); + gdt.table[GdtIndex::UTlsElf as usize].set_base(thread_b.tls_elf.lock().addr() as u32); + gdt.commit(None, None, None, None, None, None); + let current_esp: usize; asm!("mov $0, esp" : "=r"(current_esp) : : : "intel", "volatile"); @@ -123,11 +132,16 @@ pub unsafe extern "C" fn process_switch(thread_b: Arc, thread_curr // Set IOPB back to "nothing allowed" state // todo do not change iopb if thread_b belongs to the same process. - let iopb = gdt::get_main_iopb(); + + // MAIN_TSS should otherwise only be locked during DOUBLE_FAULTING, + // in which case we really shouldn't be context-switching. + let mut main_tss = MAIN_TASK.try_lock() + .expect("Cannot lock main tss"); for ioport in &thread_current.process.capabilities.ioports { let ioport = *ioport as usize; - iopb[ioport / 8] = 0xFF; + main_tss.iopb[ioport / 8] = 0xFF; } + drop(main_tss); // current is still stored in scheduler's global CURRENT_PROCESS, so it's not dropped yet. drop(thread_current); @@ -174,14 +188,17 @@ pub unsafe extern "C" fn process_switch(thread_b: Arc, thread_curr // recreate the Arc to our ThreadStruct from the pointer that was passed to us let me = unsafe { Arc::from_raw(whoami) }; + // MAIN_TSS should have been unlocked during schedule-out. Re-take it. + let mut main_tss = MAIN_TASK.try_lock() + .expect("Cannot lock main tss"); + // Set the ESP0 - let tss = gdt::MAIN_TASK.addr() as *mut TssStruct; - (*tss).esp0 = me.kstack.get_stack_start() as u32; + main_tss.tss.esp0 = me.kstack.get_stack_start() as u32; // Set IOPB for ioport in &me.process.capabilities.ioports { let ioport = *ioport as usize; - iopb[ioport / 8] &= !(1 << (ioport % 8)); + main_tss.iopb[ioport / 8] &= !(1 << (ioport % 8)); } me @@ -198,7 +215,7 @@ pub unsafe extern "C" fn process_switch(thread_b: Arc, thread_curr /// This function will definitely fuck up your stack, so make sure you're calling it on a /// never-scheduled thread's empty-stack. #[allow(clippy::fn_to_numeric_cast)] -pub unsafe fn prepare_for_first_schedule(t: &ThreadStruct, entrypoint: usize, userspace_stack: usize) { +pub unsafe fn prepare_for_first_schedule(t: &ThreadStruct, entrypoint: usize, userspace_arg: usize, userspace_stack: usize) { #[repr(packed)] #[allow(clippy::missing_docs_in_private_items)] struct RegistersOnStack { @@ -234,7 +251,7 @@ pub unsafe fn prepare_for_first_schedule(t: &ThreadStruct, entrypoint: usize, us ebp: stack_start, // -+ esp: 0, // ignored by the popad anyway // | ebx: userspace_stack as u32, // | - edx: 0, // | + edx: userspace_arg as u32, // | ecx: 0, // | eax: entrypoint as u32, // | callback_eip: first_schedule as u32 // | @@ -255,13 +272,21 @@ pub unsafe fn prepare_for_first_schedule(t: &ThreadStruct, entrypoint: usize, us /// The function ret'd on, on a thread's first schedule - as setup by the prepare_for_first_schedule. /// /// At this point, interrupts are still off. This function should ensure the thread is properly -/// switched (set up ESP0, IOPB and whatnot) and call scheduler_first_schedule. +/// switched (set up ESP0, IOPB and whatnot) and call [`scheduler_first_schedule`]. +/// +/// # Safety: +/// +/// * Interrupts must be disabled. +/// * Arguments must respect the [`prepare_for_first_schedule`] ABI, and be popped into registers. +/// +/// [`scheduler_first_schedule`]: crate::scheduler::scheduler_first_schedule. #[naked] -fn first_schedule() { +unsafe fn first_schedule() { // just get the ProcessStruct pointer in $edi, the entrypoint in $eax, and call a rust function unsafe { asm!(" push ebx + push edx push eax push edi call $0 @@ -269,30 +294,31 @@ fn first_schedule() { } /// Stack is set-up, now we can run rust code. - extern "C" fn first_schedule_inner(whoami: *const ThreadStruct, entrypoint: usize, userspace_stack: usize) -> ! { + extern "C" fn first_schedule_inner(whoami: *const ThreadStruct, entrypoint: usize, userspace_arg: usize, userspace_stack: usize) -> ! { // reconstruct an Arc to our ProcessStruct from the leaked pointer let current = unsafe { Arc::from_raw(whoami) }; + // MAIN_TSS must have been unlocked by now. + let mut main_tss = MAIN_TASK.try_lock() + .expect("Cannot lock main tss"); + // Set the ESP0 - let tss = gdt::MAIN_TASK.addr() as *mut TssStruct; - unsafe { - // Safety: TSS is always valid. - (*tss).esp0 = current.kstack.get_stack_start() as u32; - } + main_tss.tss.esp0 = current.kstack.get_stack_start() as u32; // todo do not touch iopb if we come from a thread of the same process. // Set IOPB - let iopb = unsafe { - gdt::get_main_iopb() - }; for ioport in ¤t.process.capabilities.ioports { let ioport = *ioport as usize; - iopb[ioport / 8] &= !(1 << (ioport % 8)); + main_tss.iopb[ioport / 8] &= !(1 << (ioport % 8)); } + drop(main_tss); // unlock it + // call the scheduler to finish the high-level process switch mechanics - let arg = current.arg; - crate::scheduler::scheduler_first_schedule(current, || jump_to_entrypoint(entrypoint, userspace_stack, arg)); + unsafe { + // safe: interrupts are off + crate::scheduler::scheduler_first_schedule(current, || jump_to_entrypoint(entrypoint, userspace_stack, userspace_arg)); + } unreachable!() } @@ -311,25 +337,34 @@ fn first_schedule() { /// This way, just after the `iret`, cpu will be in ring 3, witl all of its registers cleared, /// `$eip` pointing to `ep`, and `$esp` pointing to `userspace_stack_ptr`. fn jump_to_entrypoint(ep: usize, userspace_stack_ptr: usize, arg: usize) -> ! { + // gonna write constants in the code, cause not enough registers. + // just check we aren't hard-coding the wrong values. + const_assert_eq!((GdtIndex::UCode as u16) << 3 | 0b11, 0x2B); + const_assert_eq!((GdtIndex::UData as u16) << 3 | 0b11, 0x33); + const_assert_eq!((GdtIndex::UTlsRegion as u16) << 3 | 0b11, 0x3B); + const_assert_eq!((GdtIndex::UTlsElf as u16) << 3 | 0b11, 0x43); + const_assert_eq!((GdtIndex::UStack as u16) << 3 | 0b11, 0x4B); unsafe { asm!(" - mov ax,0x2B // Set data segment selector to Userland Data, Ring 3 + mov ax,0x33 // ds, es <- UData, Ring 3 mov ds,ax mov es,ax + mov ax,0x3B // fs <- UTlsRegion, Ring 3 mov fs,ax + mov ax, 0x43 // gs <- UTlsElf, Ring 3 mov gs,ax // Build the fake stack for IRET - push 0x33 // Userland Stack, Ring 3 + push 0x4B // Userland Stack, Ring 3 push $1 // Userspace ESP pushfd - push 0x23 // Userland Code, Ring 3 + push 0x2B // Userland Code, Ring 3 push $0 // Entrypoint // Clean up all registers. Also setup arguments. - mov eax, $2 + mov ecx, $2 + mov eax, 0 mov ebx, 0 - mov ecx, 0 mov edx, 0 mov ebp, 0 mov edi, 0 diff --git a/kernel/src/i386/structures/idt.rs b/kernel/src/i386/structures/idt.rs index 1d0379699..e3f839a1f 100644 --- a/kernel/src/i386/structures/idt.rs +++ b/kernel/src/i386/structures/idt.rs @@ -14,11 +14,8 @@ use core::marker::PhantomData; use core::mem; use core::ops::{Index, IndexMut}; use bit_field::BitField; -use crate::i386::{AlignedTssStruct, TssStruct, PrivilegeLevel}; +use crate::i386::PrivilegeLevel; use crate::mem::VirtualAddress; -use crate::paging::{PAGE_SIZE, kernel_memory::get_kernel_memory}; -use alloc::boxed::Box; -use crate::i386::gdt; use crate::i386::structures::gdt::SegmentSelector; /// An Interrupt Descriptor Table with 256 entries. @@ -547,13 +544,11 @@ impl fmt::Debug for IdtEntry { const_assert_eq!(mem::size_of::>(), 8); /// A handler function for an interrupt or an exception without error code. -pub type HandlerFunc = extern "x86-interrupt" fn(&mut ExceptionStackFrame); +pub type HandlerFunc = fn(); /// A handler function for an exception that pushes an error code. -pub type HandlerFuncWithErrCode = - extern "x86-interrupt" fn(&mut ExceptionStackFrame, error_code: u32); +pub type HandlerFuncWithErrCode = fn(error_code: u32); /// A page fault handler function that pushes a page fault error code. -pub type PageFaultHandlerFunc = - extern "x86-interrupt" fn(&mut ExceptionStackFrame, error_code: PageFaultErrorCode); +pub type PageFaultHandlerFunc = fn(error_code: u32); impl IdtEntry { /// Creates a non-present IDT entry (but sets the must-be-one bits). @@ -589,65 +584,36 @@ impl IdtEntry { /// Set a task gate for the IDT entry and sets the present bit. /// - /// For the code selector field, this function uses the code segment selector currently - /// active in the CPU. - pub fn set_handler_task_gate_addr(&mut self, addr: u32) { + /// # Unsafety + /// + /// `tss_selector` must point to a valid TSS, which will remain present. + /// The TSS' `eip` should point to the handler function. + /// The TSS' `esp` and `esp0` should point to a usable stack for the handler function. + pub unsafe fn set_handler_task_gate(&mut self, tss_selector: SegmentSelector) { self.pointer_low = 0; self.pointer_high = 0; - - let stack = get_kernel_memory().get_page(); - - // Load tss segment with addr in IP. - let mut tss = AlignedTssStruct::new(TssStruct::new()); - //tss.ss0 = SegmentSelector(3 << 3); - tss.esp0 = (stack.addr() + PAGE_SIZE) as u32; - tss.esp = (stack.addr() + PAGE_SIZE) as u32; - tss.eip = addr; - - let tss = Box::leak(tss); - - self.gdt_selector = gdt::push_task_segment(tss); + self.gdt_selector = tss_selector; self.options.set_present_task(true); } } -macro_rules! impl_set_handler_fn { - ($h:ty) => { - impl IdtEntry<$h> { - /// Set an interrupt gate function for the IDT entry and sets the present bit. - /// - /// For the code selector field, this function uses the code segment selector currently - /// active in the CPU. - /// - /// The function returns a mutable reference to the entry's options that allows - /// further customization. - #[allow(clippy::fn_to_numeric_cast)] // it **is** a u32 - pub fn set_handler_fn(&mut self, handler: $h) -> &mut EntryOptions { - unsafe { - self.set_interrupt_gate_addr(handler as u32) - } - } - - /// Set a task gate function for the IDT entry and sets the present bit. - /// - /// For the code selector field, this function uses the code segment selector currently - /// active in the CPU. - /// - /// The function returns a mutable reference to the entry's options that allows - /// further customization. - #[allow(clippy::fn_to_numeric_cast)] // it **is** a u32 - pub fn set_task_fn(&mut self, handler: $h) { - self.set_handler_task_gate_addr(handler as u32) - } +impl IdtEntry { + /// Set an interrupt gate function for the IDT entry and sets the present bit. + /// + /// For the code selector field, this function uses the code segment selector currently + /// active in the CPU. + /// + /// The function returns a mutable reference to the entry's options that allows + /// further customization. + #[allow(clippy::fn_to_numeric_cast)] // it **is** a u32 + pub fn set_handler_fn(&mut self, handler_asm_wrapper: extern "C" fn()) -> &mut EntryOptions { + unsafe { + self.set_interrupt_gate_addr(handler_asm_wrapper as u32) } } } -impl_set_handler_fn!(HandlerFunc); -impl_set_handler_fn!(HandlerFuncWithErrCode); -impl_set_handler_fn!(PageFaultHandlerFunc); - /// Represents the type of an IDT descriptor (called a gate). /// /// Technically, this represents a subset of [SystemDescriptorTypes]. @@ -765,10 +731,10 @@ impl fmt::Debug for ExceptionStackFrame { let mut s = f.debug_struct("ExceptionStackFrame"); s.field("instruction_pointer", &self.instruction_pointer); - s.field("code_segment", &self.code_segment); + s.field("code_segment", &Hex(self.code_segment)); s.field("cpu_flags", &Hex(self.cpu_flags)); s.field("stack_pointer", &self.stack_pointer); - s.field("stack_segment", &self.stack_segment); + s.field("stack_segment", &Hex(self.stack_segment)); s.finish() } } diff --git a/kernel/src/interrupts/irq.rs b/kernel/src/interrupts/irq.rs deleted file mode 100644 index eb64bef05..000000000 --- a/kernel/src/interrupts/irq.rs +++ /dev/null @@ -1,44 +0,0 @@ -//! IRQ Handling -//! -//! IRQs are asynchronous interruptions coming from an external source, -//! generally a device. Each platform has its own IRQ handlers. The API exposed -//! by this module consists solely of an IRQ_HANDLERS array containing function -//! pointers for all the IRQs, redirecting them to the generic IRQ management -//! defined in the event module. It is expected that these pointer will then be -//! inserted in an architecture-specific interrupt table (such as i386's IDT). - -use crate::i386::structures::idt::ExceptionStackFrame; - -macro_rules! irq_handler { - ($irq:expr, $name:ident) => {{ - #[allow(clippy::missing_docs_in_private_items)] - extern "x86-interrupt" fn $name(_stack_frame: &mut ExceptionStackFrame) { - // pic::get().acknowledge($irq); - crate::i386::interrupt::acknowledge($irq); - crate::event::dispatch_event($irq); - } - $name - }} -} - -/// Array of interrupt handlers. The position in the array defines the IRQ this -/// handler is targeting. See the module documentation for more information. -pub static IRQ_HANDLERS : [extern "x86-interrupt" fn(stack_frame: &mut ExceptionStackFrame); 17] = [ - irq_handler!(0, pit_handler), - irq_handler!(1, keyboard_handler), - irq_handler!(2, cascade_handler), - irq_handler!(3, serial2_handler), - irq_handler!(4, serial1_handler), - irq_handler!(5, sound_handler), - irq_handler!(6, floppy_handler), - irq_handler!(7, parallel1_handler), - irq_handler!(8, rtc_handler), - irq_handler!(9, acpi_handler), - irq_handler!(10, irq10_handler), - irq_handler!(11, irq11_handler), - irq_handler!(12, mouse_handler), - irq_handler!(13, irq13_handler), - irq_handler!(14, primary_ata_handler), - irq_handler!(15, secondary_ata_handler), - irq_handler!(16, hpet_handler), -]; diff --git a/kernel/src/interrupts/mod.rs b/kernel/src/interrupts/mod.rs index c0e4d1311..77b45b072 100644 --- a/kernel/src/interrupts/mod.rs +++ b/kernel/src/interrupts/mod.rs @@ -1,27 +1,55 @@ -//! Interrupt handling. +//! i386 exceptions + irq + syscall handling +//! +//! # Macros +//! +//! This module defines the following macros to handle exceptions and interrupts: +//! +//! * [`trap_gate_asm`]\: low-level asm wrapper. +//! * [`generate_trap_gate_handler`]\: high-level rust wrapper. +//! * [`irq_handler`]\: irq handler generator. +//! +//! # Exceptions //! //! All exceptions are considered unrecoverable errors, and kill the process that issued it. //! //! Feature `panic-on-exception` makes the kernel stop and panic when a thread generates //! an exception. This is useful for debugging. +//! +//! # IRQs +//! +//! Interrupts are handled like exceptions, whose handler dispatches the event for the irq line. +//! See [`irq_handler`]. +//! +//! # Syscalls +//! +//! Syscalls are handled as if they were exceptions, but instead of killing the process the handler +//! calls [syscall_interrupt_dispatcher]. +//! +//! [syscall_interrupt_dispatcher]: self::interrupts::syscall_interrupt_dispatcher -use crate::i386::structures::idt::{ExceptionStackFrame, PageFaultErrorCode, Idt}; +use crate::i386::structures::idt::{PageFaultErrorCode, Idt}; use crate::i386::instructions::interrupts::sti; use crate::mem::VirtualAddress; use crate::paging::kernel_memory::get_kernel_memory; -use crate::i386::{TssStruct, PrivilegeLevel}; -use crate::i386::gdt; -use crate::scheduler::get_current_thread; +use crate::i386::PrivilegeLevel; +use crate::scheduler::{get_current_thread, get_current_process}; use crate::process::{ProcessStruct, ThreadState}; -use crate::sync::SpinLockIRQ; +use crate::sync::{SpinLock, SpinLockIRQ}; use core::sync::atomic::Ordering; -use core::fmt::Arguments; -use crate::sync::SpinLock; use crate::scheduler; +use crate::i386::gdt::GdtIndex; +use crate::i386::gdt::DOUBLE_FAULT_TASK; +use crate::panic::{kernel_panic, PanicOrigin}; +use crate::i386::structures::gdt::SegmentSelector; +use crate::i386::registers::eflags::EFlags; +use crate::mem::{UserSpacePtr, UserSpacePtrMut}; +use crate::error::UserspaceError; +use crate::interrupts::syscalls::*; +use bit_field::BitArray; +use sunrise_libkern::{nr, SYSCALL_NAMES}; -mod irq; -mod syscalls; +pub mod syscalls; /// Checks if our thread was killed, in which case unschedule ourselves. /// @@ -38,314 +66,783 @@ pub fn check_thread_killed() { } } -/// Panics with an informative message. -fn panic_on_exception(exception_string: Arguments<'_>, exception_stack_frame: &ExceptionStackFrame) -> ! { - unsafe { - // safe: we're not passing a stackdump_source - // so it will use our current kernel stack, which is safe. - crate::do_panic( - format_args!("{} in {:?}: {:?}", - exception_string, - scheduler::try_get_current_process().as_ref().map(|p| &p.name), - exception_stack_frame), - None, +/// Represents a register backup. +/// +/// The exception wrapper constructs this structure before calling the exception handler, +/// and saves it to the ThreadStruct for debug purposes. +/// +/// When the exception handler returns, the wrapper pops it before returning to +/// userspace, allowing precise control over register state. +/// The only exception being `.esp`, which will not be reloaded into `esp`, see [trap_gate_asm]. +#[repr(C)] +#[derive(Debug, Clone, Default)] +#[allow(clippy::missing_docs_in_private_items)] +#[allow(missing_docs)] +pub struct UserspaceHardwareContext { + pub esp: usize, + pub ebp: usize, + pub edi: usize, + pub esi: usize, + pub edx: usize, + pub ecx: usize, + pub ebx: usize, + pub eax: usize, + // pushed by cpu: + pub errcode: usize, + pub eip: usize, + pub cs: usize, + pub eflags: usize, +} + +impl core::fmt::Display for UserspaceHardwareContext { + fn fmt(&self, f: &mut core::fmt::Formatter) -> Result<(), core::fmt::Error> { + writeln!(f, "EIP={:#010x} ESP={:#010x} EBP={:#010x}\n\ + EAX={:#010x} EBX={:#010x} ECX={:#010x} EDX={:#010x}\n\ + ESI={:#010x} EDI={:#010X}\n\ + EFLAGS={:?}\n\ + CS={:?}", + self.eip, self.esp, self.ebp, + self.eax, self.ebx, self.ecx, self.edx, + self.esi, self.edi, + EFlags::from_bits_truncate(self.eflags as u32), + SegmentSelector(self.cs as u16), ) } } -/// Divide by zero interruption handler. Kills the process unconditionally. +// gonna write constants in the code, cause not enough registers. +// just check we aren't hard-coding the wrong values. +const_assert_eq!((GdtIndex::KTls as u16) << 3 | 0b00, 0x18); +const_assert_eq!((GdtIndex::UTlsRegion as u16) << 3 | 0b11, 0x3B); +const_assert_eq!((GdtIndex::UTlsElf as u16) << 3 | 0b11, 0x43); + +/// The exception/syscall handler asm wrapper. /// -/// If the panic-on-exception feature is enabled, this will also panic the kernel. -extern "x86-interrupt" fn divide_by_zero_handler(stack_frame: &mut ExceptionStackFrame) { - { - if cfg!(feature = "panic-on-exception") { - panic_on_exception(format_args!("Divide Error Exception"), stack_frame); - } +/// When the cpu handles a Trap/Interrupt gate, it: +/// +/// 1. decides if it must switch stacks. If it does, we configured it to switch to the current +/// thread's kernel stack. It will then push userspace `ss` and `esp`. +/// 2. pushes `eflags`, `cs`, `eip`. +/// 3. optionally pushes an errorcode, depending on the exception. +/// +/// This is just enough for the cpu to restore the context on `iret` and optionally switch back to +/// the userspace stack. +/// +/// On those cpu-pushed registers, we push the rest of the hardware context, so we can restore it +/// at the end of isr. By doing so, we're constructing a [UserspaceHardwareContext] on the stack, +/// but whose `eflags`, `cs` and `eip` fields point to words that were actually pushed by cpu. +/// +/// We then call the isr, passing it a pointer to this structure. The isr is free to modify the +/// backed-up registers, including the cpu-pushed ones, and those modification will be popped into +/// the registers at the end of the isr, effectively changing context after we `iret`. +/// +/// ## Diagram +/// +/// ```txt +/// Privilege changed Privilege unchanged +/// (e.g. int, syscall) (e.g. kernel fault, +/// int during kernel) +/// +/// Page fault +/// +----------------+ +----------------+ +/// | SS | | | +/// +----------------+ +----------------+ +/// | ESP | | | +/// +----------------+ +----------------+ +/// | EFLAGS | <+ +> | EFLAGS | <-+ <-+ +/// +----------------+ | | +----------------+ | | +/// | CS | | | | CS | | | +/// +----------------+ | | +----------------+ | | Registers pushed by CPU +/// | EIP | | | | EIP | | | +/// +----------------+ | | +----------------+ | | +/// | Error code | | | | Error code | <-+ | +/// +****************+ | | +****************+ | +/// | Pushed eax | | | | Pushed eax | | +/// +----------------+ | | +----------------+ | +/// | Pushed ebx | | | | Pushed ebx | | +/// +----------------+ | | +----------------+ | struct UserspaceHardwareContext +/// | Pushed ecx | | | | Pushed ecx | | passed as an argument +/// +----------------+ | | +----------------+ | +/// | Pushed edx | | | | Pushed edx | | +/// +----------------+ | | +----------------+ | +/// | Pushed esi | | | | Pushed esi | | +/// +----------------+ | | +----------------+ | +/// | Pushed edi | | | | Pushed edi | | +/// +----------------+ | | +----------------+ | +/// | Pushed ebp | | | | Pushed ebp | | +/// +----------------+ | | +----------------+ | +/// | Pushed esp cpy | | | | Pushed esp cpy | <-+ +/// +----------------+ | | +----------------+ +/// | Pushed arg ptr | -+ +- | Pushed arg ptr | +/// +----------------+ +----------------+ +/// ``` +/// +/// ##### ESP +/// +/// The only register that can't be modified by the isr is the `esp` register. +/// +/// Because this register is only pushed by the cpu when Privilege changed, we must take extra +/// precautions when reading/writting it from the stack, if we don't want to page fault. +/// +/// When reading it we use the pushed `cs` to determine if we did change privilege, in which case +/// we proceed to read it, otherwise we can assume we're running on the same stack, +/// and deduce it from our current `esp` value. +/// +/// If the isr modifies `esp` and we're in the Privilege Unchanged situation, there is no way +/// for us to make the cpu use this `esp` after we `iret`, that is make the change effective. +/// For this reason we never bother to copy the `esp` from the UserspaceHardwareContext back to the stack. +/// +/// ## Usage +/// +/// This macro is intended to be inserted in an `asm!()` block, like this: +/// +/// ```rust +/// extern "C" fn my_isr_function(userspace_context: &mut UserspaceHardwareContext) { +/// // irq handling here +/// } +/// +/// unsafe { +/// asm!(trap_gate_asm!(has_errorcode: false) +/// :: "i"(my_isr_function as *const u8) :: "volatile", "intel"); +/// } +/// ``` +/// +/// Because `asm!()` expects a literal, `trap_gate_asm` needs to be macro. +/// +/// ## Error code +/// +/// Some exceptions push an additional errcode on the stack and some don't. +/// +/// When one is pushed by the cpu, the isr is still expected to pop it before calling `iret`. +/// +/// Because we want to handle both cases in a similar way, for exceptions that are errorcode-less +/// we push a fake error code on the stack as if the cpu did it, and handle everything else in +/// one code path. +/// +/// When returning from the exception, the isr will unconditionally pop the errcode, +/// with no regards for whether it was real or not, and call `iret`. +/// +/// [UserspaceHardwareContext]: crate::interrupts::UserspaceHardwareContext +#[macro_export] // for docs +macro_rules! trap_gate_asm { + (has_errorcode: true) => { " + // Direction flag will be restored on return when iret pops EFLAGS + cld - let thread = get_current_thread(); - error!("Divide Error Exception in {:#?}", thread); - ProcessStruct::kill_process(thread.process.clone()); - } + // Construct UserspaceHardwareContext structure + push eax + push ebx + push ecx + push edx + push esi + push edi + push ebp + // Are we in the privilege change state or unchanged ? Look at pushed CS + mov eax, [esp + 0x24] // cs is 9 registers away at that time * 4 bytes / reg + and eax, 0x3 + jz 1f - check_thread_killed(); -} + 0: // if priv changed + // copy the esp pushed by cpu + mov eax, [esp + 0x2C] // cs is 11 registers away at that time * 4 bytes / reg + push eax -/// Debug interruption handler. Kills the process unconditionally. -/// -/// If the panic-on-exception feature is enabled, this will also panic the kernel. -extern "x86-interrupt" fn debug_handler(stack_frame: &mut ExceptionStackFrame) { - { - if cfg!(feature = "panic-on-exception") { - panic_on_exception(format_args!("Debug Exception"), stack_frame); - } + // Load kernel tls segment + mov ax, 0x18 + mov gs, ax - let thread = get_current_thread(); - error!("Debug Exception in {:#?}", thread); - ProcessStruct::kill_process(thread.process.clone()); - } + jmp 2f + 1: // else if priv unchanged + // cpu did not push an esp, we are still running on the same stack: compute it + mov eax, esp + add eax, 0x2C // old esp is 11 registers away at that time * 4 bytes / reg + push eax + 2: // endif - check_thread_killed(); -} + // Push a pointer to the UserspaceHardwareContext we created on the stack + push esp -/// Non maskable interruption handler. Unconditionally panics the kernel. -extern "x86-interrupt" fn non_maskable_interrupt_handler(stack_frame: &mut ExceptionStackFrame) { - // unconditionally panic - panic_on_exception(format_args!("An unexpected non-maskable (but still kinda maskable) interrupt occured"), stack_frame); -} + // Great, registers are now fully backed up -/// Breakpoint interruption handler. Does nothing. -extern "x86-interrupt" fn breakpoint_handler(_stack_frame: &mut ExceptionStackFrame) { - // don't do anything -} + // Call some rust code, passing it a pointer to the UserspaceHardwareContext + call $0 -/// Overflow interruption handler. Kills the process unconditionally. -/// -/// If the panic-on-exception feature is enabled, this will also panic the kernel. -extern "x86-interrupt" fn overflow_handler(stack_frame: &mut ExceptionStackFrame) { - { - if cfg!(feature = "panic-on-exception") { - panic_on_exception(format_args!("Overflow Exception"), stack_frame); - } + // Handler finished, returning - let thread = get_current_thread(); - error!("Overflow Exception in {:#?}", thread); - ProcessStruct::kill_process(thread.process.clone()); - } + // Check whether we're returning to the same privilege level + mov eax, [esp + 0x2C] // cs is 11 registers away at that time * 4 bytes / reg + and eax, 0x3 + jz 4f + 3: // if changing priv + // Load userspace tls segment + mov ax, 0x43 + mov gs, ax + jmp 5f + 4: // else if not changing priv + 5: // endif + + // Restore registers. + add esp, 0x8 // pop and ignore the pushed arg ptr and esp cpy + pop ebp + pop edi + pop esi + pop edx + pop ecx + pop ebx + pop eax + add esp, 0x4 // pop the errcode pushed by cpu before iret - check_thread_killed(); + // Return from the interrupt + iretd + " }; + (has_errorcode: false) => { + concat!(" + push 0x0 // push a fake errcode", + trap_gate_asm!(has_errorcode: true) + ) + }; } -/// Bound range exceeded interruption handler. Kills the process unconditionally. +/// Generates a trap/interrupt gate isr. /// -/// If the panic-on-exception feature is enabled, this will also panic the kernel. -extern "x86-interrupt" fn bound_range_exceeded_handler(stack_frame: &mut ExceptionStackFrame) { - { - if cfg!(feature = "panic-on-exception") { - panic_on_exception(format_args!("BOUND Range Exceeded Exception"), stack_frame); - } +/// # Goal +/// +/// This macro generates a handler for a trap/interrupt gate that will: +/// +/// 1. save userspace hardware context in the [ThreadStruct] +/// 2. check boilerplate conditions like if the kernel generated the instruction, or if "panic-on-exception" is on. +/// 3. call a function to handle the interrupt +/// 4. check if the current process was killed, in which case unschedule instead ourselves of returning +/// 5. restore the userspace context +/// 6. `iret` +/// +/// This macro is designed to be modular, the idea being that every exception does pretty much the same thing, +/// but in a slightly different way. Because of this we want the step 2 and 3 to be parameterizable. +/// +/// The way we do this is defining a few standard strategies for step 2 and 3, letting the user choose +/// which one it wants, and also letting the user override those strategies if they do not fit its use case. +/// +/// The macro uses [`trap_gate_asm`] as a the low-level asm handler. +/// +/// # Usage +/// +/// You are expected to use this macro in the following way: +/// +/// ```rust +/// generate_trap_gate_handler!(name: "BOUND Range Exceeded Exception", // name of this interrupt, used for logging and when panicking. +/// has_errcode: false, // whether the cpu pushes an error code on the stack for this interrupt. +/// wrapper_asm_fnname: bound_range_exceeded_exception_asm_wrapper, // name for the raw asm function this macro will generate. You can then put this function's address in the IDT. +/// wrapper_rust_fnname: bound_range_exceeded_exception_rust_wrapper, // name for the high-level rust handler this macro will generate. +/// kernel_fault_strategy: panic, // what to do if we were in kernelspace when this interruption happened. +/// user_fault_strategy: panic, // what to do if we were in userspace when this interruption happened, and feature "panic-on-exception" is enabled. +/// handler_strategy: kill // what to for this interrupt otherwise +///); +/// ``` +/// +/// * The possible values for `kernel_fault_strategy` and `user_fault_strategy` are: +/// * `panic`: causes a kernel panic. +/// * `ignore`: don't do anything for this condition. +/// * `my_handler_func`: calls `my_handler_func` to handle this condition. Useful if you want to override a standard strategy. +/// * The possible values for `handler_strategy` are: +/// * `panic`: causes a kernel panic. +/// * `ignore`: don't do anything for this interrupt. +/// * `kill`: kills the process in which this interrupt originated. +/// * `my_handler_func`: calls `my_handler_func` to handle this interrupt. Useful if you want to override a standard strategy. +/// +/// When providing a custom function as strategy, the function must be of signature: +/// +/// ``` +/// fn my_handler_func(exception_name: &'static str, hwcontext: &mut UserspaceHardwareContext, has_errcode: bool) +/// ``` +/// +/// The [UserspaceHardwareContext] saved by the wrapper is passed by mut reference so the handler can modify it. +/// Those modifications will be effective as soon as we `iret`. +/// +/// # Generates +/// +/// This will generate some code along the lines of: +/// +/// ``` +/// #[naked] +/// extern "C" fn $wrapper_asm_fnname() { +/// unsafe { +/// asm!(interrupt_gate_asm!(has_errorcode: $has_errcode) +/// :: "s"($wrapper_rust_fnname as extern "C" fn (&mut UserspaceHardwareContext) : "memory" : "volatile", "intel"); +/// } +/// } +/// +/// extern "C" fn $wrapper_rust_fnname(userspace_context: &mut UserspaceHardwareContext) { +/// +/// if coming from Ring == 0 { +/// +/// kernel_panic(&PanicOrigin::KernelFault { // +/// exception_message: format_args!("{}, exception errcode: {:?}", // +/// $exception_name, // kernel_fault_strategy +/// userspace_context.errcode), // (here: panic) +/// kernel_hardware_context: userspace_context.clone() // +/// }); // +/// +/// } else { +/// +/// // we come from userspace, backup the hardware context in the thread struct +/// { +/// *get_current_thread().userspace_hwcontext.lock() = *userspace_context +/// } +/// +/// if cfg!(feature = "panic-on-exception") { +/// +/// kernel_panic(&PanicOrigin::UserspaceFault { // +/// exception_message: format_args ! ("{}, exception errcode: {:?}", // +/// $exception_name, // user_fault_strategy +/// userspace_context.errcode), // (here: panic) +/// userspace_hardware_context: userspace_context.clone() // +/// }); // +/// } +/// +/// } +/// +/// // do the handler +/// { +/// let thread = get_current_thread(); // +/// error!("{}, errorcode: {}, in {:#?}", // handler_strategy +/// $exception_name, $hwcontext.errcode, thread); // (here: kill) +/// ProcessStruct::kill_process(thread.process.clone()); // +/// } +/// +/// // if we're returning to userspace, check we haven't been killed +/// if comming from Ring == 3 { +/// check_thread_killed(); +/// } +/// } +/// ``` +/// +/// [ThreadStruct]: crate::process::ThreadStruct +/// [UserspaceHardwareContext]: crate::interrupts::UserspaceHardwareContext +#[macro_export] // for docs +macro_rules! generate_trap_gate_handler { - let thread = get_current_thread(); - error!("BOUND Range Exceeded Exception in {:#?}", thread); - ProcessStruct::kill_process(thread.process.clone()); - } + // __gen rules are meant to be called recursively. - check_thread_killed(); -} + /* standard strategies */ -/// Invalid opcode interruption handler. Kills the process unconditionally. -/// -/// If the panic-on-exception feature is enabled, this will also panic the kernel. -extern "x86-interrupt" fn invalid_opcode_handler(stack_frame: &mut ExceptionStackFrame) { - { - if cfg!(feature = "panic-on-exception") { - panic_on_exception(format_args!("Invalid opcode Exception"), stack_frame); + // if cs == 0 { + (__gen kernel_fault; name: $exception_name:literal, $hwcontext:ident, errcode: true, strategy: panic) => { + kernel_panic(&PanicOrigin::KernelFault { + exception_message: format_args!("{}, exception errcode: {:?}", + $exception_name, + $hwcontext.errcode), + kernel_hardware_context: $hwcontext.clone() + }); + }; + + (__gen kernel_fault; name: $exception_name:literal, $hwcontext:ident, errcode: false, strategy: panic) => { + kernel_panic(&PanicOrigin::KernelFault { + exception_message: format_args!("{}", + $exception_name), + kernel_hardware_context: $hwcontext.clone() + }); + }; + // } + + // if cs == 3 && panic-on-exception { + (__gen user_fault; name: $exception_name:literal, $hwcontext:ident, errcode: true, strategy: panic) => { + kernel_panic(&PanicOrigin::UserspaceFault { + exception_message: format_args!("{}, exception errcode: {:?}", + $exception_name, + $hwcontext.errcode), + userspace_hardware_context: $hwcontext.clone() + }); + }; + + (__gen user_fault; name: $exception_name:literal, $hwcontext:ident, errcode: false, strategy: panic) => { + kernel_panic(&PanicOrigin::UserspaceFault { + exception_message: format_args!("{}", + $exception_name), + userspace_hardware_context: $hwcontext.clone() + }); + }; + // } + + // the handler + (__gen handler; name: $exception_name:literal, $hwcontext:ident, errcode: true, strategy: panic) => { + kernel_panic(&PanicOrigin::UserspaceFault { + exception_message: format_args!("Unexpected exception: {}, exception errcode: {:?}", + $exception_name, + $hwcontext.errcode), + userspace_hardware_context: $hwcontext.clone() + }); + }; + + (__gen handler; name: $exception_name:literal, $hwcontext:ident, errcode: false, strategy: panic) => { + kernel_panic(&PanicOrigin::KernelFault { + exception_message: format_args!("Unexpected exception: {}", + $exception_name), + kernel_hardware_context: $hwcontext.clone() + }); + }; + + (__gen handler; name: $exception_name:literal, $hwcontext:ident, errcode: true, strategy: kill) => { + { + let thread = get_current_thread(); + error!("{}, errorcode: {}, in {:#?}", $exception_name, $hwcontext.errcode, thread); + ProcessStruct::kill_process(thread.process.clone()); } + }; - let thread = get_current_thread(); - error!("Invalid opcode Exception in {:#?}", thread); - ProcessStruct::kill_process(thread.process.clone()); - } + (__gen handler; name: $exception_name:literal, $hwcontext:ident, errcode: false, strategy: kill) => { + { + let thread = get_current_thread(); + error!("{}, in {:#?}", $exception_name, thread); + ProcessStruct::kill_process(thread.process.clone()); + } + }; + // end handler - check_thread_killed(); -} + // strategy: ignore, shared by all __gen rules + (__gen $_all:ident; name: $_exception_name:literal, $_hwcontext:ident, errcode: $_errcode:ident, strategy: ignore) => { + /* ignored */ + }; -/// Device not available interruption handler. Kills the process unconditionally. -/// -/// If the panic-on-exception feature is enabled, this will also panic the kernel. -extern "x86-interrupt" fn device_not_available_handler(stack_frame: &mut ExceptionStackFrame) { - { - if cfg!(feature = "panic-on-exception") { - panic_on_exception(format_args!("Device Not Available Exception"), stack_frame); + // strategy: call external handler, shared by all __gen rules + // + // `handler: fn (&'static str, &mut UserspaceHardwareContext, bool)` + (__gen $_all:ident; name: $exception_name:literal, $hwcontext:ident, errcode: $errcode:ident, strategy: $fnname:ident) => { + $fnname($exception_name, $hwcontext, $errcode); + }; + + /* ASM wrapper */ + + // Generates a naked function with asm that will call `$wrapper_rust_fnname`. + // + // Generic over `has_errorcode`. + (__gen asm_wrapper; $wrapper_asm_fnname:ident, $wrapper_rust_fnname:ident, $errcode:ident) => { + /// Auto generated function. See [generate_trap_gate_handler]. + #[naked] + extern "C" fn $wrapper_asm_fnname() { + unsafe { + asm!(trap_gate_asm!(has_errorcode: $errcode) + :: "s"($wrapper_rust_fnname as extern "C" fn (&mut UserspaceHardwareContext)) : "memory" : "volatile", "intel"); + } } + }; - let thread = get_current_thread(); - error!("Device Not Available Exception in {:#?}", thread); - ProcessStruct::kill_process(thread.process.clone()); - } + /* The full wrapper */ - check_thread_killed(); -} + // The rule called to generate an exception handler. + ( + name: $exception_name:literal, + has_errcode: $has_errcode:ident, + wrapper_asm_fnname: $wrapper_asm_fnname:ident, + wrapper_rust_fnname: $wrapper_rust_fnname:ident, + kernel_fault_strategy: $kernel_fault_strategy:ident, + user_fault_strategy: $user_fault_strategy:ident, + handler_strategy: $handler_strategy:ident + ) => { -/// Double fault handler. Panics the kernel unconditionally. -fn double_fault_handler() { - // Get the Main TSS so I can recover some information about what happened. - unsafe { - // Safety: gdt::MAIN_TASK should always point to a valid TssStruct. - if let Some(tss_main) = (gdt::MAIN_TASK.addr() as *const TssStruct).as_ref() { - - // safe: we're in an exception handler, nobody can modify the faulty thread's stack. - crate::do_panic(format_args!("Double fault! - EIP={:#010x} CR3={:#010x} - EAX={:#010x} EBX={:#010x} ECX={:#010x} EDX={:#010x} - ESI={:#010x} EDI={:#010X} ESP={:#010x} EBP={:#010x}", - tss_main.eip, tss_main.cr3, - tss_main.eax, tss_main.ebx, tss_main.ecx, tss_main.edx, - tss_main.esi, tss_main.edi, tss_main.esp, tss_main.ebp), - Some(crate::stack::StackDumpSource::new( - tss_main.esp as usize, tss_main.ebp as usize, tss_main.eip as usize - ))); - } else { - // safe: we're not passing a stackdump_source - // so it will use our current stack, which is safe. - crate::do_panic(format_args!("Doudble fault! Cannot get main TSS, good luck"), None) + generate_trap_gate_handler!(__gen asm_wrapper; $wrapper_asm_fnname, $wrapper_rust_fnname, $has_errcode); + + /// Auto generated function. See [generate_trap_gate_handler]. + extern "C" fn $wrapper_rust_fnname(userspace_context: &mut UserspaceHardwareContext) { + + use crate::i386::structures::gdt::SegmentSelector; + + + if let PrivilegeLevel::Ring0 = SegmentSelector(userspace_context.cs as u16).rpl() { + generate_trap_gate_handler!(__gen kernel_fault; name: $exception_name, userspace_context, errcode: $has_errcode, strategy: $kernel_fault_strategy); + } else { + // we come from userspace, backup the hardware context in the thread struct + { + *get_current_thread().userspace_hwcontext.lock() = userspace_context.clone(); + // don't leave an Arc in case we're killed in the handler. + } + + if cfg!(feature = "panic-on-exception") { + generate_trap_gate_handler!(__gen user_fault; name: $exception_name, userspace_context, errcode: $has_errcode, strategy: $user_fault_strategy); + } + } + + // call the handler + generate_trap_gate_handler!(__gen handler; name: $exception_name, userspace_context, errcode: $has_errcode, strategy: $handler_strategy); + + // if we're returning to userspace, check we haven't been killed + if let PrivilegeLevel::Ring3 = SegmentSelector(userspace_context.cs as u16).rpl() { + check_thread_killed(); + } } - } + }; } -/// Invalid tss interruption handler. Panics the kernel unconditionally. -extern "x86-interrupt" fn invalid_tss_handler(stack_frame: &mut ExceptionStackFrame, errcode: u32) { - // inconditionally panic - panic_on_exception(format_args!("Invalid TSS Exception: error code {:?}", errcode), stack_frame); -} +/* */ +/* Generate the wrappers */ +/* */ -/// Segment not present interruption handler. Kills the process unconditionally. -/// -/// If the panic-on-exception feature is enabled, this will also panic the kernel. -extern "x86-interrupt" fn segment_not_present_handler(stack_frame: &mut ExceptionStackFrame, errcode: u32) { - { - if cfg!(feature = "panic-on-exception") { - panic_on_exception(format_args!("Segment Not Present: error code: {:?}", errcode), stack_frame); - } +generate_trap_gate_handler!(name: "Divide Error Exception", + has_errcode: false, + wrapper_asm_fnname: divide_by_zero_exception_asm_wrapper, + wrapper_rust_fnname: divide_by_zero_exception_rust_wrapper, + kernel_fault_strategy: panic, + user_fault_strategy: panic, + handler_strategy: kill +); - let thread = get_current_thread(); - error!("Segment Not Present in {:#?}", thread); - ProcessStruct::kill_process(thread.process.clone()); - } +generate_trap_gate_handler!(name: "Debug Exception", + has_errcode: false, + wrapper_asm_fnname: debug_exception_asm_wrapper, + wrapper_rust_fnname: debug_exception_rust_wrapper, + kernel_fault_strategy: panic, + user_fault_strategy: panic, + handler_strategy: panic +); - check_thread_killed(); -} +generate_trap_gate_handler!(name: "An unexpected non-maskable (but still kinda maskable) interrupt occurred", + has_errcode: false, + wrapper_asm_fnname: nmi_exception_asm_wrapper, + wrapper_rust_fnname: nmi_exception_rust_wrapper, + kernel_fault_strategy: panic, + user_fault_strategy: panic, + handler_strategy: panic +); -/// Stack segment fault handler. Kills the process unconditionally. -/// -/// If the panic-on-exception feature is enabled, this will also panic the kernel. -extern "x86-interrupt" fn stack_segment_fault_handler(stack_frame: &mut ExceptionStackFrame, errcode: u32) { - { - if cfg!(feature = "panic-on-exception") { - panic_on_exception(format_args!("Stack Fault Exception: error code: {:?}", errcode), stack_frame); - } +generate_trap_gate_handler!(name: "Breakpoint Exception", + has_errcode: false, + wrapper_asm_fnname: breakpoint_exception_asm_wrapper, + wrapper_rust_fnname: breakpoint_exception_rust_wrapper, + kernel_fault_strategy: ignore, + user_fault_strategy: ignore, + handler_strategy: panic +); - let thread = get_current_thread(); - error!("Exception : Stack Fault Exception in {:#?}", thread); - ProcessStruct::kill_process(thread.process.clone()); - } +generate_trap_gate_handler!(name: "Overflow Exception", + has_errcode: false, + wrapper_asm_fnname: overflow_exception_asm_wrapper, + wrapper_rust_fnname: overflow_exception_rust_wrapper, + kernel_fault_strategy: panic, + user_fault_strategy: panic, + handler_strategy: kill +); - check_thread_killed(); -} +generate_trap_gate_handler!(name: "BOUND Range Exceeded Exception", + has_errcode: false, + wrapper_asm_fnname: bound_range_exceeded_exception_asm_wrapper, + wrapper_rust_fnname: bound_range_exceeded_exception_rust_wrapper, + kernel_fault_strategy: panic, + user_fault_strategy: panic, + handler_strategy: kill +); -/// General protection fault handler. Kills the process unconditionally. -/// -/// If the panic-on-exception feature is enabled, this will also panic the kernel. -extern "x86-interrupt" fn general_protection_fault_handler(stack_frame: &mut ExceptionStackFrame, errcode: u32) { - { - if cfg!(feature = "panic-on-exception") { - panic_on_exception(format_args!("General Protection Fault Exception: error code: {:?}", errcode), stack_frame); - } +generate_trap_gate_handler!(name: "Invalid opcode Exception", + has_errcode: false, + wrapper_asm_fnname: invalid_opcode_exception_asm_wrapper, + wrapper_rust_fnname: invalid_opcode_exception_rust_wrapper, + kernel_fault_strategy: panic, + user_fault_strategy: panic, + handler_strategy: kill +); - let thread = get_current_thread(); - error!("Exception : General Protection Fault Exception in {:#?}", thread); - ProcessStruct::kill_process(thread.process.clone()); - } +generate_trap_gate_handler!(name: "Device Not Available Exception", + has_errcode: false, + wrapper_asm_fnname: device_not_available_exception_asm_wrapper, + wrapper_rust_fnname: device_not_available_exception_rust_wrapper, + kernel_fault_strategy: panic, + user_fault_strategy: panic, + handler_strategy: kill +); - check_thread_killed(); +/// Double fault handler. Panics the kernel unconditionally. +/// +/// This one is called via a Task Gate, we don't generate a wrapper for it. +fn double_fault_handler() { + kernel_panic(&PanicOrigin::DoubleFault); } -/// Page fault handler. Kills the process unconditionally. -/// -/// If the panic-on-exception feature is enabled, this will also panic the kernel. -extern "x86-interrupt" fn page_fault_handler(stack_frame: &mut ExceptionStackFrame, errcode: PageFaultErrorCode) { - { - let cause_address = crate::paging::read_cr2(); +generate_trap_gate_handler!(name: "Invalid TSS Exception", + has_errcode: true, + wrapper_asm_fnname: invalid_tss_exception_asm_wrapper, + wrapper_rust_fnname: invalid_tss_exception_rust_wrapper, + kernel_fault_strategy: panic, + user_fault_strategy: panic, + handler_strategy: panic +); - if cfg!(feature = "panic-on-exception") { - panic_on_exception(format_args!("Page Fault accessing {:?}, error: {:?}", cause_address, errcode), stack_frame); - } +generate_trap_gate_handler!(name: "Segment Not Present Exception", + has_errcode: true, + wrapper_asm_fnname: segment_not_present_exception_asm_wrapper, + wrapper_rust_fnname: segment_not_present_exception_rust_wrapper, + kernel_fault_strategy: panic, + user_fault_strategy: panic, + handler_strategy: kill +); - let thread = get_current_thread(); - error!("Exception : Page Fault accessing {:?}, error: {:?} in {:#?}", cause_address, errcode, thread); - ProcessStruct::kill_process(thread.process.clone()); - } +generate_trap_gate_handler!(name: "Stack Fault Exception", + has_errcode: true, + wrapper_asm_fnname: stack_fault_exception_asm_wrapper, + wrapper_rust_fnname: stack_fault_exception_rust_wrapper, + kernel_fault_strategy: panic, + user_fault_strategy: panic, + handler_strategy: kill +); - check_thread_killed(); -} +generate_trap_gate_handler!(name: "General Protection Fault Exception", + has_errcode: true, + wrapper_asm_fnname: general_protection_fault_exception_asm_wrapper, + wrapper_rust_fnname: general_protection_fault_exception_rust_wrapper, + kernel_fault_strategy: panic, + user_fault_strategy: panic, + handler_strategy: kill +); -/// X87 floating point interruption handler. Kills the process unconditionally. -/// -/// If the panic-on-exception feature is enabled, this will also panic the kernel. -extern "x86-interrupt" fn x87_floating_point_handler(stack_frame: &mut ExceptionStackFrame) { - { - if cfg!(feature = "panic-on-exception") { - panic_on_exception(format_args!("x87 FPU floating-point error"), stack_frame); - } +generate_trap_gate_handler!(name: "Page Fault Exception", + has_errcode: true, + wrapper_asm_fnname: page_fault_exception_asm_wrapper, + wrapper_rust_fnname: page_fault_exception_rust_wrapper, + kernel_fault_strategy: kernel_page_fault_panic, + user_fault_strategy: user_page_fault_panic, + handler_strategy: user_page_fault_handler +); - let thread = get_current_thread(); - error!("x87 FPU floating-point error in {:#?}", thread); - ProcessStruct::kill_process(thread.process.clone()); - } +/// Overriding the default panic strategy so we can display cr2 +fn kernel_page_fault_panic(_exception_name: &'static str, hwcontext: &mut UserspaceHardwareContext, _has_errcode: bool) { + let errcode = PageFaultErrorCode::from_bits_truncate(hwcontext.errcode as u32); + let cause_address = crate::paging::read_cr2(); - check_thread_killed(); + kernel_panic(&PanicOrigin::KernelFault { + exception_message: format_args!("Page Fault accessing {:?}, exception errcode: {:?}", + cause_address, + errcode), + kernel_hardware_context: hwcontext.clone() + }); } -/// Alignment check interruption handler. Kills the process unconditionally. -/// -/// If the panic-on-exception feature is enabled, this will also panic the kernel. -extern "x86-interrupt" fn alignment_check_handler(stack_frame: &mut ExceptionStackFrame, errcode: u32) { - { - if cfg!(feature = "panic-on-exception") { - panic_on_exception(format_args!("Alignment Check Exception: error code: {:?}", errcode), stack_frame); - } - - let thread = get_current_thread(); - error!("Alignment Check Exception in {:#?}", thread); - ProcessStruct::kill_process(thread.process.clone()); - } +/// Overriding the default panic strategy so we can display cr2 +fn user_page_fault_panic(_exception_name: &'static str, hwcontext: &mut UserspaceHardwareContext, _has_errcode: bool) { + let errcode = PageFaultErrorCode::from_bits_truncate(hwcontext.errcode as u32); + let cause_address = crate::paging::read_cr2(); - check_thread_killed(); + kernel_panic(&PanicOrigin::UserspaceFault { + exception_message: format_args!("Page Fault accessing {:?}, exception errcode: {:?}", + cause_address, + errcode), + userspace_hardware_context: hwcontext.clone() + }); } -/// Machine check interruption handler. Panics the kernel unconditionally. -extern "x86-interrupt" fn machine_check_handler(stack_frame: &mut ExceptionStackFrame) { - // unconditionally panic - panic_on_exception(format_args!("Machine-Check Exception"), stack_frame); +/// Overriding the default kill strategy so we can display cr2 +fn user_page_fault_handler(_exception_name: &'static str, hwcontext: &mut UserspaceHardwareContext, _has_errcode: bool) { + let errcode = PageFaultErrorCode::from_bits_truncate(hwcontext.errcode as u32); + let cause_address = crate::paging::read_cr2(); + + let thread = get_current_thread(); + error!("Page Fault accessing {:?}, exception errcode: {:?} in {:#?}", cause_address, errcode, thread); + ProcessStruct::kill_process(thread.process.clone()); } -/// SIMD exception handler. Kills the process unconditionally. -/// -/// If the panic-on-exception feature is enabled, this will also panic the kernel. -extern "x86-interrupt" fn simd_floating_point_handler(stack_frame: &mut ExceptionStackFrame) { - { - if cfg!(feature = "panic-on-exception") { - panic_on_exception(format_args!("SIMD Floating-Point Exception"), stack_frame); - } +generate_trap_gate_handler!(name: "x87 FPU floating-point error", + has_errcode: false, + wrapper_asm_fnname: x87_floating_point_exception_asm_wrapper, + wrapper_rust_fnname: x87_floating_point_exception_rust_wrapper, + kernel_fault_strategy: panic, + user_fault_strategy: panic, + handler_strategy: kill +); - let thread = get_current_thread(); - error!("SIMD Floating-Point Exception in {:#?}", thread); - ProcessStruct::kill_process(thread.process.clone()); - } +generate_trap_gate_handler!(name: "Alignment Check Exception", + has_errcode: true, + wrapper_asm_fnname: alignment_check_exception_asm_wrapper, + wrapper_rust_fnname: alignment_check_exception_rust_wrapper, + kernel_fault_strategy: panic, + user_fault_strategy: panic, + handler_strategy: kill +); - check_thread_killed(); -} +generate_trap_gate_handler!(name: "Machine-Check Exception", + has_errcode: false, + wrapper_asm_fnname: machine_check_exception_asm_wrapper, + wrapper_rust_fnname: machinee_check_exception_rust_wrapper, + kernel_fault_strategy: panic, + user_fault_strategy: panic, + handler_strategy: panic +); -/// Virtualization exception handler. Kills the process unconditionally. -/// -/// If the panic-on-exception feature is enabled, this will also panic the kernel. -extern "x86-interrupt" fn virtualization_handler(stack_frame: &mut ExceptionStackFrame) { - { - if cfg!(feature = "panic-on-exception") { - panic_on_exception(format_args!("Virtualization Exception"), stack_frame); - } +generate_trap_gate_handler!(name: "SIMD Floating-Point Exception", + has_errcode: false, + wrapper_asm_fnname: simd_floating_point_exception_asm_wrapper, + wrapper_rust_fnname: simd_floating_point_exception_rust_wrapper, + kernel_fault_strategy: panic, + user_fault_strategy: panic, + handler_strategy: kill +); + +generate_trap_gate_handler!(name: "Virtualization Exception", + has_errcode: false, + wrapper_asm_fnname: virtualization_exception_asm_wrapper, + wrapper_rust_fnname: virtualization_exception_rust_wrapper, + kernel_fault_strategy: panic, + user_fault_strategy: panic, + handler_strategy: kill +); - let thread = get_current_thread(); - error!("Virtualization Exception in {:#?}", thread); - ProcessStruct::kill_process(thread.process.clone()); +generate_trap_gate_handler!(name: "Security Exception", + has_errcode: true, + wrapper_asm_fnname: security_exception_asm_wrapper, + wrapper_rust_fnname: security_exception_rust_wrapper, + kernel_fault_strategy: panic, + user_fault_strategy: panic, + handler_strategy: panic +); + +generate_trap_gate_handler!(name: "Syscall Interrupt", + has_errcode: false, + wrapper_asm_fnname: syscall_interrupt_asm_wrapper, + wrapper_rust_fnname: syscall_interrupt_rust_wrapper, + kernel_fault_strategy: panic, // you aren't expected to syscall from the kernel + user_fault_strategy: ignore, // don't worry it's fine ;) + handler_strategy: syscall_interrupt_dispatcher +); + +impl UserspaceHardwareContext { + /// Update the Registers with the passed result. + fn apply0(&mut self, ret: Result<(), UserspaceError>) { + self.apply3(ret.map(|_| (0, 0, 0))) } - check_thread_killed(); -} + /// Update the Registers with the passed result. + fn apply1(&mut self, ret: Result) { + self.apply3(ret.map(|v| (v, 0, 0))) + } -/// Security exception handler. Panics the kernel unconditionally. -extern "x86-interrupt" fn security_exception_handler(stack_frame: &mut ExceptionStackFrame, errcode: u32) { - // unconditionally panic - panic_on_exception(format_args!("Unexpected Security Exception: error code {:?}", errcode), stack_frame); + /// Update the Registers with the passed result. + fn apply2(&mut self, ret: Result<(usize, usize), UserspaceError>) { + self.apply3(ret.map(|(v0, v1)| (v0, v1, 0))) + } + + /// Update the Registers with the passed result. + fn apply3(&mut self, ret: Result<(usize, usize, usize), UserspaceError>) { + self.apply4(ret.map(|(v0, v1, v2)| (v0, v1, v2, 0))) + } + + /// Update the Registers with the passed result. + fn apply4(&mut self, ret: Result<(usize, usize, usize, usize), UserspaceError>) { + match ret { + Ok((v0, v1, v2, v3)) => { + self.eax = 0; + self.ebx = v0; + self.ecx = v1; + self.edx = v2; + self.esi = v3; + self.edi = 0; + self.ebp = 0; + }, + Err(err) => { + self.eax = err.make_ret() as usize; + self.ebx = 0; + self.ecx = 0; + self.edx = 0; + self.esi = 0; + self.edi = 0; + self.ebp = 0; + } + } + } } /// This is the function called on int 0x80. @@ -381,36 +878,163 @@ extern "x86-interrupt" fn security_exception_handler(stack_frame: &mut Exception /// /// We do *NOT* restore registers before returning, as they all are used for parameter passing. /// It is the caller's job to save the one it needs. -#[naked] -extern "C" fn syscall_handler() { - unsafe { - asm!(" - cld // direction flag will be restored on return when iret pops EFLAGS - // Construct Registers structure - see syscalls for more info - push ebp - push edi - push esi - push edx - push ecx - push ebx - push eax - // Push pointer to Registers structure as argument - push esp - call $0 - // Restore registers. - mov ebx, [esp + 0x08] - mov ecx, [esp + 0x0C] - mov edx, [esp + 0x10] - mov esi, [esp + 0x14] - mov edi, [esp + 0x18] - mov ebp, [esp + 0x1C] - mov eax, [esp + 0x04] - add esp, 0x20 - iretd - " :: "i"(syscalls::syscall_handler_inner as *const u8) :: "volatile", "intel" ); +/// +/// Dispatches to the various syscall handling functions based on `hwcontext.eax`, +/// and updates the hwcontext with the correct return values. +// TODO: Missing argument slot for SVCs on i386 backend +// BODY: Our i386 SVC ABI is currently fairly different from the ABI used by +// BODY: Horizon/NX. This is for two reasons: +// BODY: +// BODY: 1. We are missing one argument slot compared to the official SVCs, so +// BODY: we changed the ABI to work around it. +// BODY: +// BODY: 2. The Horizon ABI "skipping" over some register is an optimization for +// BODY: ARM, but doesn't help on i386. +// BODY: +// BODY: That being said, there is a way for us to recover the missing SVC slot. +// BODY: We are currently "wasting" x0 for the syscall number. We could avoid +// BODY: this by instead using different IDT entries for the different syscalls. +// BODY: This is actually more in line with what the Horizon/NX kernel is doing +// BODY: anyways. +// BODY: +// BODY: Once we've regained this missing slot, we'll be able to make our ABI +// BODY: match the Horizon/NX 32-bit ABI. While the "skipping over" doesn't help +// BODY: our performances, it doesn't really hurt it either, and having a uniform +// BODY: ABI across platforms would make for lower maintenance. +fn syscall_interrupt_dispatcher(_exception_name: &'static str, hwcontext: &mut UserspaceHardwareContext, _has_errcode: bool) { + + let (syscall_nr, x0, x1, x2, x3, x4, x5) = (hwcontext.eax, hwcontext.ebx, hwcontext.ecx, hwcontext.edx, hwcontext.esi, hwcontext.edi, hwcontext.ebp); + let syscall_name = SYSCALL_NAMES.get(syscall_nr).unwrap_or(&"Unknown"); + + debug!("Handling syscall {} - x0: {}, x1: {}, x2: {}, x3: {}, x4: {}, x5: {}", + syscall_name, x0, x1, x2, x3, x4, x5); + + let allowed = get_current_process().capabilities.syscall_mask.get_bit(syscall_nr); + + if cfg!(feature = "no-security-check") && !allowed { + let curproc = get_current_process(); + error!("Process {} attempted to use unauthorized syscall {} ({:#04x})", + curproc.name, syscall_name, syscall_nr); + } + + let allowed = cfg!(feature = "no-security-check") || allowed; + + match (allowed, syscall_nr) { + // Horizon-inspired syscalls! + (true, nr::SetHeapSize) => hwcontext.apply1(set_heap_size(x0)), + (true, nr::QueryMemory) => hwcontext.apply1(query_memory(UserSpacePtrMut(x0 as _), x1, x2)), + (true, nr::ExitProcess) => hwcontext.apply0(exit_process()), + (true, nr::CreateThread) => hwcontext.apply1(create_thread(x0, x1, x2, x3 as _, x4 as _)), + (true, nr::StartThread) => hwcontext.apply0(start_thread(x0 as _)), + (true, nr::ExitThread) => hwcontext.apply0(exit_thread()), + (true, nr::SleepThread) => hwcontext.apply0(sleep_thread(x0)), + (true, nr::MapSharedMemory) => hwcontext.apply0(map_shared_memory(x0 as _, x1 as _, x2 as _, x3 as _)), + (true, nr::UnmapSharedMemory) => hwcontext.apply0(unmap_shared_memory(x0 as _, x1 as _, x2 as _)), + (true, nr::CloseHandle) => hwcontext.apply0(close_handle(x0 as _)), + (true, nr::WaitSynchronization) => hwcontext.apply1(wait_synchronization(UserSpacePtr::from_raw_parts(x0 as _, x1), x2)), + (true, nr::ConnectToNamedPort) => hwcontext.apply1(connect_to_named_port(UserSpacePtr(x0 as _))), + (true, nr::SendSyncRequestWithUserBuffer) => hwcontext.apply0(send_sync_request_with_user_buffer(UserSpacePtrMut::from_raw_parts_mut(x0 as _, x1), x2 as _)), + (true, nr::OutputDebugString) => hwcontext.apply0(output_debug_string(UserSpacePtr::from_raw_parts(x0 as _, x1), x2, UserSpacePtr::from_raw_parts(x3 as _, x4))), + (true, nr::CreateSession) => hwcontext.apply2(create_session(x0 != 0, x1 as _)), + (true, nr::AcceptSession) => hwcontext.apply1(accept_session(x0 as _)), + (true, nr::ReplyAndReceiveWithUserBuffer) => hwcontext.apply1(reply_and_receive_with_user_buffer(UserSpacePtrMut::from_raw_parts_mut(x0 as _, x1), UserSpacePtr::from_raw_parts(x2 as _, x3), x4 as _, x5)), + (true, nr::CreateSharedMemory) => hwcontext.apply1(create_shared_memory(x0 as _, x1 as _, x2 as _)), + (true, nr::CreateInterruptEvent) => hwcontext.apply1(create_interrupt_event(x0, x1 as u32)), + (true, nr::QueryPhysicalAddress) => hwcontext.apply4(query_physical_address(x0 as _)), + (true, nr::CreatePort) => hwcontext.apply2(create_port(x0 as _, x1 != 0, UserSpacePtr(x2 as _))), + (true, nr::ManageNamedPort) => hwcontext.apply1(manage_named_port(UserSpacePtr(x0 as _), x1 as _)), + (true, nr::ConnectToPort) => hwcontext.apply1(connect_to_port(x0 as _)), + + // sunrise extensions + (true, nr::MapFramebuffer) => hwcontext.apply4(map_framebuffer()), + (true, nr::MapMmioRegion) => hwcontext.apply0(map_mmio_region(x0, x1, x2, x3 != 0)), + (true, nr::SetThreadArea) => hwcontext.apply0(set_thread_area(x0)), + + // Unknown/unauthorized syscall. + (false, _) => { + // Attempted to call unauthorized SVC. Horizon invokes usermode + // exception handling in some cases. Let's just kill the process for + // now. + let curproc = get_current_process(); + error!("Process {} attempted to use unauthorized syscall {} ({:#04x}), killing", + curproc.name, syscall_name, syscall_nr); + ProcessStruct::kill_process(curproc); + }, + _ => { + let curproc = get_current_process(); + error!("Process {} attempted to use unknown syscall {} ({:#04x}), killing", + curproc.name, syscall_name, syscall_nr); + ProcessStruct::kill_process(curproc); + } } } +/// Generates irq handlers. +/// +/// For each irq number it is given, this macro will generate an irq handler that: +/// +/// 1. acknowledges the irq +/// 2. dispatches the event for this irq line +/// +/// It uses [`generate_trap_gate_handler`] internally to generate the asm and low-level rust wrappers. +/// You must give it an ident for both of those functions that will be passed on to `generate_trap_gate_handler`, +/// and a third for the name of the generated irq handler function. +/// +/// This macro will also generate the `IRQ_HANDLERS` array and fill it with function pointers to the +/// raw asm handlers for each irq, so you can easily copy it into the IDT. +#[macro_export] // for docs +macro_rules! irq_handler { + ( $($irq_nbr:expr, $handler_name:ident, $asm_wrapper_name:ident, $rust_wrapper_name:ident ; )* ) => { + + $( + /// Auto generated irq handler. See [`irq_handler`]. + fn $handler_name(_exception_name: &'static str, _hwcontext: &mut UserspaceHardwareContext, _has_errcode: bool) { + crate::i386::interrupt::acknowledge($irq_nbr); + crate::event::dispatch_event($irq_nbr); + } + + generate_trap_gate_handler!(name: "Irq handler", + has_errcode: false, + wrapper_asm_fnname: $asm_wrapper_name, + wrapper_rust_fnname: $rust_wrapper_name, + kernel_fault_strategy: ignore, // irqs can happen while we're in kernel mode, don't worry, it's fine ;) + user_fault_strategy: ignore, // don't worry it's fine ;) + handler_strategy: $handler_name + ); + )* + + + /// Array of interrupt handlers. + /// + /// The position in the array defines the IRQ this handler is targeting. See [`irq_handler`]. + static IRQ_HANDLERS : [extern "C" fn(); 17] = [ + $( + $asm_wrapper_name, + )* + ]; + } +} + +irq_handler!( + 0, pit_handler, pit_handler_asm_wrapper, pit_handler_rust_wrapper; + 1, keyboard_handler, keyboard_handler_asm_wrapper, keyboard_handler_rust_wrapper; + 2, cascade_handler, cascade_handler_asm_wrapper, cascade_handler_rust_wrapper; + 3, serial2_handler, serial2_handler_asm_wrapper, serial2_handler_rust_wrapper; + 4, serial1_handler, serial1_handler_asm_wrapper, serial1_handler_rust_wrapper; + 5, sound_handler, sound_handler_asm_wrapper, sound_handler_rust_wrapper; + 6, floppy_handler, floppy_handler_asm_wrapper, floppy_handler_rust_wrapper; + 7, parallel1_handler, parallel1_handler_asm_wrapper, parallel1_handler_rust_wrapper; + 8, rtc_handler, rtc_handler_asm_wrapper, rtc_handler_rust_wrapper; + 9, acpi_handler, acpi_handler_asm_wrapper, acpi_handler_rust_wrapper; + 10, irq10_handler, irq10_handler_asm_wrapper, irq10_handler_rust_wrapper; + 11, irq11_handler, irq11_handler_asm_wrapper, irq11_handler_rust_wrapper; + 12, mouse_handler, mouse_handler_asm_wrapper, mouse_handler_rust_wrapper; + 13, irq13_handler, irq13_handler_asm_wrapper, irq13_handler_rust_wrapper; + 14, primary_ata_handler, primary_ata_handler_asm_wrapper, primary_ata_handler_rust_wrapper; + 15, secondary_ata_handler, secondary_ata_handler_asm_wrapper, secondary_ata_handler_rust_wrapper; + 16, hpet_handler, hpet_handler_asm_wrapper, hpet_handler_rust_wrapper; +); + lazy_static! { /// IDT address. Initialized in `init()`. static ref IDT: SpinLock> = SpinLock::new(None); @@ -431,34 +1055,35 @@ pub unsafe fn init() { let idt = page.addr() as *mut u8 as *mut Idt; unsafe { (*idt).init(); - (*idt).divide_by_zero.set_handler_fn(divide_by_zero_handler); - (*idt).debug.set_handler_fn(debug_handler); - (*idt).non_maskable_interrupt.set_handler_fn(non_maskable_interrupt_handler); - (*idt).breakpoint.set_handler_fn(breakpoint_handler); - (*idt).overflow.set_handler_fn(overflow_handler); - (*idt).bound_range_exceeded.set_handler_fn(bound_range_exceeded_handler); - (*idt).invalid_opcode.set_handler_fn(invalid_opcode_handler); - (*idt).device_not_available.set_handler_fn(device_not_available_handler); - (*idt).double_fault.set_handler_task_gate_addr(double_fault_handler as u32); + (*idt).divide_by_zero.set_handler_fn(divide_by_zero_exception_asm_wrapper); + (*idt).debug.set_handler_fn(debug_exception_asm_wrapper); + (*idt).non_maskable_interrupt.set_handler_fn(nmi_exception_asm_wrapper); + (*idt).breakpoint.set_handler_fn(breakpoint_exception_asm_wrapper); + (*idt).overflow.set_handler_fn(overflow_exception_asm_wrapper); + (*idt).bound_range_exceeded.set_handler_fn(bound_range_exceeded_exception_asm_wrapper); + (*idt).invalid_opcode.set_handler_fn(invalid_opcode_exception_asm_wrapper); + (*idt).device_not_available.set_handler_fn(device_not_available_exception_asm_wrapper); + DOUBLE_FAULT_TASK.lock().set_ip(double_fault_handler as u32); + (*idt).double_fault.set_handler_task_gate(GdtIndex::FTSS.selector()); // coprocessor_segment_overrun - (*idt).invalid_tss.set_handler_fn(invalid_tss_handler); - (*idt).segment_not_present.set_handler_fn(segment_not_present_handler); - (*idt).stack_segment_fault.set_handler_fn(stack_segment_fault_handler); - (*idt).general_protection_fault.set_handler_fn(general_protection_fault_handler); - (*idt).page_fault.set_handler_fn(page_fault_handler); - (*idt).x87_floating_point.set_handler_fn(x87_floating_point_handler); - (*idt).alignment_check.set_handler_fn(alignment_check_handler); - (*idt).machine_check.set_handler_fn(machine_check_handler); - (*idt).simd_floating_point.set_handler_fn(simd_floating_point_handler); - (*idt).virtualization.set_handler_fn(virtualization_handler); - (*idt).security_exception.set_handler_fn(security_exception_handler); - - for (i, handler) in irq::IRQ_HANDLERS.iter().enumerate() { - (*idt).interrupts[i].set_handler_fn(*handler); + (*idt).invalid_tss.set_handler_fn(invalid_tss_exception_asm_wrapper); + (*idt).segment_not_present.set_handler_fn(segment_not_present_exception_asm_wrapper); + (*idt).stack_segment_fault.set_handler_fn(stack_fault_exception_asm_wrapper); + (*idt).general_protection_fault.set_handler_fn(general_protection_fault_exception_asm_wrapper); + (*idt).page_fault.set_handler_fn(page_fault_exception_asm_wrapper); + (*idt).x87_floating_point.set_handler_fn(x87_floating_point_exception_asm_wrapper); + (*idt).alignment_check.set_handler_fn(alignment_check_exception_asm_wrapper); + (*idt).machine_check.set_handler_fn(machine_check_exception_asm_wrapper); + (*idt).simd_floating_point.set_handler_fn(simd_floating_point_exception_asm_wrapper); + (*idt).virtualization.set_handler_fn(virtualization_exception_asm_wrapper); + (*idt).security_exception.set_handler_fn(security_exception_asm_wrapper); + + for (i, handler) in IRQ_HANDLERS.iter().enumerate() { + (*idt).interrupts[i].set_interrupt_gate_addr(*handler as u32); } // Add entry for syscalls - let syscall_int = (*idt)[0x80].set_interrupt_gate_addr(syscall_handler as u32); + let syscall_int = (*idt)[0x80].set_interrupt_gate_addr(syscall_interrupt_asm_wrapper as u32); syscall_int.set_privilege_level(PrivilegeLevel::Ring3); syscall_int.disable_interrupts(false); } diff --git a/kernel/src/interrupts/syscalls.rs b/kernel/src/interrupts/syscalls.rs index aff39727a..236ea3d87 100644 --- a/kernel/src/interrupts/syscalls.rs +++ b/kernel/src/interrupts/syscalls.rs @@ -14,13 +14,13 @@ use alloc::string::String; use alloc::sync::Arc; use alloc::vec::Vec; use crate::ipc; -use super::check_thread_killed; use crate::error::{UserspaceError, KernelError}; use crate::sync::RwLock; use crate::timer; use failure::Backtrace; -use sunrise_libkern::{nr, SYSCALL_NAMES, MemoryInfo, MemoryAttributes, MemoryPermissions, MemoryType}; +use sunrise_libkern::{MemoryInfo, MemoryAttributes, MemoryPermissions, MemoryType}; use bit_field::BitArray; +use crate::i386::gdt::{GDT, GdtIndex}; /// Resize the heap of a process, just like a brk. /// It can both expand, and shrink the heap. @@ -36,7 +36,7 @@ use bit_field::BitArray; /// * `new_size` must be [PAGE_SIZE] aligned. /// /// [PAGE_SIZE]: crate::paging::PAGE_SIZE -fn set_heap_size(new_size: usize) -> Result { +pub fn set_heap_size(new_size: usize) -> Result { let p = get_current_process(); let mut pmemory = p.pmemory.lock(); let heap_addr = pmemory.resize_heap(new_size)?; @@ -44,7 +44,7 @@ fn set_heap_size(new_size: usize) -> Result { } /// Maps the vga frame buffer mmio in userspace memory -fn map_framebuffer() -> Result<(usize, usize, usize, usize), UserspaceError> { +pub fn map_framebuffer() -> Result<(usize, usize, usize, usize), UserspaceError> { let tag = i386::multiboot::get_boot_information().framebuffer_tag() .expect("Framebuffer to be provided"); let framebuffer_size = tag.bpp as usize @@ -81,7 +81,7 @@ fn map_framebuffer() -> Result<(usize, usize, usize, usize), UserspaceError> { /// # Error /// /// NoSuchEntry: IRQ above 0x3FF or outside the IRQ access mask was given. -fn create_interrupt_event(irq_num: usize, _flag: u32) -> Result { +pub fn create_interrupt_event(irq_num: usize, _flag: u32) -> Result { // TODO: Properly handle flags in create_interrupt_event. // BODY: The flags in create_interrupt_event configure the triggering of the // BODY: event. If it is false, the IRQ is active HIGH level sensitive. If it @@ -135,7 +135,7 @@ fn create_interrupt_event(irq_num: usize, _flag: u32) -> Result Result<(usize, usize, usize, usize), UserspaceError> { +pub fn query_physical_address(virtual_address: usize) -> Result<(usize, usize, usize, usize), UserspaceError> { let virtual_address = VirtualAddress(virtual_address); let proc = scheduler::get_current_process(); let mem = proc.pmemory.lock(); @@ -168,7 +168,7 @@ fn query_physical_address(virtual_address: usize) -> Result<(usize, usize, usize /// /// - Timeout: the timeout was reached without a signal occuring on the given handles. /// - InvalidHandle: A handle in the handle table does not exist. -fn wait_synchronization(handles_ptr: UserSpacePtr<[u32]>, timeout_ns: usize) -> Result { +pub fn wait_synchronization(handles_ptr: UserSpacePtr<[u32]>, timeout_ns: usize) -> Result { // A list of underlying handles to wait for... let mut handle_arr = Vec::new(); let proc = scheduler::get_current_process(); @@ -212,7 +212,7 @@ fn wait_synchronization(handles_ptr: UserSpacePtr<[u32]>, timeout_ns: usize) -> } /// Print the passed string to the serial port. -fn output_debug_string(msg: UserSpacePtr<[u8]>, level: usize, target: UserSpacePtr<[u8]>) -> Result<(), UserspaceError> { +pub fn output_debug_string(msg: UserSpacePtr<[u8]>, level: usize, target: UserSpacePtr<[u8]>) -> Result<(), UserspaceError> { let level = match level { 00..20 => log::Level::Error, 20..40 => log::Level::Warn, @@ -226,7 +226,7 @@ fn output_debug_string(msg: UserSpacePtr<[u8]>, level: usize, target: UserSpaceP } /// Kills our own process. -fn exit_process() -> Result<(), UserspaceError> { +pub fn exit_process() -> Result<(), UserspaceError> { ProcessStruct::kill_process(get_current_process()); Ok(()) } @@ -241,7 +241,7 @@ fn exit_process() -> Result<(), UserspaceError> { /// /// - InvalidHandle: The passed handle does not exist, or is not a ClientPort. /// - PortRemoteDead: All associated ServerPort handles are closed -fn connect_to_port(handle: u32) -> Result { +pub fn connect_to_port(handle: u32) -> Result { let curproc = scheduler::get_current_process(); let clientport = curproc.phandles.lock().get_handle(handle)?.as_client_port()?; let clientsess = clientport.connect()?; @@ -250,7 +250,7 @@ fn connect_to_port(handle: u32) -> Result { } /// Kills our own thread. -fn exit_thread() -> Result<(), UserspaceError> { +pub fn exit_thread() -> Result<(), UserspaceError> { ThreadStruct::kill(get_current_thread()); Ok(()) } @@ -269,9 +269,9 @@ fn exit_thread() -> Result<(), UserspaceError> { /// # Returns /// /// A thread_handle to the created thread. -fn create_thread(ip: usize, arg: usize, sp: usize, _priority: u32, _processor_id: u32) -> Result { +pub fn create_thread(ip: usize, arg: usize, sp: usize, _priority: u32, _processor_id: u32) -> Result { let cur_proc = get_current_process(); - let thread = ThreadStruct::new(&cur_proc, VirtualAddress(ip), VirtualAddress(sp), arg)?; + let thread = ThreadStruct::new(&cur_proc, VirtualAddress(ip), VirtualAddress(sp), Some(arg))?; let handle = Handle::Thread(thread); let mut handles_table = cur_proc.phandles.lock(); Ok(handles_table.add_handle(Arc::new(handle)) as usize) @@ -284,7 +284,7 @@ fn create_thread(ip: usize, arg: usize, sp: usize, _priority: u32, _processor_id /// * `InvalidHandle` if the handle is not a thread_handle, /// * `ProcessAlreadyStarted` if the thread has already started, #[allow(clippy::unit_arg)] -fn start_thread(thread_handle: u32) -> Result<(), UserspaceError> { +pub fn start_thread(thread_handle: u32) -> Result<(), UserspaceError> { let cur_proc = get_current_process(); let handles_table = cur_proc.phandles.lock(); let thread = handles_table.get_handle(thread_handle)?.as_thread_handle()?; @@ -303,7 +303,7 @@ fn start_thread(thread_handle: u32) -> Result<(), UserspaceError> { /// - ExceedingMaximum: Name is bigger than 12 character, or is missing a \0. /// - NoSuchEntry: No named port were registered with this name. /// - PortRemoteDead: All associated ServerPort handles are closed. -fn connect_to_named_port(name: UserSpacePtr<[u8; 12]>) -> Result { +pub fn connect_to_named_port(name: UserSpacePtr<[u8; 12]>) -> Result { let session = ipc::connect_to_named_port(*name)?; let curproc = scheduler::get_current_process(); let hnd = curproc.phandles.lock().add_handle(Arc::new(Handle::ClientSession(session))); @@ -321,7 +321,7 @@ fn connect_to_named_port(name: UserSpacePtr<[u8; 12]>) -> Result, max_sessions: u32) -> Result { +pub fn manage_named_port(name_ptr: UserSpacePtr<[u8; 12]>, max_sessions: u32) -> Result { let server = ipc::create_named_port(*name_ptr, max_sessions)?; let curproc = scheduler::get_current_process(); let hnd = curproc.phandles.lock().add_handle(Arc::new(Handle::ServerPort(server))); @@ -338,7 +338,7 @@ fn manage_named_port(name_ptr: UserSpacePtr<[u8; 12]>, max_sessions: u32) -> Res /// # Error /// /// - InvalidHandle: Handles does not exist or is not a ServerPort. -fn accept_session(porthandle: u32) -> Result { +pub fn accept_session(porthandle: u32) -> Result { let curproc = scheduler::get_current_process(); let handle = curproc.phandles.lock().get_handle(porthandle)?; let port = match *handle { @@ -358,7 +358,7 @@ fn accept_session(porthandle: u32) -> Result { /// # Error /// /// - PortRemoteDead: All ServerSession associated with this handle are closed. -fn send_sync_request_with_user_buffer(buf: UserSpacePtrMut<[u8]>, handle: u32) -> Result<(), UserspaceError> { +pub fn send_sync_request_with_user_buffer(buf: UserSpacePtrMut<[u8]>, handle: u32) -> Result<(), UserspaceError> { let proc = scheduler::get_current_process(); let sess = proc.phandles.lock().get_handle(handle)?.as_client_session()?; sess.send_request(buf) @@ -379,7 +379,7 @@ fn send_sync_request_with_user_buffer(buf: UserSpacePtrMut<[u8]>, handle: u32) - /// session has been closed, if one that appears earlier in the list has an /// incoming message, it will take priority and a result code of 0x0 will be /// returned. -fn reply_and_receive_with_user_buffer(buf: UserSpacePtrMut<[u8]>, handles: UserSpacePtr<[u32]>, reply_target: u32, timeout: usize) -> Result { +pub fn reply_and_receive_with_user_buffer(buf: UserSpacePtrMut<[u8]>, handles: UserSpacePtr<[u32]>, reply_target: u32, timeout: usize) -> Result { let proc = scheduler::get_current_process(); if reply_target != 0 { // get session @@ -398,7 +398,7 @@ fn reply_and_receive_with_user_buffer(buf: UserSpacePtrMut<[u8]>, handles: UserS /// Closed the passed handle. /// /// Does not accept 0xFFFF8001 or 0xFFFF8000 as handles. -fn close_handle(handle: u32) -> Result<(), UserspaceError> { +pub fn close_handle(handle: u32) -> Result<(), UserspaceError> { let proc = scheduler::get_current_process(); proc.phandles.lock().delete_handle(handle)?; Ok(()) @@ -411,7 +411,7 @@ fn close_handle(handle: u32) -> Result<(), UserspaceError> { /// - 0 Yielding without core migration /// - -1 Yielding with core migration /// - -2 Yielding to any other thread -fn sleep_thread(nanos: usize) -> Result<(), UserspaceError> { +pub fn sleep_thread(nanos: usize) -> Result<(), UserspaceError> { if nanos == 0 { scheduler::schedule(); Ok(()) @@ -422,7 +422,7 @@ fn sleep_thread(nanos: usize) -> Result<(), UserspaceError> { /// Create a new Port pair. Those ports are linked to each-other: The server will /// receive connections from the client. -fn create_port(max_sessions: u32, _is_light: bool, _name_ptr: UserSpacePtr<[u8; 12]>) -> Result<(usize, usize), UserspaceError>{ +pub fn create_port(max_sessions: u32, _is_light: bool, _name_ptr: UserSpacePtr<[u8; 12]>) -> Result<(usize, usize), UserspaceError>{ let (server, client) = ipc::port::new(max_sessions); let curproc = scheduler::get_current_process(); let serverhnd = curproc.phandles.lock().add_handle(Arc::new(Handle::ServerPort(server))); @@ -436,7 +436,7 @@ fn create_port(max_sessions: u32, _is_light: bool, _name_ptr: UserSpacePtr<[u8; /// /// Other perm can be used to enforce permission 1, 3, or 0x10000000 if don't /// care. -fn create_shared_memory(size: u32, _myperm: u32, _otherperm: u32) -> Result { +pub fn create_shared_memory(size: u32, _myperm: u32, _otherperm: u32) -> Result { let frames = FrameAllocator::allocate_frames_fragmented(size as usize)?; let handle = Arc::new(Handle::SharedMemory(Arc::new(RwLock::new(frames)))); let curproc = get_current_process(); @@ -450,7 +450,7 @@ fn create_shared_memory(size: u32, _myperm: u32, _otherperm: u32) -> Result Result<(), UserspaceError> { +pub fn map_shared_memory(handle: u32, addr: usize, size: usize, perm: u32) -> Result<(), UserspaceError> { let perm = MemoryPermissions::from_bits(perm).ok_or(UserspaceError::InvalidMemPerms)?; let curproc = get_current_process(); let mem = curproc.phandles.lock().get_handle(handle)?.as_shared_memory()?; @@ -470,7 +470,7 @@ fn map_shared_memory(handle: u32, addr: usize, size: usize, perm: u32) -> Result /// /// - InvalidAddress: address is not the start of a shared mapping /// - InvalidSize: Size is not the same as the mapping size. -fn unmap_shared_memory(handle: u32, addr: usize, size: usize) -> Result<(), UserspaceError> { +pub fn unmap_shared_memory(handle: u32, addr: usize, size: usize) -> Result<(), UserspaceError> { let curproc = get_current_process(); let hmem = curproc.phandles.lock().get_handle(handle)?.as_shared_memory()?; let addr = VirtualAddress(addr); @@ -508,7 +508,7 @@ fn unmap_shared_memory(handle: u32, addr: usize, size: usize) -> Result<(), User /// mapping that contains the provided address. Writes the output to the /// given userspace pointer to a MemoryInfo structure. #[inline(never)] -fn query_memory(mut meminfo: UserSpacePtrMut, _unk: usize, addr: usize) -> Result { +pub fn query_memory(mut meminfo: UserSpacePtrMut, _unk: usize, addr: usize) -> Result { let curproc = scheduler::get_current_process(); let memlock = curproc.pmemory.lock(); let qmem = memlock.query_memory(VirtualAddress(addr)); @@ -539,7 +539,7 @@ fn query_memory(mut meminfo: UserSpacePtrMut, _unk: usize, addr: usi /// /// - A handle to a ServerSession /// - A handle to a ClientSession -fn create_session(_is_light: bool, _unk: usize) -> Result<(usize, usize), UserspaceError> { +pub fn create_session(_is_light: bool, _unk: usize) -> Result<(usize, usize), UserspaceError> { let (server, client) = ipc::session::new(); let curproc = scheduler::get_current_process(); let serverhnd = curproc.phandles.lock().add_handle(Arc::new(Handle::ServerSession(server))); @@ -570,162 +570,40 @@ pub fn map_mmio_region(physical_address: usize, size: usize, virtual_address: us Ok(()) } -impl Registers { - /// Update the Registers with the passed result. - fn apply0(&mut self, ret: Result<(), UserspaceError>) { - self.apply3(ret.map(|_| (0, 0, 0))) - } - - /// Update the Registers with the passed result. - fn apply1(&mut self, ret: Result) { - self.apply3(ret.map(|v| (v, 0, 0))) - } - - /// Update the Registers with the passed result. - fn apply2(&mut self, ret: Result<(usize, usize), UserspaceError>) { - self.apply3(ret.map(|(v0, v1)| (v0, v1, 0))) - } - - /// Update the Registers with the passed result. - fn apply3(&mut self, ret: Result<(usize, usize, usize), UserspaceError>) { - self.apply4(ret.map(|(v0, v1, v2)| (v0, v1, v2, 0))) - } - - /// Update the Registers with the passed result. - fn apply4(&mut self, ret: Result<(usize, usize, usize, usize), UserspaceError>) { - match ret { - Ok((v0, v1, v2, v3)) => { - self.eax = 0; - self.ebx = v0; - self.ecx = v1; - self.edx = v2; - self.esi = v3; - self.edi = 0; - self.ebp = 0; - }, - Err(err) => { - self.eax = err.make_ret() as _; - self.ebx = 0; - self.ecx = 0; - self.edx = 0; - self.esi = 0; - self.edi = 0; - self.ebp = 0; - } - } - } -} - -/// Represents a register backup. The syscall wrapper constructs this structure -/// before calling syscall_handler_inner, and then pops it before returning to -/// userspace, allowing precise control over register state. -#[repr(C)] -#[derive(Debug)] -#[allow(clippy::missing_docs_in_private_items)] -pub struct Registers { - eax: usize, - ebx: usize, - ecx: usize, - edx: usize, - esi: usize, - edi: usize, - ebp: usize, -} - - -// TODO: Missing argument slot for SVCs on i386 backend -// BODY: Our i386 SVC ABI is currently fairly different from the ABI used by -// BODY: Horizon/NX. This is for two reasons: -// BODY: -// BODY: 1. We are missing one argument slot compared to the official SVCs, so -// BODY: we changed the ABI to work around it. -// BODY: -// BODY: 2. The Horizon ABI "skipping" over some register is an optimization for -// BODY: ARM, but doesn't help on i386. -// BODY: -// BODY: That being said, there is a way for us to recover the missing SVC slot. -// BODY: We are currently "wasting" x0 for the syscall number. We could avoid -// BODY: this by instead using different IDT entries for the different syscalls. -// BODY: This is actually more in line with what the Horizon/NX kernel is doing -// BODY: anyways. -// BODY: -// BODY: Once we've regained this missing slot, we'll be able to make our ABI -// BODY: match the Horizon/NX 32-bit ABI. While the "skipping over" doesn't help -// BODY: our performances, it doesn't really hurt it either, and having a uniform -// BODY: ABI across platforms would make for lower maintenance. -/// Syscall dispatcher. Dispatches to the various syscall handling functions -/// based on registers.eax, and updates the registers struct with the correct -/// return values. -pub extern fn syscall_handler_inner(registers: &mut Registers) { - - let (syscall_nr, x0, x1, x2, x3, x4, x5) = (registers.eax, registers.ebx, registers.ecx, registers.edx, registers.esi, registers.edi, registers.ebp); - let syscall_name = SYSCALL_NAMES.get(syscall_nr).unwrap_or(&"Unknown"); - - debug!("Handling syscall {} - x0: {}, x1: {}, x2: {}, x3: {}, x4: {}, x5: {}", - syscall_name, x0, x1, x2, x3, x4, x5); - - let allowed = get_current_process().capabilities.syscall_mask.get_bit(syscall_nr); - - if cfg!(feature = "no-security-check") && !allowed { - let curproc = get_current_process(); - error!("Process {} attempted to use unauthorized syscall {} ({:#04x})", - curproc.name, syscall_name, syscall_nr); - } - - let allowed = cfg!(feature = "no-security-check") || allowed; - - match (allowed, syscall_nr) { - // Horizon-inspired syscalls! - (true, nr::SetHeapSize) => registers.apply1(set_heap_size(x0)), - (true, nr::QueryMemory) => registers.apply1(query_memory(UserSpacePtrMut(x0 as _), x1, x2)), - (true, nr::ExitProcess) => registers.apply0(exit_process()), - (true, nr::CreateThread) => registers.apply1(create_thread(x0, x1, x2, x3 as _, x4 as _)), - (true, nr::StartThread) => registers.apply0(start_thread(x0 as _)), - (true, nr::ExitThread) => registers.apply0(exit_thread()), - (true, nr::SleepThread) => registers.apply0(sleep_thread(x0)), - (true, nr::MapSharedMemory) => registers.apply0(map_shared_memory(x0 as _, x1 as _, x2 as _, x3 as _)), - (true, nr::UnmapSharedMemory) => registers.apply0(unmap_shared_memory(x0 as _, x1 as _, x2 as _)), - (true, nr::CloseHandle) => registers.apply0(close_handle(x0 as _)), - (true, nr::WaitSynchronization) => registers.apply1(wait_synchronization(UserSpacePtr::from_raw_parts(x0 as _, x1), x2)), - (true, nr::ConnectToNamedPort) => registers.apply1(connect_to_named_port(UserSpacePtr(x0 as _))), - (true, nr::SendSyncRequestWithUserBuffer) => registers.apply0(send_sync_request_with_user_buffer(UserSpacePtrMut::from_raw_parts_mut(x0 as _, x1), x2 as _)), - (true, nr::OutputDebugString) => registers.apply0(output_debug_string(UserSpacePtr::from_raw_parts(x0 as _, x1), x2, UserSpacePtr::from_raw_parts(x3 as _, x4))), - (true, nr::CreateSession) => registers.apply2(create_session(x0 != 0, x1 as _)), - (true, nr::AcceptSession) => registers.apply1(accept_session(x0 as _)), - (true, nr::ReplyAndReceiveWithUserBuffer) => registers.apply1(reply_and_receive_with_user_buffer(UserSpacePtrMut::from_raw_parts_mut(x0 as _, x1), UserSpacePtr::from_raw_parts(x2 as _, x3), x4 as _, x5)), - (true, nr::CreateSharedMemory) => registers.apply1(create_shared_memory(x0 as _, x1 as _, x2 as _)), - (true, nr::CreateInterruptEvent) => registers.apply1(create_interrupt_event(x0, x1 as u32)), - (true, nr::QueryPhysicalAddress) => registers.apply4(query_physical_address(x0 as _)), - (true, nr::CreatePort) => registers.apply2(create_port(x0 as _, x1 != 0, UserSpacePtr(x2 as _))), - (true, nr::ManageNamedPort) => registers.apply1(manage_named_port(UserSpacePtr(x0 as _), x1 as _)), - (true, nr::ConnectToPort) => registers.apply1(connect_to_port(x0 as _)), - - // sunrise extensions - (true, nr::MapFramebuffer) => registers.apply4(map_framebuffer()), - (true, nr::MapMmioRegion) => registers.apply0(map_mmio_region(x0, x1, x2, x3 != 0)), - - // Unknown/unauthorized syscall. - (false, _) => { - // Attempted to call unauthorized SVC. Horizon invokes usermode - // exception handling in some cases. Let's just kill the process for - // now. - let curproc = get_current_process(); - error!("Process {} attempted to use unauthorized syscall {} ({:#04x}), killing", - curproc.name, syscall_name, syscall_nr); - ProcessStruct::kill_process(curproc); - }, - _ => { - let curproc = get_current_process(); - error!("Process {} attempted to use unknown syscall {} ({:#04x}), killing", - curproc.name, syscall_name, syscall_nr); - ProcessStruct::kill_process(curproc); - } - } - - debug!("Returning from syscall {} - x0: {}, x1: {}, x2: {}, x3: {}, x4: {}, x5: {}, x6: {}", - syscall_name, registers.eax, registers.ebx, registers.ecx, registers.edx, registers.esi, registers.edi, registers.ebp); - - // Effectively kill the thread at syscall boundary - check_thread_killed(); +/// Set thread local area pointer. +/// +/// Akin to `set_thread_area` on Linux, this syscall sets the `gs` segment selector's base address +/// to the address passed as argument. +/// +/// The user will likely want to make it point to its elf thread local storage, as `gs:0` is expected +/// to contain the thread pointer `tp`. +/// +/// Unlike linux, you only have **one** user controlled segment, found in `gs`, and you can only set its address. +/// +/// The limit will always be set to `0xFFFFFFFF`, and adding this offset to a non-zero base address +/// means that the resulting address will "wrap around" the address space, and end-up **under** +/// the base address. +/// You can use this property to implement thread local storage variant II - gnu model, +/// as thread local variable are expected to be found "below" `gs:0`, with "negative" offset such as +/// `gs:0xFFFFFFFC`. +/// +/// ## x86_64 +/// +/// ![same, but different, but still same](https://media.giphy.com/media/C6JQPEUsZUyVq/giphy.gif) +/// +/// `fs` is used instead of `gs`, because reasons. +/// +/// # Errors +/// +/// * The whole initial design of TLS on x86 should be considered an error. +/// * No returned error otherwise. +pub fn set_thread_area(segment_base_address: usize) -> Result<(), UserspaceError> { + let segment_base_address = VirtualAddress(segment_base_address); + let mut gdt = GDT.r#try().expect("GDT not initialized").lock(); + gdt.table[GdtIndex::UTlsElf as usize].set_base(segment_base_address.addr() as u32); + gdt.commit(None, None, None, None, None, None); + // store it in the thread struct. + let thread = get_current_thread(); + *thread.tls_elf.lock() = segment_base_address; + Ok(()) } - diff --git a/kernel/src/main.rs b/kernel/src/main.rs index 6fbb46279..1f69c2402 100644 --- a/kernel/src/main.rs +++ b/kernel/src/main.rs @@ -74,6 +74,8 @@ pub mod ipc; pub mod elf_loader; pub mod utils; pub mod checks; +pub mod cpu_locals; +pub mod panic; #[cfg(target_os = "none")] // Make rust happy about rust_oom being no_mangle... @@ -92,6 +94,7 @@ use crate::paging::{PAGE_SIZE, MappingAccessRights}; use crate::mem::VirtualAddress; use crate::process::{ProcessStruct, ThreadStruct}; use sunrise_libkern::MemoryType; +use crate::cpu_locals::init_cpu_locals; /// Forces a double fault by stack overflowing. /// @@ -151,7 +154,7 @@ fn main() { (VirtualAddress(ep), stack + 5 * PAGE_SIZE) }; - let thread = ThreadStruct::new(&proc, ep, sp, 0) + let thread = ThreadStruct::new(&proc, ep, sp, None) .expect("failed creating thread for service"); ThreadStruct::start(thread) .expect("failed starting thread for service"); @@ -229,6 +232,9 @@ pub extern "C" fn common_start(multiboot_info_addr: usize) -> ! { info!("Start ACPI detection"); unsafe { i386::acpi::init(); } + info!("Allocating cpu_locals"); + init_cpu_locals(1); + info!("Enabling interrupts"); unsafe { interrupts::init(); } @@ -257,97 +263,13 @@ pub extern "C" fn common_start(multiboot_info_addr: usize) -> ! { #[cfg(target_os = "none")] #[lang = "eh_personality"] #[no_mangle] pub extern fn eh_personality() {} -/// The kernel panic function. -/// -/// Executed on a `panic!`, but can also be called directly. -/// Will print some useful debugging information, and never return. -/// -/// This function will print a stack dump, from `stackdump_source`. -/// If `None` is passed, it will dump the current KernelStack instead, this is the default for a panic!. -/// It is usefull being able to debug another stack that our own, especially when we double-faulted. -/// -/// # Safety -/// -/// When a `stackdump_source` is passed, this function cannot check the requirements of -/// [dump_stack], it is the caller's job to do it. -/// -/// Note that if `None` is passed, this function is safe. -/// -/// [dump_stack]: crate::stack::dump_stack -unsafe fn do_panic(msg: core::fmt::Arguments<'_>, stackdump_source: Option) -> ! { - - // Disable interrupts forever! - unsafe { sync::permanently_disable_interrupts(); } - // Don't deadlock in the logger - unsafe { SerialLogger.force_unlock(); } - - //todo: force unlock the KernelMemory lock - // and also the process memory lock for userspace stack dumping (only if panic-on-excetpion ?). - - use crate::devices::rs232::SerialLogger; - - let _ = writeln!(SerialLogger, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\ - ! Panic! at the disco\n\ - ! {}\n\ - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", - msg); - - // Parse the ELF to get the symbol table. - // We must not fail, so this means a lot of Option checking :/ - use xmas_elf::symbol_table::Entry32; - use xmas_elf::sections::SectionData; - use xmas_elf::ElfFile; - use crate::elf_loader::MappedGrubModule; - - let mapped_kernel_elf = i386::multiboot::try_get_boot_information() - .and_then(|info| info.module_tags().nth(0)) - .and_then(|module| elf_loader::map_grub_module(module).ok()); - - /// Gets the symbol table of a mapped module. - fn get_symbols<'a>(mapped_kernel_elf: &'a Option>) -> Option<(&'a ElfFile<'a>, &'a[Entry32])> { - let module = mapped_kernel_elf.as_ref()?; - let elf = module.elf.as_ref().ok()?; - let data = elf.find_section_by_name(".symtab")? - .get_data(elf).ok()?; - let st = match data { - SectionData::SymbolTable32(st) => st, - _ => return None - }; - Some((elf, st)) - } - - let elf_and_st = get_symbols(&mapped_kernel_elf); - - if elf_and_st.is_none() { - let _ = writeln!(SerialLogger, "Panic handler: Failed to get kernel elf symbols"); - } - - // Then print the stack - if let Some(sds) = stackdump_source { - unsafe { - // this is unsafe, caller must check safety - crate::stack::dump_stack(&sds, elf_and_st) - } - } else { - crate::stack::KernelStack::dump_current_stack(elf_and_st) - } - - let _ = writeln!(SerialLogger, "Thread : {:#x?}", scheduler::try_get_current_thread()); - - let _ = writeln!(SerialLogger, "!!!!!!!!!!!!!!!END PANIC!!!!!!!!!!!!!!"); - - loop { unsafe { asm!("HLT"); } } -} - /// Function called on `panic!` invocation. /// /// Kernel panics. #[cfg(target_os = "none")] #[panic_handler] #[no_mangle] pub extern fn panic_fmt(p: &::core::panic::PanicInfo<'_>) -> ! { - unsafe { - // safe: we're not passing a stackdump_source - // so it will use our current stack, which is safe. - do_panic(format_args!("{}", p), None); - } + panic::kernel_panic(&panic::PanicOrigin::KernelAssert { + panic_message: format_args!("{}", p) + }); } diff --git a/kernel/src/paging/arch/i386/table.rs b/kernel/src/paging/arch/i386/table.rs index 86a94bd7b..ae78e7ca6 100644 --- a/kernel/src/paging/arch/i386/table.rs +++ b/kernel/src/paging/arch/i386/table.rs @@ -299,6 +299,11 @@ impl InactiveHierarchyTrait for InactiveHierarchy { // Copy the kernel space tables self.copy_active_kernel_space(); super::swap_cr3(self.directory_physical_address); + // Update the cr3 DOUBLE_FAULT_TSS will switch to when we double fault + // DOUBLE_FAULT_TASK should only be locked during init and update, and switch_to is not re-entrant. + crate::i386::gdt::DOUBLE_FAULT_TASK + .try_lock().expect("Cannot update DOUBLE_FAULT_TASK's cr3") + .cr3 = self.directory_physical_address.addr() as u32; } fn copy_active_kernel_space(&mut self) { diff --git a/kernel/src/paging/arch/mod.rs b/kernel/src/paging/arch/mod.rs index 3267277b7..add70092f 100644 --- a/kernel/src/paging/arch/mod.rs +++ b/kernel/src/paging/arch/mod.rs @@ -7,5 +7,5 @@ pub use self::i386::table::{ActiveHierarchy, InactiveHierarchy}; pub use self::i386::entry::I386Entry as Entry; pub use self::i386::entry::I386EntryFlags as EntryFlags; pub use self::i386::is_paging_on; -pub use self::i386::read_cr2; // todo give access to this in an arch-independent way +pub use self::i386::{read_cr2, read_cr3}; // TODO: expose current page directory's address in an arch-independant way. pub use self::i386::lands::{KernelLand, UserLand, RecursiveTablesLand}; diff --git a/kernel/src/paging/mod.rs b/kernel/src/paging/mod.rs index e013a23b0..fb8bb0798 100644 --- a/kernel/src/paging/mod.rs +++ b/kernel/src/paging/mod.rs @@ -25,7 +25,7 @@ mod hierarchical_table; mod arch; mod bookkeeping; -pub use self::arch::{PAGE_SIZE, read_cr2, InactiveHierarchy}; +pub use self::arch::{PAGE_SIZE, read_cr2, read_cr3, InactiveHierarchy}; pub use self::hierarchical_table::PageState; pub use self::hierarchical_table::{InactiveHierarchyTrait}; use sunrise_libkern; diff --git a/kernel/src/panic.rs b/kernel/src/panic.rs new file mode 100644 index 000000000..d54d2b87f --- /dev/null +++ b/kernel/src/panic.rs @@ -0,0 +1,255 @@ +//! Kernel panic +//! +//! ![minor mistake marvin](https://github.com/sunriseos/SunriseOS/blob/master/kernel/res/kernel_panic_doc.jpg) + +use crate::sync; +use crate::interrupts::UserspaceHardwareContext; +use tinybmp::Bmp; +use crate::interrupts::syscalls::map_framebuffer; +use crate::devices::rs232::SerialLogger; +use crate::i386::gdt::MAIN_TASK; +use crate::scheduler::try_get_current_thread; +use core::fmt::Write; +use crate::i386::registers::eflags::EFlags; + +/// Reason for a kernel panic. Must be passed to [kernel_panic]. +#[allow(missing_debug_implementations)] // want to display it ? pass it to kernel_panic() ! +pub enum PanicOrigin<'a> { + /// The kernel failed an assertion. + /// + /// This is a case when we make a call to `panic!()`, `assert!()`, make an out of bound access, etc. + KernelAssert { + /// Formatted string passed to `panic!()`. + panic_message: core::fmt::Arguments<'a> + }, + /// CPU Exception occurred while we were in kernel, e.g. page fault. + /// + /// This means there's a serious bug in the kernel. + KernelFault { + /// Formatted string of the exception name, and optional cpu error code. + exception_message: core::fmt::Arguments<'a>, + /// Kernel registers state before exception. + kernel_hardware_context: UserspaceHardwareContext + }, + /// Kernel Faulted, and then the fault handler faulted too. + /// + /// You fucked up on some quality level. + /// + /// Registers state before the second fault can be retrieved from the MAIN_TASK tss. + DoubleFault, + /// Userspace exception. + /// + /// Normally this isn't a panic, the kernel should kill the faulty process, + /// display an error message, and keep on going. + /// + /// But if the feature panic-on-exception is enabled, we make the kernel panic to help debugging + /// sessions. + UserspaceFault { + /// Formatted string of the exception name, and optional cpu error code. + exception_message: core::fmt::Arguments<'a>, + /// Userspace registers state before exception. + userspace_hardware_context: UserspaceHardwareContext, + }, +} + +/// The kernel panic function. +/// +/// Executed on a `panic!`, but can also be called directly. +/// +/// Will print some useful debugging information, and never return. +/// +/// Takes a panic origin, so we can personalize the kernel panic message. +pub fn kernel_panic(panic_origin: &PanicOrigin) -> ! { + + // TODO: permanently_disable_interrupts shouldn't be unsafe. + // BODY: disabling interrupts doesn't break any safety guidelines, and is perfectly safe as far as rustc is concerned. + // Disable interrupts forever! + unsafe { sync::permanently_disable_interrupts(); } + // Don't deadlock in the logger + unsafe { + // safe: All CPUs are halted at this point, and interrupts are stopped. + // Any code relying on locked mutex will not run anymore, so unlocking mutexes is fine now. + SerialLogger.force_unlock(); + } + + // Get the process we were running, and its name. Gonna be quite useful. + let current_thread = try_get_current_thread(); + let current_process = current_thread.as_ref().map(|t| t.process.clone()); + let current_process_name = current_process.as_ref().map(|p| &p.name); + + //todo: force unlock the KernelMemory lock + // and also the process memory lock for userspace stack dumping (only if panic-on-excetpion ?). + + // display the panic header: summary of what happened + let _ = writeln!(SerialLogger, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\ + ! Panic! at the disco"); + + match panic_origin { + PanicOrigin::KernelAssert { panic_message: msg} => { + let _ = writeln!(SerialLogger, "! {}", msg); + } + PanicOrigin::KernelFault { exception_message: msg, ..} => { + let _ = writeln!(SerialLogger, "! Kernel Fault !\n\ + ! {}", msg); + } + PanicOrigin::DoubleFault => { + let _ = writeln!(SerialLogger, "! Double Fault !\n\ + ! Good luck."); + } + PanicOrigin::UserspaceFault { exception_message: msg, ..} => { + let _ = writeln!(SerialLogger, "! Userspace exception in {:?}.\n\ + ! {}", current_process_name, msg); + } + } + + let _ = writeln!(SerialLogger, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"); + + // Add some nice ascii art to cheer up desperate developers + match panic_origin { + PanicOrigin::KernelFault { .. } => { + let _ = writeln!(SerialLogger, include_str!("../res/kernel_fault.txt")); + }, + PanicOrigin::DoubleFault { .. } => { + let _ = writeln!(SerialLogger, include_str!("../res/double_fault.txt")); + } + _ => { /* You're not desperate enough */ } + } + + // Show the name of the process we were running + // should show thread id in the future. + let _ = writeln!(SerialLogger, "Process: {:?}", current_process_name); + + // Show hardware context + match panic_origin { + PanicOrigin::KernelAssert { .. } => { /* You shouldn't need it */ }, + PanicOrigin::KernelFault { kernel_hardware_context: registers, .. } => { + let _ = writeln!(SerialLogger, "Kernel registers before fault:\n{}", registers); + }, + PanicOrigin::UserspaceFault { userspace_hardware_context: registers, .. } => { + let _ = writeln!(SerialLogger, "Userspace registers before fault:\n{}", registers); + }, + PanicOrigin::DoubleFault => { + // Get the Main TSS so I can recover some information about what happened. + if let Some(tss_main) = MAIN_TASK.try_lock() { + let _ = writeln!(SerialLogger, "Kernel registers before double fault:\n\ + EIP={:#010x} CR3={:#010x}\n\ + EAX={:#010x} EBX={:#010x} ECX={:#010x} EDX={:#010x}\n\ + ESI={:#010x} EDI={:#010X} ESP={:#010x} EBP={:#010x}\n\ + EFLAGS={:?}", + tss_main.tss.eip, tss_main.tss.cr3, + tss_main.tss.eax, tss_main.tss.ebx, tss_main.tss.ecx, tss_main.tss.edx, + tss_main.tss.esi, tss_main.tss.edi, tss_main.tss.esp, tss_main.tss.ebp, + EFlags::from_bits_truncate(tss_main.tss.eflags)); + } else { + let _ = writeln!(SerialLogger, "Kernel registers before double fault: Cannot get main TSS, good luck"); + } + } + } + + // display the full thread struct + if let Some(t) = ¤t_thread { + let _ = writeln!(SerialLogger, "Current thread: {:#?}", t); + } + + // display a stack dump + + // Parse the ELF to get the symbol table. + // We must not fail, so this means a lot of Option checking :/ + use xmas_elf::symbol_table::Entry32; + use xmas_elf::sections::SectionData; + use xmas_elf::ElfFile; + use crate::elf_loader::MappedGrubModule; + + let mapped_kernel_elf = crate::i386::multiboot::try_get_boot_information() + .and_then(|info| info.module_tags().nth(0)) + .and_then(|module| crate::elf_loader::map_grub_module(module).ok()); + + /// Gets the symbol table of a mapped module. + fn get_symbols<'a>(mapped_kernel_elf: &'a Option>) -> Option<(&'a ElfFile<'a>, &'a[Entry32])> { + let module = mapped_kernel_elf.as_ref()?; + let elf = module.elf.as_ref().ok()?; + let data = elf.find_section_by_name(".symtab")? + .get_data(elf).ok()?; + let st = match data { + SectionData::SymbolTable32(st) => st, + _ => return None + }; + Some((elf, st)) + } + + let elf_and_st = get_symbols(&mapped_kernel_elf); + + if elf_and_st.is_none() { + let _ = writeln!(SerialLogger, "Panic handler: Failed to get kernel elf symbols"); + } + + // todo: Kernel Stack dump update + // body: Update the kernel stack dump functions to be compatible the new and improved + // body: kernel panic. + // body: + // body: Now that know the origin (userspace or kernelspace) in the panic, this should + // body: be easy, and we can finally have userspace stack dumps that actually work. + let stackdump_source = None; + + // Then print the stack + if let Some(sds) = stackdump_source { + unsafe { + // this is unsafe, caller must check safety + crate::stack::dump_stack(&sds, elf_and_st) + } + } else { + crate::stack::KernelStack::dump_current_stack(elf_and_st) + } + + // Display the infamous "Blue Screen Of Death" + display_bsod(); + + let _ = writeln!(SerialLogger, "!!!!!!!!!!!!!!!END PANIC!!!!!!!!!!!!!!"); + + loop { unsafe { asm!("HLT"); } } +} + + +/// The "Blue Screen Of Death" +/// +/// Stored as an uncompressed BMP, so we don't have to do decompression in the panic handler, +/// and just blit it on the screen instead. +/// +/// See [display_bsod]. +static BSOD_BMP: &[u8; 1192016] = include_bytes!("../res/bsod.bmp"); + +/// Display the infamous "Blue Screen Of Death" +/// +/// When the kernel panics, we blit an image to the screen to inform the user we have kernel +/// panicked. +/// +/// This function attempts to map the framebuffer, and copies [BSOD_BMP] to it. +/// +/// It is designed to fail silently if mapping the framebuffer or parsing the BMP failed, as it +/// should only be called from the panic handler, and the last thing we want at that time +/// is more error handling. +/// +/// Note that this function will write to the framebuffer with no regards to whether it was already +/// mapped by another process. +/// This is OK since we're panicking, and all processes should be halted by now. +fn display_bsod() { + if let Ok((fb_addr, fb_width, fb_height, fb_bpp)) = map_framebuffer() { + let fb = unsafe { core::slice::from_raw_parts_mut(fb_addr as *mut u8, fb_width * fb_height * fb_bpp) }; + if let Ok(bmp) = Bmp::from_slice(BSOD_BMP) { + let fb_row_len = fb_width * fb_bpp / 8; + let bmp_row_len = bmp.width() * bmp.bpp() / 8 + + /* bmp row padded to 4 bytes */ (32 - (bmp.width() * bmp.bpp()) % 32) / 8; + for (fb_row, bmp_row) in fb.chunks_exact_mut(fb_row_len) + .zip(bmp.image_data().rchunks_exact(bmp_row_len as usize)) { + for (fb_px, bmp_px) in fb_row.chunks_exact_mut(fb_bpp / 8) + .zip(bmp_row.chunks_exact(bmp.bpp() as usize / 8)) { + // bmp has GRB encoding apparently + fb_px[0] = bmp_px[1]; + fb_px[1] = bmp_px[2]; + fb_px[2] = bmp_px[0]; + fb_px[3] = 0; + } + } + } + } +} diff --git a/kernel/src/process.rs b/kernel/src/process.rs index a5f984757..41a046e4c 100644 --- a/kernel/src/process.rs +++ b/kernel/src/process.rs @@ -20,9 +20,12 @@ use failure::Backtrace; use crate::frame_allocator::PhysicalMemRegion; use crate::sync::RwLock; +pub mod thread_local_storage; mod capabilities; pub use self::capabilities::ProcessCapabilities; use crate::paging::{InactiveHierarchy, InactiveHierarchyTrait}; +use self::thread_local_storage::TLSManager; +use crate::interrupts::UserspaceHardwareContext; /// The struct representing a process. There's one for every process. /// @@ -53,6 +56,9 @@ pub struct ProcessStruct { /// Permissions of this process. pub capabilities: ProcessCapabilities, + /// Tracks used and free allocated Thread Local Storage regions of this process. + pub tls_manager: Mutex, + /// An array of the created but not yet started threads. /// /// When we create a thread, we return a handle to userspace containing a weak reference to the thread, @@ -108,8 +114,22 @@ pub struct ThreadStruct { /// The currently running process is indirectly kept alive by the `CURRENT_THREAD` global in scheduler. pub process: Arc, - /// Argument passed to the entrypoint on first schedule. - pub arg: usize + /// Pointer to the Thread Local Storage region of this thread. + /// + /// * x86_32: loaded in the `fs` segment selector. + /// * x86_64: loaded in the `gs` segment selector. + pub tls_region: VirtualAddress, + + /// Userspace's elf `Thread Pointer`. + /// + /// * x86_32: loaded in the `gs` segment selectors. + /// * x86_64: loaded in the `fs` segment selectors. + pub tls_elf: Mutex, + + /// Userspace hardware context of this thread. + /// + /// Registers are backed up every time we enter the kernel via a syscall/exception, for debug purposes. + pub userspace_hwcontext: SpinLock, } /// A handle to a userspace-accessible resource. @@ -433,6 +453,7 @@ impl ProcessStruct { threads: SpinLockIRQ::new(Vec::new()), phandles: SpinLockIRQ::new(HandleTable::default()), killed: AtomicBool::new(false), + tls_manager: Mutex::new(TLSManager::default()), thread_maternity: SpinLock::new(Vec::new()), capabilities } @@ -480,6 +501,7 @@ impl ProcessStruct { phandles: SpinLockIRQ::new(HandleTable::default()), killed: AtomicBool::new(false), thread_maternity: SpinLock::new(Vec::new()), + tls_manager: Mutex::new(TLSManager::default()), capabilities: ProcessCapabilities::default(), } ) @@ -531,7 +553,19 @@ impl ThreadStruct { /// /// The thread's only strong reference is stored in the process' maternity, /// and we return only a weak to it, that can directly be put in a thread_handle. - pub fn new(belonging_process: &Arc, ep: VirtualAddress, stack: VirtualAddress, arg: usize) -> Result, KernelError> { + /// + /// ##### Argument + /// + /// * When creating a new thread from `svcCreateThread` you should pass `Some(thread_entry_arg)`. + /// This should be the argument provided by the userspace, and will be passed to the thread + /// when it starts. + /// * When creating the first thread of a process ("main thread") you should pass `None`. + /// This function will recognise this condition, automatically push a handle to the created + /// thread in the process' handle table, and this handle will be given as an argument to + /// the thread itself when it starts, so that the main thread can know its thread handle. + pub fn new(belonging_process: &Arc, ep: VirtualAddress, stack: VirtualAddress, arg: Option) -> Result, KernelError> { + // get its process memory + let mut pmemory = belonging_process.pmemory.lock(); // allocate its kernel stack let kstack = KernelStack::allocate_stack()?; @@ -542,21 +576,37 @@ impl ThreadStruct { // the state of the process, Stopped let state = ThreadStateAtomic::new(ThreadState::Stopped); + // allocate its thread local storage region + let tls = belonging_process.tls_manager.lock().allocate_tls(&mut pmemory)?; + let t = Arc::new( ThreadStruct { state, kstack, hwcontext : empty_hwcontext, process: Arc::clone(belonging_process), - arg + tls_region: tls, + tls_elf: Mutex::new(VirtualAddress(0x00000000)), + userspace_hwcontext: SpinLock::new(UserspaceHardwareContext::default()), } ); + // if we're creating the main thread, push a handle to it in the process' handle table, + // and give it to the thread as an argument. + let arg = match arg { + Some(arg) => arg, + None => { + debug_assert!(belonging_process.threads.lock().is_empty() && + belonging_process.thread_maternity.lock().is_empty(), "Argument shouldn't be None"); + belonging_process.phandles.lock().add_handle(Arc::new(Handle::Thread(Arc::downgrade(&t)))) as usize + } + }; + // prepare the thread's stack for its first schedule-in unsafe { // Safety: We just created the ThreadStruct, and own the only reference // to it, so we *know* it never has been scheduled, and cannot be. - prepare_for_first_schedule(&t, ep.addr(), stack.addr()); + prepare_for_first_schedule(&t, ep.addr(), arg, stack.addr()); } // make a weak copy that we will return @@ -614,13 +664,22 @@ impl ThreadStruct { // the saved esp will be overwritten on schedule-out anyway let hwcontext = SpinLockIRQ::new(ThreadHardwareContext::default()); + // create our thread local storage region + let tls = { + let mut pmemory = process.pmemory.lock(); + let mut tls_manager = process.tls_manager.lock(); + tls_manager.allocate_tls(&mut pmemory).expect("Failed to allocate TLS for first thread") + }; + let t = Arc::new( ThreadStruct { state, kstack, hwcontext, process: Arc::clone(&process), - arg: 0 + tls_region: tls, + tls_elf: Mutex::new(VirtualAddress(0x00000000)), + userspace_hwcontext: SpinLock::new(UserspaceHardwareContext::default()), } ); @@ -683,7 +742,14 @@ impl ThreadStruct { } impl Drop for ThreadStruct { + /// Late thread death notifications: + /// + /// * notifies our process that our TLS can be re-used. fn drop(&mut self) { + unsafe { + // safe: we're being dropped, our TLS will not be reused by us. + self.process.tls_manager.lock().free_tls(self.tls_region); + } // todo this should be a debug ! info!("💀 Dropped a thread : {}", self.process.name) } diff --git a/kernel/src/process/thread_local_storage.rs b/kernel/src/process/thread_local_storage.rs new file mode 100644 index 000000000..78b46bd50 --- /dev/null +++ b/kernel/src/process/thread_local_storage.rs @@ -0,0 +1,186 @@ +//! TLS manager +//! +//! # Abstract +//! +//! For each thread of a process, the kernel allocates a 0x200-bytes "Thread Local Storage" +//! memory region in UserLand. In this region resides the 0x100-bytes IPC command buffer, +//! which is used by the user for passing IPC arguments, and a pointer to the user-controlled +//! "thread context", which will likely be used for holding userspace thread local variables. +//! +//! Each thread in a process has its own private TLS, and from userspace its address can be found out +//! at anytime by reading an architecture-specific register (aarch64 uses `tpidrro_el0`, x86 uses the +//! `gs` segment selector). +//! +//! # Location +//! +//! The TLS content is defined by the [TLS] structure. It is a 0x200-bytes memory area that leaves +//! in UserLand so it can be accessed and modified by the user. +//! The user is allowed to access and modify the TLS of other thread from its process if it +//! manages to find the location of their TLS, but this is not advised, as it serves little purpose. +//! +//! Kernel-side, each thread holds a raw pointer to its TLS (`*mut TLS`) in its [ThreadStruct]. +//! This pointer is used by the kernel to get the thread's `ipc_command_buffer` address, +//! and is restored as part of hardware context on every context-switch. +//! +//! # Allocation +//! +//! Each process holds a [TLSManager] in its ProcessStruct, which manages the TLSs for this process, +//! keeps track of which ones are in-use and which ones are free, and try to re-use free TLSs when +//! spawning a thread. +//! +//! When a thread is being created, it asks its process's `TLSManager` via [allocate_tls] to get a pointer +//! to its TLS, and saves it in the `ThreadStruct`. +//! +//! When a thread dies, it notifies its process's `TLSManager` via [free_tls], so its TLS can be re-used. +//! +//! TLSs are only 0x200 bytes, so the `TLSManager` groups them together to fit inside a page, +//! and will allocate a new page every time it is full and cannot satisfy a TLS allocation. +//! +//! [TLS]: sunrise_libkern::TLS +//! [TLSManager]: thread_local_storage::TLSManager +//! [allocate_TLS]: thread_local_storage::TLSManager::allocate_tls +//! [free_TLS]: thread_local_storage::TLSManager::free_tls + +use crate::VirtualAddress; +use crate::PAGE_SIZE; +use crate::paging::process_memory::ProcessMemory; +use crate::paging::MappingAccessRights; +use crate::error::KernelError; +use sunrise_libutils::bit_array_first_zero; +use sunrise_libkern::{MemoryType, TLS}; +use core::mem::size_of; +use bit_field::BitArray; +use alloc::vec::Vec; + +/// Manages a page containing 8 TLS +/// +/// A TLS being only 0x200 bytes, the kernel aggregates the TLSs of a same process in groups of 8 +/// so that they fit in one page. +/// +/// # Memory leak +/// +/// Dropping this struct will leak the page, until the process is killed and all its memory is freed. +/// See [TLSManager] for more on this topic. +#[derive(Debug)] +struct TLSPage { + /// Address of the page, in UserLand. + page_address: VirtualAddress, + /// Bitmap indicating if the TLS is in use (`1`) or free (`0`). + usage: [u8; PAGE_SIZE / size_of::() / 8] +} + +impl TLSPage { + + /// Allocates a new page holing 8 TLS. + /// + /// The page is user read-write, and its memory type is `ThreadLocal`. + /// + /// # Error + /// + /// Fails if the allocation fails. + fn new(pmemory: &mut ProcessMemory) -> Result { + let addr = pmemory.find_available_space(PAGE_SIZE)?; + pmemory.create_regular_mapping(addr, PAGE_SIZE, MemoryType::ThreadLocal, MappingAccessRights::u_rw())?; + Ok(TLSPage { + page_address: addr, + usage: [0u8; PAGE_SIZE / size_of::() / 8] + }) + } + + /// Finds an available slot in the TLSPage, bzero it, marks it allocated, and gives back a pointer to it. + /// + /// If no slot was available, this function returns `None`. + /// + /// The returned TLS still has to be bzeroed, has it may contain the data of a previous thread. + fn allocate_tls(&mut self) -> Option { + let index = bit_array_first_zero(&self.usage)?; + self.usage.set_bit(index, true); + Some(self.page_address + index * size_of::()) + } + + /// Marks a TLS in this TLSPage as free so it can be used by the next spawned thread. + /// + /// # Panics + /// + /// Panics if `address` does not fall in this TLSPage, not a valid offset, or marked already free. + fn free_tls(&mut self, address: VirtualAddress) { + debug_assert!(address.floor() == self.page_address, "Freed TLS ptr is outside of TLSPage."); + debug_assert!(address.addr() % size_of::() == 0, "Freed TLS ptr is not TLS size aligned."); + let index = (address - self.page_address) / size_of::(); + debug_assert!(self.usage.get_bit(index), "Freed TLS was not marked occupied"); + self.usage.set_bit(index, false); + } +} + +// size_of::() is expected to divide PAGE_SIZE evenly. +const_assert_eq!(PAGE_SIZE % size_of::(), 0); + +/// TLS allocator +/// +/// Each process holds a `TLSManager` in its [ProcessStruct]. +/// +/// When a thread is being created, we ask the `TLSManager` to allocate a TLS for it, and when +/// it dies we give it back to the manager so it can be re-used the next time this process spawns a thread. +/// +/// When all of its TLS are occupied, the `TLSManager` will expend its memory by allocating a new page. +/// +/// # Memory leak +/// +/// The `TLSManager` will never free the pages it manages, and they are leaked when the `TLSManager` is dropped. +/// They will become available again after the process dies and its [ProcessMemory] is freed. +/// +/// A `TLSManager` will always be dropped at process's death, at the same time as the `ProcessMemory`. +/// This prevents a dependency in the order in which the `TLSManager` and the `ProcessMemory` are dropped. +/// +/// [ProcessStruct]: crate::process::ProcessStruct +#[derive(Debug, Default)] +pub struct TLSManager { + /// Vec of tracked pages. When all slots are occupied, we allocate a new page. + tls_pages: Vec +} + +impl TLSManager { + /// Allocates a new TLS. + /// + /// This function will try to re-use free TLSs, and will only allocate when all TLS are in use. + /// + /// The returned TLS still has to be bzeroed, has it may contain the data of a previous thread. + /// + /// # Error + /// + /// Fails if the allocation fails. + pub fn allocate_tls(&mut self, pmemory: &mut ProcessMemory) -> Result { + for tls_page in &mut self.tls_pages { + if let Some(tls) = tls_page.allocate_tls() { + return Ok(tls); + } + } + // no free slot, we need to allocate a new page. + let mut new_tls_page = TLSPage::new(pmemory)?; + let tls = new_tls_page.allocate_tls().expect("Empty TLSPage can't allocate"); + self.tls_pages.push(new_tls_page); + Ok(tls) + } + + + /// Mark this TLS as free, so it can be re-used by future spawned thread. + /// + /// # Unsafety + /// + /// The TLS will be reassigned, so it must never be used again after calling this function. + /// + /// # Panics + /// + /// Panics if the TLS is not managed by this TLSManager, doesn't have a valid offset, or is already marked free. + pub unsafe fn free_tls(&mut self, tls: VirtualAddress) { + // round down ptr to find out which page in belongs to. + let tls_page_ptr = tls.floor(); + for tls_page in &mut self.tls_pages { + if tls_page.page_address == tls_page_ptr { + tls_page.free_tls(tls); + return; + } + } + panic!("Freed TLS {:?} is not in TLSManager.", tls); + } +} diff --git a/kernel/src/scheduler.rs b/kernel/src/scheduler.rs index 4a21e7bac..ef46b615f 100644 --- a/kernel/src/scheduler.rs +++ b/kernel/src/scheduler.rs @@ -9,6 +9,9 @@ use crate::i386::process_switch::process_switch; use crate::sync::{Lock, SpinLockIRQ, SpinLockIRQGuard}; use core::sync::atomic::Ordering; use crate::error::{UserspaceError}; +use sunrise_libkern::TLS; +use core::cell::RefCell; +use crate::cpu_locals::ARE_CPU_LOCALS_INITIALIZED_YET; /// An Arc to the currently running thread. /// @@ -25,16 +28,18 @@ use crate::error::{UserspaceError}; /// /// Setting this value should be done through set_current_thread, otherwise Bad Things:tm: /// will happen. -static mut CURRENT_THREAD: Option> = None; +#[thread_local] // this is a cpu_local +static CURRENT_THREAD: RefCell>> = RefCell::new(None); /// Gets the current ThreadStruct, incrementing its refcount. /// Will return None if we're in an early boot state, and it has not yet been initialized. pub fn try_get_current_thread() -> Option> { - unsafe { - // Safe because modifications only happens in the schedule() function, - // and outside of that function, seen from a thread' perspective, - // CURRENT_THREAD will always have the same value - CURRENT_THREAD.clone() + // if cpu_locals haven't been initialized, accessing gs:0 will triple fault, + // so don't even remotely try to access it. + if !ARE_CPU_LOCALS_INITIALIZED_YET.load(Ordering::Relaxed) { + None + } else { + CURRENT_THREAD.borrow().clone() } } @@ -56,15 +61,28 @@ pub fn get_current_process() -> Arc { /// Sets the current ThreadStruct. /// +/// Note that if `CURRENT_THREAD` was the last reference to the current thread, this is where it will +/// be dropped. +/// /// Setting the current thread should *always* go through this function, and never -/// by setting CURRENT_PROCESS directly. This function uses mem::replace to ensure +/// by setting [`CURRENT_THREAD`] directly. This function uses mem::replace to ensure /// that the ThreadStruct's Drop is run with CURRENT_THREAD set to the *new* value. /// /// The passed function will be executed after setting the CURRENT_THREAD, but before /// setting it back to the RUNNING state. +/// +/// # Unsafety +/// +/// Interrupts must be disabled when calling this function. It will mutably borrow [`CURRENT_THREAD`], +/// so we can't have interrupts on top of that which try to access it while it is borrowed mutably by us, +/// otherwise the kernel will panic. #[allow(clippy::needless_pass_by_value)] // more readable unsafe fn set_current_thread R>(t: Arc, f: F) -> R { - mem::replace(&mut CURRENT_THREAD, Some(t.clone())); + let old_thread = { + mem::replace(&mut *CURRENT_THREAD.borrow_mut(), Some(t.clone())) + }; + // drop RefMut first, then old thread. + drop(old_thread); let r = f(); @@ -109,7 +127,7 @@ pub fn add_to_schedule_queue(thread: Arc) { /// Checks if a thread is already either in the schedule queue or currently running. pub fn is_in_schedule_queue(queue: &SpinLockIRQGuard<'_, Vec>>, thread: &Arc) -> bool { - unsafe { CURRENT_THREAD.iter() }.filter(|v| { + CURRENT_THREAD.borrow().iter().filter(|v| { v.state.load(Ordering::SeqCst) != ThreadState::Stopped }).chain(queue.iter()).any(|elem| Arc::ptr_eq(thread, elem)) } @@ -274,7 +292,7 @@ where let whoami = if !Arc::ptr_eq(&process_b, &proc) { unsafe { - // safety: interrupts are off + // safety: interrupts are disabled by the interrupt_lock. process_switch(process_b, proc) } } else { @@ -287,7 +305,10 @@ where // replace CURRENT_THREAD with ourself. // If previously running thread had deleted all other references to itself, this // is where its drop actually happens - unsafe { set_current_thread(whoami.clone(), || lock.lock()) } + unsafe { + // safety: interrupts are disabled by the interrupt_lock. + set_current_thread(whoami.clone(), || lock.lock()) + } } }; break retguard; @@ -303,16 +324,34 @@ where /// /// The passed function should take care to change the protection level, and ensure it cleans up all /// the registers before calling the EIP, in order to avoid leaking information to userspace. -pub fn scheduler_first_schedule(current_thread: Arc, jump_to_entrypoint: F) { +/// +/// # Unsafety: +/// +/// Interrupts must be off when calling this function. It will set [`CURRENT_THREAD`], and then +/// turn them on, as we are running a new thread, no SpinLockIRQ is held. +pub unsafe fn scheduler_first_schedule(current_thread: Arc, jump_to_entrypoint: F) { // replace CURRENT_THREAD with ourself. // If previously running thread had deleted all other references to itself, this // is where its drop actually happens - unsafe { set_current_thread(current_thread, || ()) }; + unsafe { + // safety: interrupts are off + set_current_thread(current_thread, || ()) + }; unsafe { // this is a new process, no SpinLockIRQ is held crate::i386::instructions::interrupts::sti(); } + // memset the TLS, to clear previous owner's data. + // we do it here so don't have to CrossProcessMap it earlier. + unsafe { + // safe: we manage this memory, ptr is aligned, 0 is valid for every field of the TLS, + // and TLS contains no padding bytes. + let tls_ptr = get_current_thread().tls_region.addr() as *mut TLS; + core::ptr::write_bytes(tls_ptr, 0u8, 1); + (*tls_ptr).ptr_self = tls_ptr + } + jump_to_entrypoint() } diff --git a/libkern/Cargo.toml b/libkern/Cargo.toml index 78ce9ef99..e96fb39c9 100644 --- a/libkern/Cargo.toml +++ b/libkern/Cargo.toml @@ -13,3 +13,7 @@ bitfield = "0.13" [dependencies.lazy_static] features = ["spin_no_std"] version = "1.3.0" + +[dependencies.static_assertions] +version = "0.3.1" +features = ["nightly"] diff --git a/libkern/src/lib.rs b/libkern/src/lib.rs index 23ffba5dc..54a9a0557 100644 --- a/libkern/src/lib.rs +++ b/libkern/src/lib.rs @@ -28,6 +28,8 @@ pub mod error; use core::fmt; use bitfield::bitfield; +use static_assertions::assert_eq_size; +use core::mem::size_of; bitfield! { /// Represents the current state of a memory region: why is it allocated, and @@ -306,6 +308,50 @@ pub struct MemoryInfo { pub device_ref_count: u32, } +/// Buffer used for Inter Process Communication. +/// Kernel reads, interprets, and copies data from/to it. +/// +/// Found in the [TLS] of every thread. +pub type IpcBuffer = [u8; 0x100]; + +/// Thread Local Storage region. +/// +/// The kernel allocates one for every thread, and makes a register point (indirectly) to it +/// so that the userspace can access it at any time. +/// +/// * x86_32: Stored at `fs:0x00..fs:0x200`. +/// * x86_64: Stored at `gs:0x00..gs:0x200`. +#[repr(C, align(16))] +pub struct TLS { + /// Pointer pointing to this TLS region (i.e pointing to itself). Set by the kernel. + /// + /// x86 uses the segmentation for accessing the TLS, and it has no way to translate `fs:0x0` + /// to an address in the flat segmentation model that every other segment uses. + /// + /// This pointer serves as a translation. + pub ptr_self: *mut TLS, + /// reserved or unknown. + _reserved0: [u8; 16 - size_of::<*mut TLS>()], + /// Buffer used for IPC. Kernel reads, interprets, and copies data from/to it. + pub ipc_command_buffer: IpcBuffer, + /// reserved or unknown. + _reserved1: [u8; 0x200 - 16 - size_of::() - size_of::()], + /// User controlled pointer to thread context. Not observed by the kernel. + pub ptr_thread_context: usize, +} + +impl fmt::Debug for TLS { + /// Debug on TLS displays only the address of the IPC command buffer, and `ptr_thread_context`. + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + f.debug_struct("TLS") + .field("ipc_command_buffer_address", &(&self.ipc_command_buffer as *const u8)) + .field("ptr_thread_context", &(self.ptr_thread_context as *const u8)) + .finish() + } +} + +assert_eq_size!(TLS, [u8; 0x200]); + macro_rules! syscalls { ( static $byname:ident; @@ -466,8 +512,9 @@ syscalls! { MapFramebuffer = 0x80, StartProcessEntrypoint = 0x81, MapMmioRegion = 0x82, + SetThreadArea = 0x83, --- // Add SVCs before this line. - MaxSvc = 0x82 + MaxSvc = 0x83 } diff --git a/libuser/src/crt0/mod.rs b/libuser/src/crt0/mod.rs index 9c5bf3701..a835f9182 100644 --- a/libuser/src/crt0/mod.rs +++ b/libuser/src/crt0/mod.rs @@ -2,9 +2,7 @@ //! This module is a minimal RT0 handling the entry point of the application. //! It handles relocation, clean the bss and then finally call start_main. -use core::ptr; - -mod relocation; +pub mod relocation; /// Executable entrypoint. Handle relocations and calls real_start. #[cfg(target_os = "none")] @@ -23,6 +21,10 @@ pub unsafe extern fn start() { _start_shim: pop eax + // Save our thread handle passed by the kernel + // `esi` is callee-saved + mov esi, ecx + // Save eip_pos address mov ecx, eax @@ -42,6 +44,10 @@ pub unsafe extern fn start() { push ebx call clean_bss + // Init TLS + push esi + call init_main_thread + call real_start "); } @@ -59,5 +65,5 @@ pub unsafe extern fn clean_bss(module_header: *const relocation::ModuleHeader) { let bss_end_address = module_header_address.add(module_header.bss_end_off as usize) as *mut u8; let count = bss_end_address as usize - bss_start_address as usize; - ptr::write_bytes(bss_start_address, 0, count); -} \ No newline at end of file + core::ptr::write_bytes(bss_start_address, 0, count); +} diff --git a/libuser/src/crt0/relocation.rs b/libuser/src/crt0/relocation.rs index c7fcc7bd0..bf13240b0 100644 --- a/libuser/src/crt0/relocation.rs +++ b/libuser/src/crt0/relocation.rs @@ -52,6 +52,12 @@ impl ModuleHeader { pub const MAGIC: u32 = 0x30444F4D; } +extern "C" { + /// After relocations have been performed, you can access the module_header in Rust code + /// through this symbol. + pub static module_header: ModuleHeader; +} + /// A simple definition of a ELF Dynamic section entry. #[repr(C)] #[derive(Debug)] @@ -124,15 +130,15 @@ const R_386_RELATIVE: usize = 8; #[cfg(target_os = "none")] #[no_mangle] #[allow(clippy::cast_ptr_alignment)] -pub unsafe extern fn relocate_self(aslr_base: *mut u8, module_header: *const ModuleHeader) -> u32 { - let module_header_address = module_header as *const u8; - let module_header = &(*module_header); +pub unsafe extern fn relocate_self(aslr_base: *mut u8, module_headr: *const ModuleHeader) -> u32 { + let module_header_address = module_headr as *const u8; + let module_headr = &(*module_headr); - if module_header.magic != ModuleHeader::MAGIC { + if module_headr.magic != ModuleHeader::MAGIC { return 1; } - let mut dynamic = module_header_address.add(module_header.dynamic_off as usize) as *const ElfDyn; + let mut dynamic = module_header_address.add(module_headr.dynamic_off as usize) as *const ElfDyn; let mut rela_offset = None; let mut rela_entry_size = 0; diff --git a/libuser/src/lib.rs b/libuser/src/lib.rs index b2fae7ba0..38ddb616b 100644 --- a/libuser/src/lib.rs +++ b/libuser/src/lib.rs @@ -43,6 +43,8 @@ pub mod syscalls; pub mod mem; pub mod types; pub mod ipc; +pub mod threads; +pub mod thread_local_storage; #[gen_ipc(path = "../../ipcdefs/sm.id", prefix = "sunrise_libuser")] pub mod sm {} diff --git a/libuser/src/syscalls.rs b/libuser/src/syscalls.rs index 6b3fc0482..16e261df6 100644 --- a/libuser/src/syscalls.rs +++ b/libuser/src/syscalls.rs @@ -145,7 +145,11 @@ pub fn exit_process() -> ! { } /// Creates a thread in the current process. -pub fn create_thread(ip: extern fn() -> !, arg: usize, sp: *const u8, priority: u32, processor_id: u32) -> Result { +/// +/// # Unsafety +/// +/// `sp` must a valid pointer to a stack that is uniquely owned, as the thread will write to it. +pub unsafe fn create_thread(ip: extern "fastcall" fn(usize) -> !, arg: usize, sp: *const u8, priority: u32, processor_id: u32) -> Result { unsafe { let (out_handle, ..) = syscall(nr::CreateThread, ip as usize, arg, sp as _, priority as _, processor_id as _, 0)?; Ok(Thread(Handle::new(out_handle as _))) @@ -439,3 +443,42 @@ pub fn map_mmio_region(physical_address: usize, size: usize, virtual_address: us Ok(()) } } + +/// Set thread local area pointer. +/// +/// Akin to `set_thread_area` on Linux, this syscall sets the `gs` segment selector's base address +/// to the address passed as argument. +/// +/// The user will likely want to make it point to its elf thread local storage, as `gs:0` is expected +/// to contain the thread pointer `tp`. +/// +/// Unlike linux, you only have **one** user controlled segment, found in `gs`, and you can only set its address. +/// +/// The limit will always be set to `0xFFFFFFFF`, and adding this offset to a non-zero base address +/// means that the resulting address will "wrap around" the address space, and end-up **under** +/// the base address. +/// You can use this property to implement thread local storage variant II - gnu model, +/// as thread local variable are expected to be found "below" `gs:0`, with "negative" offset such as +/// `gs:0xFFFFFFFC`. +/// +/// ## x86_64 +/// +/// ![same, but different, but still same](https://media.giphy.com/media/C6JQPEUsZUyVq/giphy.gif) +/// +/// `fs` is used instead of `gs`, because reasons. +/// +/// # Safety +/// +/// `address` should point to a valid TLS image, unique to the current thread. +/// Setting `gs` to random data, malformed image, or shared image is UB. +/// +/// # Errors +/// +/// * The whole initial design of TLS on x86 should be considered an error. +/// * No returned error otherwise. +pub unsafe fn set_thread_area(address: usize) -> Result<(), KernelError> { + unsafe { + syscall(nr::SetThreadArea, address, 0, 0, 0, 0, 0)?; + Ok(()) + } +} diff --git a/libuser/src/thread_local_storage.rs b/libuser/src/thread_local_storage.rs new file mode 100644 index 000000000..ae96d24d9 --- /dev/null +++ b/libuser/src/thread_local_storage.rs @@ -0,0 +1,337 @@ +//! Thread Local Storage on x86 +//! +//! # Usage +//! +//! You declare a thread-local using the [#\[thread_local\] attribute] : +//! +//! ``` +//! #[thread_local] +//! static MY_THREAD_LOCAL: core::cell::Cell = core::cell::Cell::new(42); +//! ``` +//! +//! and access it as if it was a regular static, only that each thread will have its own view of +//! the static. +//! +//! The compiler is responsible for generating code that will access the right address, provided +//! we configured TLS correctly. +//! +//! ##### Early startup +//! +//! Note that you can't access a thread-local static before [`init_main_thread`] is called, because +//! the thread-local area for the main thread isn't initialized yet, and this will likely result to +//! a page fault or UB. +//! +//! # Inner workings +//! +//! We implement the TLS according to conventions laid out by [Ulrich Drepper's paper on TLS] which +//! is followed by LLVM and most compilers. +//! +//! Since we're running on i386, we're following variant II. +//! +//! Each thread's `gs` segment points to a thread local memory area where thread-local statics live. +//! thread-local statics are simply accessed through an offset from `gs`. +//! +//! The linker is in charge of creating an ELF segment of type `PT_TLS` where an initialization image +//! for cpu local regions can be found, and is meant to be copy-pasted for every thread we create. +//! +//! ##### on SunriseOS +//! +//! On Surnise, the area where `gs` points to is per-thread and user-controlled, we set it at the +//! startup of every thread with the [`set_thread_area`] syscall. +//! +//! The TLS initialisation image is supposed to be retrieved from our own program headers, which is +//! a really weird design. +//! Since we don't have access to our program headers, we instead use the linker to expose the following +//! symbols: +//! +//! * [`__tls_start__`], The start address of our TLS initialisation image. +//! * [`__tls_end__`], The end address of our TLS initialisation image. +//! * [`__tls_align__`], The linker forced TLS alignment to this value. +//! +//! Those symbols are the addresses of the initialization in our `.tdata`, so it can directly be copied. +//! +//! ##### dtv and `__tls_get_addr` +//! +//! Since we don't do dynamic loading (yet ?), we know our TLS model will be static (either +//! Initial Exec or Local Exec). +//! Those models always access thread-locals directly via `gs`, and always short-circuit the dtv. +//! +//! So we don't even bother allocating a dtv array at all. Neither do we define a `__tls_get_addr` +//! function. +//! +//! This might change in the future when we will want to support dynamic loading. +//! +//! [`init_main_thread`]: crate::threads::init_main_thread +//! [`ARE_CPU_LOCALS_INITIALIZED_YET`]: self::cpu_locals::ARE_CPU_LOCALS_INITIALIZED_YET +//! [Ulrich Drepper's paper on TLS]: https://web.archive.org/web/20190710135250/https://akkadia.org/drepper/tls.pdf +//! [`set_thread_area`]: crate::syscalls::set_thread_area +//! [#\[thread_local\] attribute]: https://github.com/rust-lang/rust/issues/10310 +//! [`__tls_start__`]: self::thread_local_storage::__tls_start__ +//! [`__tls_end__`]: self::thread_local_storage::__tls_end__ +//! [`__tls_align__`]: self::thread_local_storage::__tls_align__ + +use crate::syscalls; +use sunrise_libutils::div_ceil; +use alloc::alloc::{alloc_zeroed, dealloc, Layout}; +use core::mem::{align_of, size_of}; +use core::fmt::Debug; + +extern "C" { + /// The address of the start of the TLS initialisation image in our `.tdata`. + /// + /// Because we don't want to read our own `P_TLS` program header, + /// the linker provides a symbol for the start of the init image. + /// + /// This is an **absolute symbol**, which means its "address" is actually its value, + /// i.e. to get a pointer do: + /// + /// ``` + /// let tls_start: *const u8 = unsafe { &__tls_start__ as *const u8 }; + /// ``` + static __tls_start__: u8; + /// The address of the end of the TLS initialisation image in our `.tdata`. + /// + /// Because we don't want to read our own `P_TLS` program header, + /// the linker provides a symbol for the end of the init image. + /// + /// This is an **absolute symbol**, which means its "address" is actually its value, + /// i.e. to get a pointer do: + /// + /// ``` + /// let tls_end: *const u8 = unsafe { &__tls_end__ as *const u8 }; + /// ``` + static __tls_end__: u8; + /// The alignment of the TLS segment. + /// + /// Because we don't want to read our own `P_TLS` program header, + /// the linker provides a symbol for the alignment it used. + /// + /// This is an **absolute symbol**, which means its "address" is actually its value, + /// i.e. to get its value do: + /// + /// ``` + /// let tls_align = unsafe { &__tls_align__ as *const _ as usize }; + /// ``` + static __tls_align__: usize; +} + +/// The Thread Local Storage manager for a thread +/// +/// We allocate one for every thread we create, and store it in the thread's context. +/// When it is dropped, all allocated memory is freed. +#[derive(Debug)] +pub struct TlsElf { + /// The array of static module blocks + TCB + static_region: ThreadLocalStaticRegion, + // no dtv, no dynamics regions for now +} + +impl TlsElf { + /// Allocates and initializes the static region, including TCB. + /// + /// Finds out the location of the initialization image from linker defined symbols. + pub fn allocate() -> Self { + // copy tls static area + let static_block_start = unsafe { + // safe: set by linker + (&__tls_start__ as *const u8) + }; + let static_block_len = unsafe { + // safe: set by the linker + (&__tls_end__ as *const u8 as usize) - (&__tls_start__ as *const u8 as usize) + }; + let static_block = unsafe { + // safe: - the initialization image will never be accessed mutably, + // - it lives in our .data so its lifetime is &'static, + // - u8 is POD and always aligned, + // => creating a const slice is ok. + core::slice::from_raw_parts(static_block_start, static_block_len) + }; + let static_block_align = unsafe { + // safe: set by the linker + &__tls_align__ as *const _ as usize + }; + let tls_static_region = ThreadLocalStaticRegion::allocate( + static_block, + static_block_len, + static_block_align); + + TlsElf { + static_region: tls_static_region + } + } + + /// Calls [`syscalls::set_thread_area`] with the address of this TlsElf's [`ThreadControlBlock`]. + /// + /// # Safety + /// + /// The TlsElf should not be enabled_for_current_thread by any other thread. + /// Having a TLS shared by multiple threads is UB. + /// + /// # Panics + /// + /// Panics if the syscall returned an error, as this is unrecoverable. + pub unsafe fn enable_for_current_thread(&self) { + unsafe { + // safe: TlsElf is RAII so self is a valid well-formed TLS region. + // However, we cannot guarantee that it's not used by anybody else, + // so propagate this constraint. + syscalls::set_thread_area(self.static_region.tcb() as *const _ as usize) + .expect("Cannot set thread TLS pointer"); + } + } +} + +/// The `round` function, as defined in section 3.0: +/// +/// ```text +/// round(x,y) = y * ⌈x/y⌉ +/// ``` +/// +/// Just a poorly-named `align_up`. +fn tls_align_up(x: usize, y: usize) -> usize { + y * div_ceil(x, y) +} + +/// Elf TLS TCB +/// +/// The variant II leaves the specification of the ThreadControlBlock (TCB) to the implementor, +/// with the only requirement that the first word in the TCB, pointed by `tp`, contains its own +/// address, i.e. is a pointer to itself (GNU variant). +/// +/// We don't need to store anything else in the TCB, it's just the self pointer. +#[repr(C)] +#[derive(Debug)] +struct ThreadControlBlock { + /// Pointer containing its own address. + tp_self_ptr: *const ThreadControlBlock, +} + +/// Represents an allocated thread local static region. +/// +/// Because TLS regions have a really specific layout, we don't use Box and instead interact with +/// the allocator directly. This type is the equivalent of a Box, it stores the pointer to the +/// allocated memory, and deallocates it on Drop. +struct ThreadLocalStaticRegion { + /// Pointer to the allocated memory + ptr: usize, + /// Layout of the allocated memory. Used when deallocating. + layout: Layout, + /// Offset of the TCB in this allocation. + tcb_offset: usize, +} + +impl ThreadLocalStaticRegion { + /// Returns a pointer to the [ThreadControlBlock] in the allocated region. + /// All TLS arithmetic are done relative to this pointer. + /// + /// For TLS to work, the value stored at this address should be the address itself, i.e. + /// having a pointer pointing to itself. + fn tcb(&self) -> &ThreadControlBlock { + unsafe { + // safe: - guaranteed to be aligned, and still in the allocation, + // - no one should ever have a mut reference to the ThreadControlBlock after its + // initialisation. + &*((self.ptr + self.tcb_offset) as *const ThreadControlBlock) + } + } + + /// Allocates a ThreadLocalStaticRegion. + /// + /// The region's content is copied from the TLS initialisation image described by `block_src`, + /// padded with 0s for `block_size`, to which is appended a [`ThreadControlBlock`]. + /// + /// The ThreadLocalStaticRegion uses `PT_TLS`'s `p_align` field passed in `block_align` + /// to compute its layout and total size. + /// + /// ### Alignment + /// + /// ```text + /// + /// V----------------------V tls_align_up(tls_size_1, align_1) + /// + /// +-- gs:0 + /// | + /// +----------------------|-- tlsoffset_1 = gs:0 - tls_align_up(tls_size_1, align_1) + /// | | + /// V V + /// + /// j----------------~-----j---------j + /// ... | tls_size_1 | pad | TCB | + /// j----------------~-----j---------j + /// + /// ^ ^ ^ + /// | | | + /// | | +-- TCB_align: Determines alignment of everything. + /// | | = max(align_of::(), align_1). e.g. : 16. + /// | | + /// | +------------------------- TCB_align - n * align_1 + /// | => still aligned to align_1 because TCB is aligned to align_1. + /// | + /// +------------------------------ alloc_align == TCB_align + /// => &TCB = &alloc + tls_align_up(gs:0 - tls_offset_1, TCB_align) + /// + /// ^---^ alloc_pad + /// + /// ``` + #[allow(clippy::cast_ptr_alignment)] + fn allocate(block_src: &[u8], block_size: usize, block_align: usize) -> Self { + let tls_offset1 = tls_align_up(block_size, block_align); + let tcb_align = usize::max(align_of::(), block_align); + let tcb_offset = tls_align_up(tls_offset1, tcb_align); + let alloc_pad_size = tcb_offset - tls_offset1; + let layout = Layout::from_size_align( + tcb_offset + size_of::(), + tcb_align + ).unwrap(); + let alloc = unsafe { + // safe: layout.size >= sizeof:: -> layout.size != 0 + alloc_zeroed(layout) + }; + assert!(!alloc.is_null(), "thread_locals: failed static area allocation"); + + unsafe { + // safe: everything is done within our allocation, u8 is always aligned. + // copy data + core::ptr::copy_nonoverlapping( + block_src as *const [u8] as *const u8, + alloc.add(alloc_pad_size), + block_src.len() + ); + // .tbss + pad are already set to 0 by alloc_zeroed. + // write tcb + core::ptr::write( + alloc.add(tcb_offset) as *mut ThreadControlBlock, + ThreadControlBlock { + tp_self_ptr: alloc.add(tcb_offset) as *const ThreadControlBlock + } + ); + }; + Self { + ptr: alloc as usize, + layout, + tcb_offset + } + } +} + +impl Drop for ThreadLocalStaticRegion { + /// Dropping a ThreadLocalStaticRegion deallocates it. + fn drop(&mut self) { + unsafe { + // safe: - self.ptr is obviously allocated. + // - self.layout is the same argument that was used for alloc. + dealloc(self.ptr as *mut u8, self.layout) + }; + } +} + +impl Debug for ThreadLocalStaticRegion { + fn fmt(&self, f: &mut core::fmt::Formatter) -> Result<(), core::fmt::Error> { + f.debug_struct("ThreadLocalStaticRegion") + .field("start_address", &self.ptr) + .field("tcb_address", &self.tcb()) + .field("total_size", &self.layout.size()) + .finish() + } +} diff --git a/libuser/src/threads.rs b/libuser/src/threads.rs new file mode 100644 index 000000000..1e115826d --- /dev/null +++ b/libuser/src/threads.rs @@ -0,0 +1,331 @@ +//! Low-level api to create threads and start them. +//! +//! This module defines the low-level representation of a thread, kind to pthread on Unix. +//! You will want to abstract it in the libstd. +//! +//! # Threads on SunriseOS +//! +//! The sunrise kernel provides only three syscalls of interest relative to threads: +//! +//! * [`svcCreateThread`] : allocates kernel resources for a thread and returns a handle to it. +//! * [`svcStartThread`] : starts a thread created by `svcCreateThread`. +//! * [`svcExitThread`] : terminates the current thread. +//! +//! Note that it is impossible to terminate another thread but our own. +//! +//! The first thread of a process (referred later in this doc as "main thread") gets the handle to +//! its own thread in one of its registers when it is started by the kernel. +//! +//! ### TLS region +//! +//! Every thread possesses a small memory region called [Thread Local Storage region] which the kernel +//! allocates, and puts its address in a ro register so it can be accessed from the userspace. +//! +//! There lives the [IpcBuffer], and a userspace controlled pointer where the user can store a +//! user-defined context. We use it to to keep a pointer to a [ThreadContext] (see below). +//! +//! # Threads in libuser +//! +//! The main thread will always live for the entire life of the process. +//! When its routine returns, it calls `svcExitProcess` and every other thread will be killed. +//! +//! It can create other threads, which are represented by the [`Thread`] struct. +//! A `Thread` detaches (read "leak") the associated thread when it is dropped, +//! which means that there is no longer any handle to thread and no way to join on it. +//! +//! This is analog to the way the libstd threads work. +//! +//! ### Thread context +//! +//! For every thread we create (and also for the main thread), we allocate a [ThreadContext] +//! structure on the heap, which holds its stack, its thread handle so it will be able to use +//! mutexes, the routine we want it to execute, and the argument to pass to it. +//! +//! ### Thread entry point +//! +//! We tell the kernel the entry of the thread is [`thread_trampoline`]. +//! This function will set-up a valid environment for the routine (mainly handle ELF thread local variables), +//! call the routine with its argument, and finally call `svcExitThread` when the routine has returned. +//! +//! [`svcCreateThread`]: crate::syscalls::create_thread +//! [`svcStartThread`]: crate::syscalls::start_thread +//! [`svcExitThread`]: crate::syscalls::exit_thread +//! [Thread Local Storage region]: sunrise_libkern::TLS +//! [IpcBuffer]: sunrise_libkern::IpcBuffer +//! [ThreadContext]: self::threads::ThreadContext +//! [`Thread`]: self::threads::Thread +//! [`thread_trampoline`]: self::threads::thread_trampoline + +use crate::types::Thread as ThreadHandle; +use crate::syscalls; +use crate::error::Error; +use crate::thread_local_storage::TlsElf; +use sunrise_libkern::{TLS, IpcBuffer}; +use alloc::boxed::Box; +use core::mem::ManuallyDrop; +use core::fmt; +use spin::Once; + +/// Size of a thread's stack, in bytes. +const STACK_SIZE: usize = 0x8000; + +/// Structure holding the thread local context of a thread. +/// Allocated at thread creation by the creator of the thread. +#[repr(C)] +pub struct ThreadContext { + /// Pointer to the function this thread should execute after + /// all its set-up in [thread_trampoline] is done. + entry_point: fn (usize) -> (), + /// The argument to call it with. + arg: usize, + /// The stack used by this thread. + /// + /// `None` for the main thread's stack, since it was not allocated by us + /// and will never be freed as it'll be the last thread alive. + /// + /// `Some` for every other thread. + stack: Option>, + /// The thread local storage of this thread. + /// + /// This is where `#[thread_local]` statics live. + tls_elf: Once, + /// The ThreadHandle of this thread. + /// + /// The thread needs to be able to access its own ThreadHandle at anytime + /// to be able to use mutexes. + thread_handle: Once, +} + +impl fmt::Debug for ThreadContext { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + f.debug_struct("ThreadContext") + .field("entry_point", &self.entry_point) + .field("arg", &self.arg) + .field("stack_address", &(self.stack.as_ref().map(|v| v as *const _ as usize).unwrap_or(0))) + .field("tls", &self.tls_elf) + .field("thread_handle", &self.thread_handle) + .finish() + } +} + +/// Context of the main thread. Instead of allocating it at startup, this one lives in the `.data`. +/// +/// The handle of the main thread is stored to it at startup. +/// +/// ## Mock values: +/// +/// Because the main thread is started by the kernel and not libuser, we don't have control or +/// even knowledge of most of the fields that should be in our context. Because of this, we choose +/// to put mock values instead. +/// This includes: +/// +/// * `.entry_point`: unused, we are started by the kernel +/// * `.arg`: unused +/// * `.stack`: our stack is not allocated by us, and we don't know its size. +static MAIN_THREAD_CONTEXT: ThreadContext = ThreadContext { + entry_point: |_| { }, + arg: 0, + stack: None, + tls_elf: Once::new(), // will be initialised at startup. + thread_handle: Once::new(), // will be initialized at startup. +}; + +/// Get a pointer to this thread's [TLS] region pointed to by `fs`, translated to the flat-memory model. +#[inline] +fn get_my_tls_region() -> *mut TLS { + let mut tls: *mut TLS; + unsafe { + // get the address of the TLS region from fs:0x00 translated to the flat model + // safe: fs:0x00 is guaranteed by the kernel to hold a valid pointer to itself. + asm!("mov $0, fs:0x00" : "=r" (tls) ::: "intel"); + } + tls +} + + +/// Get a reference to this thread's [ThreadContext], from the [TLS] region pointed to by `fs`. +/// +/// # Panics +/// +/// Panics if the thread context hasn't been initialized yet. +/// This happens immediately in the startup of a thread, and relatively early for the main thread. +pub fn get_my_thread_context() -> &'static ThreadContext { + // read the last bytes of TLS region and treat it as a pointer + let context_ptr = unsafe { + // safe: - get_my_tls returns a valid 0x200 aligned ptr, + // - .ptr_thread_context is correctly aligned in the TLS region to usize. + (*get_my_tls_region()).ptr_thread_context as *const ThreadContext + }; + // The TLS region is initially memset to 0 by the kernel. + // If the context_ptr is 0 it means it hasn't been written yet. + debug_assert!(!context_ptr.is_null(), "thread context not initialized yet"); + // create a ref + unsafe { + // safe: the context will never be accessed mutably after its allocation, + // it is guaranteed to be well-formed since we allocated it ourselves, + // the thread context is never deallocated, so 'static is appropriate. + // We will want to return an Arc in the future. + // => creating a ref is safe. + &*(context_ptr) + } +} + +/// Get a pointer to this thread's [IPCBuffer], from the [TLS] region pointed to by `fs`. +/// +/// [IpcBuffer]: sunrise_libkern::IpcBuffer +#[inline] +pub fn get_my_ipc_buffer() -> *mut IpcBuffer { + unsafe { + // safe: just pointer arithmetic + &(*get_my_tls_region()).ipc_command_buffer as *const _ as *mut _ + } +} + +/// Libuser's representation of a thread. +/// +/// This is the low-level representation of a thread, kind to `pthread_t` on Unix. +/// +/// You can create and start a thread from its `Thread` structure. +/// +/// A `Thread` detaches (read "leak resources of") the associated thread when it is dropped, +/// which means that there is no longer any handle to thread and no way to join on it. +/// +/// Internally owns the [ThreadContext] for this thread, including its stack. +#[derive(Debug)] +pub struct Thread(ManuallyDrop>); + +impl Thread { + /// Start this thread. + pub fn start(&self) -> Result<(), Error> { + syscalls::start_thread(&(*self.0).thread_handle.r#try().unwrap()) + .map_err(|v| v.into()) + } + + /// Allocates resources for a thread. To start it, call [`start`]. + /// + /// Allocates the stack, sets up the context and TLS, and calls `svcCreateThread`. + /// + /// [`start`]: Thread::start + // TODO: Libuser Thread stack guard + // BODY: Currently the stack of every non-main thread is allocated in the heap, and no page + // BODY: guard protects from stack-overflowing and rewriting all the heap. + // BODY: + // BODY: This is of course terrible for security, as with this stack overflowing is U.B. + // BODY: + // BODY: The simpler way to fix this would be to continue allocating the stack on the heap, + // BODY: but remap the last page with no permissions with the yet unimplemented svcMapMemory syscall. + pub fn create(entry: fn (usize) -> (), arg: usize) -> Result { + + let tls_elf = Once::new(); + tls_elf.call_once(TlsElf::allocate); + // allocate a context + let context = ManuallyDrop::new(Box::new(ThreadContext { + entry_point: entry, + arg, + stack: Some(box [0u8; STACK_SIZE]), + tls_elf: tls_elf, + thread_handle: Once::new(), // will be rewritten in a second + })); + match unsafe { + // safe: sp is valid and points to memory only owned by the thread, + // which is used exclusively for stack. + syscalls::create_thread( + thread_trampoline, + &**context as *const ThreadContext as usize, + (&**context.stack.as_ref().unwrap() as *const u8).wrapping_add(STACK_SIZE), + 0, + 0) + } { + Err(err) => { + error!("Failed to create thread {:?}: {}", &*context, err); + // dealloc the stack and context + drop(ManuallyDrop::into_inner(context)); + Err(err.into()) + } + Ok(thread_handle) => { + // finally, push the handle to the context. + context.thread_handle.call_once(|| { thread_handle }); + debug!("Allocated new thread: {:?}", context); + + Ok(Self(context)) + } + } + } +} + +/// Small stub executed by every thread but the main thread when they start. +/// +/// Saves the pointer to their [ThreadContext] in their [TLS], performs copy of `.tdata` and `.tbss`, +/// calls the routine this thread was meant to perform, and calls `svcExitThread` when it's finished. +/// +/// # ABI +/// +/// This function is the entry point of a thread, called directly by the kernel, with the +/// argument passed by [Thread::create]. +/// It expects this argument to be the address of its `ThreadContext` so it can save it its `TLS`. +/// +/// The routine to call and its argument are expected to be found in this `ThreadContext`. +extern "fastcall" fn thread_trampoline(thread_context_addr: usize) -> ! { + debug!("starting from new thread, context at address {:#010x}", thread_context_addr); + // first save the address of our context in our TLS region + unsafe { + // safe: - get_my_tls returns a valid 0x200 aligned ptr, + // - .ptr_thread_context is correctly aligned in the TLS region to usize, + // - we're a private fn, thread_context_addr is guaranteed by our caller to point to the context. + (*get_my_tls_region()).ptr_thread_context = thread_context_addr + }; + + // use get_my_thread_context to create a ref for us + let thread_context = get_my_thread_context(); + + // make gs point to our tls + unsafe { + // safe: this module guarantees that the TLS region is unique to this thread. + thread_context.tls_elf.r#try().unwrap().enable_for_current_thread(); + } + + // call the routine saved in the context, passing it the arg saved in the context + (thread_context.entry_point)(thread_context.arg); + + debug!("exiting thread"); + syscalls::exit_thread() +} + + +impl Drop for Thread { + fn drop(&mut self) { + // todo: Properly free resource after thread detach + // body: When detaching a thread, we should ensure that the associated resources (stack, + // body: handle, context, etc...) are properly freed before the Process exits. This can be + // body: done by adding the ThreadContext to a global Vec<> of ThreadContext that gets freed + // body: when the main thread (or the last thread alive?) exits. + } +} + +/// Initialisation of the main thread's thread local structures: +/// +/// When a main thread starts, the kernel puts the handle of its own thread in one of its registers. +/// The main thread should perform relocations, and then call this function, which will: +/// +/// * put the main thread's handle in [MAIN_THREAD_CONTEXT]. +/// * save a pointer to it in its [TLS]. +/// * perform copy of `.tdata` and `.tbss` for the main thread. +#[no_mangle] // called from asm +pub extern fn init_main_thread(handle: ThreadHandle) { + // save the handle in our context + MAIN_THREAD_CONTEXT.thread_handle.call_once(|| handle); + // save the address of our context in our TLS region + unsafe { + // safe: - get_my_tls returns a valid 0x200 aligned ptr, + // - .ptr_thread_context is correctly aligned in the TLS region to usize, + (*get_my_tls_region()).ptr_thread_context = &MAIN_THREAD_CONTEXT as *const ThreadContext as usize + }; + + // allocate, enable elf TLS, and save it in our context + let tls_elf = TlsElf::allocate(); + unsafe { + // safe: this module guarantees that the TLS region is unique to this thread. + tls_elf.enable_for_current_thread(); + } + MAIN_THREAD_CONTEXT.tls_elf.call_once(move || tls_elf); +} diff --git a/libuser/src/types.rs b/libuser/src/types.rs index 1e4563fc8..5922c78bf 100644 --- a/libuser/src/types.rs +++ b/libuser/src/types.rs @@ -220,7 +220,10 @@ impl ServerPort { /// A Thread. Created with the [create_thread syscall]. /// +/// See the [threads] module. +/// /// [create_thread syscall]: crate::syscalls::create_thread. +/// [threads]: crate::threads #[repr(transparent)] #[derive(Debug)] pub struct Thread(pub Handle); @@ -231,12 +234,6 @@ impl Thread { fn current() -> Thread { Thread(Handle::new(0xFFFF8000)) } - - /// Start the thread. - pub fn start(&self) -> Result<(), Error> { - syscalls::start_thread(self) - .map_err(|v| v.into()) - } } /// A Process. Created with `create_process` syscall, or by calling diff --git a/linker-scripts/kernel.ld b/linker-scripts/kernel.ld index 3aaf0170f..2806dc77f 100644 --- a/linker-scripts/kernel.ld +++ b/linker-scripts/kernel.ld @@ -7,6 +7,7 @@ PHDRS rodata PT_LOAD ; data PT_LOAD ; dynamic PT_DYNAMIC ; + tls PT_TLS; } /* The kernel should be mapped in high memory */ @@ -43,6 +44,17 @@ SECTIONS { . = ALIGN(4K); } : data + /* Thread Local sections */ + + .tdata : { + *(.tdata .tdata.*) + } :tls :data + + .tbss : { + *(.tbss .tbss.*) + *(.tcommon) + } :tls :data + /DISCARD/ : { *(.comment*) *(.eh_frame*) diff --git a/linker-scripts/userspace.ld b/linker-scripts/userspace.ld index 3afedeb44..a2fe262ad 100644 --- a/linker-scripts/userspace.ld +++ b/linker-scripts/userspace.ld @@ -9,6 +9,7 @@ PHDRS data PT_LOAD FLAGS(6); bss PT_LOAD FLAGS(6); dynamic PT_DYNAMIC; + tls PT_TLS; } SECTIONS @@ -130,14 +131,21 @@ SECTIONS /* Thread Local sections */ - .tdata : { - *(.tdata .tdata.*) - } :data + /* Since we don't want our user to have to read its own program headers to find out the size and alignment, + * we force the aligment here and expose symbols so they can be deduced at runtime */ + __tls_align__ = 16; + + . = ALIGN(__tls_align__); /* we want no padding between __tls_start__ and .tdata */ + + __tls_start__ = .; - .tbss : { + .tdata : ALIGN(__tls_align__) { + *(.tdata .tdata.*) *(.tbss .tbss.*) *(.tcommon) - } :data + } :tls :data + + __tls_end__ = .; /* BSS section */ . = ALIGN(0x1000); diff --git a/shell/src/main.rs b/shell/src/main.rs index a10146aae..facc3a351 100644 --- a/shell/src/main.rs +++ b/shell/src/main.rs @@ -34,10 +34,10 @@ use crate::libuser::io; use crate::libuser::sm; use crate::libuser::window::{Window, Color}; use crate::libuser::terminal::{Terminal, WindowSize}; +use crate::libuser::threads::Thread; use core::fmt::Write; use alloc::vec::Vec; -use alloc::boxed::Box; use alloc::sync::Arc; use spin::Mutex; @@ -114,64 +114,40 @@ fn test_threads(terminal: Terminal) -> Terminal { let terminal = unsafe { Arc::from_raw(terminal as *const Mutex) }; - for _ in 0..10 { + let mut i = 0; + while i < 10 { if let Some(mut lock) = terminal.try_lock() { let _ = writeln!(lock, "A"); + i += 1; } let _ = libuser::syscalls::sleep_thread(0); } } #[doc(hidden)] - #[no_mangle] - fn thread_b(terminal: usize) -> ! { + fn thread_b(terminal: usize) { // Wrap in a block to forcibly call Arc destructor before exiting the thread. { let terminal = unsafe { Arc::from_raw(terminal as *const Mutex) }; - for _ in 0..10 { + let mut i = 0; + while i < 10 { if let Some(mut lock) = terminal.try_lock() { let _ = writeln!(lock, "B"); + i += 1; } let _ = libuser::syscalls::sleep_thread(0); } } - libuser::syscalls::exit_thread() } - /// Small wrapper around thread_b fixing the thread calling convention. - #[naked] - extern fn function_wrapper() { - unsafe { - asm!(" - push eax - call thread_b - " :::: "intel"); - } - } - - /// Size of the test_threads stack. - const THREAD_STACK_SIZE: usize = 0x2000; - let mut terminal = Arc::new(Mutex::new(terminal)); - let stack = Box::new([0u8; THREAD_STACK_SIZE]); - let sp = (Box::into_raw(stack) as *const u8).wrapping_add(THREAD_STACK_SIZE); - let ip : extern fn() -> ! = unsafe { - // Safety: This is changing the return type from () to !. It's safe. It - // sucks though. This is, yet again, an instance of "naked functions are - // fucking horrible". - // Also, fun fact about the Rust Type System. Every function has its own - // type, that's zero-sized. Those usually get casted automatically into - // fn() pointers, but of course transmute is special. So we need to help - // it a bit. - let fn_wrapper: extern fn() = function_wrapper; - core::mem::transmute(fn_wrapper) - }; - let thread_handle = libuser::syscalls::create_thread(ip, Arc::into_raw(terminal.clone()) as usize, sp, 0, 0) - .expect("svcCreateThread returned an error"); - thread_handle.start() - .expect("svcStartThread returned an error"); + + let t = Thread::create(thread_b, Arc::into_raw(terminal.clone()) as usize) + .expect("Failed to create thread B"); + t.start() + .expect("Failed to start thread B"); // thread is running b, run a meanwhile thread_a(Arc::into_raw(terminal.clone()) as usize); @@ -227,6 +203,7 @@ capabilities!(CAPABILITIES = Capabilities { libuser::syscalls::nr::CloseHandle, libuser::syscalls::nr::WaitSynchronization, libuser::syscalls::nr::OutputDebugString, + libuser::syscalls::nr::SetThreadArea, libuser::syscalls::nr::SetHeapSize, libuser::syscalls::nr::QueryMemory, diff --git a/sm/src/main.rs b/sm/src/main.rs index 1c9af0a8f..3a69595d4 100644 --- a/sm/src/main.rs +++ b/sm/src/main.rs @@ -145,6 +145,7 @@ capabilities!(CAPABILITIES = Capabilities { sunrise_libuser::syscalls::nr::CloseHandle, sunrise_libuser::syscalls::nr::WaitSynchronization, sunrise_libuser::syscalls::nr::OutputDebugString, + sunrise_libuser::syscalls::nr::SetThreadArea, sunrise_libuser::syscalls::nr::SetHeapSize, sunrise_libuser::syscalls::nr::ManageNamedPort, diff --git a/time/src/main.rs b/time/src/main.rs index 380a2f04b..19491bcec 100644 --- a/time/src/main.rs +++ b/time/src/main.rs @@ -43,6 +43,7 @@ capabilities!(CAPABILITIES = Capabilities { sunrise_libuser::syscalls::nr::CloseHandle, sunrise_libuser::syscalls::nr::WaitSynchronization, sunrise_libuser::syscalls::nr::OutputDebugString, + sunrise_libuser::syscalls::nr::SetThreadArea, sunrise_libuser::syscalls::nr::ReplyAndReceiveWithUserBuffer, sunrise_libuser::syscalls::nr::AcceptSession, @@ -265,4 +266,4 @@ fn main() { man.add_waitable_ref(&mut rtc); man.run(); -} \ No newline at end of file +} diff --git a/vi/src/main.rs b/vi/src/main.rs index 192ed2620..dfe596137 100644 --- a/vi/src/main.rs +++ b/vi/src/main.rs @@ -281,6 +281,7 @@ capabilities!(CAPABILITIES = Capabilities { sunrise_libuser::syscalls::nr::CloseHandle, sunrise_libuser::syscalls::nr::WaitSynchronization, sunrise_libuser::syscalls::nr::OutputDebugString, + sunrise_libuser::syscalls::nr::SetThreadArea, sunrise_libuser::syscalls::nr::ReplyAndReceiveWithUserBuffer, sunrise_libuser::syscalls::nr::AcceptSession, diff --git a/wall-clock/src/main.rs b/wall-clock/src/main.rs index c18666587..649e1266b 100644 --- a/wall-clock/src/main.rs +++ b/wall-clock/src/main.rs @@ -146,6 +146,7 @@ capabilities!(CAPABILITIES = Capabilities { sunrise_libuser::syscalls::nr::CloseHandle, sunrise_libuser::syscalls::nr::WaitSynchronization, sunrise_libuser::syscalls::nr::OutputDebugString, + sunrise_libuser::syscalls::nr::SetThreadArea, sunrise_libuser::syscalls::nr::ConnectToNamedPort, sunrise_libuser::syscalls::nr::SetHeapSize,