[kernel] Update syscall MSRs for all CPUs

Since SYSCALL/SYSRET rely on MSRs to control their function, split out
syscall_enable() into syscall_initialize() and syscall_enable(), the
latter being called on all CPUs. This affects not just syscalls but also
the kernel_to_user_trampoline.

Additionally, do away with the max syscalls, and just make a single page
of syscall pointers and name pointers. Max syscalls was fragile and
needed to be kept in sync in multiple places.
This commit is contained in:
Justin C. Miller
2021-02-10 01:15:32 -08:00
parent 70d6094f46
commit 872f178d94
5 changed files with 64 additions and 47 deletions

View File

@@ -11,6 +11,7 @@
#include "log.h" #include "log.h"
#include "msr.h" #include "msr.h"
#include "objects/vm_area.h" #include "objects/vm_area.h"
#include "syscall.h"
#include "tss.h" #include "tss.h"
cpu_data g_bsp_cpu_data; cpu_data g_bsp_cpu_data;
@@ -98,6 +99,8 @@ init_cpu(bool bsp)
tss->ist_stack(ist) = stack_top; tss->ist_stack(ist) = stack_top;
} }
// Set up the syscall MSRs
syscall_enable();
// Set up the page attributes table // Set up the page attributes table
uint64_t pat = rdmsr(msr::ia32_pat); uint64_t pat = rdmsr(msr::ia32_pat);

View File

@@ -149,6 +149,7 @@ kernel_main(args::header *header)
} }
} }
syscall_initialize();
device_manager &devices = device_manager::get(); device_manager &devices = device_manager::get();
devices.parse_acpi(header->acpi_table); devices.parse_acpi(header->acpi_table);
@@ -184,7 +185,6 @@ kernel_main(args::header *header)
} }
*/ */
syscall_enable();
scheduler *sched = new scheduler(devices.get_lapic()); scheduler *sched = new scheduler(devices.get_lapic());
// Skip program 0, which is the kernel itself // Skip program 0, which is the kernel itself

View File

@@ -10,11 +10,11 @@
extern "C" { extern "C" {
void syscall_invalid(uint64_t call); void syscall_invalid(uint64_t call);
void syscall_handler_prelude();
} }
uintptr_t syscall_registry[static_cast<unsigned>(syscall::MAX)]; uintptr_t syscall_registry[256] __attribute__((section(".syscall_registry")));
const char * syscall_names[static_cast<unsigned>(syscall::MAX)]; const char * syscall_names[256] __attribute__((section(".syscall_registry")));
static constexpr size_t num_syscalls = sizeof(syscall_registry) / sizeof(syscall_registry[0]);
void void
syscall_invalid(uint64_t call) syscall_invalid(uint64_t call)
@@ -23,13 +23,10 @@ syscall_invalid(uint64_t call)
cons->set_color(9); cons->set_color(9);
cons->printf("\nReceived unknown syscall: %02x\n", call); cons->printf("\nReceived unknown syscall: %02x\n", call);
const unsigned num_calls =
static_cast<unsigned>(syscall::MAX);
cons->printf(" Known syscalls:\n"); cons->printf(" Known syscalls:\n");
cons->printf(" invalid %016lx\n", syscall_invalid); cons->printf(" invalid %016lx\n", syscall_invalid);
for (unsigned i = 0; i < num_calls; ++i) { for (unsigned i = 0; i < num_syscalls; ++i) {
const char *name = syscall_names[i]; const char *name = syscall_names[i];
uintptr_t handler = syscall_registry[i]; uintptr_t handler = syscall_registry[i];
if (name) if (name)
@@ -41,33 +38,14 @@ syscall_invalid(uint64_t call)
} }
void void
syscall_enable() syscall_initialize()
{ {
// IA32_STAR - high 32 bits contain k+u CS
// Kernel CS: GDT[1] ring 0 bits[47:32]
// User CS: GDT[3] ring 3 bits[63:48]
uint64_t star =
(((1ull << 3) | 0) << 32) |
(((3ull << 3) | 3) << 48);
wrmsr(msr::ia32_star, star);
// IA32_LSTAR - RIP for syscall
wrmsr(msr::ia32_lstar,
reinterpret_cast<uintptr_t>(&syscall_handler_prelude));
// IA32_FMASK - FLAGS mask inside syscall
wrmsr(msr::ia32_fmask, 0x200);
static constexpr unsigned num_calls =
static_cast<unsigned>(syscall::MAX);
kutil::memset(&syscall_registry, 0, sizeof(syscall_registry)); kutil::memset(&syscall_registry, 0, sizeof(syscall_registry));
kutil::memset(&syscall_names, 0, sizeof(syscall_names)); kutil::memset(&syscall_names, 0, sizeof(syscall_names));
#define SYSCALL(id, name, result, ...) \ #define SYSCALL(id, name, result, ...) \
syscall_registry[id] = reinterpret_cast<uintptr_t>(syscalls::name); \ syscall_registry[id] = reinterpret_cast<uintptr_t>(syscalls::name); \
syscall_names[id] = #name; \ syscall_names[id] = #name; \
static_assert( id <= num_calls, "Syscall " #name " has id > syscall::MAX" ); \
log::debug(logs::syscall, "Enabling syscall 0x%02x as " #name , id); log::debug(logs::syscall, "Enabling syscall 0x%02x as " #name , id);
#include "j6/tables/syscalls.inc" #include "j6/tables/syscalls.inc"
#undef SYSCALL #undef SYSCALL

View File

@@ -10,13 +10,10 @@ enum class syscall : uint64_t
#define SYSCALL(id, name, ...) name = id, #define SYSCALL(id, name, ...) name = id,
#include "j6/tables/syscalls.inc" #include "j6/tables/syscalls.inc"
#undef SYSCALL #undef SYSCALL
// Maximum syscall id. If you change this, also change
// MAX_SYSCALLS in syscall.s
MAX = 0x40
}; };
void syscall_enable(); void syscall_initialize();
extern "C" void syscall_enable();
namespace syscalls namespace syscalls
{ {

View File

@@ -1,17 +1,32 @@
%include "tasking.inc" %include "tasking.inc"
; Make sure to keep MAX_SYSCALLS in sync with ; SYSCALL/SYSRET control MSRs
; syscall::MAX in syscall.h MSR_STAR equ 0xc0000081
MAX_SYSCALLS equ 0x40 MSR_LSTAR equ 0xc0000082
MSR_FMASK equ 0xc0000084
; IA32_STAR - high 32 bits contain k+u CS
; Kernel CS: GDT[1] ring 0 bits[47:32]
; User CS: GDT[3] ring 3 bits[63:48]
STAR_HIGH equ \
(((1 << 3) | 0)) | \
(((3 << 3) | 3) << 16)
; IA32_FMASK - Mask off interrupts in syscalls
FMASK_VAL equ 0x200
extern __counter_syscall_enter extern __counter_syscall_enter
extern __counter_syscall_sysret extern __counter_syscall_sysret
extern syscall_registry extern syscall_registry
extern syscall_invalid extern syscall_invalid
global syscall_handler_prelude
global syscall_handler_prelude:function (syscall_handler_prelude.end - syscall_handler_prelude)
syscall_handler_prelude: syscall_handler_prelude:
push rbp ; Never executed, fake function prelude
mov rbp, rsp ; to calm down gdb
.real:
swapgs swapgs
mov [gs:CPU_DATA.rsp3], rsp mov [gs:CPU_DATA.rsp3], rsp
mov rsp, [gs:CPU_DATA.rsp0] mov rsp, [gs:CPU_DATA.rsp0]
@@ -36,14 +51,7 @@ syscall_handler_prelude:
inc qword [rel __counter_syscall_enter] inc qword [rel __counter_syscall_enter]
cmp rax, MAX_SYSCALLS and rax, 0xff ; Only 256 possible syscall values
jle .ok_syscall
.bad_syscall:
mov rdi, rax
call syscall_invalid
.ok_syscall:
lea r11, [rel syscall_registry] lea r11, [rel syscall_registry]
mov r11, [r11 + rax * 8] mov r11, [r11 + rax * 8]
cmp r11, 0 cmp r11, 0
@@ -52,8 +60,14 @@ syscall_handler_prelude:
call r11 call r11
inc qword [rel __counter_syscall_sysret] inc qword [rel __counter_syscall_sysret]
jmp kernel_to_user_trampoline
global kernel_to_user_trampoline .bad_syscall:
mov rdi, rax
call syscall_invalid
.end:
global kernel_to_user_trampoline:function (kernel_to_user_trampoline.end - kernel_to_user_trampoline)
kernel_to_user_trampoline: kernel_to_user_trampoline:
pop r15 pop r15
pop r14 pop r14
@@ -70,3 +84,28 @@ kernel_to_user_trampoline:
swapgs swapgs
o64 sysret o64 sysret
.end:
global syscall_enable:function (syscall_enable.end - syscall_enable)
syscall_enable:
push rbp
mov rbp, rsp
mov rcx, MSR_STAR
mov rax, 0
mov rdx, STAR_HIGH
wrmsr
mov rcx, MSR_LSTAR
mov rax, syscall_handler_prelude.real
mov rdx, rax
shr rdx, 32
wrmsr
mov rcx, MSR_FMASK
mov rax, FMASK_VAL
wrmsr
pop rbp
ret
.end: