[kernel] Update syscall MSRs for all CPUs
Since SYSCALL/SYSRET rely on MSRs to control their function, split out syscall_enable() into syscall_initialize() and syscall_enable(), the latter being called on all CPUs. This affects not just syscalls but also the kernel_to_user_trampoline. Additionally, do away with the max syscalls, and just make a single page of syscall pointers and name pointers. Max syscalls was fragile and needed to be kept in sync in multiple places.
This commit is contained in:
@@ -11,6 +11,7 @@
|
|||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "msr.h"
|
#include "msr.h"
|
||||||
#include "objects/vm_area.h"
|
#include "objects/vm_area.h"
|
||||||
|
#include "syscall.h"
|
||||||
#include "tss.h"
|
#include "tss.h"
|
||||||
|
|
||||||
cpu_data g_bsp_cpu_data;
|
cpu_data g_bsp_cpu_data;
|
||||||
@@ -98,6 +99,8 @@ init_cpu(bool bsp)
|
|||||||
|
|
||||||
tss->ist_stack(ist) = stack_top;
|
tss->ist_stack(ist) = stack_top;
|
||||||
}
|
}
|
||||||
|
// Set up the syscall MSRs
|
||||||
|
syscall_enable();
|
||||||
|
|
||||||
// Set up the page attributes table
|
// Set up the page attributes table
|
||||||
uint64_t pat = rdmsr(msr::ia32_pat);
|
uint64_t pat = rdmsr(msr::ia32_pat);
|
||||||
|
|||||||
@@ -149,6 +149,7 @@ kernel_main(args::header *header)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
syscall_initialize();
|
||||||
|
|
||||||
device_manager &devices = device_manager::get();
|
device_manager &devices = device_manager::get();
|
||||||
devices.parse_acpi(header->acpi_table);
|
devices.parse_acpi(header->acpi_table);
|
||||||
@@ -184,7 +185,6 @@ kernel_main(args::header *header)
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
syscall_enable();
|
|
||||||
scheduler *sched = new scheduler(devices.get_lapic());
|
scheduler *sched = new scheduler(devices.get_lapic());
|
||||||
|
|
||||||
// Skip program 0, which is the kernel itself
|
// Skip program 0, which is the kernel itself
|
||||||
|
|||||||
@@ -10,11 +10,11 @@
|
|||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
void syscall_invalid(uint64_t call);
|
void syscall_invalid(uint64_t call);
|
||||||
void syscall_handler_prelude();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uintptr_t syscall_registry[static_cast<unsigned>(syscall::MAX)];
|
uintptr_t syscall_registry[256] __attribute__((section(".syscall_registry")));
|
||||||
const char * syscall_names[static_cast<unsigned>(syscall::MAX)];
|
const char * syscall_names[256] __attribute__((section(".syscall_registry")));
|
||||||
|
static constexpr size_t num_syscalls = sizeof(syscall_registry) / sizeof(syscall_registry[0]);
|
||||||
|
|
||||||
void
|
void
|
||||||
syscall_invalid(uint64_t call)
|
syscall_invalid(uint64_t call)
|
||||||
@@ -23,13 +23,10 @@ syscall_invalid(uint64_t call)
|
|||||||
cons->set_color(9);
|
cons->set_color(9);
|
||||||
cons->printf("\nReceived unknown syscall: %02x\n", call);
|
cons->printf("\nReceived unknown syscall: %02x\n", call);
|
||||||
|
|
||||||
const unsigned num_calls =
|
|
||||||
static_cast<unsigned>(syscall::MAX);
|
|
||||||
|
|
||||||
cons->printf(" Known syscalls:\n");
|
cons->printf(" Known syscalls:\n");
|
||||||
cons->printf(" invalid %016lx\n", syscall_invalid);
|
cons->printf(" invalid %016lx\n", syscall_invalid);
|
||||||
|
|
||||||
for (unsigned i = 0; i < num_calls; ++i) {
|
for (unsigned i = 0; i < num_syscalls; ++i) {
|
||||||
const char *name = syscall_names[i];
|
const char *name = syscall_names[i];
|
||||||
uintptr_t handler = syscall_registry[i];
|
uintptr_t handler = syscall_registry[i];
|
||||||
if (name)
|
if (name)
|
||||||
@@ -41,33 +38,14 @@ syscall_invalid(uint64_t call)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
syscall_enable()
|
syscall_initialize()
|
||||||
{
|
{
|
||||||
// IA32_STAR - high 32 bits contain k+u CS
|
|
||||||
// Kernel CS: GDT[1] ring 0 bits[47:32]
|
|
||||||
// User CS: GDT[3] ring 3 bits[63:48]
|
|
||||||
uint64_t star =
|
|
||||||
(((1ull << 3) | 0) << 32) |
|
|
||||||
(((3ull << 3) | 3) << 48);
|
|
||||||
wrmsr(msr::ia32_star, star);
|
|
||||||
|
|
||||||
// IA32_LSTAR - RIP for syscall
|
|
||||||
wrmsr(msr::ia32_lstar,
|
|
||||||
reinterpret_cast<uintptr_t>(&syscall_handler_prelude));
|
|
||||||
|
|
||||||
// IA32_FMASK - FLAGS mask inside syscall
|
|
||||||
wrmsr(msr::ia32_fmask, 0x200);
|
|
||||||
|
|
||||||
static constexpr unsigned num_calls =
|
|
||||||
static_cast<unsigned>(syscall::MAX);
|
|
||||||
|
|
||||||
kutil::memset(&syscall_registry, 0, sizeof(syscall_registry));
|
kutil::memset(&syscall_registry, 0, sizeof(syscall_registry));
|
||||||
kutil::memset(&syscall_names, 0, sizeof(syscall_names));
|
kutil::memset(&syscall_names, 0, sizeof(syscall_names));
|
||||||
|
|
||||||
#define SYSCALL(id, name, result, ...) \
|
#define SYSCALL(id, name, result, ...) \
|
||||||
syscall_registry[id] = reinterpret_cast<uintptr_t>(syscalls::name); \
|
syscall_registry[id] = reinterpret_cast<uintptr_t>(syscalls::name); \
|
||||||
syscall_names[id] = #name; \
|
syscall_names[id] = #name; \
|
||||||
static_assert( id <= num_calls, "Syscall " #name " has id > syscall::MAX" ); \
|
|
||||||
log::debug(logs::syscall, "Enabling syscall 0x%02x as " #name , id);
|
log::debug(logs::syscall, "Enabling syscall 0x%02x as " #name , id);
|
||||||
#include "j6/tables/syscalls.inc"
|
#include "j6/tables/syscalls.inc"
|
||||||
#undef SYSCALL
|
#undef SYSCALL
|
||||||
|
|||||||
@@ -10,13 +10,10 @@ enum class syscall : uint64_t
|
|||||||
#define SYSCALL(id, name, ...) name = id,
|
#define SYSCALL(id, name, ...) name = id,
|
||||||
#include "j6/tables/syscalls.inc"
|
#include "j6/tables/syscalls.inc"
|
||||||
#undef SYSCALL
|
#undef SYSCALL
|
||||||
|
|
||||||
// Maximum syscall id. If you change this, also change
|
|
||||||
// MAX_SYSCALLS in syscall.s
|
|
||||||
MAX = 0x40
|
|
||||||
};
|
};
|
||||||
|
|
||||||
void syscall_enable();
|
void syscall_initialize();
|
||||||
|
extern "C" void syscall_enable();
|
||||||
|
|
||||||
namespace syscalls
|
namespace syscalls
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,17 +1,32 @@
|
|||||||
%include "tasking.inc"
|
%include "tasking.inc"
|
||||||
|
|
||||||
; Make sure to keep MAX_SYSCALLS in sync with
|
; SYSCALL/SYSRET control MSRs
|
||||||
; syscall::MAX in syscall.h
|
MSR_STAR equ 0xc0000081
|
||||||
MAX_SYSCALLS equ 0x40
|
MSR_LSTAR equ 0xc0000082
|
||||||
|
MSR_FMASK equ 0xc0000084
|
||||||
|
|
||||||
|
; IA32_STAR - high 32 bits contain k+u CS
|
||||||
|
; Kernel CS: GDT[1] ring 0 bits[47:32]
|
||||||
|
; User CS: GDT[3] ring 3 bits[63:48]
|
||||||
|
STAR_HIGH equ \
|
||||||
|
(((1 << 3) | 0)) | \
|
||||||
|
(((3 << 3) | 3) << 16)
|
||||||
|
|
||||||
|
; IA32_FMASK - Mask off interrupts in syscalls
|
||||||
|
FMASK_VAL equ 0x200
|
||||||
|
|
||||||
extern __counter_syscall_enter
|
extern __counter_syscall_enter
|
||||||
extern __counter_syscall_sysret
|
extern __counter_syscall_sysret
|
||||||
|
|
||||||
extern syscall_registry
|
extern syscall_registry
|
||||||
extern syscall_invalid
|
extern syscall_invalid
|
||||||
|
|
||||||
global syscall_handler_prelude
|
|
||||||
|
global syscall_handler_prelude:function (syscall_handler_prelude.end - syscall_handler_prelude)
|
||||||
syscall_handler_prelude:
|
syscall_handler_prelude:
|
||||||
|
push rbp ; Never executed, fake function prelude
|
||||||
|
mov rbp, rsp ; to calm down gdb
|
||||||
|
|
||||||
|
.real:
|
||||||
swapgs
|
swapgs
|
||||||
mov [gs:CPU_DATA.rsp3], rsp
|
mov [gs:CPU_DATA.rsp3], rsp
|
||||||
mov rsp, [gs:CPU_DATA.rsp0]
|
mov rsp, [gs:CPU_DATA.rsp0]
|
||||||
@@ -36,14 +51,7 @@ syscall_handler_prelude:
|
|||||||
|
|
||||||
inc qword [rel __counter_syscall_enter]
|
inc qword [rel __counter_syscall_enter]
|
||||||
|
|
||||||
cmp rax, MAX_SYSCALLS
|
and rax, 0xff ; Only 256 possible syscall values
|
||||||
jle .ok_syscall
|
|
||||||
|
|
||||||
.bad_syscall:
|
|
||||||
mov rdi, rax
|
|
||||||
call syscall_invalid
|
|
||||||
|
|
||||||
.ok_syscall:
|
|
||||||
lea r11, [rel syscall_registry]
|
lea r11, [rel syscall_registry]
|
||||||
mov r11, [r11 + rax * 8]
|
mov r11, [r11 + rax * 8]
|
||||||
cmp r11, 0
|
cmp r11, 0
|
||||||
@@ -52,8 +60,14 @@ syscall_handler_prelude:
|
|||||||
call r11
|
call r11
|
||||||
|
|
||||||
inc qword [rel __counter_syscall_sysret]
|
inc qword [rel __counter_syscall_sysret]
|
||||||
|
jmp kernel_to_user_trampoline
|
||||||
|
|
||||||
global kernel_to_user_trampoline
|
.bad_syscall:
|
||||||
|
mov rdi, rax
|
||||||
|
call syscall_invalid
|
||||||
|
.end:
|
||||||
|
|
||||||
|
global kernel_to_user_trampoline:function (kernel_to_user_trampoline.end - kernel_to_user_trampoline)
|
||||||
kernel_to_user_trampoline:
|
kernel_to_user_trampoline:
|
||||||
pop r15
|
pop r15
|
||||||
pop r14
|
pop r14
|
||||||
@@ -70,3 +84,28 @@ kernel_to_user_trampoline:
|
|||||||
|
|
||||||
swapgs
|
swapgs
|
||||||
o64 sysret
|
o64 sysret
|
||||||
|
.end:
|
||||||
|
|
||||||
|
global syscall_enable:function (syscall_enable.end - syscall_enable)
|
||||||
|
syscall_enable:
|
||||||
|
push rbp
|
||||||
|
mov rbp, rsp
|
||||||
|
|
||||||
|
mov rcx, MSR_STAR
|
||||||
|
mov rax, 0
|
||||||
|
mov rdx, STAR_HIGH
|
||||||
|
wrmsr
|
||||||
|
|
||||||
|
mov rcx, MSR_LSTAR
|
||||||
|
mov rax, syscall_handler_prelude.real
|
||||||
|
mov rdx, rax
|
||||||
|
shr rdx, 32
|
||||||
|
wrmsr
|
||||||
|
|
||||||
|
mov rcx, MSR_FMASK
|
||||||
|
mov rax, FMASK_VAL
|
||||||
|
wrmsr
|
||||||
|
|
||||||
|
pop rbp
|
||||||
|
ret
|
||||||
|
.end:
|
||||||
|
|||||||
Reference in New Issue
Block a user