Improve syscalls for new task switching

There are a lot of under the hood changes here:
- Move syscalls to be a dispatch table, defined by syscalls.inc
- Don't need a full process state (push_all) in syscalls now
- In push_all, define REGS instead of using offsets
- Save TWO stack pointers as well as current saved stack pointer in TCB:
  - rsp0 is the base of the kernel stack for interrupts
  - rsp3 is the saved user stack from cpu_data
- Update syscall numbers in nulldrv
- Some asm-debugging enhancements to the gdb script
- fork() still not working
This commit is contained in:
Justin C. Miller
2019-04-02 00:25:36 -07:00
parent ca2362f858
commit 11a53e792f
17 changed files with 284 additions and 215 deletions

View File

@@ -59,7 +59,7 @@ print_stacktrace(int skip)
int fi = -skip;
__asm__ __volatile__ ( "mov %%rbp, %0" : "=r" (fp) );
while (fp) {
while (fp && fp->return_addr) {
if (fi++ >= 0)
cons->printf(" frame %2d: %lx\n", fi-1, fp->return_addr);
fp = fp->prev;

View File

@@ -20,7 +20,6 @@ extern "C" {
void isr_handler(cpu_state*);
void irq_handler(cpu_state*);
void syscall_handler(cpu_state*);
#define ISR(i, name) extern void name ();
#define EISR(i, name) extern void name ();
@@ -219,9 +218,11 @@ isr_handler(cpu_state *regs)
_halt();
break;
/*
case isr::isrSyscall:
syscall_dispatch(regs);
break;
*/
case isr::isrSpurious:
// No EOI for the spurious interrupt
@@ -284,9 +285,3 @@ irq_handler(cpu_state *regs)
*reinterpret_cast<uint32_t *>(0xffffff80fee000b0) = 0;
}
void
syscall_handler(cpu_state *regs)
{
syscall_dispatch(regs);
}

View File

@@ -1,30 +1,22 @@
%include "push_all.inc"
extern load_process
extern load_process_image
global ramdisk_process_loader
ramdisk_process_loader:
; create_process already pushed a cpu_state onto the stack for us, this
; acts both as the cpu_state parameter to load_process, and the saved
; state for the following iretq
; acts both as the cpu_state parameter to load_process_image, and the
; saved state for the following iretq
pop rdi ; the address of the program image
pop rsi ; the size of the program image
pop rdx ; the address of this process' process structure
pop rcx ; the cpu_state
call load_process
call load_process_image
push rax ; load_process_image returns the process entrypoint
swapgs
xor rax, rax
mov ax, ss
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
pop_all
add rsp, 16 ; because the ISRs add err/num
iretq

View File

@@ -6,3 +6,4 @@ LOG(memory, debug);
LOG(fs, info);
LOG(task, debug);
LOG(boot, debug);
LOG(syscall,debug);

View File

@@ -35,11 +35,7 @@ process::fork(cpu_state *regs)
kernel_stack, child->kernel_stack, child->rsp);
child->setup_kernel_stack();
task_fork(child); // Both parent and child will return from this
if (bsp_cpu_data.tcb->pid == child->pid) {
return 0;
}
task_fork(child);
return child->pid;
}

View File

@@ -43,6 +43,7 @@ struct process
// be sure to change the assembly definitions in 'tasking.inc'
uintptr_t rsp;
uintptr_t rsp0;
uintptr_t rsp3;
page_table *pml4;
// End of assembly fields

View File

@@ -1,51 +1,73 @@
struc REGS
.r15 resq 1 ; 0x00
.r14 resq 1 ; 0x08
.r13 resq 1 ; 0x10
.r12 resq 1 ; 0x18
.r11 resq 1 ; 0x20
.r10 resq 1 ; 0x28
.r9 resq 1 ; 0x30
.r8 resq 1 ; 0x38
.rdi resq 1 ; 0x40
.rsi resq 1 ; 0x48
.rbp resq 1 ; 0x50
.rbx resq 1 ; 0x58
.rdx resq 1 ; 0x60
.rcx resq 1 ; 0x68
.rax resq 1 ; 0x70
.int resq 1 ; 0x78
.err resq 1 ; 0x80
.rip resq 1 ; 0x88
.cs3 resq 1 ; 0x90
.rflags resq 1 ; 0x98
.rsp3 resq 1 ; 0xa0
.ss3 resq 1 ; 0xa8
endstruc
regs_total_size equ 0xb0
regs_extra_size equ 0x78
%macro push_all 0
sub rsp, 0x78
sub rsp, regs_extra_size
; ss3 rsp + 0xa8
; rsp3 rsp + 0xa0
; flags3 rsp + 0x98
; cs3 rsp + 0x90
; rip3 rsp + 0x88
; error rsp + 0x80
; vector rsp + 0x78
mov [rsp + REGS.rax], rax
mov [rsp + REGS.rcx], rcx
mov [rsp + REGS.rdx], rdx
mov [rsp + REGS.rbx], rbx
mov [rsp + REGS.rbp], rbp
mov [rsp + REGS.rsi], rsi
mov [rsp + REGS.rdi], rdi
mov [rsp + 0x70], rax
mov [rsp + 0x68], rcx
mov [rsp + 0x60], rdx
mov [rsp + 0x58], rbx
mov [rsp + 0x50], rbp
mov [rsp + 0x48], rsi
mov [rsp + 0x40], rdi
mov [rsp + 0x38], r8
mov [rsp + 0x30], r9
mov [rsp + 0x28], r10
mov [rsp + 0x20], r11
mov [rsp + 0x18], r12
mov [rsp + 0x10], r13
mov [rsp + 0x08], r14
mov [rsp + 0x00], r15
mov [rsp + REGS.r8 ], r8
mov [rsp + REGS.r9 ], r9
mov [rsp + REGS.r10], r10
mov [rsp + REGS.r11], r11
mov [rsp + REGS.r12], r12
mov [rsp + REGS.r13], r13
mov [rsp + REGS.r14], r14
mov [rsp + REGS.r15], r15
%endmacro
%macro pop_all 0
mov rax, [rsp + 0x70]
mov rcx, [rsp + 0x68]
mov rdx, [rsp + 0x60]
mov rbx, [rsp + 0x58]
mov rbp, [rsp + 0x50]
mov rsi, [rsp + 0x48]
mov rdi, [rsp + 0x40]
mov rax, [rsp + REGS.rax]
mov rcx, [rsp + REGS.rcx]
mov rdx, [rsp + REGS.rdx]
mov rbx, [rsp + REGS.rbx]
mov rbp, [rsp + REGS.rbp]
mov rsi, [rsp + REGS.rsi]
mov rdi, [rsp + REGS.rdi]
mov r8, [rsp + 0x38]
mov r9, [rsp + 0x30]
mov r10, [rsp + 0x28]
mov r11, [rsp + 0x20]
mov r12, [rsp + 0x18]
mov r13, [rsp + 0x10]
mov r14, [rsp + 0x08]
mov r15, [rsp + 0x00]
mov r8, [rsp + REGS.r8 ]
mov r9, [rsp + REGS.r9 ]
mov r10, [rsp + REGS.r10]
mov r11, [rsp + REGS.r11]
mov r12, [rsp + REGS.r12]
mov r13, [rsp + REGS.r13]
mov r14, [rsp + REGS.r14]
mov r15, [rsp + REGS.r15]
add rsp, 0x78
add rsp, regs_extra_size
%endmacro
%macro check_swap_gs 0

View File

@@ -23,7 +23,7 @@ const uint64_t rflags_int = 0x202;
extern "C" {
void ramdisk_process_loader();
void load_process(const void *image_start, size_t bytes, process *proc, cpu_state *state);
uintptr_t load_process_image(const void *image_start, size_t bytes, process *proc);
};
scheduler::scheduler(lapic *apic) :
@@ -54,8 +54,8 @@ scheduler::scheduler(lapic *apic) :
bsp_cpu_data.tcb = idle;
}
void
load_process(const void *image_start, size_t bytes, process *proc, cpu_state *state)
uintptr_t
load_process_image(const void *image_start, size_t bytes, process *proc)
{
// We're now in the process space for this process, allocate memory for the
// process code and load it
@@ -65,7 +65,7 @@ load_process(const void *image_start, size_t bytes, process *proc, cpu_state *st
// TODO: Handle bad images gracefully
elf::elf image(image_start, bytes);
kassert(image.valid(), "Invalid ELF passed to load_process");
kassert(image.valid(), "Invalid ELF passed to load_process_image");
const unsigned program_count = image.program_count();
for (unsigned i = 0; i < program_count; ++i) {
@@ -106,10 +106,11 @@ load_process(const void *image_start, size_t bytes, process *proc, cpu_state *st
kutil::memcpy(dest, src, header->size);
}
state->rip = image.entrypoint();
proc->flags &= ~process_flags::loading;
log::debug(logs::task, " Loaded! New process rip: %016lx", state->rip);
uintptr_t entrypoint = image.entrypoint();
log::debug(logs::task, " Loaded! New process rip: %016lx", entrypoint);
return entrypoint;
}
process_node *
@@ -124,20 +125,20 @@ scheduler::create_process(pid_t pid)
}
static uintptr_t
add_fake_stack_return(uintptr_t rsp, uintptr_t rbp, uintptr_t rip)
add_fake_task_return(uintptr_t rsp, uintptr_t rbp, uintptr_t rip)
{
// Initialize a new empty stack with a fake return segment
// for returning out of task_switch
rsp -= sizeof(uintptr_t) * 7;
uintptr_t *stack = reinterpret_cast<uintptr_t*>(rsp);
stack[0] = rbp; // rbp
stack[1] = 0xbbbbbbbb; // rbx
stack[2] = 0x12121212; // r12
stack[3] = 0x13131313; // r13
stack[4] = 0x14141414; // r14
stack[5] = 0x15151515; // r15
stack[6] = rip; // return rip
stack[5] = rbp; // rbp
stack[4] = 0xbbbbbbbb; // rbx
stack[3] = 0x12121212; // r12
stack[2] = 0x13131313; // r13
stack[1] = 0x14141414; // r14
stack[0] = 0x15151515; // r15
return rsp;
}
@@ -157,28 +158,25 @@ scheduler::load_process(const char *name, const void *data, size_t size)
// Create an initial kernel stack space
void *sp0 = proc->setup_kernel_stack();
cpu_state *state = reinterpret_cast<cpu_state *>(sp0) - 1;
// Highest state in the stack is the process' kernel stack for the loader
// to iret to:
state->ss = ss;
state->cs = cs;
state->rflags = rflags_int;
state->rip = 0; // to be filled by the loader
state->user_rsp = initial_stack;
uintptr_t *stack = reinterpret_cast<uintptr_t *>(sp0) - 7;
// Pass args to ramdisk_process_loader on the stack
uintptr_t *stack = reinterpret_cast<uintptr_t *>(state) - 4;
stack[0] = reinterpret_cast<uintptr_t>(data);
stack[1] = reinterpret_cast<uintptr_t>(size);
stack[2] = reinterpret_cast<uintptr_t>(proc);
stack[3] = reinterpret_cast<uintptr_t>(state);
proc->rsp = add_fake_stack_return(
proc->rsp = add_fake_task_return(
reinterpret_cast<uintptr_t>(stack),
proc->rsp0,
reinterpret_cast<uintptr_t>(ramdisk_process_loader));
// Arguments for iret - rip will be pushed on before these
stack[3] = cs;
stack[4] = rflags_int;
stack[5] = initial_stack;
stack[6] = ss;
proc->rsp3 = initial_stack;
proc->quanta = process_quanta;
proc->flags =
process_flags::running |
@@ -188,7 +186,8 @@ scheduler::load_process(const char *name, const void *data, size_t size)
m_runlists[default_priority].push_back(proc);
log::debug(logs::task, "Creating process %s: pid %d pri %d", name, proc->pid, proc->priority);
log::debug(logs::task, " RSP0 %016lx", state);
log::debug(logs::task, " RSP %016lx", proc->rsp);
log::debug(logs::task, " RSP0 %016lx", proc->rsp0);
log::debug(logs::task, " PML4 %016lx", proc->pml4);
}
@@ -202,7 +201,7 @@ scheduler::create_kernel_task(pid_t pid, void (*task)())
// Create an initial kernel stack space
proc->setup_kernel_stack();
proc->rsp = add_fake_stack_return(
proc->rsp = add_fake_task_return(
proc->rsp0, proc->rsp0,
reinterpret_cast<uintptr_t>(task));
@@ -302,12 +301,11 @@ scheduler::schedule()
m_current = m_runlists[pri].pop_front();
if (lastpid != m_current->pid) {
task_switch(m_current);
bool loading = m_current->flags && process_flags::loading;
log::debug(logs::task, "Scheduler switching to process %d, priority %d%s.",
log::debug(logs::task, "Scheduler switched to process %d, priority %d%s.",
m_current->pid, m_current->priority, loading ? " (loading)" : "");
task_switch(m_current);
}
}

View File

@@ -1,6 +1,7 @@
#include "console.h"
#include "cpu.h"
#include "debug.h"
#include "log.h"
#include "msr.h"
#include "process.h"
#include "scheduler.h"
@@ -11,29 +12,90 @@ extern "C" {
void syscall_handler_prelude();
}
namespace syscalls {
void
syscall_enable()
noop()
{
// IA32_EFER - set bit 0, syscall enable
uint64_t efer = rdmsr(msr::ia32_efer);
wrmsr(msr::ia32_efer, efer|1);
// IA32_STAR - high 32 bits contain k+u CS
// Kernel CS: GDT[1] ring 0 bits[47:32]
// User CS: GDT[3] ring 3 bits[63:48]
uint64_t star =
(((1ull << 3) | 0) << 32) |
(((3ull << 3) | 3) << 48);
wrmsr(msr::ia32_star, star);
// IA32_LSTAR - RIP for syscall
wrmsr(msr::ia32_lstar,
reinterpret_cast<uintptr_t>(&syscall_handler_prelude));
// IA32_FMASK - FLAGS mask inside syscall
wrmsr(msr::ia32_fmask, 0x200);
auto &s = scheduler::get();
auto *p = s.current();
log::debug(logs::syscall, "Process %d called noop syscall.", p->pid);
}
void
exit(int64_t status)
{
auto &s = scheduler::get();
auto *p = s.current();
log::debug(logs::syscall, "Process %d exiting with code %d", p->pid, status);
p->exit(status);
s.schedule();
}
pid_t
getpid()
{
auto &s = scheduler::get();
auto *p = s.current();
return p->pid;
}
pid_t fork() { return 0; }
void
message(const char *message)
{
auto &s = scheduler::get();
auto *p = s.current();
log::info(logs::syscall, "Message[%d]: %s", p->pid, message);
}
void
pause()
{
auto &s = scheduler::get();
auto *p = s.current();
p->wait_on_signal(-1ull);
s.schedule();
}
void
sleep(uint64_t til)
{
auto &s = scheduler::get();
auto *p = s.current();
log::debug(logs::syscall, "Process %d sleeping until %d", p->pid, til);
p->wait_on_time(til);
s.schedule();
}
void send() {}
void receive() {}
} // namespace syscalls
struct syscall_handler_info
{
unsigned nargs;
const char *name;
};
uintptr_t syscall_registry[static_cast<unsigned>(syscall::COUNT)];
syscall_handler_info syscall_info_registry[static_cast<unsigned>(syscall::COUNT)];
void
syscall_invalid(uint64_t call)
{
console *cons = console::get();
cons->set_color(9);
cons->printf("\nReceived unknown syscall: %d\n", call);
cons->set_color();
_halt();
}
/*
void
syscall_dispatch(cpu_state *regs)
{
@@ -56,44 +118,6 @@ syscall_dispatch(cpu_state *regs)
cons->printf(" Syscall sysret: %8d\n", __counter_syscall_sysret);
break;
case syscall::message:
cons->set_color(11);
cons->printf("\nProcess %d: Received MESSAGE syscall\n", p->pid);
cons->set_color();
break;
case syscall::pause:
{
cons->set_color(11);
auto &s = scheduler::get();
auto *p = s.current();
p->wait_on_signal(-1ull);
cons->printf("\nProcess %d: Received PAUSE syscall\n", p->pid);
cons->set_color();
s.schedule();
}
break;
case syscall::sleep:
{
cons->set_color(11);
cons->printf("\nProcess %d: Received SLEEP syscall\n", p->pid);
cons->printf("Sleeping until %lu\n", regs->rdi);
cons->set_color();
p->wait_on_time(regs->rdi);
s.schedule();
}
break;
case syscall::getpid:
cons->set_color(11);
cons->printf("\nProcess %d: Received GETPID syscall\n", p->pid);
cons->set_color();
regs->rax = p->pid;
break;
case syscall::send:
{
pid_t target = regs->rdi;
@@ -134,14 +158,6 @@ syscall_dispatch(cpu_state *regs)
}
break;
case syscall::exit:
cons->set_color(11);
cons->printf("\nProcess %d: Received EXIT syscall\n", p->pid);
cons->set_color();
p->exit(regs->rdi);
s.schedule();
break;
default:
cons->set_color(9);
cons->printf("\nReceived unknown syscall: %02x\n", call);
@@ -150,4 +166,36 @@ syscall_dispatch(cpu_state *regs)
break;
}
}
*/
void
syscall_enable()
{
// IA32_EFER - set bit 0, syscall enable
uint64_t efer = rdmsr(msr::ia32_efer);
wrmsr(msr::ia32_efer, efer|1);
// IA32_STAR - high 32 bits contain k+u CS
// Kernel CS: GDT[1] ring 0 bits[47:32]
// User CS: GDT[3] ring 3 bits[63:48]
uint64_t star =
(((1ull << 3) | 0) << 32) |
(((3ull << 3) | 3) << 48);
wrmsr(msr::ia32_star, star);
// IA32_LSTAR - RIP for syscall
wrmsr(msr::ia32_lstar,
reinterpret_cast<uintptr_t>(&syscall_handler_prelude));
// IA32_FMASK - FLAGS mask inside syscall
wrmsr(msr::ia32_fmask, 0x200);
#define SYSCALL(name, nargs) \
syscall_registry[static_cast<unsigned>(syscall::name)] = \
reinterpret_cast<uintptr_t>(syscalls::name); \
syscall_info_registry[static_cast<unsigned>(syscall::name)] = { \
nargs, #name };
#include "syscalls.inc"
#undef SYSCALL
}

View File

@@ -6,20 +6,12 @@ struct cpu_state;
enum class syscall : uint64_t
{
noop = 0x0000,
debug = 0x0001,
message = 0x0002,
pause = 0x0003,
sleep = 0x0004,
getpid = 0x0005,
send = 0x0006,
receive = 0x0007,
fork = 0x0008,
exit = 0x0009,
#define SYSCALL(name, nargs) name ,
#include "syscalls.inc"
#undef SYSCALL
last_syscall
COUNT
};
void syscall_enable();
void syscall_dispatch(cpu_state *);
extern "C" void syscall_invalid(uint64_t call);

View File

@@ -1,53 +1,52 @@
%include "push_all.inc"
%include "tasking.inc"
%define SYSCALL(name, nargs) resb 1
struc SYSCALLS
%include "syscalls.inc"
.count:
endstruc
extern __counter_syscall_enter
extern __counter_syscall_sysret
extern syscall_handler
extern isr_handler_return
extern syscall_registry
extern syscall_invalid
global syscall_handler_prelude
syscall_handler_prelude:
swapgs
mov [gs:0x08], rsp
mov rsp, [gs:0x00]
mov [gs:CPU_DATA.rsp3], rsp
mov rsp, [gs:CPU_DATA.rsp0]
push 0x23 ; ss
push 0x00 ; rsp - to be filled
push r11 ; rflags
push 0x2b ; cs
push rcx ; user rip
push 0 ; bogus error
push 0 ; bogus vector
push_all
push rcx
push rbp
mov rbp, rsp
push r11
inc qword [rel __counter_syscall_enter]
mov rax, [gs:0x08]
mov [rsp + 0xa0], rax
mov rax, [rsp + 0x70]
cmp rax, SYSCALLS.count
jl .ok_syscall
mov rdi, rsp
call syscall_handler
mov rdi, rax
call syscall_invalid
mov rax, [rsp + 0x90]
and rax, 0x3
cmp rax, 0x3
jne isr_handler_return
.ok_syscall:
lea r11, [rel syscall_registry]
mov r11, [r11 + rax * 8]
call r11
inc qword [rel __counter_syscall_sysret]
mov rax, [rsp + 0xa0]
mov [gs:0x08], rax
pop r11
pop rbp
pop rcx
pop_all
add rsp, 16 ; ignore bogus interrupt / error
pop rcx ; user rip
add rsp, 8 ; ignore cs
pop r11 ; flags
add rsp, 16 ; rsp, ss
mov [gs:0x00], rsp
mov rsp, [gs:0x08]
mov [gs:CPU_DATA.rsp0], rsp
mov rsp, [gs:CPU_DATA.rsp3]
swapgs
o64 sysret

12
src/kernel/syscalls.inc Normal file
View File

@@ -0,0 +1,12 @@
SYSCALL(noop, 0)
SYSCALL(exit, 1)
SYSCALL(getpid, 0)
SYSCALL(fork, 0)
SYSCALL(message, 1)
SYSCALL(pause, 0)
SYSCALL(sleep, 1)
SYSCALL(send, 2)
SYSCALL(receive, 2)

View File

@@ -18,6 +18,10 @@ task_switch:
mov rax, [gs:CPU_DATA.tcb] ; rax: current task TCB
mov [rax + TCB.rsp], rsp
; Copy off saved user rsp
mov rcx, [gs:CPU_DATA.rsp3] ; rcx: curretn task's saved user rsp
mov [rax + TCB.rsp3], rcx
; Install next task's TCB
mov [gs:CPU_DATA.tcb], rdi ; rdi: next TCB (function param)
mov rsp, [rdi + TCB.rsp] ; next task's stack pointer
@@ -28,6 +32,10 @@ task_switch:
mov rcx, [rdi + TCB.rsp0] ; rcx: top of next task's kernel stack
mov [gs:CPU_DATA.rsp0], rcx
; Update saved user rsp
mov rcx, [rdi + TCB.rsp3] ; rcx: new task's saved user rsp
mov [gs:CPU_DATA.rsp3], rcx
lea rdx, [rel g_tss] ; rdx: address of TSS
mov [rdx + TSS.rsp0], rcx
@@ -71,6 +79,7 @@ task_fork:
mov rdi, [r14 + TCB.rsp0] ; rdi: child task rsp0
sub rdi, rax ; rdi: child task rsp
mov rsi, rsp ; rsi: current rsp
mov [r14 + TCB.rsp], rdi
rep movsq

View File

@@ -1,6 +1,7 @@
struc TCB
.rsp: resq 1
.rsp0: resq 1
.rsp3: resq 1
.pml4: resq 1
endstruc