From c605793a9dc9020ee28d5151c30e7e40c085dcb9 Mon Sep 17 00:00:00 2001 From: "Justin C. Miller" Date: Wed, 3 Apr 2019 10:08:26 -0700 Subject: [PATCH] Fix fork() for new task switching model --- src/drivers/nulldrv/main.cpp | 4 +-- src/drivers/nulldrv/main.s | 2 +- src/kernel/boot.s | 19 ++++++----- src/kernel/kernel_memory.h | 4 +++ src/kernel/page_manager.cpp | 64 +++++++++++++++++++++++++++++------- src/kernel/page_manager.h | 3 +- src/kernel/process.cpp | 50 ++++++++++++++++++++++++---- src/kernel/process.h | 8 +++-- src/kernel/scheduler.cpp | 35 +++++--------------- src/kernel/syscall.cpp | 1 - src/kernel/syscall.s | 14 ++++++++ src/kernel/syscalls/fork.cpp | 24 ++++++++++++++ src/kernel/task.s | 41 +++-------------------- 13 files changed, 172 insertions(+), 97 deletions(-) create mode 100644 src/kernel/syscalls/fork.cpp diff --git a/src/drivers/nulldrv/main.cpp b/src/drivers/nulldrv/main.cpp index 6e9b523..1110f5d 100644 --- a/src/drivers/nulldrv/main.cpp +++ b/src/drivers/nulldrv/main.cpp @@ -16,11 +16,9 @@ int main(int argc, const char **argv) { int32_t pid = getpid(); + int32_t child = fork(); message("hello from nulldrv!"); - //int32_t child = fork(); - //debug(); for (int i = 1; i < 5; ++i) sleep(i*10); - debug(); return 0; } diff --git a/src/drivers/nulldrv/main.s b/src/drivers/nulldrv/main.s index 1835de2..1a59205 100644 --- a/src/drivers/nulldrv/main.s +++ b/src/drivers/nulldrv/main.s @@ -44,7 +44,7 @@ fork: push rbp mov rbp, rsp - mov rax, 0x00 + mov rax, 0x03 syscall ; pid left in rax pop rbp diff --git a/src/kernel/boot.s b/src/kernel/boot.s index fec42f1..7b9ae17 100644 --- a/src/kernel/boot.s +++ b/src/kernel/boot.s @@ -18,11 +18,11 @@ global _start:function (_start.end - _start) _start: cli - mov rsp, stack_end - push 0 ; signal end of stack with 0 return address - push 0 ; and a few extra entries in case of stack - push 0 ; problems - push 0 + mov rsp, idle_stack_end + mov qword [rsp + 0x00], 0 ; signal end of stack with 0 return address + mov qword [rsp + 0x08], 0 ; and a few extra entries in case of stack + mov qword [rsp + 0x10], 0 ; problems + mov qword [rsp + 0x18], 0 mov rbp, rsp extern kernel_main @@ -47,6 +47,9 @@ interrupts_disable: section .bss align 0x100 -stack_begin: - resb 0x4000 ; 16KiB stack space -stack_end: +idle_stack_begin: + resb 0x1000 ; 4KiB stack space + +global idle_stack_end +idle_stack_end: + resq 4 diff --git a/src/kernel/kernel_memory.h b/src/kernel/kernel_memory.h index fcd2d0e..21edf55 100644 --- a/src/kernel/kernel_memory.h +++ b/src/kernel/kernel_memory.h @@ -19,4 +19,8 @@ namespace memory { /// Initial process thread's stack size, in pages static const unsigned initial_stack_pages = 1; + /// Helper to determine if a physical address can be accessed + /// through the page_offset area. + inline bool page_mappable(uintptr_t a) { return (a & page_offset) == 0; } + } // namespace memory diff --git a/src/kernel/page_manager.cpp b/src/kernel/page_manager.cpp index 6701c81..bf9f7e4 100644 --- a/src/kernel/page_manager.cpp +++ b/src/kernel/page_manager.cpp @@ -2,12 +2,14 @@ #include "kutil/assert.h" #include "console.h" +#include "io.h" #include "log.h" #include "page_manager.h" using memory::frame_size; using memory::kernel_offset; using memory::page_offset; +using memory::page_mappable; extern kutil::frame_allocator g_frame_allocator; extern kutil::address_manager g_kernel_address_manager; @@ -68,28 +70,58 @@ page_manager::create_process_map() uintptr_t page_manager::copy_page(uintptr_t orig) { - uintptr_t virt = m_addrs.allocate(2 * frame_size); - uintptr_t copy = 0; + bool paged_orig = false; + bool paged_copy = false; + uintptr_t orig_virt; + + if (page_mappable(orig)) { + orig_virt = orig + page_offset; + } else { + orig_virt = m_addrs.allocate(frame_size); + page_in(get_pml4(), orig, orig_virt, 1); + paged_orig = true; + } + + uintptr_t copy = 0; + uintptr_t copy_virt; size_t n = m_frames.allocate(1, ©); kassert(n, "copy_page could not allocate page"); - page_in(get_pml4(), orig, virt, 1); - page_in(get_pml4(), copy, virt + frame_size, 1); + if (page_mappable(copy)) { + copy_virt = copy + page_offset; + } else { + copy_virt = m_addrs.allocate(frame_size); + page_in(get_pml4(), copy, copy_virt, 1); + paged_copy = true; + } + + if (paged_orig || paged_copy) { + set_pml4(get_pml4()); + __sync_synchronize(); + io_wait(); + } kutil::memcpy( - reinterpret_cast(virt + frame_size), - reinterpret_cast(virt), + reinterpret_cast(copy_virt), + reinterpret_cast(orig_virt), frame_size); - page_out(get_pml4(), virt, 2); + if (paged_orig) { + page_out(get_pml4(), orig_virt, 1); + m_addrs.free(orig_virt); + } + + if (paged_copy) { + page_out(get_pml4(), copy_virt, 1); + m_addrs.free(copy_virt); + } - m_addrs.free(virt); return copy; } page_table * -page_manager::copy_table(page_table *from, page_table::level lvl) +page_manager::copy_table(page_table *from, page_table::level lvl, page_table_indices index) { page_table *to = get_table_page(); log::debug(logs::paging, "Page manager copying level %d table at %016lx to %016lx.", lvl, from, to); @@ -105,12 +137,17 @@ page_manager::copy_table(page_table *from, page_table::level lvl) 512; unsigned pages_copied = 0; + uintptr_t from_addr = 0; + uintptr_t to_addr = 0; + for (int i = 0; i < max; ++i) { if (!from->is_present(i)) { to->entries[i] = 0; continue; } + index[lvl] = i; + bool is_page = lvl == page_table::level::pt || from->is_large_page(lvl, i); @@ -119,17 +156,20 @@ page_manager::copy_table(page_table *from, page_table::level lvl) uint16_t flags = from->entries[i] & 0xfffull; uintptr_t orig = from->entries[i] & ~0xfffull; to->entries[i] = copy_page(orig) | flags; - pages_copied++; + if (!pages_copied++) + from_addr = index.addr(); + to_addr = index.addr(); } else { uint16_t flags = 0; page_table *next_from = from->get(i, &flags); - page_table *next_to = copy_table(next_from, page_table::deeper(lvl)); + page_table *next_to = copy_table(next_from, page_table::deeper(lvl), index); to->set(i, next_to, flags); } } if (pages_copied) - log::debug(logs::paging, " copied %3u pages", pages_copied); + log::debug(logs::paging, " copied %3u pages %016lx - %016lx", + pages_copied, from_addr, to_addr + frame_size); return to; } diff --git a/src/kernel/page_manager.h b/src/kernel/page_manager.h index d14d398..1c40d52 100644 --- a/src/kernel/page_manager.h +++ b/src/kernel/page_manager.h @@ -62,7 +62,8 @@ public: /// \arg lvl Level of the given tables (default is PML4) /// \returns The new page table page_table * copy_table(page_table *from, - page_table::level lvl = page_table::level::pml4); + page_table::level lvl = page_table::level::pml4, + page_table_indices index = {}); /// Allocate and map pages into virtual memory. /// \arg address The virtual address at which to map the pages diff --git a/src/kernel/process.cpp b/src/kernel/process.cpp index a6db1eb..dced666 100644 --- a/src/kernel/process.cpp +++ b/src/kernel/process.cpp @@ -4,6 +4,8 @@ #include "process.h" #include "scheduler.h" +extern "C" void task_fork_return_thunk(); + void process::exit(uint32_t code) @@ -13,7 +15,7 @@ process::exit(uint32_t code) } pid_t -process::fork(cpu_state *regs) +process::fork() { auto &sched = scheduler::get(); auto *child = sched.create_process(); @@ -29,13 +31,31 @@ process::fork(cpu_state *regs) child->pml4 = page_manager::get()->copy_table(pml4); kassert(child->pml4, "process::fork() got null pml4"); - log::debug(logs::task, "Copied process %d to %d, new PML4 %016lx.", - pid, child->pid, child->pml4); - log::debug(logs::task, " copied stack %016lx to %016lx, rsp %016lx.", - kernel_stack, child->kernel_stack, child->rsp); - + child->rsp3 = bsp_cpu_data.rsp3; child->setup_kernel_stack(); - task_fork(child); + + log::debug(logs::task, "Copied process %d to %d", + pid, child->pid); + + log::debug(logs::task, " PML4 %016lx", child->pml4); + log::debug(logs::task, " RSP3 %016lx", child->rsp3); + log::debug(logs::task, " RSP0 %016lx", child->rsp0); + + // Initialize a new empty stack with a fake saved state + // for returning out of syscall_handler_prelude + size_t ret_seg_size = sizeof(uintptr_t) * 8; + child->rsp -= ret_seg_size; + + void *this_ret_seg = + reinterpret_cast(rsp0 - ret_seg_size); + void *child_ret_seg = + reinterpret_cast(child->rsp); + kutil::memcpy(child_ret_seg, this_ret_seg, ret_seg_size); + + child->add_fake_task_return( + reinterpret_cast(task_fork_return_thunk)); + + log::debug(logs::task, " RSP %016lx", child->rsp); return child->pid; } @@ -63,10 +83,26 @@ process::setup_kernel_stack() kernel_stack_size = initial_stack_size; kernel_stack = reinterpret_cast(stack_bottom); rsp0 = reinterpret_cast(stack_top); + rsp = rsp0; return stack_top; } +void +process::add_fake_task_return(uintptr_t rip) +{ + rsp -= sizeof(uintptr_t) * 7; + uintptr_t *stack = reinterpret_cast(rsp); + + stack[6] = rip; // return rip + stack[5] = rsp0; // rbp + stack[4] = 0xbbbbbbbb; // rbx + stack[3] = 0x12121212; // r12 + stack[2] = 0x13131313; // r13 + stack[1] = 0x14141414; // r14 + stack[0] = 0x15151515; // r15 +} + bool process::wait_on_signal(uint64_t sigmask) { diff --git a/src/kernel/process.h b/src/kernel/process.h index a00847b..5e5fc8a 100644 --- a/src/kernel/process.h +++ b/src/kernel/process.h @@ -71,10 +71,9 @@ struct process void exit(unsigned code); /// Copy this process. - /// \arg regs The saved state from the fork syscall /// \returns Returns the child's pid to the parent, and /// 0 to the child. - pid_t fork(cpu_state *regs); + pid_t fork(); /// Unready this process until it gets a signal /// \arg sigmask A bitfield of signals to wake on @@ -135,6 +134,11 @@ private: /// process object, but also returns it. /// \returns The new rsp0 as a pointer void * setup_kernel_stack(); + + /// Initialize this process' kenrel stack with a fake return segment for + /// returning out of task_switch. + /// \arg rip The rip to return to + void add_fake_task_return(uintptr_t rip); }; using process_list = kutil::linked_list; diff --git a/src/kernel/scheduler.cpp b/src/kernel/scheduler.cpp index ef591f4..ce6eda0 100644 --- a/src/kernel/scheduler.cpp +++ b/src/kernel/scheduler.cpp @@ -26,20 +26,22 @@ extern "C" { uintptr_t load_process_image(const void *image_start, size_t bytes, process *proc); }; +extern uint64_t idle_stack_end; + scheduler::scheduler(lapic *apic) : m_apic(apic), m_next_pid(1) { auto *idle = m_process_allocator.pop(); - idle->setup_kernel_stack(); - uint8_t last_pri = num_priorities - 1; // The kernel idle task, also the thread we're in now idle->pid = 0; idle->ppid = 0; idle->priority = last_pri; - idle->rsp = 0; // This will get set when we switch away + idle->rsp = 0; // This will get set when we switch away + idle->rsp3 = 0; // Never used for the idle task + idle->rsp0 = reinterpret_cast(&idle_stack_end); idle->pml4 = page_manager::get_pml4(); idle->quanta = process_quanta; idle->flags = @@ -124,24 +126,6 @@ scheduler::create_process(pid_t pid) return proc; } -static uintptr_t -add_fake_task_return(uintptr_t rsp, uintptr_t rbp, uintptr_t rip) -{ - // Initialize a new empty stack with a fake return segment - // for returning out of task_switch - rsp -= sizeof(uintptr_t) * 7; - uintptr_t *stack = reinterpret_cast(rsp); - - stack[6] = rip; // return rip - stack[5] = rbp; // rbp - stack[4] = 0xbbbbbbbb; // rbx - stack[3] = 0x12121212; // r12 - stack[2] = 0x13131313; // r13 - stack[1] = 0x14141414; // r14 - stack[0] = 0x15151515; // r15 - return rsp; -} - void scheduler::load_process(const char *name, const void *data, size_t size) { @@ -165,9 +149,8 @@ scheduler::load_process(const char *name, const void *data, size_t size) stack[1] = reinterpret_cast(size); stack[2] = reinterpret_cast(proc); - proc->rsp = add_fake_task_return( - reinterpret_cast(stack), - proc->rsp0, + proc->rsp = reinterpret_cast(stack); + proc->add_fake_task_return( reinterpret_cast(ramdisk_process_loader)); // Arguments for iret - rip will be pushed on before these @@ -201,8 +184,7 @@ scheduler::create_kernel_task(pid_t pid, void (*task)()) // Create an initial kernel stack space proc->setup_kernel_stack(); - proc->rsp = add_fake_task_return( - proc->rsp0, proc->rsp0, + proc->add_fake_task_return( reinterpret_cast(task)); proc->pml4 = page_manager::get()->get_kernel_pml4(); @@ -216,6 +198,7 @@ scheduler::create_kernel_task(pid_t pid, void (*task)()) log::debug(logs::task, "Creating kernel task: pid %d pri %d", proc->pid, proc->priority); log::debug(logs::task, " RSP0 %016lx", proc->rsp0); log::debug(logs::task, " RSP %016lx", proc->rsp); + log::debug(logs::task, " PML4 %016lx", proc->pml4); } void diff --git a/src/kernel/syscall.cpp b/src/kernel/syscall.cpp index 15ddc25..2a00e46 100644 --- a/src/kernel/syscall.cpp +++ b/src/kernel/syscall.cpp @@ -14,7 +14,6 @@ extern "C" { namespace syscalls { -pid_t fork() { return 0; } void send() {} void receive() {} diff --git a/src/kernel/syscall.s b/src/kernel/syscall.s index 81871d0..64a989d 100644 --- a/src/kernel/syscall.s +++ b/src/kernel/syscall.s @@ -12,6 +12,7 @@ extern syscall_registry extern syscall_invalid global syscall_handler_prelude +global syscall_handler_prelude.return syscall_handler_prelude: swapgs mov [gs:CPU_DATA.rsp3], rsp @@ -20,7 +21,13 @@ syscall_handler_prelude: push rcx push rbp mov rbp, rsp + + push rbx push r11 + push r12 + push r13 + push r14 + push r15 inc qword [rel __counter_syscall_enter] @@ -37,7 +44,14 @@ syscall_handler_prelude: inc qword [rel __counter_syscall_sysret] +.return: + pop r15 + pop r14 + pop r13 + pop r12 pop r11 + pop rbx + pop rbp pop rcx diff --git a/src/kernel/syscalls/fork.cpp b/src/kernel/syscalls/fork.cpp new file mode 100644 index 0000000..7aae1eb --- /dev/null +++ b/src/kernel/syscalls/fork.cpp @@ -0,0 +1,24 @@ + +#include "log.h" +#include "scheduler.h" + +namespace syscalls { + +pid_t +fork() +{ + auto &s = scheduler::get(); + auto *p = s.current(); + pid_t ppid = p->pid; + + log::debug(logs::syscall, "Process %d calling fork()", ppid); + + pid_t pid = p->fork(); + + p = s.current(); + log::debug(logs::syscall, "Process %d's fork: returning %d from process %d", ppid, pid, p->pid); + + return pid; +} + +} // namespace syscalls diff --git a/src/kernel/task.s b/src/kernel/task.s index 29ee309..84794df 100644 --- a/src/kernel/task.s +++ b/src/kernel/task.s @@ -1,7 +1,6 @@ %include "tasking.inc" extern g_tss - global task_switch task_switch: push rbp @@ -55,40 +54,10 @@ task_switch: pop rbp ret -global task_fork -task_fork: - push rbp - mov rbp, rsp - ; Save the rest of the callee-saved regs - push rbx - push r12 - push r13 - push r14 - push r15 - - mov r14, rdi ; r14: child task TCB (function argument) - - mov rax, [gs:CPU_DATA.tcb] ; rax: current task TCB - mov rax, [rax + TCB.rsp0] ; rax: current task rsp0 - sub rax, rsp ; rax: size of kernel stack in bytes - - mov rcx, rax - shr rcx, 3 ; rcx: size of kernel stack in qwords - - mov rdi, [r14 + TCB.rsp0] ; rdi: child task rsp0 - sub rdi, rax ; rdi: child task rsp - mov rsi, rsp ; rsi: current rsp - mov [r14 + TCB.rsp], rdi - - rep movsq - - pop r15 - pop r14 - pop r13 - pop r12 - pop rbx - - pop rbp - ret +extern syscall_handler_prelude.return +global task_fork_return_thunk +task_fork_return_thunk: + mov rax, 0 + jmp syscall_handler_prelude.return