Fix fork() for new task switching model

This commit is contained in:
Justin C. Miller
2019-04-03 10:08:26 -07:00
parent 8375870af6
commit c605793a9d
13 changed files with 172 additions and 97 deletions

View File

@@ -16,11 +16,9 @@ int
main(int argc, const char **argv)
{
int32_t pid = getpid();
int32_t child = fork();
message("hello from nulldrv!");
//int32_t child = fork();
//debug();
for (int i = 1; i < 5; ++i)
sleep(i*10);
debug();
return 0;
}

View File

@@ -44,7 +44,7 @@ fork:
push rbp
mov rbp, rsp
mov rax, 0x00
mov rax, 0x03
syscall ; pid left in rax
pop rbp

View File

@@ -18,11 +18,11 @@ global _start:function (_start.end - _start)
_start:
cli
mov rsp, stack_end
push 0 ; signal end of stack with 0 return address
push 0 ; and a few extra entries in case of stack
push 0 ; problems
push 0
mov rsp, idle_stack_end
mov qword [rsp + 0x00], 0 ; signal end of stack with 0 return address
mov qword [rsp + 0x08], 0 ; and a few extra entries in case of stack
mov qword [rsp + 0x10], 0 ; problems
mov qword [rsp + 0x18], 0
mov rbp, rsp
extern kernel_main
@@ -47,6 +47,9 @@ interrupts_disable:
section .bss
align 0x100
stack_begin:
resb 0x4000 ; 16KiB stack space
stack_end:
idle_stack_begin:
resb 0x1000 ; 4KiB stack space
global idle_stack_end
idle_stack_end:
resq 4

View File

@@ -19,4 +19,8 @@ namespace memory {
/// Initial process thread's stack size, in pages
static const unsigned initial_stack_pages = 1;
/// Helper to determine if a physical address can be accessed
/// through the page_offset area.
inline bool page_mappable(uintptr_t a) { return (a & page_offset) == 0; }
} // namespace memory

View File

@@ -2,12 +2,14 @@
#include "kutil/assert.h"
#include "console.h"
#include "io.h"
#include "log.h"
#include "page_manager.h"
using memory::frame_size;
using memory::kernel_offset;
using memory::page_offset;
using memory::page_mappable;
extern kutil::frame_allocator g_frame_allocator;
extern kutil::address_manager g_kernel_address_manager;
@@ -68,28 +70,58 @@ page_manager::create_process_map()
uintptr_t
page_manager::copy_page(uintptr_t orig)
{
uintptr_t virt = m_addrs.allocate(2 * frame_size);
uintptr_t copy = 0;
bool paged_orig = false;
bool paged_copy = false;
uintptr_t orig_virt;
if (page_mappable(orig)) {
orig_virt = orig + page_offset;
} else {
orig_virt = m_addrs.allocate(frame_size);
page_in(get_pml4(), orig, orig_virt, 1);
paged_orig = true;
}
uintptr_t copy = 0;
uintptr_t copy_virt;
size_t n = m_frames.allocate(1, &copy);
kassert(n, "copy_page could not allocate page");
page_in(get_pml4(), orig, virt, 1);
page_in(get_pml4(), copy, virt + frame_size, 1);
if (page_mappable(copy)) {
copy_virt = copy + page_offset;
} else {
copy_virt = m_addrs.allocate(frame_size);
page_in(get_pml4(), copy, copy_virt, 1);
paged_copy = true;
}
if (paged_orig || paged_copy) {
set_pml4(get_pml4());
__sync_synchronize();
io_wait();
}
kutil::memcpy(
reinterpret_cast<void *>(virt + frame_size),
reinterpret_cast<void *>(virt),
reinterpret_cast<void *>(copy_virt),
reinterpret_cast<void *>(orig_virt),
frame_size);
page_out(get_pml4(), virt, 2);
if (paged_orig) {
page_out(get_pml4(), orig_virt, 1);
m_addrs.free(orig_virt);
}
if (paged_copy) {
page_out(get_pml4(), copy_virt, 1);
m_addrs.free(copy_virt);
}
m_addrs.free(virt);
return copy;
}
page_table *
page_manager::copy_table(page_table *from, page_table::level lvl)
page_manager::copy_table(page_table *from, page_table::level lvl, page_table_indices index)
{
page_table *to = get_table_page();
log::debug(logs::paging, "Page manager copying level %d table at %016lx to %016lx.", lvl, from, to);
@@ -105,12 +137,17 @@ page_manager::copy_table(page_table *from, page_table::level lvl)
512;
unsigned pages_copied = 0;
uintptr_t from_addr = 0;
uintptr_t to_addr = 0;
for (int i = 0; i < max; ++i) {
if (!from->is_present(i)) {
to->entries[i] = 0;
continue;
}
index[lvl] = i;
bool is_page =
lvl == page_table::level::pt ||
from->is_large_page(lvl, i);
@@ -119,17 +156,20 @@ page_manager::copy_table(page_table *from, page_table::level lvl)
uint16_t flags = from->entries[i] & 0xfffull;
uintptr_t orig = from->entries[i] & ~0xfffull;
to->entries[i] = copy_page(orig) | flags;
pages_copied++;
if (!pages_copied++)
from_addr = index.addr();
to_addr = index.addr();
} else {
uint16_t flags = 0;
page_table *next_from = from->get(i, &flags);
page_table *next_to = copy_table(next_from, page_table::deeper(lvl));
page_table *next_to = copy_table(next_from, page_table::deeper(lvl), index);
to->set(i, next_to, flags);
}
}
if (pages_copied)
log::debug(logs::paging, " copied %3u pages", pages_copied);
log::debug(logs::paging, " copied %3u pages %016lx - %016lx",
pages_copied, from_addr, to_addr + frame_size);
return to;
}

View File

@@ -62,7 +62,8 @@ public:
/// \arg lvl Level of the given tables (default is PML4)
/// \returns The new page table
page_table * copy_table(page_table *from,
page_table::level lvl = page_table::level::pml4);
page_table::level lvl = page_table::level::pml4,
page_table_indices index = {});
/// Allocate and map pages into virtual memory.
/// \arg address The virtual address at which to map the pages

View File

@@ -4,6 +4,8 @@
#include "process.h"
#include "scheduler.h"
extern "C" void task_fork_return_thunk();
void
process::exit(uint32_t code)
@@ -13,7 +15,7 @@ process::exit(uint32_t code)
}
pid_t
process::fork(cpu_state *regs)
process::fork()
{
auto &sched = scheduler::get();
auto *child = sched.create_process();
@@ -29,13 +31,31 @@ process::fork(cpu_state *regs)
child->pml4 = page_manager::get()->copy_table(pml4);
kassert(child->pml4, "process::fork() got null pml4");
log::debug(logs::task, "Copied process %d to %d, new PML4 %016lx.",
pid, child->pid, child->pml4);
log::debug(logs::task, " copied stack %016lx to %016lx, rsp %016lx.",
kernel_stack, child->kernel_stack, child->rsp);
child->rsp3 = bsp_cpu_data.rsp3;
child->setup_kernel_stack();
task_fork(child);
log::debug(logs::task, "Copied process %d to %d",
pid, child->pid);
log::debug(logs::task, " PML4 %016lx", child->pml4);
log::debug(logs::task, " RSP3 %016lx", child->rsp3);
log::debug(logs::task, " RSP0 %016lx", child->rsp0);
// Initialize a new empty stack with a fake saved state
// for returning out of syscall_handler_prelude
size_t ret_seg_size = sizeof(uintptr_t) * 8;
child->rsp -= ret_seg_size;
void *this_ret_seg =
reinterpret_cast<void*>(rsp0 - ret_seg_size);
void *child_ret_seg =
reinterpret_cast<void*>(child->rsp);
kutil::memcpy(child_ret_seg, this_ret_seg, ret_seg_size);
child->add_fake_task_return(
reinterpret_cast<uintptr_t>(task_fork_return_thunk));
log::debug(logs::task, " RSP %016lx", child->rsp);
return child->pid;
}
@@ -63,10 +83,26 @@ process::setup_kernel_stack()
kernel_stack_size = initial_stack_size;
kernel_stack = reinterpret_cast<uintptr_t>(stack_bottom);
rsp0 = reinterpret_cast<uintptr_t>(stack_top);
rsp = rsp0;
return stack_top;
}
void
process::add_fake_task_return(uintptr_t rip)
{
rsp -= sizeof(uintptr_t) * 7;
uintptr_t *stack = reinterpret_cast<uintptr_t*>(rsp);
stack[6] = rip; // return rip
stack[5] = rsp0; // rbp
stack[4] = 0xbbbbbbbb; // rbx
stack[3] = 0x12121212; // r12
stack[2] = 0x13131313; // r13
stack[1] = 0x14141414; // r14
stack[0] = 0x15151515; // r15
}
bool
process::wait_on_signal(uint64_t sigmask)
{

View File

@@ -71,10 +71,9 @@ struct process
void exit(unsigned code);
/// Copy this process.
/// \arg regs The saved state from the fork syscall
/// \returns Returns the child's pid to the parent, and
/// 0 to the child.
pid_t fork(cpu_state *regs);
pid_t fork();
/// Unready this process until it gets a signal
/// \arg sigmask A bitfield of signals to wake on
@@ -135,6 +134,11 @@ private:
/// process object, but also returns it.
/// \returns The new rsp0 as a pointer
void * setup_kernel_stack();
/// Initialize this process' kenrel stack with a fake return segment for
/// returning out of task_switch.
/// \arg rip The rip to return to
void add_fake_task_return(uintptr_t rip);
};
using process_list = kutil::linked_list<process>;

View File

@@ -26,20 +26,22 @@ extern "C" {
uintptr_t load_process_image(const void *image_start, size_t bytes, process *proc);
};
extern uint64_t idle_stack_end;
scheduler::scheduler(lapic *apic) :
m_apic(apic),
m_next_pid(1)
{
auto *idle = m_process_allocator.pop();
idle->setup_kernel_stack();
uint8_t last_pri = num_priorities - 1;
// The kernel idle task, also the thread we're in now
idle->pid = 0;
idle->ppid = 0;
idle->priority = last_pri;
idle->rsp = 0; // This will get set when we switch away
idle->rsp = 0; // This will get set when we switch away
idle->rsp3 = 0; // Never used for the idle task
idle->rsp0 = reinterpret_cast<uintptr_t>(&idle_stack_end);
idle->pml4 = page_manager::get_pml4();
idle->quanta = process_quanta;
idle->flags =
@@ -124,24 +126,6 @@ scheduler::create_process(pid_t pid)
return proc;
}
static uintptr_t
add_fake_task_return(uintptr_t rsp, uintptr_t rbp, uintptr_t rip)
{
// Initialize a new empty stack with a fake return segment
// for returning out of task_switch
rsp -= sizeof(uintptr_t) * 7;
uintptr_t *stack = reinterpret_cast<uintptr_t*>(rsp);
stack[6] = rip; // return rip
stack[5] = rbp; // rbp
stack[4] = 0xbbbbbbbb; // rbx
stack[3] = 0x12121212; // r12
stack[2] = 0x13131313; // r13
stack[1] = 0x14141414; // r14
stack[0] = 0x15151515; // r15
return rsp;
}
void
scheduler::load_process(const char *name, const void *data, size_t size)
{
@@ -165,9 +149,8 @@ scheduler::load_process(const char *name, const void *data, size_t size)
stack[1] = reinterpret_cast<uintptr_t>(size);
stack[2] = reinterpret_cast<uintptr_t>(proc);
proc->rsp = add_fake_task_return(
reinterpret_cast<uintptr_t>(stack),
proc->rsp0,
proc->rsp = reinterpret_cast<uintptr_t>(stack);
proc->add_fake_task_return(
reinterpret_cast<uintptr_t>(ramdisk_process_loader));
// Arguments for iret - rip will be pushed on before these
@@ -201,8 +184,7 @@ scheduler::create_kernel_task(pid_t pid, void (*task)())
// Create an initial kernel stack space
proc->setup_kernel_stack();
proc->rsp = add_fake_task_return(
proc->rsp0, proc->rsp0,
proc->add_fake_task_return(
reinterpret_cast<uintptr_t>(task));
proc->pml4 = page_manager::get()->get_kernel_pml4();
@@ -216,6 +198,7 @@ scheduler::create_kernel_task(pid_t pid, void (*task)())
log::debug(logs::task, "Creating kernel task: pid %d pri %d", proc->pid, proc->priority);
log::debug(logs::task, " RSP0 %016lx", proc->rsp0);
log::debug(logs::task, " RSP %016lx", proc->rsp);
log::debug(logs::task, " PML4 %016lx", proc->pml4);
}
void

View File

@@ -14,7 +14,6 @@ extern "C" {
namespace syscalls {
pid_t fork() { return 0; }
void send() {}
void receive() {}

View File

@@ -12,6 +12,7 @@ extern syscall_registry
extern syscall_invalid
global syscall_handler_prelude
global syscall_handler_prelude.return
syscall_handler_prelude:
swapgs
mov [gs:CPU_DATA.rsp3], rsp
@@ -20,7 +21,13 @@ syscall_handler_prelude:
push rcx
push rbp
mov rbp, rsp
push rbx
push r11
push r12
push r13
push r14
push r15
inc qword [rel __counter_syscall_enter]
@@ -37,7 +44,14 @@ syscall_handler_prelude:
inc qword [rel __counter_syscall_sysret]
.return:
pop r15
pop r14
pop r13
pop r12
pop r11
pop rbx
pop rbp
pop rcx

View File

@@ -0,0 +1,24 @@
#include "log.h"
#include "scheduler.h"
namespace syscalls {
pid_t
fork()
{
auto &s = scheduler::get();
auto *p = s.current();
pid_t ppid = p->pid;
log::debug(logs::syscall, "Process %d calling fork()", ppid);
pid_t pid = p->fork();
p = s.current();
log::debug(logs::syscall, "Process %d's fork: returning %d from process %d", ppid, pid, p->pid);
return pid;
}
} // namespace syscalls

View File

@@ -1,7 +1,6 @@
%include "tasking.inc"
extern g_tss
global task_switch
task_switch:
push rbp
@@ -55,40 +54,10 @@ task_switch:
pop rbp
ret
global task_fork
task_fork:
push rbp
mov rbp, rsp
; Save the rest of the callee-saved regs
push rbx
push r12
push r13
push r14
push r15
mov r14, rdi ; r14: child task TCB (function argument)
mov rax, [gs:CPU_DATA.tcb] ; rax: current task TCB
mov rax, [rax + TCB.rsp0] ; rax: current task rsp0
sub rax, rsp ; rax: size of kernel stack in bytes
mov rcx, rax
shr rcx, 3 ; rcx: size of kernel stack in qwords
mov rdi, [r14 + TCB.rsp0] ; rdi: child task rsp0
sub rdi, rax ; rdi: child task rsp
mov rsi, rsp ; rsi: current rsp
mov [r14 + TCB.rsp], rdi
rep movsq
pop r15
pop r14
pop r13
pop r12
pop rbx
pop rbp
ret
extern syscall_handler_prelude.return
global task_fork_return_thunk
task_fork_return_thunk:
mov rax, 0
jmp syscall_handler_prelude.return