mirror of
https://github.com/justinian/jsix.git
synced 2025-12-10 08:24:32 -08:00
Fix fork() for new task switching model
This commit is contained in:
@@ -16,11 +16,9 @@ int
|
||||
main(int argc, const char **argv)
|
||||
{
|
||||
int32_t pid = getpid();
|
||||
int32_t child = fork();
|
||||
message("hello from nulldrv!");
|
||||
//int32_t child = fork();
|
||||
//debug();
|
||||
for (int i = 1; i < 5; ++i)
|
||||
sleep(i*10);
|
||||
debug();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -44,7 +44,7 @@ fork:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
||||
mov rax, 0x00
|
||||
mov rax, 0x03
|
||||
syscall ; pid left in rax
|
||||
|
||||
pop rbp
|
||||
|
||||
@@ -18,11 +18,11 @@ global _start:function (_start.end - _start)
|
||||
_start:
|
||||
cli
|
||||
|
||||
mov rsp, stack_end
|
||||
push 0 ; signal end of stack with 0 return address
|
||||
push 0 ; and a few extra entries in case of stack
|
||||
push 0 ; problems
|
||||
push 0
|
||||
mov rsp, idle_stack_end
|
||||
mov qword [rsp + 0x00], 0 ; signal end of stack with 0 return address
|
||||
mov qword [rsp + 0x08], 0 ; and a few extra entries in case of stack
|
||||
mov qword [rsp + 0x10], 0 ; problems
|
||||
mov qword [rsp + 0x18], 0
|
||||
|
||||
mov rbp, rsp
|
||||
extern kernel_main
|
||||
@@ -47,6 +47,9 @@ interrupts_disable:
|
||||
|
||||
section .bss
|
||||
align 0x100
|
||||
stack_begin:
|
||||
resb 0x4000 ; 16KiB stack space
|
||||
stack_end:
|
||||
idle_stack_begin:
|
||||
resb 0x1000 ; 4KiB stack space
|
||||
|
||||
global idle_stack_end
|
||||
idle_stack_end:
|
||||
resq 4
|
||||
|
||||
@@ -19,4 +19,8 @@ namespace memory {
|
||||
/// Initial process thread's stack size, in pages
|
||||
static const unsigned initial_stack_pages = 1;
|
||||
|
||||
/// Helper to determine if a physical address can be accessed
|
||||
/// through the page_offset area.
|
||||
inline bool page_mappable(uintptr_t a) { return (a & page_offset) == 0; }
|
||||
|
||||
} // namespace memory
|
||||
|
||||
@@ -2,12 +2,14 @@
|
||||
|
||||
#include "kutil/assert.h"
|
||||
#include "console.h"
|
||||
#include "io.h"
|
||||
#include "log.h"
|
||||
#include "page_manager.h"
|
||||
|
||||
using memory::frame_size;
|
||||
using memory::kernel_offset;
|
||||
using memory::page_offset;
|
||||
using memory::page_mappable;
|
||||
|
||||
extern kutil::frame_allocator g_frame_allocator;
|
||||
extern kutil::address_manager g_kernel_address_manager;
|
||||
@@ -68,28 +70,58 @@ page_manager::create_process_map()
|
||||
uintptr_t
|
||||
page_manager::copy_page(uintptr_t orig)
|
||||
{
|
||||
uintptr_t virt = m_addrs.allocate(2 * frame_size);
|
||||
uintptr_t copy = 0;
|
||||
bool paged_orig = false;
|
||||
bool paged_copy = false;
|
||||
|
||||
uintptr_t orig_virt;
|
||||
|
||||
if (page_mappable(orig)) {
|
||||
orig_virt = orig + page_offset;
|
||||
} else {
|
||||
orig_virt = m_addrs.allocate(frame_size);
|
||||
page_in(get_pml4(), orig, orig_virt, 1);
|
||||
paged_orig = true;
|
||||
}
|
||||
|
||||
uintptr_t copy = 0;
|
||||
uintptr_t copy_virt;
|
||||
size_t n = m_frames.allocate(1, ©);
|
||||
kassert(n, "copy_page could not allocate page");
|
||||
|
||||
page_in(get_pml4(), orig, virt, 1);
|
||||
page_in(get_pml4(), copy, virt + frame_size, 1);
|
||||
if (page_mappable(copy)) {
|
||||
copy_virt = copy + page_offset;
|
||||
} else {
|
||||
copy_virt = m_addrs.allocate(frame_size);
|
||||
page_in(get_pml4(), copy, copy_virt, 1);
|
||||
paged_copy = true;
|
||||
}
|
||||
|
||||
if (paged_orig || paged_copy) {
|
||||
set_pml4(get_pml4());
|
||||
__sync_synchronize();
|
||||
io_wait();
|
||||
}
|
||||
|
||||
kutil::memcpy(
|
||||
reinterpret_cast<void *>(virt + frame_size),
|
||||
reinterpret_cast<void *>(virt),
|
||||
reinterpret_cast<void *>(copy_virt),
|
||||
reinterpret_cast<void *>(orig_virt),
|
||||
frame_size);
|
||||
|
||||
page_out(get_pml4(), virt, 2);
|
||||
if (paged_orig) {
|
||||
page_out(get_pml4(), orig_virt, 1);
|
||||
m_addrs.free(orig_virt);
|
||||
}
|
||||
|
||||
if (paged_copy) {
|
||||
page_out(get_pml4(), copy_virt, 1);
|
||||
m_addrs.free(copy_virt);
|
||||
}
|
||||
|
||||
m_addrs.free(virt);
|
||||
return copy;
|
||||
}
|
||||
|
||||
page_table *
|
||||
page_manager::copy_table(page_table *from, page_table::level lvl)
|
||||
page_manager::copy_table(page_table *from, page_table::level lvl, page_table_indices index)
|
||||
{
|
||||
page_table *to = get_table_page();
|
||||
log::debug(logs::paging, "Page manager copying level %d table at %016lx to %016lx.", lvl, from, to);
|
||||
@@ -105,12 +137,17 @@ page_manager::copy_table(page_table *from, page_table::level lvl)
|
||||
512;
|
||||
|
||||
unsigned pages_copied = 0;
|
||||
uintptr_t from_addr = 0;
|
||||
uintptr_t to_addr = 0;
|
||||
|
||||
for (int i = 0; i < max; ++i) {
|
||||
if (!from->is_present(i)) {
|
||||
to->entries[i] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
index[lvl] = i;
|
||||
|
||||
bool is_page =
|
||||
lvl == page_table::level::pt ||
|
||||
from->is_large_page(lvl, i);
|
||||
@@ -119,17 +156,20 @@ page_manager::copy_table(page_table *from, page_table::level lvl)
|
||||
uint16_t flags = from->entries[i] & 0xfffull;
|
||||
uintptr_t orig = from->entries[i] & ~0xfffull;
|
||||
to->entries[i] = copy_page(orig) | flags;
|
||||
pages_copied++;
|
||||
if (!pages_copied++)
|
||||
from_addr = index.addr();
|
||||
to_addr = index.addr();
|
||||
} else {
|
||||
uint16_t flags = 0;
|
||||
page_table *next_from = from->get(i, &flags);
|
||||
page_table *next_to = copy_table(next_from, page_table::deeper(lvl));
|
||||
page_table *next_to = copy_table(next_from, page_table::deeper(lvl), index);
|
||||
to->set(i, next_to, flags);
|
||||
}
|
||||
}
|
||||
|
||||
if (pages_copied)
|
||||
log::debug(logs::paging, " copied %3u pages", pages_copied);
|
||||
log::debug(logs::paging, " copied %3u pages %016lx - %016lx",
|
||||
pages_copied, from_addr, to_addr + frame_size);
|
||||
|
||||
return to;
|
||||
}
|
||||
|
||||
@@ -62,7 +62,8 @@ public:
|
||||
/// \arg lvl Level of the given tables (default is PML4)
|
||||
/// \returns The new page table
|
||||
page_table * copy_table(page_table *from,
|
||||
page_table::level lvl = page_table::level::pml4);
|
||||
page_table::level lvl = page_table::level::pml4,
|
||||
page_table_indices index = {});
|
||||
|
||||
/// Allocate and map pages into virtual memory.
|
||||
/// \arg address The virtual address at which to map the pages
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
#include "process.h"
|
||||
#include "scheduler.h"
|
||||
|
||||
extern "C" void task_fork_return_thunk();
|
||||
|
||||
|
||||
void
|
||||
process::exit(uint32_t code)
|
||||
@@ -13,7 +15,7 @@ process::exit(uint32_t code)
|
||||
}
|
||||
|
||||
pid_t
|
||||
process::fork(cpu_state *regs)
|
||||
process::fork()
|
||||
{
|
||||
auto &sched = scheduler::get();
|
||||
auto *child = sched.create_process();
|
||||
@@ -29,13 +31,31 @@ process::fork(cpu_state *regs)
|
||||
child->pml4 = page_manager::get()->copy_table(pml4);
|
||||
kassert(child->pml4, "process::fork() got null pml4");
|
||||
|
||||
log::debug(logs::task, "Copied process %d to %d, new PML4 %016lx.",
|
||||
pid, child->pid, child->pml4);
|
||||
log::debug(logs::task, " copied stack %016lx to %016lx, rsp %016lx.",
|
||||
kernel_stack, child->kernel_stack, child->rsp);
|
||||
|
||||
child->rsp3 = bsp_cpu_data.rsp3;
|
||||
child->setup_kernel_stack();
|
||||
task_fork(child);
|
||||
|
||||
log::debug(logs::task, "Copied process %d to %d",
|
||||
pid, child->pid);
|
||||
|
||||
log::debug(logs::task, " PML4 %016lx", child->pml4);
|
||||
log::debug(logs::task, " RSP3 %016lx", child->rsp3);
|
||||
log::debug(logs::task, " RSP0 %016lx", child->rsp0);
|
||||
|
||||
// Initialize a new empty stack with a fake saved state
|
||||
// for returning out of syscall_handler_prelude
|
||||
size_t ret_seg_size = sizeof(uintptr_t) * 8;
|
||||
child->rsp -= ret_seg_size;
|
||||
|
||||
void *this_ret_seg =
|
||||
reinterpret_cast<void*>(rsp0 - ret_seg_size);
|
||||
void *child_ret_seg =
|
||||
reinterpret_cast<void*>(child->rsp);
|
||||
kutil::memcpy(child_ret_seg, this_ret_seg, ret_seg_size);
|
||||
|
||||
child->add_fake_task_return(
|
||||
reinterpret_cast<uintptr_t>(task_fork_return_thunk));
|
||||
|
||||
log::debug(logs::task, " RSP %016lx", child->rsp);
|
||||
|
||||
return child->pid;
|
||||
}
|
||||
@@ -63,10 +83,26 @@ process::setup_kernel_stack()
|
||||
kernel_stack_size = initial_stack_size;
|
||||
kernel_stack = reinterpret_cast<uintptr_t>(stack_bottom);
|
||||
rsp0 = reinterpret_cast<uintptr_t>(stack_top);
|
||||
rsp = rsp0;
|
||||
|
||||
return stack_top;
|
||||
}
|
||||
|
||||
void
|
||||
process::add_fake_task_return(uintptr_t rip)
|
||||
{
|
||||
rsp -= sizeof(uintptr_t) * 7;
|
||||
uintptr_t *stack = reinterpret_cast<uintptr_t*>(rsp);
|
||||
|
||||
stack[6] = rip; // return rip
|
||||
stack[5] = rsp0; // rbp
|
||||
stack[4] = 0xbbbbbbbb; // rbx
|
||||
stack[3] = 0x12121212; // r12
|
||||
stack[2] = 0x13131313; // r13
|
||||
stack[1] = 0x14141414; // r14
|
||||
stack[0] = 0x15151515; // r15
|
||||
}
|
||||
|
||||
bool
|
||||
process::wait_on_signal(uint64_t sigmask)
|
||||
{
|
||||
|
||||
@@ -71,10 +71,9 @@ struct process
|
||||
void exit(unsigned code);
|
||||
|
||||
/// Copy this process.
|
||||
/// \arg regs The saved state from the fork syscall
|
||||
/// \returns Returns the child's pid to the parent, and
|
||||
/// 0 to the child.
|
||||
pid_t fork(cpu_state *regs);
|
||||
pid_t fork();
|
||||
|
||||
/// Unready this process until it gets a signal
|
||||
/// \arg sigmask A bitfield of signals to wake on
|
||||
@@ -135,6 +134,11 @@ private:
|
||||
/// process object, but also returns it.
|
||||
/// \returns The new rsp0 as a pointer
|
||||
void * setup_kernel_stack();
|
||||
|
||||
/// Initialize this process' kenrel stack with a fake return segment for
|
||||
/// returning out of task_switch.
|
||||
/// \arg rip The rip to return to
|
||||
void add_fake_task_return(uintptr_t rip);
|
||||
};
|
||||
|
||||
using process_list = kutil::linked_list<process>;
|
||||
|
||||
@@ -26,20 +26,22 @@ extern "C" {
|
||||
uintptr_t load_process_image(const void *image_start, size_t bytes, process *proc);
|
||||
};
|
||||
|
||||
extern uint64_t idle_stack_end;
|
||||
|
||||
scheduler::scheduler(lapic *apic) :
|
||||
m_apic(apic),
|
||||
m_next_pid(1)
|
||||
{
|
||||
auto *idle = m_process_allocator.pop();
|
||||
idle->setup_kernel_stack();
|
||||
|
||||
uint8_t last_pri = num_priorities - 1;
|
||||
|
||||
// The kernel idle task, also the thread we're in now
|
||||
idle->pid = 0;
|
||||
idle->ppid = 0;
|
||||
idle->priority = last_pri;
|
||||
idle->rsp = 0; // This will get set when we switch away
|
||||
idle->rsp = 0; // This will get set when we switch away
|
||||
idle->rsp3 = 0; // Never used for the idle task
|
||||
idle->rsp0 = reinterpret_cast<uintptr_t>(&idle_stack_end);
|
||||
idle->pml4 = page_manager::get_pml4();
|
||||
idle->quanta = process_quanta;
|
||||
idle->flags =
|
||||
@@ -124,24 +126,6 @@ scheduler::create_process(pid_t pid)
|
||||
return proc;
|
||||
}
|
||||
|
||||
static uintptr_t
|
||||
add_fake_task_return(uintptr_t rsp, uintptr_t rbp, uintptr_t rip)
|
||||
{
|
||||
// Initialize a new empty stack with a fake return segment
|
||||
// for returning out of task_switch
|
||||
rsp -= sizeof(uintptr_t) * 7;
|
||||
uintptr_t *stack = reinterpret_cast<uintptr_t*>(rsp);
|
||||
|
||||
stack[6] = rip; // return rip
|
||||
stack[5] = rbp; // rbp
|
||||
stack[4] = 0xbbbbbbbb; // rbx
|
||||
stack[3] = 0x12121212; // r12
|
||||
stack[2] = 0x13131313; // r13
|
||||
stack[1] = 0x14141414; // r14
|
||||
stack[0] = 0x15151515; // r15
|
||||
return rsp;
|
||||
}
|
||||
|
||||
void
|
||||
scheduler::load_process(const char *name, const void *data, size_t size)
|
||||
{
|
||||
@@ -165,9 +149,8 @@ scheduler::load_process(const char *name, const void *data, size_t size)
|
||||
stack[1] = reinterpret_cast<uintptr_t>(size);
|
||||
stack[2] = reinterpret_cast<uintptr_t>(proc);
|
||||
|
||||
proc->rsp = add_fake_task_return(
|
||||
reinterpret_cast<uintptr_t>(stack),
|
||||
proc->rsp0,
|
||||
proc->rsp = reinterpret_cast<uintptr_t>(stack);
|
||||
proc->add_fake_task_return(
|
||||
reinterpret_cast<uintptr_t>(ramdisk_process_loader));
|
||||
|
||||
// Arguments for iret - rip will be pushed on before these
|
||||
@@ -201,8 +184,7 @@ scheduler::create_kernel_task(pid_t pid, void (*task)())
|
||||
|
||||
// Create an initial kernel stack space
|
||||
proc->setup_kernel_stack();
|
||||
proc->rsp = add_fake_task_return(
|
||||
proc->rsp0, proc->rsp0,
|
||||
proc->add_fake_task_return(
|
||||
reinterpret_cast<uintptr_t>(task));
|
||||
|
||||
proc->pml4 = page_manager::get()->get_kernel_pml4();
|
||||
@@ -216,6 +198,7 @@ scheduler::create_kernel_task(pid_t pid, void (*task)())
|
||||
log::debug(logs::task, "Creating kernel task: pid %d pri %d", proc->pid, proc->priority);
|
||||
log::debug(logs::task, " RSP0 %016lx", proc->rsp0);
|
||||
log::debug(logs::task, " RSP %016lx", proc->rsp);
|
||||
log::debug(logs::task, " PML4 %016lx", proc->pml4);
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@@ -14,7 +14,6 @@ extern "C" {
|
||||
|
||||
namespace syscalls {
|
||||
|
||||
pid_t fork() { return 0; }
|
||||
|
||||
void send() {}
|
||||
void receive() {}
|
||||
|
||||
@@ -12,6 +12,7 @@ extern syscall_registry
|
||||
extern syscall_invalid
|
||||
|
||||
global syscall_handler_prelude
|
||||
global syscall_handler_prelude.return
|
||||
syscall_handler_prelude:
|
||||
swapgs
|
||||
mov [gs:CPU_DATA.rsp3], rsp
|
||||
@@ -20,7 +21,13 @@ syscall_handler_prelude:
|
||||
push rcx
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
||||
push rbx
|
||||
push r11
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
|
||||
inc qword [rel __counter_syscall_enter]
|
||||
|
||||
@@ -37,7 +44,14 @@ syscall_handler_prelude:
|
||||
|
||||
inc qword [rel __counter_syscall_sysret]
|
||||
|
||||
.return:
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop r11
|
||||
pop rbx
|
||||
|
||||
pop rbp
|
||||
pop rcx
|
||||
|
||||
|
||||
24
src/kernel/syscalls/fork.cpp
Normal file
24
src/kernel/syscalls/fork.cpp
Normal file
@@ -0,0 +1,24 @@
|
||||
|
||||
#include "log.h"
|
||||
#include "scheduler.h"
|
||||
|
||||
namespace syscalls {
|
||||
|
||||
pid_t
|
||||
fork()
|
||||
{
|
||||
auto &s = scheduler::get();
|
||||
auto *p = s.current();
|
||||
pid_t ppid = p->pid;
|
||||
|
||||
log::debug(logs::syscall, "Process %d calling fork()", ppid);
|
||||
|
||||
pid_t pid = p->fork();
|
||||
|
||||
p = s.current();
|
||||
log::debug(logs::syscall, "Process %d's fork: returning %d from process %d", ppid, pid, p->pid);
|
||||
|
||||
return pid;
|
||||
}
|
||||
|
||||
} // namespace syscalls
|
||||
@@ -1,7 +1,6 @@
|
||||
%include "tasking.inc"
|
||||
|
||||
extern g_tss
|
||||
|
||||
global task_switch
|
||||
task_switch:
|
||||
push rbp
|
||||
@@ -55,40 +54,10 @@ task_switch:
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
global task_fork
|
||||
task_fork:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
||||
; Save the rest of the callee-saved regs
|
||||
push rbx
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
|
||||
mov r14, rdi ; r14: child task TCB (function argument)
|
||||
|
||||
mov rax, [gs:CPU_DATA.tcb] ; rax: current task TCB
|
||||
mov rax, [rax + TCB.rsp0] ; rax: current task rsp0
|
||||
sub rax, rsp ; rax: size of kernel stack in bytes
|
||||
|
||||
mov rcx, rax
|
||||
shr rcx, 3 ; rcx: size of kernel stack in qwords
|
||||
|
||||
mov rdi, [r14 + TCB.rsp0] ; rdi: child task rsp0
|
||||
sub rdi, rax ; rdi: child task rsp
|
||||
mov rsi, rsp ; rsi: current rsp
|
||||
mov [r14 + TCB.rsp], rdi
|
||||
|
||||
rep movsq
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rbx
|
||||
|
||||
pop rbp
|
||||
ret
|
||||
extern syscall_handler_prelude.return
|
||||
global task_fork_return_thunk
|
||||
task_fork_return_thunk:
|
||||
mov rax, 0
|
||||
jmp syscall_handler_prelude.return
|
||||
|
||||
|
||||
Reference in New Issue
Block a user