[kernel] Fix AP idle stack overflow

This bug has been making me tear my hair out for weeks. When creating
the idle thread for each CPU, we were previously sharing stack areas
with other CPUs' idle threads in an effort to save memory. However, this
caused stack corruption that was very hard to track down. The kernel
stacks are in a vm_area_guarded to better detect this exact kind of
issue, but splitting stacks like this skirts that protection. It's not
worth saving a few KiB per CPU.
This commit is contained in:
Justin C. Miller
2022-03-13 16:58:57 -07:00
parent df8eb43074
commit d08e5dabe4

View File

@@ -60,14 +60,7 @@ start(cpu_data &bsp, void *kpml4)
reinterpret_cast<void*>(&ap_startup), reinterpret_cast<void*>(&ap_startup),
ap_startup_code_size); ap_startup_code_size);
// AP idle stacks need less room than normal stacks, so pack multiple
// into a normal stack area
static constexpr size_t idle_stack_bytes = 2048; // 2KiB is generous
static constexpr size_t full_stack_bytes = kernel_stack_pages * frame_size;
static constexpr size_t idle_stacks_per = full_stack_bytes / idle_stack_bytes;
size_t free_stack_count = 0; size_t free_stack_count = 0;
uintptr_t stack_area_start = 0;
lapic &apic = *bsp.apic; lapic &apic = *bsp.apic;
lapic::ipi mode = lapic::ipi::init | lapic::ipi::level | lapic::ipi::assert; lapic::ipi mode = lapic::ipi::init | lapic::ipi::level | lapic::ipi::assert;
@@ -78,13 +71,10 @@ start(cpu_data &bsp, void *kpml4)
cpu_data *cpu = cpu_create(id, ++index); cpu_data *cpu = cpu_create(id, ++index);
// Set up the CPU's idle task stack static constexpr size_t stack_bytes = kernel_stack_pages * frame_size;
if (free_stack_count == 0) { const uintptr_t stack_start = g_kernel_stacks.get_section();
stack_area_start = g_kernel_stacks.get_section(); uintptr_t stack_end = stack_start + stack_bytes;
free_stack_count = idle_stacks_per;
}
uintptr_t stack_end = stack_area_start + free_stack_count-- * idle_stack_bytes;
stack_end -= 2 * sizeof(void*); // Null frame stack_end -= 2 * sizeof(void*); // Null frame
*reinterpret_cast<uint64_t*>(stack_end) = 0; // pre-fault the page *reinterpret_cast<uint64_t*>(stack_end) = 0; // pre-fault the page
cpu->rsp0 = stack_end; cpu->rsp0 = stack_end;