From 2d4a65c654c7074fa7b17b9bd71366149ea388f8 Mon Sep 17 00:00:00 2001 From: "Justin C. Miller" Date: Wed, 10 Feb 2021 01:19:41 -0800 Subject: [PATCH] [kernel] Pre-allocate cpu_data and pass to APs In order to avoid cyclic dependencies in the case of page faults while bringing up an AP, pre-allocate the cpu_data structure and related CPU control structures, and pass them to the AP startup code. This also changes the following: - cpu_early_init() was split out of cpu_early_init() to allow early usage of current_cpu() on the BSP before we're ready for the rest of cpu_init(). (These functions were also renamed to follow the preferred area_action naming style.) - isr_handler now zeroes out the IST entry for its vector instead of trying to increment the IST stack pointer - the IST stacks are allocated outside of cpu_init, to also help reduce stack pressue and chance of page faults before APs are ready - share stack areas between AP idle threads so we only waste 1K per additional AP for the unused idle stack --- src/kernel/ap_startup.s | 9 +++-- src/kernel/cpu.cpp | 67 +++++++------------------------ src/kernel/cpu.h | 17 ++++---- src/kernel/idt.cpp | 12 +++--- src/kernel/idt.h | 16 ++++---- src/kernel/interrupts.cpp | 26 ++++++++---- src/kernel/main.cpp | 83 +++++++++++++++++++++++++++++---------- src/kernel/tss.cpp | 29 ++++++++++++++ src/kernel/tss.h | 4 ++ 9 files changed, 159 insertions(+), 104 deletions(-) diff --git a/src/kernel/ap_startup.s b/src/kernel/ap_startup.s index 327651f..c438867 100644 --- a/src/kernel/ap_startup.s +++ b/src/kernel/ap_startup.s @@ -38,7 +38,7 @@ ap_startup: align 8 .pml4: dq 0 - .stack: dq 0 + .cpu: dq 0 .ret: dq 0 align 16 @@ -100,7 +100,8 @@ align 8 mov gs, ax mov ss, ax - mov rax, [BASE + (.stack - ap_startup)] + mov rdi, [BASE + (.cpu - ap_startup)] + mov rax, [rdi + CPU_DATA.rsp0] mov rsp, rax mov rax, [BASE + (.ret - ap_startup)] @@ -121,8 +122,8 @@ init_ap_trampoline: ; rdi is the kernel pml4 mov [BASE + (ap_startup.pml4 - ap_startup)], rdi - ; rsi is the stack for this AP - mov [BASE + (ap_startup.stack - ap_startup)], rsi + ; rsi is the cpu data for this AP + mov [BASE + (ap_startup.cpu - ap_startup)], rsi ; rdx is the address to jump to mov [BASE + (ap_startup.ret - ap_startup)], rdx diff --git a/src/kernel/cpu.cpp b/src/kernel/cpu.cpp index 7d1a1aa..c398586 100644 --- a/src/kernel/cpu.cpp +++ b/src/kernel/cpu.cpp @@ -40,65 +40,26 @@ cpu_validate() } void -init_cpu(bool bsp) +cpu_early_init(cpu_data *cpu) { - extern TSS &g_bsp_tss; - extern GDT &g_bsp_gdt; - extern vm_area_guarded &g_kernel_stacks; - - uint8_t id = 0; - - TSS *tss = nullptr; - GDT *gdt = nullptr; - cpu_data *cpu = nullptr; - - if (bsp) { - gdt = &g_bsp_gdt; - tss = &g_bsp_tss; - cpu = &g_bsp_cpu_data; - } else { - g_idt.install(); - - tss = new TSS; - gdt = new GDT {tss}; - cpu = new cpu_data; - - gdt->install(); - - lapic &apic = device_manager::get().get_lapic(); - id = apic.get_id(); - } - - kutil::memset(cpu, 0, sizeof(cpu_data)); - - cpu->self = cpu; - cpu->id = id; - cpu->gdt = gdt; - cpu->tss = tss; + IDT::get().install(); + cpu->gdt->install(); // Install the GS base pointint to the cpu_data wrmsr(msr::ia32_gs_base, reinterpret_cast(cpu)); +} - using memory::frame_size; - using memory::kernel_stack_pages; - constexpr size_t stack_size = kernel_stack_pages * frame_size; - - uint8_t ist_entries = g_idt.used_ist_entries(); - - // Set up the IST stacks - for (unsigned ist = 1; ist < 8; ++ist) { - if (!(ist_entries & (1 << ist))) - continue; - - // Two zero entries at the top for the null frame - uintptr_t stack_bottom = g_kernel_stacks.get_section(); - uintptr_t stack_top = stack_bottom + stack_size - 2 * sizeof(uintptr_t); - - // Pre-realize these stacks, they're no good if they page fault - *reinterpret_cast(stack_top) = 0; - - tss->ist_stack(ist) = stack_top; +void +cpu_init(cpu_data *cpu, bool bsp) +{ + if (!bsp) { + // The BSP already called cpu_early_init + cpu_early_init(cpu); } + + lapic &apic = device_manager::get().get_lapic(); + cpu->id = apic.get_id(); + // Set up the syscall MSRs syscall_enable(); diff --git a/src/kernel/cpu.h b/src/kernel/cpu.h index 029938d..b1f2f05 100644 --- a/src/kernel/cpu.h +++ b/src/kernel/cpu.h @@ -38,9 +38,16 @@ struct cpu_data extern "C" cpu_data * _current_gsbase(); -/// Initialize a CPU and set up its cpu_data structure -/// \arg bsp True if the current CPU is the BSP -void init_cpu(bool bsp); +/// Set up the running CPU. This sets GDT, IDT, and necessary MSRs as well as creating +/// the cpu_data structure for this processor. +/// \arg cpu The cpu_data structure for this CPU +/// \arg bsp True if this CPU is the BSP +void cpu_init(cpu_data *cpu, bool bsp); + +/// Do early (before cpu_init) initialization work. Only needs to be called manually for +/// the BSP, otherwise cpu_init will call it. +/// \arg cpu The cpu_data structure for this CPU +void cpu_early_init(cpu_data *cpu); /// Get the cpu_data struct for the current executing CPU inline cpu_data & current_cpu() { return *_current_gsbase(); } @@ -49,7 +56,3 @@ inline cpu_data & current_cpu() { return *_current_gsbase(); } /// validated the required features, but still iterate the options and log about them. void cpu_validate(); -/// Set up the running CPU. This sets GDT, IDT, and necessary MSRs as well as creating -/// the cpu_data structure for this processor. -/// \arg bsp True if this CPU is the BSP -void cpu_initialize(bool bsp); diff --git a/src/kernel/idt.cpp b/src/kernel/idt.cpp index 03d3e1c..bbf6b18 100644 --- a/src/kernel/idt.cpp +++ b/src/kernel/idt.cpp @@ -37,6 +37,12 @@ IDT::IDT() #undef ISR } +IDT & +IDT::get() +{ + return g_idt; +} + void IDT::install() const { @@ -85,12 +91,6 @@ IDT::set(uint8_t i, void (*handler)(), uint16_t selector, uint8_t flags) m_entries[i].reserved = 0; } -void -IDT::set_ist(uint8_t i, uint8_t ist) -{ - m_entries[i].ist = ist; -} - void IDT::dump(unsigned index) const { diff --git a/src/kernel/idt.h b/src/kernel/idt.h index fc5ac24..fc6e3af 100644 --- a/src/kernel/idt.h +++ b/src/kernel/idt.h @@ -6,8 +6,6 @@ class IDT { public: - static constexpr unsigned count = 256; - IDT(); /// Install this IDT to the current CPU @@ -21,11 +19,15 @@ public: /// Get the IST entry used by an entry. /// \arg i Which IDT entry to look in /// \returns The IST index used by entry i, or 0 for none - inline uint8_t get_ist(unsigned i) const { - if (i >= count) return 0; + inline uint8_t get_ist(uint8_t i) const { return m_entries[i].ist; } + /// Set the IST entry used by an entry. + /// \arg i Which IDT entry to set + /// \arg ist The IST index for entry i, or 0 for none + void set_ist(uint8_t i, uint8_t ist) { m_entries[i].ist = ist; } + /// Get the IST entries that are used by this table, as a bitmap uint8_t used_ist_entries() const; @@ -33,9 +35,11 @@ public: /// \arg index Which entry to print, or -1 for all entries void dump(unsigned index = -1) const; + /// Get the global IDT + static IDT & get(); + private: void set(uint8_t i, void (*handler)(), uint16_t selector, uint8_t flags); - void set_ist(uint8_t i, uint8_t ist); struct descriptor { @@ -57,5 +61,3 @@ private: descriptor m_entries[256]; ptr m_ptr; }; - -extern IDT &g_idt; diff --git a/src/kernel/interrupts.cpp b/src/kernel/interrupts.cpp index 00d405b..54a08d5 100644 --- a/src/kernel/interrupts.cpp +++ b/src/kernel/interrupts.cpp @@ -83,10 +83,11 @@ isr_handler(cpu_state *regs) console *cons = console::get(); uint8_t vector = regs->interrupt & 0xff; - TSS &tss = TSS::current(); - uint8_t ist = g_idt.get_ist(vector); - if (ist) - tss.ist_stack(ist) -= increment_offset; + // Clear out the IST for this vector so we just keep using + // this stack + uint8_t old_ist = IDT::get().get_ist(vector); + if (old_ist) + IDT::get().set_ist(vector, 0); switch (static_cast(vector)) { @@ -122,6 +123,16 @@ isr_handler(cpu_state *regs) } break; + case isr::isrDoubleFault: + cons->set_color(9); + cons->printf("\nDouble Fault:\n"); + + cons->set_color(); + print_regs(*regs); + print_stacktrace(2); + _halt(); + break; + case isr::isrGPFault: { cons->set_color(9); cons->puts("\nGeneral Protection Fault:\n"); @@ -141,7 +152,7 @@ isr_handler(cpu_state *regs) case 1: case 3: cons->printf(" IDT[%x]\n", index); - g_idt.dump(index); + IDT::get().dump(index); break; default: @@ -261,8 +272,9 @@ isr_handler(cpu_state *regs) _halt(); } - if (ist) - tss.ist_stack(ist) += increment_offset; + // Return the IST for this vector to what it was + if (old_ist) + IDT::get().set_ist(vector, old_ist); *reinterpret_cast(apic_eoi_addr) = 0; } diff --git a/src/kernel/main.cpp b/src/kernel/main.cpp index f0c2f30..e209e1a 100644 --- a/src/kernel/main.cpp +++ b/src/kernel/main.cpp @@ -37,9 +37,9 @@ extern "C" { void kernel_main(kernel::args::header *header); void (*__ctors)(void); void (*__ctors_end)(void); - void long_ap_startup(); + void long_ap_startup(cpu_data *cpu); void ap_startup(); - void init_ap_trampoline(void*, uintptr_t, void (*)()); + void init_ap_trampoline(void*, cpu_data *, void (*)(cpu_data *)); } extern void __kernel_assert(const char *, unsigned, const char *); @@ -118,23 +118,29 @@ kernel_main(args::header *header) logger_clear_immediate(); } + extern IDT &g_idt; extern TSS &g_bsp_tss; extern GDT &g_bsp_gdt; - - TSS *tss = new (&g_bsp_tss) TSS; - GDT *gdt = new (&g_bsp_gdt) GDT {tss}; - gdt->install(); + extern cpu_data g_bsp_cpu_data; IDT *idt = new (&g_idt) IDT; - idt->install(); + + cpu_data *cpu = &g_bsp_cpu_data; + kutil::memset(cpu, 0, sizeof(cpu_data)); + + cpu->self = cpu; + cpu->tss = new (&g_bsp_tss) TSS; + cpu->gdt = new (&g_bsp_gdt) GDT {cpu->tss}; + cpu_early_init(cpu); disable_legacy_pic(); memory_initialize_pre_ctors(*header); - init_cpu(true); run_constructors(); memory_initialize_post_ctors(*header); + cpu->tss->create_ist_stacks(idt->used_ist_entries()); + for (size_t i = 0; i < header->num_modules; ++i) { args::module &mod = header->modules[i]; void *virt = memory::to_virtual(mod.location); @@ -154,11 +160,15 @@ kernel_main(args::header *header) device_manager &devices = device_manager::get(); devices.parse_acpi(header->acpi_table); + // cpu_init relies on the APIC being set up + cpu_init(cpu, true); + devices.init_drivers(); devices.get_lapic().calibrate_timer(); start_aps(header->pml4); + idt->add_ist_entries(); interrupts_enable(); /* @@ -216,8 +226,8 @@ start_aps(void *kpml4) // Since we're using address space outside kernel space, make sure // the kernel's vm_space is used - cpu_data &cpu = current_cpu(); - cpu.process = &g_kernel_process; + cpu_data &bsp = current_cpu(); + bsp.process = &g_kernel_process; // Copy the startup code somwhere the real mode trampoline can run uintptr_t addr = 0x8000; // TODO: find a valid address, rewrite addresses @@ -229,36 +239,69 @@ start_aps(void *kpml4) reinterpret_cast(&ap_startup), ap_startup_code_size); - static constexpr size_t stack_bytes = kernel_stack_pages * frame_size; + // AP idle stacks need less room than normal stacks, so pack multiple + // into a normal stack area + static constexpr size_t idle_stack_bytes = 1024; // 2KiB is generous + static constexpr size_t full_stack_bytes = kernel_stack_pages * frame_size; + static constexpr size_t idle_stacks_per = full_stack_bytes / idle_stack_bytes; + + uint8_t ist_entries = IDT::get().used_ist_entries(); + + size_t free_stack_count = 0; + uintptr_t stack_area_start = 0; for (uint8_t id : ids) { if (id == apic.get_id()) continue; - log::info(logs::boot, "Starting AP %d", id); - size_t current_count = ap_startup_count; - uintptr_t stack_start = g_kernel_stacks.get_section(); - uintptr_t stack_end = stack_start + stack_bytes - 2 * sizeof(void*); + // Set up the CPU data structures + TSS *tss = new TSS; + GDT *gdt = new GDT {tss}; + cpu_data *cpu = new cpu_data; + kutil::memset(cpu, 0, sizeof(cpu_data)); + cpu->self = cpu; + cpu->gdt = gdt; + cpu->tss = tss; + + tss->create_ist_stacks(ist_entries); + + // Set up the CPU's idle task stack + if (free_stack_count == 0) { + stack_area_start = g_kernel_stacks.get_section(); + free_stack_count = idle_stacks_per; + } + + uintptr_t stack_end = stack_area_start + free_stack_count-- * idle_stack_bytes; + stack_end -= 2 * sizeof(void*); // Null frame *reinterpret_cast(stack_end) = 0; // pre-fault the page + cpu->rsp0 = stack_end; - init_ap_trampoline(kpml4, stack_end, long_ap_startup); + // Set up the trampoline with this CPU's data + init_ap_trampoline(kpml4, cpu, long_ap_startup); + // Kick it off! + size_t current_count = ap_startup_count; + log::debug(logs::boot, "Starting AP %d: stack %llx", id, stack_end); apic.send_ipi(lapic::ipi_mode::init, 0, id); clk.spinwait(1000); apic.send_ipi(lapic::ipi_mode::startup, vector, id); for (unsigned i = 0; i < 20; ++i) { if (ap_startup_count > current_count) break; - clk.spinwait(10); + clk.spinwait(20); } + // If the CPU already incremented ap_startup_count, it's done if (ap_startup_count > current_count) continue; + // Send the second SIPI (intel recommends this) apic.send_ipi(lapic::ipi_mode::startup, vector, id); for (unsigned i = 0; i < 100; ++i) { if (ap_startup_count > current_count) break; - clk.spinwait(10); + clk.spinwait(100); } + + log::warn(logs::boot, "No response from AP %d within timeout", id); } log::info(logs::boot, "%d CPUs running", ap_startup_count); @@ -266,9 +309,9 @@ start_aps(void *kpml4) } void -long_ap_startup() +long_ap_startup(cpu_data *cpu) { - init_cpu(false); + cpu_init(cpu, false); ++ap_startup_count; while(1) asm("hlt"); diff --git a/src/kernel/tss.cpp b/src/kernel/tss.cpp index 9c7b057..780cac5 100644 --- a/src/kernel/tss.cpp +++ b/src/kernel/tss.cpp @@ -1,7 +1,11 @@ #include "kutil/assert.h" #include "kutil/memory.h" #include "kutil/no_construct.h" + #include "cpu.h" +#include "kernel_memory.h" +#include "log.h" +#include "objects/vm_area.h" #include "tss.h" // The BSP's TSS is initialized _before_ global constructors are called, @@ -37,3 +41,28 @@ TSS::ist_stack(unsigned ist) return m_ist[ist]; } +void +TSS::create_ist_stacks(uint8_t ist_entries) +{ + extern vm_area_guarded &g_kernel_stacks; + using memory::frame_size; + using memory::kernel_stack_pages; + constexpr size_t stack_bytes = kernel_stack_pages * frame_size; + + for (unsigned ist = 1; ist < 8; ++ist) { + if (!(ist_entries & (1 << ist))) continue; + + // Two zero entries at the top for the null frame + uintptr_t stack_bottom = g_kernel_stacks.get_section(); + uintptr_t stack_top = stack_bottom + stack_bytes - 2 * sizeof(uintptr_t); + + log::debug(logs::memory, "Created IST stack at %016lx size 0x%lx", + stack_bottom, stack_bytes); + + // Pre-realize these stacks, they're no good if they page fault + for (unsigned i = 0; i < kernel_stack_pages; ++i) + *reinterpret_cast(stack_bottom + i * frame_size) = 0; + + ist_stack(ist) = stack_top; + } +} diff --git a/src/kernel/tss.h b/src/kernel/tss.h index d0fe2b1..4bf8d1a 100644 --- a/src/kernel/tss.h +++ b/src/kernel/tss.h @@ -22,6 +22,10 @@ public: /// \returns A mutable reference to the stack pointer uintptr_t & ist_stack(unsigned ist); + /// Allocate new stacks for the given IST entries. + /// \arg ist_entries A bitmap of used IST entries + void create_ist_stacks(uint8_t ist_entries); + private: uint32_t m_reserved0;