From fd25d3babcb8d57a93e55e459ca4ba8d00689a7b Mon Sep 17 00:00:00 2001 From: "Justin C. Miller" Date: Thu, 27 Jan 2022 19:28:35 -0800 Subject: [PATCH] [kernel] Clean up main.cpp and others The kernel/main.cpp and kernel/memory_bootstrap.cpp files had become something of a junk drawer. This change cleans them up in the following ways: - Most CPU initialization has moved to cpu.cpp, allowing several functions to be made static and removed from cpu.h - Multi-core startup code has moved to the new smp.h and smp.cpp, and ap_startup.s has been renamed smp.s to match. - run_constructors() has moved to memory_bootstrap.cpp, and all the functionality of that file has been hidden behind a new public interface mem::initialize(). - load_init_server() has moved from memory_bootstrap.cpp to main.cpp --- src/kernel/assert.cpp | 8 + src/kernel/assert.h | 5 + src/kernel/cpu.cpp | 94 ++++++++++-- src/kernel/cpu.h | 22 +-- src/kernel/idt.cpp | 2 +- src/kernel/idt.h | 2 +- src/kernel/interrupts.cpp | 4 +- src/kernel/kernel.module | 3 +- src/kernel/main.cpp | 236 +++++------------------------ src/kernel/memory.h.cog | 8 + src/kernel/memory_bootstrap.cpp | 107 +++---------- src/kernel/panic.serial/main.cpp | 3 +- src/kernel/smp.cpp | 142 +++++++++++++++++ src/kernel/smp.h | 18 +++ src/kernel/{ap_startup.s => smp.s} | 0 15 files changed, 341 insertions(+), 313 deletions(-) create mode 100644 src/kernel/smp.cpp create mode 100644 src/kernel/smp.h rename src/kernel/{ap_startup.s => smp.s} (100%) diff --git a/src/kernel/assert.cpp b/src/kernel/assert.cpp index 8b49865..337373c 100644 --- a/src/kernel/assert.cpp +++ b/src/kernel/assert.cpp @@ -1,10 +1,18 @@ #include "assert.h" +#include "idt.h" namespace panic { uint32_t *apic_icr = reinterpret_cast(0xffffc000fee00300); void const *symbol_table = nullptr; +void +install(uintptr_t entrypoint, const void *symbol_data) +{ + IDT::set_nmi_handler(entrypoint); + symbol_table = symbol_data; +} + } // namespace panic extern "C" diff --git a/src/kernel/assert.h b/src/kernel/assert.h index 2b70bbd..9bf88cb 100644 --- a/src/kernel/assert.h +++ b/src/kernel/assert.h @@ -44,6 +44,11 @@ inline void panic( while (1) asm ("hlt"); } +/// Install a panic handler. +/// \arg entrypoint Virtual address of the panic handler's entrypoint +/// \arg symbol_data Pointer to the symbol table data +void install(uintptr_t entrypoint, const void *symbol_data); + } // namespace panic #ifndef NDEBUG diff --git a/src/kernel/cpu.cpp b/src/kernel/cpu.cpp index 513d241..db1da47 100644 --- a/src/kernel/cpu.cpp +++ b/src/kernel/cpu.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -10,7 +11,6 @@ #include "logger.h" #include "msr.h" #include "objects/thread.h" -#include "objects/vm_area.h" #include "scheduler.h" #include "syscall.h" #include "tss.h" @@ -22,6 +22,9 @@ panic_data *g_panic_data_p = &g_panic_data; cpu_data g_bsp_cpu_data; + +// Validate the required CPU features are present. Really, the bootloader already +// validated the required features, but still iterate the options and log about them. void cpu_validate() { @@ -45,18 +48,12 @@ cpu_validate() #undef CPU_FEATURE_REQ } -void -global_cpu_init() -{ - memset(&g_panic_data, 0, sizeof(g_panic_data)); -} -void +// Do early (before cpu_init) initialization work. Only needs to be called manually for +// the BSP, otherwise cpu_init will call it. +static void cpu_early_init(cpu_data *cpu) { - if (cpu->index == 0) - global_cpu_init(); - cpu->idt->install(); cpu->gdt->install(); @@ -64,6 +61,70 @@ cpu_early_init(cpu_data *cpu) wrmsr(msr::ia32_gs_base, reinterpret_cast(cpu)); } +cpu_data * +bsp_early_init() +{ + cpu_validate(); + memset(&g_panic_data, 0, sizeof(g_panic_data)); + + extern IDT &g_bsp_idt; + extern TSS &g_bsp_tss; + extern GDT &g_bsp_gdt; + extern uintptr_t idle_stack_end; + + cpu_data *cpu = &g_bsp_cpu_data; + memset(cpu, 0, sizeof(cpu_data)); + + cpu->self = cpu; + cpu->idt = new (&g_bsp_idt) IDT; + cpu->tss = new (&g_bsp_tss) TSS; + cpu->gdt = new (&g_bsp_gdt) GDT {cpu->tss}; + cpu->rsp0 = idle_stack_end; + cpu_early_init(cpu); + + return cpu; +} + +void +bsp_late_init() +{ + // BSP didn't set up IST stacks yet + extern TSS &g_bsp_tss; + uint8_t ist_entries = IDT::used_ist_entries(); + g_bsp_tss.create_ist_stacks(ist_entries); + + uint64_t cr0, cr4; + asm ("mov %%cr0, %0" : "=r"(cr0)); + asm ("mov %%cr4, %0" : "=r"(cr4)); + uint64_t efer = rdmsr(msr::ia32_efer); + log::debug(logs::boot, "Control regs: cr0:%lx cr4:%lx efer:%lx", cr0, cr4, efer); + + syscall_initialize(); +} + +cpu_data * +cpu_create(uint16_t id, uint16_t index) +{ + // Set up the CPU data structures + IDT *idt = new IDT; + TSS *tss = new TSS; + GDT *gdt = new GDT {tss}; + cpu_data *cpu = new cpu_data; + memset(cpu, 0, sizeof(cpu_data)); + + cpu->self = cpu; + cpu->id = id; + cpu->index = index; + cpu->idt = idt; + cpu->tss = tss; + cpu->gdt = gdt; + + uint8_t ist_entries = IDT::used_ist_entries(); + tss->create_ist_stacks(ist_entries); + + return cpu; +} + void cpu_init(cpu_data *cpu, bool bsp) { @@ -93,4 +154,17 @@ cpu_init(cpu_data *cpu, bool bsp) wrmsr(msr::ia32_pat, pat); cpu->idt->add_ist_entries(); + + uintptr_t apic_base = + device_manager::get().get_lapic_base(); + + lapic *apic = new lapic(apic_base); + cpu->apic = apic; + apic->enable(); + + if (bsp) { + // BSP never got an id, set that up now + cpu->id = apic->get_id(); + apic->calibrate_timer(); + } } diff --git a/src/kernel/cpu.h b/src/kernel/cpu.h index a63785d..6819ccb 100644 --- a/src/kernel/cpu.h +++ b/src/kernel/cpu.h @@ -64,20 +64,24 @@ struct cpu_data extern "C" cpu_data * _current_gsbase(); +/// Do early initialization of the BSP CPU. +/// \returns A pointer to the BSP cpu_data structure +cpu_data * bsp_early_init(); + +/// Do late initialization of the BSP CPU. +void bsp_late_init(); + +/// Create a new cpu_data struct with all requisite sub-objects. +/// \arg id The ACPI specified id of the CPU +/// \arg index The kernel-specified initialization index of the CPU +/// \returns The new cpu_data structure +cpu_data * cpu_create(uint16_t id, uint16_t index); + /// Set up the running CPU. This sets GDT, IDT, and necessary MSRs as well as creating /// the cpu_data structure for this processor. /// \arg cpu The cpu_data structure for this CPU /// \arg bsp True if this CPU is the BSP void cpu_init(cpu_data *cpu, bool bsp); -/// Do early (before cpu_init) initialization work. Only needs to be called manually for -/// the BSP, otherwise cpu_init will call it. -/// \arg cpu The cpu_data structure for this CPU -void cpu_early_init(cpu_data *cpu); - /// Get the cpu_data struct for the current executing CPU inline cpu_data & current_cpu() { return *_current_gsbase(); } - -/// Validate the required CPU features are present. Really, the bootloader already -/// validated the required features, but still iterate the options and log about them. -void cpu_validate(); diff --git a/src/kernel/idt.cpp b/src/kernel/idt.cpp index 3bd0d68..92f1447 100644 --- a/src/kernel/idt.cpp +++ b/src/kernel/idt.cpp @@ -78,7 +78,7 @@ IDT::add_ist_entries() } uint8_t -IDT::used_ist_entries() const +IDT::used_ist_entries() { constexpr uint8_t entries = diff --git a/src/kernel/idt.h b/src/kernel/idt.h index a0ccce2..395ca4c 100644 --- a/src/kernel/idt.h +++ b/src/kernel/idt.h @@ -36,7 +36,7 @@ public: void set_ist(uint8_t i, uint8_t ist) { m_entries[i].ist = ist; } /// Get the IST entries that are used by this table, as a bitmap - uint8_t used_ist_entries() const; + static uint8_t used_ist_entries(); /// Dump debug information about the IDT to the console. /// \arg index Which entry to print, or -1 for all entries diff --git a/src/kernel/interrupts.cpp b/src/kernel/interrupts.cpp index 9c57d4b..61d3f90 100644 --- a/src/kernel/interrupts.cpp +++ b/src/kernel/interrupts.cpp @@ -100,14 +100,14 @@ isr_handler(cpu_state *regs) case isr::isrGPFault: if (regs->errorcode & 0xfff0) { - int index = (regs->errorcode & 0xffff) >> 4; + int index = (regs->errorcode & 0xffff) >> 3; int ti = (regs->errorcode & 0x07) >> 1; char const *table = (ti & 1) ? "IDT" : (!ti) ? "GDT" : "LDT"; - snprintf(message, sizeof(message), "General Protection Fault, error:%lx%s %s[%d]", + snprintf(message, sizeof(message), "General Protection Fault, error:0x%lx%s %s[%d]", regs->errorcode, regs->errorcode & 1 ? " external" : "", table, index); } else { snprintf(message, sizeof(message), "General Protection Fault, error:%lx%s", diff --git a/src/kernel/kernel.module b/src/kernel/kernel.module index 9f5fe44..6299385 100644 --- a/src/kernel/kernel.module +++ b/src/kernel/kernel.module @@ -10,7 +10,6 @@ kernel = module("kernel", includes = [ "." ], sources = [ "apic.cpp", - "ap_startup.s", "assert.cpp", "boot.s", "clock.cpp", @@ -45,6 +44,8 @@ kernel = module("kernel", "pci.cpp", "printf/printf.c", "scheduler.cpp", + "smp.cpp", + "smp.s", "syscall.cpp.cog", "syscall.h.cog", "syscall.s", diff --git a/src/kernel/main.cpp b/src/kernel/main.cpp index 05b36b2..060064f 100644 --- a/src/kernel/main.cpp +++ b/src/kernel/main.cpp @@ -6,89 +6,39 @@ #include #include -#include "apic.h" #include "assert.h" -#include "block_device.h" -#include "clock.h" #include "cpu.h" #include "device_manager.h" -#include "gdt.h" -#include "idt.h" #include "interrupts.h" -#include "io.h" #include "logger.h" #include "memory.h" -#include "msr.h" -#include "objects/channel.h" -#include "objects/event.h" +#include "objects/process.h" +#include "objects/system.h" +#include "objects/thread.h" #include "objects/vm_area.h" #include "scheduler.h" -#include "syscall.h" +#include "smp.h" #include "sysconf.h" -#include "tss.h" -#include "vm_space.h" - - -#ifndef GIT_VERSION -#define GIT_VERSION -#endif extern "C" { void kernel_main(bootproto::args *args); - void (*__ctors)(void); - void (*__ctors_end)(void); - void long_ap_startup(cpu_data *cpu); - void ap_startup(); - void ap_idle(); - void init_ap_trampoline(void*, cpu_data *, void (*)()); } -volatile size_t ap_startup_count; -static bool scheduler_ready = false; - /// Bootstrap the memory managers. -void memory_initialize_pre_ctors(bootproto::args &kargs); -void memory_initialize_post_ctors(bootproto::args &kargs); void load_init_server(bootproto::program &program, uintptr_t modules_address); -unsigned start_aps(lapic &apic, const util::vector &ids, void *kpml4); - -void -run_constructors() -{ - void (**p)(void) = &__ctors; - while (p < &__ctors_end) { - void (*ctor)(void) = *p++; - if (ctor) ctor(); - } -} void kernel_main(bootproto::args *args) { if (args->panic) { - IDT::set_nmi_handler(args->panic->entrypoint); - panic::symbol_table = util::offset_pointer(args->symbol_table, mem::linear_offset); + const void *syms = util::offset_pointer(args->symbol_table, mem::linear_offset); + panic::install(args->panic->entrypoint, syms); } logger_init(); - cpu_validate(); - extern IDT &g_bsp_idt; - extern TSS &g_bsp_tss; - extern GDT &g_bsp_gdt; - extern cpu_data g_bsp_cpu_data; - extern uintptr_t idle_stack_end; - - cpu_data *cpu = &g_bsp_cpu_data; - memset(cpu, 0, sizeof(cpu_data)); - - cpu->self = cpu; - cpu->idt = new (&g_bsp_idt) IDT; - cpu->tss = new (&g_bsp_tss) TSS; - cpu->gdt = new (&g_bsp_gdt) GDT {cpu->tss}; - cpu->rsp0 = idle_stack_end; - cpu_early_init(cpu); + cpu_data *cpu = bsp_early_init(); kassert(args->magic == bootproto::args_magic, "Bad kernel args magic number"); @@ -99,48 +49,24 @@ kernel_main(bootproto::args *args) log::debug(logs::boot, "Runtime service is at: %016lx", args->runtime_services); log::debug(logs::boot, " Kernel PML4 is at: %016lx", args->pml4); - uint64_t cr0, cr4; - asm ("mov %%cr0, %0" : "=r"(cr0)); - asm ("mov %%cr4, %0" : "=r"(cr4)); - uint64_t efer = rdmsr(msr::ia32_efer); - log::debug(logs::boot, "Control regs: cr0:%lx cr4:%lx efer:%lx", cr0, cr4, efer); - disable_legacy_pic(); - memory_initialize_pre_ctors(*args); - run_constructors(); - memory_initialize_post_ctors(*args); + mem::initialize(*args); - cpu->tss->create_ist_stacks(cpu->idt->used_ist_entries()); - - syscall_initialize(); + bsp_late_init(); device_manager &devices = device_manager::get(); devices.parse_acpi(args->acpi_table); - // Need the local APIC to get the BSP's id - uintptr_t apic_base = devices.get_lapic_base(); - - lapic *apic = new lapic(apic_base); - apic->enable(); - - cpu->id = apic->get_id(); - cpu->apic = apic; - + devices.init_drivers(); cpu_init(cpu, true); - devices.init_drivers(); - apic->calibrate_timer(); - - const auto &apic_ids = devices.get_apic_ids(); - g_num_cpus = start_aps(*apic, apic_ids, args->pml4); + g_num_cpus = smp::start(*cpu, args->pml4); sysconf_create(); interrupts_enable(); - //g_com1.handle_interrupt(); scheduler *sched = new scheduler {g_num_cpus}; - scheduler_ready = true; // Load the init server load_init_server(*args->init, args->modules); @@ -148,128 +74,34 @@ kernel_main(bootproto::args *args) sched->start(); } -unsigned -start_aps(lapic &apic, const util::vector &ids, void *kpml4) +void +load_init_server(bootproto::program &program, uintptr_t modules_address) { - using mem::frame_size; - using mem::kernel_stack_pages; + using bootproto::section_flags; using obj::vm_flags; - extern size_t ap_startup_code_size; - extern obj::process &g_kernel_process; - extern obj::vm_area_guarded &g_kernel_stacks; + obj::process *p = new obj::process; + p->add_handle(&obj::system::get(), obj::system::init_caps); - clock &clk = clock::get(); + vm_space &space = p->space(); + for (const auto § : program.sections) { + vm_flags flags = + ((sect.type && section_flags::execute) ? vm_flags::exec : vm_flags::none) | + ((sect.type && section_flags::write) ? vm_flags::write : vm_flags::none); - ap_startup_count = 1; // BSP processor - log::info(logs::boot, "Starting %d other CPUs", ids.count() - 1); - - // Since we're using address space outside kernel space, make sure - // the kernel's vm_space is used - cpu_data &bsp = current_cpu(); - bsp.process = &g_kernel_process; - - uint16_t index = bsp.index; - - // Copy the startup code somwhere the real mode trampoline can run - uintptr_t addr = 0x8000; // TODO: find a valid address, rewrite addresses - uint8_t vector = addr >> 12; - obj::vm_area *vma = new obj::vm_area_fixed(addr, 0x1000, vm_flags::write); - vm_space::kernel_space().add(addr, vma); - memcpy( - reinterpret_cast(addr), - reinterpret_cast(&ap_startup), - ap_startup_code_size); - - // AP idle stacks need less room than normal stacks, so pack multiple - // into a normal stack area - static constexpr size_t idle_stack_bytes = 2048; // 2KiB is generous - static constexpr size_t full_stack_bytes = kernel_stack_pages * frame_size; - static constexpr size_t idle_stacks_per = full_stack_bytes / idle_stack_bytes; - - uint8_t ist_entries = IDT::current().used_ist_entries(); - - size_t free_stack_count = 0; - uintptr_t stack_area_start = 0; - - lapic::ipi mode = lapic::ipi::init | lapic::ipi::level | lapic::ipi::assert; - apic.send_ipi_broadcast(mode, false, 0); - - for (uint8_t id : ids) { - if (id == bsp.id) continue; - - // Set up the CPU data structures - IDT *idt = new IDT; - TSS *tss = new TSS; - GDT *gdt = new GDT {tss}; - cpu_data *cpu = new cpu_data; - memset(cpu, 0, sizeof(cpu_data)); - - cpu->self = cpu; - cpu->id = id; - cpu->index = ++index; - cpu->idt = idt; - cpu->tss = tss; - cpu->gdt = gdt; - - tss->create_ist_stacks(ist_entries); - - // Set up the CPU's idle task stack - if (free_stack_count == 0) { - stack_area_start = g_kernel_stacks.get_section(); - free_stack_count = idle_stacks_per; - } - - uintptr_t stack_end = stack_area_start + free_stack_count-- * idle_stack_bytes; - stack_end -= 2 * sizeof(void*); // Null frame - *reinterpret_cast(stack_end) = 0; // pre-fault the page - cpu->rsp0 = stack_end; - - // Set up the trampoline with this CPU's data - init_ap_trampoline(kpml4, cpu, ap_idle); - - // Kick it off! - size_t current_count = ap_startup_count; - log::debug(logs::boot, "Starting AP %d: stack %llx", cpu->index, stack_end); - - lapic::ipi startup = lapic::ipi::startup | lapic::ipi::assert; - - apic.send_ipi(startup, vector, id); - for (unsigned i = 0; i < 20; ++i) { - if (ap_startup_count > current_count) break; - clk.spinwait(20); - } - - // If the CPU already incremented ap_startup_count, it's done - if (ap_startup_count > current_count) - continue; - - // Send the second SIPI (intel recommends this) - apic.send_ipi(startup, vector, id); - for (unsigned i = 0; i < 100; ++i) { - if (ap_startup_count > current_count) break; - clk.spinwait(100); - } - - log::warn(logs::boot, "No response from AP %d within timeout", id); + obj::vm_area *vma = new obj::vm_area_fixed(sect.phys_addr, sect.size, flags); + space.add(sect.virt_addr, vma); } - log::info(logs::boot, "%d CPUs running", ap_startup_count); - vm_space::kernel_space().remove(vma); - return ap_startup_count; -} - -void -long_ap_startup(cpu_data *cpu) -{ - cpu_init(cpu, false); - ++ap_startup_count; - while (!scheduler_ready) asm ("pause"); - - uintptr_t apic_base = - device_manager::get().get_lapic_base(); - cpu->apic = new lapic(apic_base); - cpu->apic->enable(); - - scheduler::get().start(); + uint64_t iopl = (3ull << 12); + + obj::thread *main = p->create_thread(); + main->add_thunk_user(program.entrypoint, 0, iopl); + main->set_state(obj::thread::state::ready); + + // Hacky: No process exists to have created a stack for init; it needs to create + // its own stack. We take advantage of that to use rsp to pass it the init modules + // address. + auto *tcb = main->tcb(); + tcb->rsp3 = modules_address; } diff --git a/src/kernel/memory.h.cog b/src/kernel/memory.h.cog index 4f4f52a..3462a2e 100644 --- a/src/kernel/memory.h.cog +++ b/src/kernel/memory.h.cog @@ -9,6 +9,10 @@ #include +namespace bootproto { + struct args; +} + void * operator new (size_t, void *p) noexcept; /// Allocate from the default allocator. @@ -81,4 +85,8 @@ constexpr uintptr_t page_align_down(uintptr_t a) { return a & ~(frame_size-1); } /// Get the given address, aligned to the next page constexpr uintptr_t page_align_up(uintptr_t a) { return page_align_down(a-1) + frame_size; } +/// Initialize memory. Create the kernel vm space and its memory regions, +/// the physical page allocator and heap allocator, and run global constructors. +void initialize(bootproto::args &args); + } // namespace mem diff --git a/src/kernel/memory_bootstrap.cpp b/src/kernel/memory_bootstrap.cpp index c448f35..1e44b38 100644 --- a/src/kernel/memory_bootstrap.cpp +++ b/src/kernel/memory_bootstrap.cpp @@ -7,26 +7,22 @@ #include "assert.h" #include "device_manager.h" #include "frame_allocator.h" -#include "gdt.h" #include "heap_allocator.h" -#include "io.h" #include "logger.h" #include "memory.h" -#include "msr.h" #include "objects/process.h" -#include "objects/thread.h" -#include "objects/system.h" #include "objects/vm_area.h" #include "vm_space.h" +extern "C" { + void (*__ctors)(void); + void (*__ctors_end)(void); +} + using bootproto::allocation_register; -using bootproto::section_flags; using obj::vm_flags; -extern "C" void initialize_main_thread(); -extern "C" uintptr_t initialize_main_user_stack(); - // These objects are initialized _before_ global constructors are called, // so we don't want them to have global constructors at all, lest they // overwrite the previous initialization. @@ -62,6 +58,8 @@ get_physical_page(T *p) { return mem::page_align_down(reinterpret_cast(p)); } +namespace { + void memory_initialize_pre_ctors(bootproto::args &kargs) { @@ -114,87 +112,26 @@ memory_initialize_post_ctors(bootproto::args &kargs) kargs.page_tables.count); } -static void -log_mtrrs() +void +run_constructors() { - uint64_t mtrrcap = rdmsr(msr::ia32_mtrrcap); - uint64_t mtrrdeftype = rdmsr(msr::ia32_mtrrdeftype); - unsigned vcap = mtrrcap & 0xff; - log::debug(logs::boot, "MTRRs: vcap=%d %s %s def=%02x %s %s", - vcap, - (mtrrcap & (1<< 8)) ? "fix" : "", - (mtrrcap & (1<<10)) ? "wc" : "", - mtrrdeftype & 0xff, - (mtrrdeftype & (1<<10)) ? "fe" : "", - (mtrrdeftype & (1<<11)) ? "enabled" : "" - ); - - for (unsigned i = 0; i < vcap; ++i) { - uint64_t base = rdmsr(find_mtrr(msr::ia32_mtrrphysbase, i)); - uint64_t mask = rdmsr(find_mtrr(msr::ia32_mtrrphysmask, i)); - log::debug(logs::boot, " vcap[%2d] base:%016llx mask:%016llx type:%02x %s", i, - (base & ~0xfffull), - (mask & ~0xfffull), - (base & 0xff), - (mask & (1<<11)) ? "valid" : ""); + void (**p)(void) = &__ctors; + while (p < &__ctors_end) { + void (*ctor)(void) = *p++; + if (ctor) ctor(); } - - msr mtrr_fixed[] = { - msr::ia32_mtrrfix64k_00000, - msr::ia32_mtrrfix16k_80000, - msr::ia32_mtrrfix16k_a0000, - msr::ia32_mtrrfix4k_c0000, - msr::ia32_mtrrfix4k_c8000, - msr::ia32_mtrrfix4k_d0000, - msr::ia32_mtrrfix4k_d8000, - msr::ia32_mtrrfix4k_e0000, - msr::ia32_mtrrfix4k_e8000, - msr::ia32_mtrrfix4k_f0000, - msr::ia32_mtrrfix4k_f8000, - }; - - for (int i = 0; i < 11; ++i) { - uint64_t v = rdmsr(mtrr_fixed[i]); - log::debug(logs::boot, " fixed[%2d] %02x %02x %02x %02x %02x %02x %02x %02x", i, - ((v << 0) & 0xff), ((v << 8) & 0xff), ((v << 16) & 0xff), ((v << 24) & 0xff), - ((v << 32) & 0xff), ((v << 40) & 0xff), ((v << 48) & 0xff), ((v << 56) & 0xff)); - } - - uint64_t pat = rdmsr(msr::ia32_pat); - static const char *pat_names[] = {"UC ","WC ","XX ","XX ","WT ","WP ","WB ","UC-"}; - log::debug(logs::boot, " PAT: 0:%s 1:%s 2:%s 3:%s 4:%s 5:%s 6:%s 7:%s", - pat_names[(pat >> (0*8)) & 7], pat_names[(pat >> (1*8)) & 7], - pat_names[(pat >> (2*8)) & 7], pat_names[(pat >> (3*8)) & 7], - pat_names[(pat >> (4*8)) & 7], pat_names[(pat >> (5*8)) & 7], - pat_names[(pat >> (6*8)) & 7], pat_names[(pat >> (7*8)) & 7]); } +} // namespace + +namespace mem { void -load_init_server(bootproto::program &program, uintptr_t modules_address) +initialize(bootproto::args &args) { - obj::process *p = new obj::process; - p->add_handle(&obj::system::get(), obj::system::init_caps); - - vm_space &space = p->space(); - for (const auto § : program.sections) { - vm_flags flags = - ((sect.type && section_flags::execute) ? vm_flags::exec : vm_flags::none) | - ((sect.type && section_flags::write) ? vm_flags::write : vm_flags::none); - - obj::vm_area *vma = new obj::vm_area_fixed(sect.phys_addr, sect.size, flags); - space.add(sect.virt_addr, vma); - } - - uint64_t iopl = (3ull << 12); - - obj::thread *main = p->create_thread(); - main->add_thunk_user(program.entrypoint, 0, iopl); - main->set_state(obj::thread::state::ready); - - // Hacky: No process exists to have created a stack for init; it needs to create - // its own stack. We take advantage of that to use rsp to pass it the init modules - // address. - auto *tcb = main->tcb(); - tcb->rsp3 = modules_address; + memory_initialize_pre_ctors(args); + run_constructors(); + memory_initialize_post_ctors(args); } + +} // namespace mem diff --git a/src/kernel/panic.serial/main.cpp b/src/kernel/panic.serial/main.cpp index ef5efc3..e865f22 100644 --- a/src/kernel/panic.serial/main.cpp +++ b/src/kernel/panic.serial/main.cpp @@ -58,8 +58,7 @@ void panic_handler(const cpu_state *regs) // If we're running on the CPU that panicked, tell the // others we have finished - if (panic) - main_cpu_done = true; + main_cpu_done = true; if (__atomic_sub_fetch(&remaining, 1, order) == 0) { // No remaining CPUs, if we're running on QEMU, diff --git a/src/kernel/smp.cpp b/src/kernel/smp.cpp new file mode 100644 index 0000000..4f6f1e5 --- /dev/null +++ b/src/kernel/smp.cpp @@ -0,0 +1,142 @@ +#include + +#include "apic.h" +#include "clock.h" +#include "device_manager.h" +#include "logger.h" +#include "memory.h" +#include "objects/vm_area.h" +#include "scheduler.h" +#include "smp.h" +#include "vm_space.h" + +extern "C" { + void long_ap_startup(cpu_data *cpu); + void ap_startup(); + void ap_idle(); + void init_ap_trampoline(void*, cpu_data *, void (*)()); +} + +extern size_t ap_startup_code_size; +extern obj::process &g_kernel_process; +extern obj::vm_area_guarded &g_kernel_stacks; + +namespace smp { + +volatile size_t ap_startup_count; +volatile bool scheduler_ready = false; + + +unsigned +start(cpu_data &bsp, void *kpml4) +{ + using mem::frame_size; + using mem::kernel_stack_pages; + using obj::vm_flags; + + + ap_startup_count = 1; // Count the BSP + + clock &clk = clock::get(); + + const auto &ids = device_manager::get().get_apic_ids(); + + log::info(logs::boot, "Starting %d other CPUs", ids.count() - 1); + + // Since we're using address space outside kernel space, make sure + // the kernel's vm_space is used + bsp.process = &g_kernel_process; + + uint16_t index = bsp.index; + + // Copy the startup code somwhere the real mode trampoline can run + uintptr_t addr = 0x8000; // TODO: find a valid address, rewrite addresses + uint8_t vector = addr >> 12; + obj::vm_area *vma = new obj::vm_area_fixed(addr, 0x1000, vm_flags::write); + vm_space::kernel_space().add(addr, vma); + memcpy( + reinterpret_cast(addr), + reinterpret_cast(&ap_startup), + ap_startup_code_size); + + // AP idle stacks need less room than normal stacks, so pack multiple + // into a normal stack area + static constexpr size_t idle_stack_bytes = 2048; // 2KiB is generous + static constexpr size_t full_stack_bytes = kernel_stack_pages * frame_size; + static constexpr size_t idle_stacks_per = full_stack_bytes / idle_stack_bytes; + + size_t free_stack_count = 0; + uintptr_t stack_area_start = 0; + + lapic &apic = *bsp.apic; + lapic::ipi mode = lapic::ipi::init | lapic::ipi::level | lapic::ipi::assert; + apic.send_ipi_broadcast(mode, false, 0); + + for (uint8_t id : ids) { + if (id == bsp.id) continue; + + cpu_data *cpu = cpu_create(id, ++index); + + // Set up the CPU's idle task stack + if (free_stack_count == 0) { + stack_area_start = g_kernel_stacks.get_section(); + free_stack_count = idle_stacks_per; + } + + uintptr_t stack_end = stack_area_start + free_stack_count-- * idle_stack_bytes; + stack_end -= 2 * sizeof(void*); // Null frame + *reinterpret_cast(stack_end) = 0; // pre-fault the page + cpu->rsp0 = stack_end; + + // Set up the trampoline with this CPU's data + init_ap_trampoline(kpml4, cpu, ap_idle); + + // Kick it off! + size_t current_count = ap_startup_count; + log::debug(logs::boot, "Starting AP %d: stack %llx", cpu->index, stack_end); + + lapic::ipi startup = lapic::ipi::startup | lapic::ipi::assert; + + apic.send_ipi(startup, vector, id); + for (unsigned i = 0; i < 20; ++i) { + if (ap_startup_count > current_count) break; + clk.spinwait(20); + } + + // If the CPU already incremented ap_startup_count, it's done + if (ap_startup_count > current_count) + continue; + + // Send the second SIPI (intel recommends this) + apic.send_ipi(startup, vector, id); + for (unsigned i = 0; i < 100; ++i) { + if (ap_startup_count > current_count) break; + clk.spinwait(100); + } + + log::warn(logs::boot, "No response from AP %d within timeout", id); + } + + log::info(logs::boot, "%d CPUs running", ap_startup_count); + vm_space::kernel_space().remove(vma); + return ap_startup_count; +} + +void +ready() +{ + scheduler_ready = true; +} + +} // namespace smp + +void +long_ap_startup(cpu_data *cpu) +{ + __atomic_add_fetch(&smp::ap_startup_count, 1, __ATOMIC_SEQ_CST); + + cpu_init(cpu, false); + + while (!smp::scheduler_ready) asm ("pause"); + scheduler::get().start(); +} diff --git a/src/kernel/smp.h b/src/kernel/smp.h new file mode 100644 index 0000000..6a45202 --- /dev/null +++ b/src/kernel/smp.h @@ -0,0 +1,18 @@ +#pragma once +/// \file smp.h +/// Multi-core / multi-processor startup code + +struct cpu_data; + +namespace smp { + +/// Start all APs and have them wait for the BSP to +/// call smp::ready(). +/// \arg bsp The cpu_data struct representing the BSP +/// \arg kpml4 The kernel PML4 +unsigned start(cpu_data &bsp, void *kpml4); + +/// Unblock all APs and let them start their schedulers. +void ready(); + +} // namespace smp diff --git a/src/kernel/ap_startup.s b/src/kernel/smp.s similarity index 100% rename from src/kernel/ap_startup.s rename to src/kernel/smp.s