From c88170f6e0d842a430771c34d2553c42e6127a4a Mon Sep 17 00:00:00 2001 From: "Justin C. Miller" Date: Sun, 7 Feb 2021 23:26:47 -0800 Subject: [PATCH] [kernel] Start all other processors in the system This very large commit is mainly focused on getting the APs started and to a state where they're waiting to have work scheduled. (Actually scheduling on them is for another commit.) To do this, a bunch of major changes were needed: - Moving a lot of the CPU initialization (including for the BSP) to init_cpu(). This includes setting up IST stacks, writing MSRs, and creating the cpu_data structure. For the APs, this also creates and installs the GDT and TSS, and installs the global IDT. - Creating the AP startup code, which tries to be as position independent as possible. It's copied from its location to 0x8000 for AP startup, and some of it is fixed at that address. The AP startup code jumps from real mode to long mode with paging in one swell foop. - Adding limited IPI capability to the lapic class. This will need to improve. - Renaming cpu/cpu.* to cpu/cpu_id.* because it was just annoying in GDB and really isn't anything but cpu_id anymore. - Moved all the GDT, TSS, and IDT code into their own files and made them classes instead of a mess of free functions. - Got rid of bsp_cpu_data everywhere. Now always call the new current_cpu() to get the current CPU's cpu_data. - Device manager keeps a list of APIC ids now. This should go somewhere else eventually, device_manager needs to be refactored away. - Moved some more things (notably the g_kernel_stacks vma) to the pre-constructor setup in memory_bootstrap. That whole file is in bad need of a refactor. --- modules.yaml | 7 +- src/boot/main.cpp | 2 +- src/kernel/ap_startup.s | 134 ++++++++ src/kernel/apic.cpp | 34 ++ src/kernel/apic.h | 21 ++ src/kernel/cpu.cpp | 79 ++++- src/kernel/cpu.h | 35 +- src/kernel/debug.cpp | 7 +- src/kernel/device_manager.cpp | 3 +- src/kernel/device_manager.h | 6 +- src/kernel/gdt.cpp | 311 +++++------------- src/kernel/gdt.h | 96 +++--- src/kernel/gdt.s | 35 -- src/kernel/gdtidt.s | 35 ++ src/kernel/idt.cpp | 137 ++++++++ src/kernel/idt.h | 61 ++++ src/kernel/interrupt_isrs.inc | 1 + src/kernel/interrupts.cpp | 43 +-- src/kernel/interrupts.h | 5 +- src/kernel/main.cpp | 127 ++++++- src/kernel/memory_bootstrap.cpp | 54 +-- src/kernel/objects/process.cpp | 4 +- src/kernel/objects/thread.cpp | 10 +- src/kernel/scheduler.cpp | 29 +- src/kernel/scheduler.h | 12 +- src/kernel/task.s | 10 +- src/kernel/tasking.inc | 10 +- src/kernel/tss.cpp | 39 +++ src/kernel/tss.h | 35 ++ src/libraries/cpu/{cpu.cpp => cpu_id.cpp} | 11 +- .../cpu/include/cpu/{cpu.h => cpu_id.h} | 5 +- 31 files changed, 952 insertions(+), 446 deletions(-) create mode 100644 src/kernel/ap_startup.s delete mode 100644 src/kernel/gdt.s create mode 100644 src/kernel/gdtidt.s create mode 100644 src/kernel/idt.cpp create mode 100644 src/kernel/idt.h create mode 100644 src/kernel/tss.cpp create mode 100644 src/kernel/tss.h rename src/libraries/cpu/{cpu.cpp => cpu_id.cpp} (93%) rename src/libraries/cpu/include/cpu/{cpu.h => cpu_id.h} (93%) diff --git a/modules.yaml b/modules.yaml index 608d729..e3d59ca 100644 --- a/modules.yaml +++ b/modules.yaml @@ -12,6 +12,7 @@ modules: - src/kernel source: - src/kernel/apic.cpp + - src/kernel/ap_startup.s - src/kernel/assert.cpp - src/kernel/boot.s - src/kernel/clock.cpp @@ -24,8 +25,9 @@ modules: - src/kernel/frame_allocator.cpp - src/kernel/fs/gpt.cpp - src/kernel/gdt.cpp - - src/kernel/gdt.s + - src/kernel/gdtidt.s - src/kernel/hpet.cpp + - src/kernel/idt.cpp - src/kernel/interrupts.cpp - src/kernel/interrupts.s - src/kernel/io.cpp @@ -56,6 +58,7 @@ modules: - src/kernel/syscalls/thread.cpp - src/kernel/syscalls/vm_area.cpp - src/kernel/task.s + - src/kernel/tss.cpp - src/kernel/vm_space.cpp boot: @@ -118,7 +121,7 @@ modules: includes: - src/libraries/cpu/include source: - - src/libraries/cpu/cpu.cpp + - src/libraries/cpu/cpu_id.cpp j6: kind: lib diff --git a/src/boot/main.cpp b/src/boot/main.cpp index 500b692..5412d4e 100644 --- a/src/boot/main.cpp +++ b/src/boot/main.cpp @@ -8,7 +8,7 @@ #include #include "console.h" -#include "cpu/cpu.h" +#include "cpu/cpu_id.h" #include "error.h" #include "fs.h" #include "hardware.h" diff --git a/src/kernel/ap_startup.s b/src/kernel/ap_startup.s new file mode 100644 index 0000000..327651f --- /dev/null +++ b/src/kernel/ap_startup.s @@ -0,0 +1,134 @@ +%include "tasking.inc" + +section .ap_startup + +BASE equ 0x8000 ; Where the kernel will map this at runtime + +CR0_PE equ (1 << 0) +CR0_MP equ (1 << 1) +CR0_ET equ (1 << 4) +CR0_NE equ (1 << 5) +CR0_WP equ (1 << 16) +CR0_PG equ (1 << 31) +CR0_VAL equ CR0_PE|CR0_MP|CR0_ET|CR0_NE|CR0_WP|CR0_PG + +CR4_DE equ (1 << 3) +CR4_PAE equ (1 << 5) +CR4_MCE equ (1 << 6) +CR4_PGE equ (1 << 7) +CR4_OSFXSR equ (1 << 9) +CR4_OSCMMEXCPT equ (1 << 10) +CR4_FSGSBASE equ (1 << 16) +CR4_PCIDE equ (1 << 17) +CR4_VAL equ CR4_DE|CR4_PAE|CR4_MCE|CR4_PGE|CR4_OSFXSR|CR4_OSCMMEXCPT|CR4_FSGSBASE|CR4_PCIDE + +EFER_MSR equ 0xC0000080 +EFER_SCE equ (1 << 0) +EFER_LME equ (1 << 8) +EFER_NXE equ (1 << 11) +EFER_VAL equ EFER_SCE|EFER_LME|EFER_NXE + +bits 16 +default rel +align 8 + +global ap_startup +ap_startup: + jmp .start_real + +align 8 + .pml4: dq 0 + .stack: dq 0 + .ret: dq 0 + +align 16 +.gdt: + dq 0x0 ; Null GDT entry + + dq 0x00209A0000000000 ; Code + dq 0x0000920000000000 ; Data + +align 4 +.gdtd: + dw ($ - .gdt) + dd BASE + (.gdt - ap_startup) + +align 4 +.idtd: + dw 0 ; zero-length IDT descriptor + dd 0 + +.start_real: + cli + cld + + xor ax, ax + mov ds, ax + + ; set the temporary null IDT + lidt [BASE + (.idtd - ap_startup)] + + ; Enter long mode + mov eax, CR4_VAL + mov cr4, eax + + mov eax, [BASE + (.pml4 - ap_startup)] + mov cr3, eax + + mov ecx, EFER_MSR + rdmsr + or eax, EFER_VAL + wrmsr + + mov eax, CR0_VAL + mov cr0, eax + + ; Set the temporary minimal GDT + lgdt [BASE + (.gdtd - ap_startup)] + + jmp (1 << 3):(BASE + (.start_long - ap_startup)) + +bits 64 +default abs +align 8 +.start_long: + ; set data segments + mov ax, (2 << 3) + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + mov ss, ax + + mov rax, [BASE + (.stack - ap_startup)] + mov rsp, rax + + mov rax, [BASE + (.ret - ap_startup)] + jmp rax + + +global ap_startup_code_size +ap_startup_code_size: + dq ($ - ap_startup) + + +section .text +global init_ap_trampoline +init_ap_trampoline: + push rbp + mov rbp, rsp + + ; rdi is the kernel pml4 + mov [BASE + (ap_startup.pml4 - ap_startup)], rdi + + ; rsi is the stack for this AP + mov [BASE + (ap_startup.stack - ap_startup)], rsi + + ; rdx is the address to jump to + mov [BASE + (ap_startup.ret - ap_startup)], rdx + + ; rcx is the processor id + mov rdi, rdx + + pop rbp + ret diff --git a/src/kernel/apic.cpp b/src/kernel/apic.cpp index 7515d1c..186c820 100644 --- a/src/kernel/apic.cpp +++ b/src/kernel/apic.cpp @@ -6,11 +6,16 @@ #include "kernel_memory.h" #include "log.h" +static constexpr uint16_t lapic_id = 0x0020; static constexpr uint16_t lapic_spurious = 0x00f0; +static constexpr uint16_t lapic_icr_low = 0x0300; +static constexpr uint16_t lapic_icr_high = 0x0310; + static constexpr uint16_t lapic_lvt_timer = 0x0320; static constexpr uint16_t lapic_lvt_lint0 = 0x0350; static constexpr uint16_t lapic_lvt_lint1 = 0x0360; +static constexpr uint16_t lapic_lvt_error = 0x0370; static constexpr uint16_t lapic_timer_init = 0x0380; static constexpr uint16_t lapic_timer_cur = 0x0390; @@ -25,6 +30,7 @@ apic_read(uint32_t volatile *apic, uint16_t offset) static void apic_write(uint32_t volatile *apic, uint16_t offset, uint32_t value) { + log::debug(logs::apic, "LAPIC write: %x = %08lx", offset, value); *(apic + offset/sizeof(uint32_t)) = value; } @@ -52,10 +58,38 @@ lapic::lapic(uintptr_t base, isr spurious) : apic(base), m_divisor(0) { + apic_write(m_base, lapic_lvt_error, static_cast(isr::isrAPICError)); apic_write(m_base, lapic_spurious, static_cast(spurious)); log::info(logs::apic, "LAPIC created, base %lx", m_base); } +uint8_t +lapic::get_id() +{ + return static_cast(apic_read(m_base, lapic_id) >> 24); +} + +void +lapic::send_ipi(ipi_mode mode, uint8_t vector, uint8_t dest) +{ + // Wait until the APIC is ready to send + ipi_wait(); + + apic_write(m_base, lapic_icr_high, static_cast(dest) << 24); + uint32_t command = + static_cast(vector) | + static_cast(mode) << 8; + + apic_write(m_base, lapic_icr_low, command); +} + +void +lapic::ipi_wait() +{ + while (apic_read(m_base, lapic_icr_low) & (1<<12)) + asm volatile ("pause" : : : "memory"); +} + void lapic::calibrate_timer() { diff --git a/src/kernel/apic.h b/src/kernel/apic.h index 54b86ee..9b434ac 100644 --- a/src/kernel/apic.h +++ b/src/kernel/apic.h @@ -29,6 +29,27 @@ public: /// \arg spurious Vector of the spurious interrupt handler lapic(uintptr_t base, isr spurious); + /// Get the local APIC's ID + uint8_t get_id(); + + enum class ipi_mode : uint8_t { + fixed = 0, + smi = 2, + nmi = 4, + init = 5, + startup = 6, + }; + + /// Send an inter-processor interrupt. + /// \arg mode The sending mode + /// \arg vector The interrupt vector + /// \arg dest The APIC ID of the destination + void send_ipi(ipi_mode mode, uint8_t vector, uint8_t dest); + + /// Wait for an IPI to finish sending. This is done automatically + /// before sending another IPI with send_ipi(). + void ipi_wait(); + /// Enable interrupts for the LAPIC timer. /// \arg vector Interrupt vector the timer should use /// \arg repeat If false, this timer is one-off, otherwise repeating diff --git a/src/kernel/cpu.cpp b/src/kernel/cpu.cpp index 4b0720e..9d5aca7 100644 --- a/src/kernel/cpu.cpp +++ b/src/kernel/cpu.cpp @@ -1,11 +1,19 @@ #include #include "kutil/assert.h" #include "kutil/memory.h" +#include "apic.h" #include "cpu.h" -#include "cpu/cpu.h" +#include "cpu/cpu_id.h" +#include "device_manager.h" +#include "gdt.h" +#include "idt.h" +#include "kernel_memory.h" #include "log.h" +#include "msr.h" +#include "objects/vm_area.h" +#include "tss.h" -cpu_data bsp_cpu_data; +cpu_data g_bsp_cpu_data; void cpu_validate() @@ -29,3 +37,70 @@ cpu_validate() #undef CPU_FEATURE_OPT #undef CPU_FEATURE_REQ } + +void +init_cpu(bool bsp) +{ + extern TSS &g_bsp_tss; + extern GDT &g_bsp_gdt; + extern vm_area_guarded &g_kernel_stacks; + + uint8_t id = 0; + + TSS *tss = nullptr; + GDT *gdt = nullptr; + cpu_data *cpu = nullptr; + + if (bsp) { + gdt = &g_bsp_gdt; + tss = &g_bsp_tss; + cpu = &g_bsp_cpu_data; + } else { + g_idt.install(); + + tss = new TSS; + gdt = new GDT {tss}; + cpu = new cpu_data; + + gdt->install(); + + lapic &apic = device_manager::get().get_lapic(); + id = apic.get_id(); + } + + kutil::memset(cpu, 0, sizeof(cpu_data)); + + cpu->self = cpu; + cpu->id = id; + cpu->gdt = gdt; + cpu->tss = tss; + + // Install the GS base pointint to the cpu_data + wrmsr(msr::ia32_gs_base, reinterpret_cast(cpu)); + + using memory::frame_size; + using memory::kernel_stack_pages; + constexpr size_t stack_size = kernel_stack_pages * frame_size; + + uint8_t ist_entries = g_idt.used_ist_entries(); + + // Set up the IST stacks + for (unsigned ist = 1; ist < 8; ++ist) { + if (!(ist_entries & (1 << ist))) + continue; + + // Two zero entries at the top for the null frame + uintptr_t stack_bottom = g_kernel_stacks.get_section(); + uintptr_t stack_top = stack_bottom + stack_size - 2 * sizeof(uintptr_t); + + // Pre-realize these stacks, they're no good if they page fault + *reinterpret_cast(stack_top) = 0; + + tss->ist_stack(ist) = stack_top; + } + + // Set up the page attributes table + uint64_t pat = rdmsr(msr::ia32_pat); + pat = (pat & 0x00ffffffffffffffull) | (0x01ull << 56); // set PAT 7 to WC + wrmsr(msr::ia32_pat, pat); +} diff --git a/src/kernel/cpu.h b/src/kernel/cpu.h index ed5722d..029938d 100644 --- a/src/kernel/cpu.h +++ b/src/kernel/cpu.h @@ -2,9 +2,13 @@ #include +#include "kutil/spinlock.h" + +class GDT; +class process; struct TCB; class thread; -class process; +class TSS; struct cpu_state { @@ -18,15 +22,34 @@ struct cpu_state /// version in 'tasking.inc' struct cpu_data { + cpu_data *self; + uint64_t id; uintptr_t rsp0; uintptr_t rsp3; TCB *tcb; - thread *t; - process *p; + thread *thread; + process *process; + TSS *tss; + GDT *gdt; + + // Values from here on don't need to be in the asm version + kutil::spinlock::node spinner; }; -extern cpu_data bsp_cpu_data; +extern "C" cpu_data * _current_gsbase(); -// We already validated the required options in the bootloader, -// but iterate the options and log about them. +/// Initialize a CPU and set up its cpu_data structure +/// \arg bsp True if the current CPU is the BSP +void init_cpu(bool bsp); + +/// Get the cpu_data struct for the current executing CPU +inline cpu_data & current_cpu() { return *_current_gsbase(); } + +/// Validate the required CPU features are present. Really, the bootloader already +/// validated the required features, but still iterate the options and log about them. void cpu_validate(); + +/// Set up the running CPU. This sets GDT, IDT, and necessary MSRs as well as creating +/// the cpu_data structure for this processor. +/// \arg bsp True if this CPU is the BSP +void cpu_initialize(bool bsp); diff --git a/src/kernel/debug.cpp b/src/kernel/debug.cpp index 1cd79e2..ec1aa5e 100644 --- a/src/kernel/debug.cpp +++ b/src/kernel/debug.cpp @@ -13,6 +13,7 @@ void print_regs(const cpu_state ®s) { console *cons = console::get(); + cpu_data &cpu = current_cpu(); uint64_t cr2 = 0; __asm__ __volatile__ ("mov %%cr2, %0" : "=r"(cr2)); @@ -20,8 +21,8 @@ print_regs(const cpu_state ®s) uintptr_t cr3 = 0; __asm__ __volatile__ ( "mov %%cr3, %0" : "=r" (cr3) ); - cons->printf(" process: %llx", bsp_cpu_data.p->koid()); - cons->printf(" thread: %llx\n", bsp_cpu_data.t->koid()); + cons->printf(" process: %llx", cpu.process->koid()); + cons->printf(" thread: %llx\n", cpu.thread->koid()); print_regL("rax", regs.rax); print_regM("rbx", regs.rbx); @@ -43,7 +44,7 @@ print_regs(const cpu_state ®s) cons->puts("\n\n"); print_regL("rbp", regs.rbp); print_regM("rsp", regs.user_rsp); - print_regR("sp0", bsp_cpu_data.rsp0); + print_regR("sp0", cpu.rsp0); print_regL("rip", regs.rip); print_regM("cr3", cr3); diff --git a/src/kernel/device_manager.cpp b/src/kernel/device_manager.cpp index 19e4939..2f8f7d3 100644 --- a/src/kernel/device_manager.cpp +++ b/src/kernel/device_manager.cpp @@ -204,7 +204,8 @@ device_manager::load_apic(const acpi_table_header *header) case 0: { // Local APIC uint8_t uid = kutil::read_from(p+2); uint8_t id = kutil::read_from(p+3); - log::debug(logs::device, " Local APIC uid %x id %x", id); + m_apic_ids.append(id); + log::debug(logs::device, " Local APIC uid %x id %x", uid, id); } break; diff --git a/src/kernel/device_manager.h b/src/kernel/device_manager.h index d18c670..539f1c2 100644 --- a/src/kernel/device_manager.h +++ b/src/kernel/device_manager.h @@ -26,7 +26,7 @@ public: /// Get the LAPIC /// \returns An object representing the local APIC - lapic * get_lapic() { return m_lapic; } + lapic & get_lapic() { return *m_lapic; } /// Get an IOAPIC /// \arg i Index of the requested IOAPIC @@ -94,6 +94,9 @@ public: &m_hpets[i] : nullptr; } + /// Get the list of APIC ids for other CPUs + inline const kutil::vector & get_apic_ids() const { return m_apic_ids; } + private: /// Parse the ACPI XSDT and load relevant sub-tables. /// \arg xsdt Pointer to the XSDT from the firmware @@ -122,6 +125,7 @@ private: lapic *m_lapic; kutil::vector m_ioapics; kutil::vector m_hpets; + kutil::vector m_apic_ids; kutil::vector m_pci; kutil::vector m_devices; diff --git a/src/kernel/gdt.cpp b/src/kernel/gdt.cpp index d629eda..389f3e4 100644 --- a/src/kernel/gdt.cpp +++ b/src/kernel/gdt.cpp @@ -1,36 +1,80 @@ #include #include "kutil/assert.h" -#include "kutil/enum_bitfields.h" #include "kutil/memory.h" +#include "kutil/no_construct.h" #include "console.h" -#include "kernel_memory.h" +#include "cpu.h" +#include "gdt.h" #include "log.h" +#include "tss.h" + +extern "C" void gdt_write(const void *gdt_ptr, uint16_t cs, uint16_t ds, uint16_t tr); + +static constexpr uint8_t kern_cs_index = 1; +static constexpr uint8_t kern_ss_index = 2; +static constexpr uint8_t user_cs32_index = 3; +static constexpr uint8_t user_ss_index = 4; +static constexpr uint8_t user_cs64_index = 5; +static constexpr uint8_t tss_index = 6; // Note that this takes TWO GDT entries + +// The BSP's GDT is initialized _before_ global constructors are called, +// so we don't want it to have a global constructor, lest it overwrite +// the previous initialization. +static kutil::no_construct __g_bsp_gdt_storage; +GDT &g_bsp_gdt = __g_bsp_gdt_storage.value; -enum class gdt_type : uint8_t +GDT::GDT(TSS *tss) : + m_tss(tss) { - accessed = 0x01, - read_write = 0x02, - conforming = 0x04, - execute = 0x08, - system = 0x10, - ring1 = 0x20, - ring2 = 0x40, - ring3 = 0x60, - present = 0x80 -}; -IS_BITFIELD(gdt_type); + kutil::memset(this, 0, sizeof(GDT)); -struct gdt_descriptor + m_ptr.limit = sizeof(m_entries) - 1; + m_ptr.base = &m_entries[0]; + + // Kernel CS/SS - always 64bit + set(kern_cs_index, 0, 0xfffff, true, gdt_type::read_write | gdt_type::execute); + set(kern_ss_index, 0, 0xfffff, true, gdt_type::read_write); + + // User CS32/SS/CS64 - layout expected by SYSRET + set(user_cs32_index, 0, 0xfffff, false, gdt_type::ring3 | gdt_type::read_write | gdt_type::execute); + set(user_ss_index, 0, 0xfffff, true, gdt_type::ring3 | gdt_type::read_write); + set(user_cs64_index, 0, 0xfffff, true, gdt_type::ring3 | gdt_type::read_write | gdt_type::execute); + + set_tss(tss); +} + +GDT & +GDT::current() { - uint16_t limit_low; - uint16_t base_low; - uint8_t base_mid; - gdt_type type; - uint8_t size; - uint8_t base_high; -} __attribute__ ((packed)); + cpu_data &cpu = current_cpu(); + return *cpu.gdt; +} + +void +GDT::install() const +{ + gdt_write( + static_cast(&m_ptr), + kern_cs_index << 3, + kern_ss_index << 3, + tss_index << 3); +} + +void +GDT::set(uint8_t i, uint32_t base, uint64_t limit, bool is64, gdt_type type) +{ + m_entries[i].limit_low = limit & 0xffff; + m_entries[i].size = (limit >> 16) & 0xf; + m_entries[i].size |= (is64 ? 0xa0 : 0xc0); + + m_entries[i].base_low = base & 0xffff; + m_entries[i].base_mid = (base >> 16) & 0xff; + m_entries[i].base_high = (base >> 24) & 0xff; + + m_entries[i].type = type | gdt_type::system | gdt_type::present; +} struct tss_descriptor { @@ -44,72 +88,16 @@ struct tss_descriptor uint32_t reserved; } __attribute__ ((packed)); -struct tss_entry -{ - uint32_t reserved0; - - uint64_t rsp[3]; // stack pointers for CPL 0-2 - uint64_t ist[8]; // ist[0] is reserved - - uint64_t reserved1; - uint16_t reserved2; - uint16_t iomap_offset; -} __attribute__ ((packed)); - -struct idt_descriptor -{ - uint16_t base_low; - uint16_t selector; - uint8_t ist; - uint8_t flags; - uint16_t base_mid; - uint32_t base_high; - uint32_t reserved; // must be zero -} __attribute__ ((packed)); - -struct table_ptr -{ - uint16_t limit; - uint64_t base; -} __attribute__ ((packed)); - - -gdt_descriptor g_gdt_table[10]; -idt_descriptor g_idt_table[256]; -table_ptr g_gdtr; -table_ptr g_idtr; -tss_entry g_tss; - - -extern "C" { - void idt_write(); - void idt_load(); - - void gdt_write(uint16_t cs, uint16_t ds, uint16_t tr); - void gdt_load(); -} - void -gdt_set_entry(uint8_t i, uint32_t base, uint64_t limit, bool is64, gdt_type type) -{ - g_gdt_table[i].limit_low = limit & 0xffff; - g_gdt_table[i].size = (limit >> 16) & 0xf; - g_gdt_table[i].size |= (is64 ? 0xa0 : 0xc0); - - g_gdt_table[i].base_low = base & 0xffff; - g_gdt_table[i].base_mid = (base >> 16) & 0xff; - g_gdt_table[i].base_high = (base >> 24) & 0xff; - - g_gdt_table[i].type = type | gdt_type::system | gdt_type::present; -} - -void -tss_set_entry(uint8_t i, uint64_t base, uint64_t limit) +GDT::set_tss(TSS *tss) { tss_descriptor tssd; + + size_t limit = sizeof(TSS); tssd.limit_low = limit & 0xffff; tssd.size = (limit >> 16) & 0xf; + uintptr_t base = reinterpret_cast(tss); tssd.base_00 = base & 0xffff; tssd.base_16 = (base >> 16) & 0xff; tssd.base_24 = (base >> 24) & 0xff; @@ -121,123 +109,26 @@ tss_set_entry(uint8_t i, uint64_t base, uint64_t limit) gdt_type::execute | gdt_type::ring3 | gdt_type::present; - kutil::memcpy(&g_gdt_table[i], &tssd, sizeof(tss_descriptor)); + + kutil::memcpy(&m_entries[tss_index], &tssd, sizeof(tss_descriptor)); } void -idt_set_entry(uint8_t i, uint64_t addr, uint16_t selector, uint8_t flags) +GDT::dump(unsigned index) const { - g_idt_table[i].base_low = addr & 0xffff; - g_idt_table[i].base_mid = (addr >> 16) & 0xffff; - g_idt_table[i].base_high = (addr >> 32) & 0xffffffff; - g_idt_table[i].selector = selector; - g_idt_table[i].flags = flags; - g_idt_table[i].ist = 0; - g_idt_table[i].reserved = 0; -} - -void -tss_set_stack(unsigned ring, uintptr_t rsp) -{ - kassert(ring < 3, "Bad ring passed to tss_set_stack."); - g_tss.rsp[ring] = rsp; -} - -uintptr_t -tss_get_stack(unsigned ring) -{ - kassert(ring < 3, "Bad ring passed to tss_get_stack."); - return g_tss.rsp[ring]; -} - -void -idt_set_ist(unsigned i, unsigned ist) -{ - g_idt_table[i].ist = ist; -} - -void -tss_set_ist(unsigned ist, uintptr_t rsp) -{ - kassert(ist > 0 && ist < 7, "Bad ist passed to tss_set_ist."); - g_tss.ist[ist] = rsp; -} - -void -ist_increment(unsigned i) -{ - uint8_t ist = g_idt_table[i].ist; - if (ist) - g_tss.ist[ist] += memory::frame_size; -} - -void -ist_decrement(unsigned i) -{ - uint8_t ist = g_idt_table[i].ist; - if (ist) - g_tss.ist[ist] -= memory::frame_size; -} - -uintptr_t -tss_get_ist(unsigned ist) -{ - kassert(ist > 0 && ist < 7, "Bad ist passed to tss_get_ist."); - return g_tss.ist[ist]; -} - -void -gdt_init() -{ - kutil::memset(&g_gdt_table, 0, sizeof(g_gdt_table)); - kutil::memset(&g_idt_table, 0, sizeof(g_idt_table)); - - g_gdtr.limit = sizeof(g_gdt_table) - 1; - g_gdtr.base = reinterpret_cast(&g_gdt_table); - - // Kernel CS/SS - always 64bit - gdt_set_entry(1, 0, 0xfffff, true, gdt_type::read_write | gdt_type::execute); - gdt_set_entry(2, 0, 0xfffff, true, gdt_type::read_write); - - // User CS32/SS/CS64 - layout expected by SYSRET - gdt_set_entry(3, 0, 0xfffff, false, gdt_type::ring3 | gdt_type::read_write | gdt_type::execute); - gdt_set_entry(4, 0, 0xfffff, true, gdt_type::ring3 | gdt_type::read_write); - gdt_set_entry(5, 0, 0xfffff, true, gdt_type::ring3 | gdt_type::read_write | gdt_type::execute); - - kutil::memset(&g_tss, 0, sizeof(tss_entry)); - g_tss.iomap_offset = sizeof(tss_entry); - - uintptr_t tss_base = reinterpret_cast(&g_tss); - - // Note that this takes TWO GDT entries - tss_set_entry(6, tss_base, sizeof(tss_entry)); - - gdt_write(1 << 3, 2 << 3, 6 << 3); - - g_idtr.limit = sizeof(g_idt_table) - 1; - g_idtr.base = reinterpret_cast(&g_idt_table); - - idt_write(); -} - -void -gdt_dump(unsigned index) -{ - const table_ptr &table = g_gdtr; - console *cons = console::get(); unsigned start = 0; - unsigned count = (table.limit + 1) / sizeof(gdt_descriptor); + unsigned count = (m_ptr.limit + 1) / sizeof(descriptor); if (index != -1) { start = index; count = 1; } else { - cons->printf(" GDT: loc:%lx size:%d\n", table.base, table.limit+1); + cons->printf(" GDT: loc:%lx size:%d\n", m_ptr.base, m_ptr.limit+1); } - const gdt_descriptor *gdt = - reinterpret_cast(table.base); + const descriptor *gdt = + reinterpret_cast(m_ptr.base); for (int i = start; i < start+count; ++i) { uint32_t base = @@ -275,51 +166,3 @@ gdt_dump(unsigned index) (gdt[i].size & 0x60) == 0x40 ? "32" : "16"); } } - -void -idt_dump(unsigned index) -{ - const table_ptr &table = g_idtr; - - - unsigned start = 0; - unsigned count = (table.limit + 1) / sizeof(idt_descriptor); - if (index != -1) { - start = index; - count = 1; - log::info(logs::boot, "IDT FOR INDEX %02x", index); - } else { - log::info(logs::boot, "Loaded IDT at: %lx size: %d bytes", table.base, table.limit+1); - } - - const idt_descriptor *idt = - reinterpret_cast(table.base); - - for (int i = start; i < start+count; ++i) { - uint64_t base = - (static_cast(idt[i].base_high) << 32) | - (static_cast(idt[i].base_mid) << 16) | - idt[i].base_low; - - char const *type; - switch (idt[i].flags & 0xf) { - case 0x5: type = " 32tsk "; break; - case 0x6: type = " 16int "; break; - case 0x7: type = " 16trp "; break; - case 0xe: type = " 32int "; break; - case 0xf: type = " 32trp "; break; - default: type = " ????? "; break; - } - - if (idt[i].flags & 0x80) { - log::debug(logs::boot, - " Entry %3d: Base:%lx Sel(rpl %d, ti %d, %3d) IST:%d %s DPL:%d", i, base, - (idt[i].selector & 0x3), - ((idt[i].selector & 0x4) >> 2), - (idt[i].selector >> 3), - idt[i].ist, - type, - ((idt[i].flags >> 5) & 0x3)); - } - } -} diff --git a/src/kernel/gdt.h b/src/kernel/gdt.h index 9e8a959..984636b 100644 --- a/src/kernel/gdt.h +++ b/src/kernel/gdt.h @@ -1,58 +1,66 @@ #pragma once /// \file gdt.h -/// Definitions relating to system descriptor tables: GDT, IDT, TSS +/// Definitions relating to a CPU's GDT table #include -/// Set up the GDT and TSS, and switch segment registers to point -/// to them. -void gdt_init(); +#include "kutil/enum_bitfields.h" -/// Set an entry in the IDT -/// \arg i Index in the IDT (vector of the interrupt this handles) -/// \arg addr Address of the handler -/// \arg selector GDT selector to set when invoking this handler -/// \arg flags Descriptor flags to set -void idt_set_entry(uint8_t i, uint64_t addr, uint16_t selector, uint8_t flags); +class TSS; -/// Set the stack pointer for a given ring in the TSS -/// \arg ring Ring to set for (0-2) -/// \arg rsp Stack pointer to set -void tss_set_stack(unsigned ring, uintptr_t rsp); +enum class gdt_type : uint8_t +{ + accessed = 0x01, + read_write = 0x02, + conforming = 0x04, + execute = 0x08, + system = 0x10, + ring1 = 0x20, + ring2 = 0x40, + ring3 = 0x60, + present = 0x80 +}; +IS_BITFIELD(gdt_type); -/// Get the stack pointer for a given ring in the TSS -/// \arg ring Ring to get (0-2) -/// \returns Stack pointers for that ring -uintptr_t tss_get_stack(unsigned ring); +class GDT +{ +public: + GDT(TSS *tss); -/// Set the given IDT entry to use the given IST entry -/// \arg i Which IDT entry to set -/// \arg ist Which IST entry to set (1-7) -void idt_set_ist(unsigned i, unsigned ist); + /// Get the currently running CPU's GDT + static GDT & current(); -/// Set the stack pointer for a given IST in the TSS -/// \arg ist Which IST entry to set (1-7) -/// \arg rsp Stack pointer to set -void tss_set_ist(unsigned ist, uintptr_t rsp); + /// Install this GDT to the current CPU + void install() const; -/// Increment the stack pointer for the given vector, -/// if it's using an IST entry -/// \arg i Which IDT entry to use -void ist_increment(unsigned i); + /// Get the addrss of the pointer + inline const void * pointer() const { return static_cast(&m_ptr); } -/// Decrement the stack pointer for the given vector, -/// if it's using an IST entry -/// \arg i Which IDT entry to use -void ist_decrement(unsigned i); + /// Dump debug information about the GDT to the console. + /// \arg index Which entry to print, or -1 for all entries + void dump(unsigned index = -1) const; -/// Get the stack pointer for a given IST in the TSS -/// \arg ring Which IST entry to get (1-7) -/// \returns Stack pointers for that IST entry -uintptr_t tss_get_ist(unsigned ist); +private: + void set(uint8_t i, uint32_t base, uint64_t limit, bool is64, gdt_type type); + void set_tss(TSS *tss); -/// Dump information about the current GDT to the screen -/// \arg index Which entry to print, or -1 for all entries -void gdt_dump(unsigned index = -1); + struct descriptor + { + uint16_t limit_low; + uint16_t base_low; + uint8_t base_mid; + gdt_type type; + uint8_t size; + uint8_t base_high; + } __attribute__ ((packed, align(8))); -/// Dump information about the current IDT to the screen -/// \arg index Which entry to print, or -1 for all entries -void idt_dump(unsigned index = -1); + struct ptr + { + uint16_t limit; + descriptor *base; + } __attribute__ ((packed, align(4))); + + descriptor m_entries[8]; + TSS *m_tss; + + ptr m_ptr; +}; diff --git a/src/kernel/gdt.s b/src/kernel/gdt.s deleted file mode 100644 index c7d56ec..0000000 --- a/src/kernel/gdt.s +++ /dev/null @@ -1,35 +0,0 @@ -extern g_idtr -extern g_gdtr - -global idt_write -idt_write: - lidt [rel g_idtr] - ret - -global idt_load -idt_load: - sidt [rel g_idtr] - ret - -global gdt_write -gdt_write: - lgdt [rel g_gdtr] - mov ax, si ; second arg is data segment - mov ds, ax - mov es, ax - mov fs, ax - mov gs, ax - mov ss, ax - push qword rdi ; first arg is code segment - lea rax, [rel .next] - push rax - o64 retf -.next: - ltr dx ; third arg is the TSS - ret - -global gdt_load -gdt_load: - sgdt [rel g_gdtr] - ret - diff --git a/src/kernel/gdtidt.s b/src/kernel/gdtidt.s new file mode 100644 index 0000000..33fd4db --- /dev/null +++ b/src/kernel/gdtidt.s @@ -0,0 +1,35 @@ + +global idt_write +idt_write: + lidt [rdi] ; first arg is the IDT pointer location + ret + +global idt_load +idt_load: + sidt [rdi] ; first arg is where to write the idtr value + ret + +global gdt_write +gdt_write: + lgdt [rdi] ; first arg is the GDT pointer location + + mov ax, dx ; third arg is data segment + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + mov ss, ax + + push qword rsi ; second arg is code segment + lea rax, [rel .next] + push rax + o64 retf +.next: + ltr cx ; fourth arg is the TSS + ret + +global gdt_load +gdt_load: + sgdt [rdi] ; first arg is where to write the gdtr value + ret + diff --git a/src/kernel/idt.cpp b/src/kernel/idt.cpp new file mode 100644 index 0000000..03d3e1c --- /dev/null +++ b/src/kernel/idt.cpp @@ -0,0 +1,137 @@ +#include "kutil/memory.h" +#include "kutil/no_construct.h" +#include "idt.h" +#include "log.h" + +extern "C" { + void idt_write(const void *idt_ptr); + +#define ISR(i, s, name) extern void name (); +#define EISR(i, s, name) extern void name (); +#define IRQ(i, q, name) extern void name (); +#include "interrupt_isrs.inc" +#undef IRQ +#undef EISR +#undef ISR +} + +// The IDT is initialized _before_ global constructors are called, +// so we don't want it to have a global constructor, lest it overwrite +// the previous initialization. +static kutil::no_construct __g_idt_storage; +IDT &g_idt = __g_idt_storage.value; + + +IDT::IDT() +{ + kutil::memset(this, 0, sizeof(IDT)); + m_ptr.limit = sizeof(m_entries) - 1; + m_ptr.base = &m_entries[0]; + +#define ISR(i, s, name) set(i, & name, 0x08, 0x8e); +#define EISR(i, s, name) set(i, & name, 0x08, 0x8e); +#define IRQ(i, q, name) set(i, & name, 0x08, 0x8e); +#include "interrupt_isrs.inc" +#undef IRQ +#undef EISR +#undef ISR +} + +void +IDT::install() const +{ + idt_write(static_cast(&m_ptr)); +} + +void +IDT::add_ist_entries() +{ +#define ISR(i, s, name) if (s) { set_ist(i, s); } +#define EISR(i, s, name) if (s) { set_ist(i, s); } +#define IRQ(i, q, name) +#include "interrupt_isrs.inc" +#undef IRQ +#undef EISR +#undef ISR +} + +uint8_t +IDT::used_ist_entries() const +{ + uint8_t entries = 0; + +#define ISR(i, s, name) if (s) { entries |= (1 << s); } +#define EISR(i, s, name) if (s) { entries |= (1 << s); } +#define IRQ(i, q, name) +#include "interrupt_isrs.inc" +#undef IRQ +#undef EISR +#undef ISR + + return entries; +} + +void +IDT::set(uint8_t i, void (*handler)(), uint16_t selector, uint8_t flags) +{ + uintptr_t addr = reinterpret_cast(handler); + + m_entries[i].base_low = addr & 0xffff; + m_entries[i].base_mid = (addr >> 16) & 0xffff; + m_entries[i].base_high = (addr >> 32) & 0xffffffff; + m_entries[i].selector = selector; + m_entries[i].flags = flags; + m_entries[i].ist = 0; + m_entries[i].reserved = 0; +} + +void +IDT::set_ist(uint8_t i, uint8_t ist) +{ + m_entries[i].ist = ist; +} + +void +IDT::dump(unsigned index) const +{ + unsigned start = 0; + unsigned count = (m_ptr.limit + 1) / sizeof(descriptor); + if (index != -1) { + start = index; + count = 1; + log::info(logs::boot, "IDT FOR INDEX %02x", index); + } else { + log::info(logs::boot, "Loaded IDT at: %lx size: %d bytes", m_ptr.base, m_ptr.limit+1); + } + + const descriptor *idt = + reinterpret_cast(m_ptr.base); + + for (int i = start; i < start+count; ++i) { + uint64_t base = + (static_cast(idt[i].base_high) << 32) | + (static_cast(idt[i].base_mid) << 16) | + idt[i].base_low; + + char const *type; + switch (idt[i].flags & 0xf) { + case 0x5: type = " 32tsk "; break; + case 0x6: type = " 16int "; break; + case 0x7: type = " 16trp "; break; + case 0xe: type = " 32int "; break; + case 0xf: type = " 32trp "; break; + default: type = " ????? "; break; + } + + if (idt[i].flags & 0x80) { + log::debug(logs::boot, + " Entry %3d: Base:%lx Sel(rpl %d, ti %d, %3d) IST:%d %s DPL:%d", i, base, + (idt[i].selector & 0x3), + ((idt[i].selector & 0x4) >> 2), + (idt[i].selector >> 3), + idt[i].ist, + type, + ((idt[i].flags >> 5) & 0x3)); + } + } +} diff --git a/src/kernel/idt.h b/src/kernel/idt.h new file mode 100644 index 0000000..fc5ac24 --- /dev/null +++ b/src/kernel/idt.h @@ -0,0 +1,61 @@ +#pragma once +/// \file idt.h +/// Definitions relating to a CPU's IDT table +#include + +class IDT +{ +public: + static constexpr unsigned count = 256; + + IDT(); + + /// Install this IDT to the current CPU + void install() const; + + /// Add the IST entries listed in the ISR table into the IDT. + /// This can't be done until after memory is set up so the + /// stacks can be created. + void add_ist_entries(); + + /// Get the IST entry used by an entry. + /// \arg i Which IDT entry to look in + /// \returns The IST index used by entry i, or 0 for none + inline uint8_t get_ist(unsigned i) const { + if (i >= count) return 0; + return m_entries[i].ist; + } + + /// Get the IST entries that are used by this table, as a bitmap + uint8_t used_ist_entries() const; + + /// Dump debug information about the IDT to the console. + /// \arg index Which entry to print, or -1 for all entries + void dump(unsigned index = -1) const; + +private: + void set(uint8_t i, void (*handler)(), uint16_t selector, uint8_t flags); + void set_ist(uint8_t i, uint8_t ist); + + struct descriptor + { + uint16_t base_low; + uint16_t selector; + uint8_t ist; + uint8_t flags; + uint16_t base_mid; + uint32_t base_high; + uint32_t reserved; // must be zero + } __attribute__ ((packed, aligned(16))); + + struct ptr + { + uint16_t limit; + descriptor *base; + } __attribute__ ((packed, aligned(4))); + + descriptor m_entries[256]; + ptr m_ptr; +}; + +extern IDT &g_idt; diff --git a/src/kernel/interrupt_isrs.inc b/src/kernel/interrupt_isrs.inc index 2d55794..fe2f1b0 100644 --- a/src/kernel/interrupt_isrs.inc +++ b/src/kernel/interrupt_isrs.inc @@ -240,6 +240,7 @@ IRQ (0xdf, 0xbf, irqBF) ISR (0xe0, 0, isrTimer) ISR (0xe1, 0, isrLINT0) ISR (0xe2, 0, isrLINT1) +ISR (0xe3, 0, isrAPICError) ISR (0xe4, 0, isrAssert) ISR (0xef, 0, isrSpurious) diff --git a/src/kernel/interrupts.cpp b/src/kernel/interrupts.cpp index 97797fd..00d405b 100644 --- a/src/kernel/interrupts.cpp +++ b/src/kernel/interrupts.cpp @@ -8,6 +8,7 @@ #include "debug.h" #include "device_manager.h" #include "gdt.h" +#include "idt.h" #include "interrupts.h" #include "io.h" #include "kernel_memory.h" @@ -15,6 +16,7 @@ #include "objects/process.h" #include "scheduler.h" #include "syscall.h" +#include "tss.h" #include "vm_space.h" static const uint16_t PIC1 = 0x20; @@ -22,19 +24,14 @@ static const uint16_t PIC2 = 0xa0; constexpr uintptr_t apic_eoi_addr = 0xfee000b0 + ::memory::page_offset; +constexpr size_t increment_offset = 0x1000; + extern "C" { void _halt(); void isr_handler(cpu_state*); void irq_handler(cpu_state*); -#define ISR(i, s, name) extern void name (); -#define EISR(i, s, name) extern void name (); -#define IRQ(i, q, name) extern void name (); -#include "interrupt_isrs.inc" -#undef IRQ -#undef EISR -#undef ISR } isr @@ -60,7 +57,7 @@ get_irq(unsigned vector) } } -static void +void disable_legacy_pic() { // Mask all interrupts @@ -80,28 +77,16 @@ disable_legacy_pic() outb(PIC2+1, 0x02); io_wait(); } -void -interrupts_init() -{ -#define ISR(i, s, name) idt_set_entry(i, reinterpret_cast(& name), 0x08, 0x8e); -#define EISR(i, s, name) idt_set_entry(i, reinterpret_cast(& name), 0x08, 0x8e); -#define IRQ(i, q, name) idt_set_entry(i, reinterpret_cast(& name), 0x08, 0x8e); -#include "interrupt_isrs.inc" -#undef IRQ -#undef EISR -#undef ISR - - disable_legacy_pic(); - - log::info(logs::boot, "Interrupts enabled."); -} - void isr_handler(cpu_state *regs) { console *cons = console::get(); uint8_t vector = regs->interrupt & 0xff; - ist_decrement(vector); + + TSS &tss = TSS::current(); + uint8_t ist = g_idt.get_ist(vector); + if (ist) + tss.ist_stack(ist) -= increment_offset; switch (static_cast(vector)) { @@ -150,13 +135,13 @@ isr_handler(cpu_state *regs) switch ((regs->errorcode & 0x07) >> 1) { case 0: cons->printf(" GDT[%x]\n", index); - gdt_dump(index); + GDT::current().dump(index); break; case 1: case 3: cons->printf(" IDT[%x]\n", index); - idt_dump(index); + g_idt.dump(index); break; default: @@ -275,7 +260,9 @@ isr_handler(cpu_state *regs) print_stacktrace(2); _halt(); } - ist_increment(vector); + + if (ist) + tss.ist_stack(ist) += increment_offset; *reinterpret_cast(apic_eoi_addr) = 0; } diff --git a/src/kernel/interrupts.h b/src/kernel/interrupts.h index 793288b..2a8c35f 100644 --- a/src/kernel/interrupts.h +++ b/src/kernel/interrupts.h @@ -29,6 +29,5 @@ extern "C" { void interrupts_disable(); } -/// Fill the IDT with our ISRs, and disable the legacy -/// PIC interrupts. -void interrupts_init(); +/// Disable the legacy PIC +void disable_legacy_pic(); diff --git a/src/kernel/main.cpp b/src/kernel/main.cpp index 1bc4285..b8e0b32 100644 --- a/src/kernel/main.cpp +++ b/src/kernel/main.cpp @@ -6,22 +6,28 @@ #include "kutil/assert.h" #include "apic.h" #include "block_device.h" +#include "clock.h" #include "console.h" #include "cpu.h" #include "device_manager.h" #include "gdt.h" +#include "idt.h" #include "interrupts.h" #include "io.h" #include "kernel_args.h" #include "kernel_memory.h" #include "log.h" +#include "msr.h" #include "objects/channel.h" #include "objects/event.h" #include "objects/thread.h" +#include "objects/vm_area.h" #include "scheduler.h" #include "serial.h" #include "symbol_table.h" #include "syscall.h" +#include "tss.h" +#include "vm_space.h" #ifndef GIT_VERSION #define GIT_VERSION @@ -31,18 +37,24 @@ extern "C" { void kernel_main(kernel::args::header *header); void (*__ctors)(void); void (*__ctors_end)(void); + void long_ap_startup(); + void ap_startup(); + void init_ap_trampoline(void*, uintptr_t, void (*)()); } extern void __kernel_assert(const char *, unsigned, const char *); using namespace kernel; +volatile size_t ap_startup_count; + /// Bootstrap the memory managers. -void setup_pat(); void memory_initialize_pre_ctors(args::header &kargs); void memory_initialize_post_ctors(args::header &kargs); process * load_simple_process(args::program &program); +void start_aps(void *kpml4); + /// TODO: not this. this is awful. args::framebuffer *fb = nullptr; @@ -77,7 +89,18 @@ kernel_main(args::header *header) logger_init(); cpu_validate(); - setup_pat(); + + log::debug(logs::boot, " jsix header is at: %016lx", header); + log::debug(logs::boot, " Memory map is at: %016lx", header->mem_map); + log::debug(logs::boot, "ACPI root table is at: %016lx", header->acpi_table); + log::debug(logs::boot, "Runtime service is at: %016lx", header->runtime_services); + log::debug(logs::boot, " Kernel PML4 is at: %016lx", header->pml4); + + uint64_t cr0, cr4; + asm ("mov %%cr0, %0" : "=r"(cr0)); + asm ("mov %%cr4, %0" : "=r"(cr4)); + uint64_t efer = rdmsr(msr::ia32_efer); + log::debug(logs::boot, "Control regs: cr0:%lx cr4:%lx efer:%lx", cr0, cr4, efer); bool has_video = false; if (header->video.size > 0) { @@ -95,10 +118,20 @@ kernel_main(args::header *header) logger_clear_immediate(); } - gdt_init(); - interrupts_init(); + extern TSS &g_bsp_tss; + extern GDT &g_bsp_gdt; + + TSS *tss = new (&g_bsp_tss) TSS; + GDT *gdt = new (&g_bsp_gdt) GDT {tss}; + gdt->install(); + + IDT *idt = new (&g_idt) IDT; + idt->install(); + + disable_legacy_pic(); memory_initialize_pre_ctors(*header); + init_cpu(true); run_constructors(); memory_initialize_post_ctors(*header); @@ -116,16 +149,15 @@ kernel_main(args::header *header) } } - log::debug(logs::boot, " jsix header is at: %016lx", header); - log::debug(logs::boot, " Memory map is at: %016lx", header->mem_map); - log::debug(logs::boot, "ACPI root table is at: %016lx", header->acpi_table); - log::debug(logs::boot, "Runtime service is at: %016lx", header->runtime_services); device_manager &devices = device_manager::get(); devices.parse_acpi(header->acpi_table); devices.init_drivers(); - devices.get_lapic()->calibrate_timer(); + devices.get_lapic().calibrate_timer(); + + start_aps(header->pml4); + interrupts_enable(); /* @@ -164,3 +196,80 @@ kernel_main(args::header *header) sched->start(); } + +void +start_aps(void *kpml4) +{ + using memory::frame_size; + using memory::kernel_stack_pages; + + extern size_t ap_startup_code_size; + extern process &g_kernel_process; + extern vm_area_guarded &g_kernel_stacks; + + clock &clk = clock::get(); + lapic &apic = device_manager::get().get_lapic(); + + ap_startup_count = 1; // BSP processor + auto &ids = device_manager::get().get_apic_ids(); + log::info(logs::boot, "Starting %d other CPUs", ids.count() - 1); + + // Since we're using address space outside kernel space, make sure + // the kernel's vm_space is used + cpu_data &cpu = current_cpu(); + cpu.process = &g_kernel_process; + + // Copy the startup code somwhere the real mode trampoline can run + uintptr_t addr = 0x8000; // TODO: find a valid address, rewrite addresses + uint8_t vector = addr >> 12; + vm_area *vma = new vm_area_fixed(addr, 0x1000, vm_flags::write); + vm_space::kernel_space().add(addr, vma); + kutil::memcpy( + reinterpret_cast(addr), + reinterpret_cast(&ap_startup), + ap_startup_code_size); + + static constexpr size_t stack_bytes = kernel_stack_pages * frame_size; + + for (uint8_t id : ids) { + if (id == apic.get_id()) continue; + log::info(logs::boot, "Starting AP %d", id); + + size_t current_count = ap_startup_count; + uintptr_t stack_start = g_kernel_stacks.get_section(); + uintptr_t stack_end = stack_start + stack_bytes - 2 * sizeof(void*); + *reinterpret_cast(stack_end) = 0; // pre-fault the page + + init_ap_trampoline(kpml4, stack_end, long_ap_startup); + + apic.send_ipi(lapic::ipi_mode::init, 0, id); + clk.spinwait(1000); + + apic.send_ipi(lapic::ipi_mode::startup, vector, id); + for (unsigned i = 0; i < 20; ++i) { + if (ap_startup_count > current_count) break; + clk.spinwait(10); + } + + if (ap_startup_count > current_count) + continue; + + apic.send_ipi(lapic::ipi_mode::startup, vector, id); + for (unsigned i = 0; i < 100; ++i) { + if (ap_startup_count > current_count) break; + clk.spinwait(10); + } + } + + log::info(logs::boot, "%d CPUs running", ap_startup_count); + vm_space::kernel_space().remove(vma); +} + +void +long_ap_startup() +{ + init_cpu(false); + ++ap_startup_count; + + while(1) asm("hlt"); +} diff --git a/src/kernel/memory_bootstrap.cpp b/src/kernel/memory_bootstrap.cpp index 5668dc8..0323c02 100644 --- a/src/kernel/memory_bootstrap.cpp +++ b/src/kernel/memory_bootstrap.cpp @@ -39,11 +39,8 @@ frame_allocator &g_frame_allocator = __g_frame_allocator_storage.value; static kutil::no_construct __g_kernel_heap_area_storage; vm_area_untracked &g_kernel_heap_area = __g_kernel_heap_area_storage.value; -vm_area_guarded g_kernel_stacks { - memory::stacks_start, - memory::kernel_stack_pages, - memory::kernel_max_stacks, - vm_flags::write}; +static kutil::no_construct __g_kernel_stacks_storage; +vm_area_guarded &g_kernel_stacks = __g_kernel_stacks_storage.value; vm_area_guarded g_kernel_buffers { memory::buffers_start, @@ -66,6 +63,11 @@ memory_initialize_pre_ctors(args::header &kargs) { using kernel::args::frame_block; + // Clean out any remaning bootloader page table entries + page_table *kpml4 = static_cast(kargs.pml4); + for (unsigned i = 0; i < memory::pml4e_kernel; ++i) + kpml4->entries[i] = 0; + new (&g_kernel_heap) kutil::heap_allocator {heap_start, kernel_max_heap}; frame_block *blocks = reinterpret_cast(memory::bitmap_start); @@ -97,7 +99,6 @@ memory_initialize_pre_ctors(args::header &kargs) } } - page_table *kpml4 = reinterpret_cast(kargs.pml4); process *kp = process::create_kernel_process(kpml4); vm_space &vm = kp->space(); @@ -105,42 +106,24 @@ memory_initialize_pre_ctors(args::header &kargs) vm_area_untracked(kernel_max_heap, vm_flags::write); vm.add(heap_start, heap); + + vm_area *stacks = new (&g_kernel_stacks) vm_area_guarded { + memory::stacks_start, + memory::kernel_stack_pages, + memory::kernel_max_stacks, + vm_flags::write}; + vm.add(memory::stacks_start, &g_kernel_stacks); } void memory_initialize_post_ctors(args::header &kargs) { vm_space &vm = vm_space::kernel_space(); - vm.add(memory::stacks_start, &g_kernel_stacks); vm.add(memory::buffers_start, &g_kernel_buffers); g_frame_allocator.free( reinterpret_cast(kargs.page_tables), kargs.table_count); - - using memory::frame_size; - using memory::kernel_stack_pages; - constexpr size_t stack_size = kernel_stack_pages * frame_size; - - for (int ist = 1; ist <= 3; ++ist) { - uintptr_t bottom = g_kernel_stacks.get_section(); - log::debug(logs::boot, "Installing IST%d stack at %llx", ist, bottom); - - // Pre-realize and xerothese stacks, they're no good - // if they page fault - kutil::memset(reinterpret_cast(bottom), 0, stack_size); - - // Skip two entries to be the null frame - tss_set_ist(ist, bottom + stack_size - 2 * sizeof(uintptr_t)); - } - -#define ISR(i, s, name) if (s) { idt_set_ist(i, s); } -#define EISR(i, s, name) if (s) { idt_set_ist(i, s); } -#define IRQ(i, q, name) -#include "interrupt_isrs.inc" -#undef IRQ -#undef EISR -#undef ISR } static void @@ -198,15 +181,6 @@ log_mtrrs() pat_names[(pat >> (6*8)) & 7], pat_names[(pat >> (7*8)) & 7]); } -void -setup_pat() -{ - uint64_t pat = rdmsr(msr::ia32_pat); - pat = (pat & 0x00ffffffffffffffull) | (0x01ull << 56); // set PAT 7 to WC - wrmsr(msr::ia32_pat, pat); - log_mtrrs(); -} - process * load_simple_process(args::program &program) diff --git a/src/kernel/objects/process.cpp b/src/kernel/objects/process.cpp index d956363..64788f9 100644 --- a/src/kernel/objects/process.cpp +++ b/src/kernel/objects/process.cpp @@ -42,7 +42,7 @@ process::~process() s_processes.remove_swap(this); } -process & process::current() { return *bsp_cpu_data.p; } +process & process::current() { return *current_cpu().process; } process & process::kernel_process() { return g_kernel_process; } process * @@ -63,7 +63,7 @@ process::exit(int32_t code) thread->exit(code); } - if (this == bsp_cpu_data.p) + if (this == current_cpu().process) scheduler::get().schedule(); } diff --git a/src/kernel/objects/thread.cpp b/src/kernel/objects/thread.cpp index d1ab4c1..7dcfd2a 100644 --- a/src/kernel/objects/thread.cpp +++ b/src/kernel/objects/thread.cpp @@ -9,7 +9,7 @@ extern "C" void kernel_to_user_trampoline(); static constexpr j6_signal_t thread_default_signals = 0; -extern vm_area_guarded g_kernel_stacks; +extern vm_area_guarded &g_kernel_stacks; thread::thread(process &parent, uint8_t pri, uintptr_t rsp0) : kobject(kobject::type::thread, thread_default_signals), @@ -43,13 +43,9 @@ thread::from_tcb(TCB *tcb) return reinterpret_cast(kutil::offset_pointer(tcb, offset)); } -thread & -thread::current() -{ - return *bsp_cpu_data.t; -} +thread & thread::current() { return *current_cpu().thread; } -inline void schedule_if_current(thread *t) { if (t == bsp_cpu_data.t) scheduler::get().schedule(); } +inline void schedule_if_current(thread *t) { if (t == current_cpu().thread) scheduler::get().schedule(); } void thread::wait_on_signals(kobject *obj, j6_signal_t signals) diff --git a/src/kernel/scheduler.cpp b/src/kernel/scheduler.cpp index fa015f2..9027905 100644 --- a/src/kernel/scheduler.cpp +++ b/src/kernel/scheduler.cpp @@ -33,7 +33,9 @@ const uint64_t rflags_int = 0x202; extern uint64_t idle_stack_end; -scheduler::scheduler(lapic *apic) : +extern "C" void task_switch(TCB *tcb); + +scheduler::scheduler(lapic &apic) : m_apic(apic), m_next_pid(1), m_clock(0), @@ -55,10 +57,11 @@ scheduler::scheduler(lapic *apic) : m_runlists[max_priority].push_back(tcb); m_current = tcb; - bsp_cpu_data.rsp0 = tcb->rsp0; - bsp_cpu_data.tcb = tcb; - bsp_cpu_data.p = kp; - bsp_cpu_data.t = idle; + cpu_data &cpu = current_cpu(); + cpu.rsp0 = tcb->rsp0; + cpu.tcb = tcb; + cpu.process = kp; + cpu.thread = idle; } template @@ -113,9 +116,8 @@ void scheduler::start() { log::info(logs::sched, "Starting scheduler."); - wrmsr(msr::ia32_gs_base, reinterpret_cast(&bsp_cpu_data)); - m_apic->enable_timer(isr::isrTimer, false); - m_apic->reset_timer(10); + m_apic.enable_timer(isr::isrTimer, false); + m_apic.reset_timer(10); } void @@ -205,7 +207,7 @@ void scheduler::schedule() { uint8_t priority = m_current->priority; - uint32_t remaining = m_apic->stop_timer(); + uint32_t remaining = m_apic.stop_timer(); m_current->time_left = remaining; thread *th = thread::from_tcb(m_current); const bool constant = th->has_state(thread::state::constant); @@ -214,7 +216,7 @@ scheduler::schedule() if (priority < max_priority && !constant) { // Process used its whole timeslice, demote it ++m_current->priority; - log::info(logs::sched, "Scheduler demoting thread %llx, priority %d", + log::debug(logs::sched, "Scheduler demoting thread %llx, priority %d", th->koid(), m_current->priority); } m_current->time_left = quantum(m_current->priority); @@ -247,13 +249,14 @@ scheduler::schedule() auto *next = m_runlists[priority].pop_front(); next->last_ran = m_clock; - m_apic->reset_timer(next->time_left); + m_apic.reset_timer(next->time_left); if (next != m_current) { thread *next_thread = thread::from_tcb(next); - bsp_cpu_data.t = next_thread; - bsp_cpu_data.p = &next_thread->parent(); + cpu_data &cpu = current_cpu(); + cpu.thread = next_thread; + cpu.process = &next_thread->parent(); m_current = next; log::debug(logs::sched, "Scheduler switching threads %llx->%llx", diff --git a/src/kernel/scheduler.h b/src/kernel/scheduler.h index 7c119ab..d3d91c4 100644 --- a/src/kernel/scheduler.h +++ b/src/kernel/scheduler.h @@ -10,13 +10,10 @@ namespace args { struct program; }} +struct cpu_data; class lapic; class process; struct page_table; -struct cpu_state; - -extern "C" void isr_handler(cpu_state*); -extern "C" void task_switch(TCB *next); /// The task scheduler @@ -42,8 +39,8 @@ public: static const uint16_t process_quanta = 10; /// Constructor. - /// \arg apic Pointer to the local APIC object - scheduler(lapic *apic); + /// \arg apic The local APIC object for this CPU + scheduler(lapic &apic); /// Create a new process from a program image in memory. /// \arg program The descriptor of the pogram in memory @@ -82,7 +79,6 @@ public: static scheduler & get() { return *s_instance; } private: - friend uintptr_t syscall_dispatch(uintptr_t, cpu_state &); friend class process; static constexpr uint64_t promote_frequency = 10; @@ -96,7 +92,7 @@ private: void prune(uint64_t now); void check_promotions(uint64_t now); - lapic *m_apic; + lapic &m_apic; uint32_t m_next_pid; uint32_t m_tick_count; diff --git a/src/kernel/task.s b/src/kernel/task.s index be201d2..2f3bbc4 100644 --- a/src/kernel/task.s +++ b/src/kernel/task.s @@ -1,6 +1,5 @@ %include "tasking.inc" -extern g_tss global task_switch task_switch: push rbp @@ -18,7 +17,7 @@ task_switch: mov [rax + TCB.rsp], rsp ; Copy off saved user rsp - mov rcx, [gs:CPU_DATA.rsp3] ; rcx: curretn task's saved user rsp + mov rcx, [gs:CPU_DATA.rsp3] ; rcx: current task's saved user rsp mov [rax + TCB.rsp3], rcx ; Install next task's TCB @@ -31,7 +30,7 @@ task_switch: mov rcx, [rdi + TCB.rsp0] ; rcx: top of next task's kernel stack mov [gs:CPU_DATA.rsp0], rcx - lea rdx, [rel g_tss] ; rdx: address of TSS + mov rdx, [gs:CPU_DATA.tss] ; rdx: address of TSS mov [rdx + TSS.rsp0], rcx ; Update saved user rsp @@ -67,3 +66,8 @@ initialize_main_thread: ; the entrypoint should already be on the stack jmp kernel_to_user_trampoline + +global _current_gsbase +_current_gsbase: + mov rax, [gs:CPU_DATA.self] + ret diff --git a/src/kernel/tasking.inc b/src/kernel/tasking.inc index d3a711a..4c7ffdb 100644 --- a/src/kernel/tasking.inc +++ b/src/kernel/tasking.inc @@ -1,14 +1,20 @@ struc TCB .rsp: resq 1 -.rsp0: resq 1 -.rsp3: resq 1 +.rsp0: resq 1 +.rsp3: resq 1 .pml4: resq 1 endstruc struc CPU_DATA +.self: resq 1 +.id: resq 1 .rsp0: resq 1 .rsp3: resq 1 .tcb: resq 1 +.thread: resq 1 +.process: resq 1 +.tss: resq 1 +.gdt: resq 1 endstruc struc TSS diff --git a/src/kernel/tss.cpp b/src/kernel/tss.cpp new file mode 100644 index 0000000..9c7b057 --- /dev/null +++ b/src/kernel/tss.cpp @@ -0,0 +1,39 @@ +#include "kutil/assert.h" +#include "kutil/memory.h" +#include "kutil/no_construct.h" +#include "cpu.h" +#include "tss.h" + +// The BSP's TSS is initialized _before_ global constructors are called, +// so we don't want it to have a global constructor, lest it overwrite +// the previous initialization. +static kutil::no_construct __g_bsp_tss_storage; +TSS &g_bsp_tss = __g_bsp_tss_storage.value; + + +TSS::TSS() +{ + kutil::memset(this, 0, sizeof(TSS)); + m_iomap_offset = sizeof(TSS); +} + +TSS & +TSS::current() +{ + return *current_cpu().tss; +} + +uintptr_t & +TSS::ring_stack(unsigned ring) +{ + kassert(ring < 3, "Bad ring passed to TSS::ring_stack."); + return m_rsp[ring]; +} + +uintptr_t & +TSS::ist_stack(unsigned ist) +{ + kassert(ist > 0 && ist < 7, "Bad ist passed to TSS::ist_stack."); + return m_ist[ist]; +} + diff --git a/src/kernel/tss.h b/src/kernel/tss.h new file mode 100644 index 0000000..d0fe2b1 --- /dev/null +++ b/src/kernel/tss.h @@ -0,0 +1,35 @@ +#pragma once +/// \file tss.h +/// Definitions relating to the TSS +#include + +/// The 64bit TSS table +class TSS +{ +public: + TSS(); + + /// Get the currently running CPU's TSS. + static TSS & current(); + + /// Ring stack accessor. Returns a mutable reference. + /// \arg ring Which ring (0-3) to get the stack for + /// \returns A mutable reference to the stack pointer + uintptr_t & ring_stack(unsigned ring); + + /// IST stack accessor. Returns a mutable reference. + /// \arg ist Which IST entry (1-7) to get the stack for + /// \returns A mutable reference to the stack pointer + uintptr_t & ist_stack(unsigned ist); + +private: + uint32_t m_reserved0; + + uintptr_t m_rsp[3]; // stack pointers for CPL 0-2 + uintptr_t m_ist[8]; // ist[0] is reserved + + uint64_t m_reserved1; + uint16_t m_reserved2; + uint16_t m_iomap_offset; +} __attribute__ ((packed)); + diff --git a/src/libraries/cpu/cpu.cpp b/src/libraries/cpu/cpu_id.cpp similarity index 93% rename from src/libraries/cpu/cpu.cpp rename to src/libraries/cpu/cpu_id.cpp index 98395e5..dfd4ae0 100644 --- a/src/libraries/cpu/cpu.cpp +++ b/src/libraries/cpu/cpu_id.cpp @@ -1,5 +1,5 @@ #include -#include "cpu/cpu.h" +#include "cpu/cpu_id.h" namespace cpu { @@ -94,4 +94,13 @@ cpu_id::has_feature(feature feat) return (m_features & (1 << static_cast(feat))) != 0; } +uint8_t +cpu_id::local_apic_id() const +{ + uint32_t eax_unused; + uint32_t ebx; + __cpuid(1, 0, &eax_unused, &ebx); + return static_cast(ebx >> 24); +} + } diff --git a/src/libraries/cpu/include/cpu/cpu.h b/src/libraries/cpu/include/cpu/cpu_id.h similarity index 93% rename from src/libraries/cpu/include/cpu/cpu.h rename to src/libraries/cpu/include/cpu/cpu_id.h index ceace0f..fa7ccd5 100644 --- a/src/libraries/cpu/include/cpu/cpu.h +++ b/src/libraries/cpu/include/cpu/cpu_id.h @@ -1,5 +1,5 @@ #pragma once -/// \file cpu.h Definition of required cpu features for jsix +/// \file cpu_id.h Definition of required cpu features for jsix #include @@ -48,6 +48,9 @@ public: /// \returns A |regs| struct of the values retuned regs get(uint32_t leaf, uint32_t sub = 0) const; + /// Get the local APIC ID of the current CPU + uint8_t local_apic_id() const; + /// Get the name of the cpu vendor (eg, "GenuineIntel") inline const char * vendor_id() const { return m_vendor_id; }