diff --git a/modules.yaml b/modules.yaml index 608d729..e3d59ca 100644 --- a/modules.yaml +++ b/modules.yaml @@ -12,6 +12,7 @@ modules: - src/kernel source: - src/kernel/apic.cpp + - src/kernel/ap_startup.s - src/kernel/assert.cpp - src/kernel/boot.s - src/kernel/clock.cpp @@ -24,8 +25,9 @@ modules: - src/kernel/frame_allocator.cpp - src/kernel/fs/gpt.cpp - src/kernel/gdt.cpp - - src/kernel/gdt.s + - src/kernel/gdtidt.s - src/kernel/hpet.cpp + - src/kernel/idt.cpp - src/kernel/interrupts.cpp - src/kernel/interrupts.s - src/kernel/io.cpp @@ -56,6 +58,7 @@ modules: - src/kernel/syscalls/thread.cpp - src/kernel/syscalls/vm_area.cpp - src/kernel/task.s + - src/kernel/tss.cpp - src/kernel/vm_space.cpp boot: @@ -118,7 +121,7 @@ modules: includes: - src/libraries/cpu/include source: - - src/libraries/cpu/cpu.cpp + - src/libraries/cpu/cpu_id.cpp j6: kind: lib diff --git a/src/boot/main.cpp b/src/boot/main.cpp index 500b692..5412d4e 100644 --- a/src/boot/main.cpp +++ b/src/boot/main.cpp @@ -8,7 +8,7 @@ #include #include "console.h" -#include "cpu/cpu.h" +#include "cpu/cpu_id.h" #include "error.h" #include "fs.h" #include "hardware.h" diff --git a/src/kernel/ap_startup.s b/src/kernel/ap_startup.s new file mode 100644 index 0000000..327651f --- /dev/null +++ b/src/kernel/ap_startup.s @@ -0,0 +1,134 @@ +%include "tasking.inc" + +section .ap_startup + +BASE equ 0x8000 ; Where the kernel will map this at runtime + +CR0_PE equ (1 << 0) +CR0_MP equ (1 << 1) +CR0_ET equ (1 << 4) +CR0_NE equ (1 << 5) +CR0_WP equ (1 << 16) +CR0_PG equ (1 << 31) +CR0_VAL equ CR0_PE|CR0_MP|CR0_ET|CR0_NE|CR0_WP|CR0_PG + +CR4_DE equ (1 << 3) +CR4_PAE equ (1 << 5) +CR4_MCE equ (1 << 6) +CR4_PGE equ (1 << 7) +CR4_OSFXSR equ (1 << 9) +CR4_OSCMMEXCPT equ (1 << 10) +CR4_FSGSBASE equ (1 << 16) +CR4_PCIDE equ (1 << 17) +CR4_VAL equ CR4_DE|CR4_PAE|CR4_MCE|CR4_PGE|CR4_OSFXSR|CR4_OSCMMEXCPT|CR4_FSGSBASE|CR4_PCIDE + +EFER_MSR equ 0xC0000080 +EFER_SCE equ (1 << 0) +EFER_LME equ (1 << 8) +EFER_NXE equ (1 << 11) +EFER_VAL equ EFER_SCE|EFER_LME|EFER_NXE + +bits 16 +default rel +align 8 + +global ap_startup +ap_startup: + jmp .start_real + +align 8 + .pml4: dq 0 + .stack: dq 0 + .ret: dq 0 + +align 16 +.gdt: + dq 0x0 ; Null GDT entry + + dq 0x00209A0000000000 ; Code + dq 0x0000920000000000 ; Data + +align 4 +.gdtd: + dw ($ - .gdt) + dd BASE + (.gdt - ap_startup) + +align 4 +.idtd: + dw 0 ; zero-length IDT descriptor + dd 0 + +.start_real: + cli + cld + + xor ax, ax + mov ds, ax + + ; set the temporary null IDT + lidt [BASE + (.idtd - ap_startup)] + + ; Enter long mode + mov eax, CR4_VAL + mov cr4, eax + + mov eax, [BASE + (.pml4 - ap_startup)] + mov cr3, eax + + mov ecx, EFER_MSR + rdmsr + or eax, EFER_VAL + wrmsr + + mov eax, CR0_VAL + mov cr0, eax + + ; Set the temporary minimal GDT + lgdt [BASE + (.gdtd - ap_startup)] + + jmp (1 << 3):(BASE + (.start_long - ap_startup)) + +bits 64 +default abs +align 8 +.start_long: + ; set data segments + mov ax, (2 << 3) + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + mov ss, ax + + mov rax, [BASE + (.stack - ap_startup)] + mov rsp, rax + + mov rax, [BASE + (.ret - ap_startup)] + jmp rax + + +global ap_startup_code_size +ap_startup_code_size: + dq ($ - ap_startup) + + +section .text +global init_ap_trampoline +init_ap_trampoline: + push rbp + mov rbp, rsp + + ; rdi is the kernel pml4 + mov [BASE + (ap_startup.pml4 - ap_startup)], rdi + + ; rsi is the stack for this AP + mov [BASE + (ap_startup.stack - ap_startup)], rsi + + ; rdx is the address to jump to + mov [BASE + (ap_startup.ret - ap_startup)], rdx + + ; rcx is the processor id + mov rdi, rdx + + pop rbp + ret diff --git a/src/kernel/apic.cpp b/src/kernel/apic.cpp index 7515d1c..186c820 100644 --- a/src/kernel/apic.cpp +++ b/src/kernel/apic.cpp @@ -6,11 +6,16 @@ #include "kernel_memory.h" #include "log.h" +static constexpr uint16_t lapic_id = 0x0020; static constexpr uint16_t lapic_spurious = 0x00f0; +static constexpr uint16_t lapic_icr_low = 0x0300; +static constexpr uint16_t lapic_icr_high = 0x0310; + static constexpr uint16_t lapic_lvt_timer = 0x0320; static constexpr uint16_t lapic_lvt_lint0 = 0x0350; static constexpr uint16_t lapic_lvt_lint1 = 0x0360; +static constexpr uint16_t lapic_lvt_error = 0x0370; static constexpr uint16_t lapic_timer_init = 0x0380; static constexpr uint16_t lapic_timer_cur = 0x0390; @@ -25,6 +30,7 @@ apic_read(uint32_t volatile *apic, uint16_t offset) static void apic_write(uint32_t volatile *apic, uint16_t offset, uint32_t value) { + log::debug(logs::apic, "LAPIC write: %x = %08lx", offset, value); *(apic + offset/sizeof(uint32_t)) = value; } @@ -52,10 +58,38 @@ lapic::lapic(uintptr_t base, isr spurious) : apic(base), m_divisor(0) { + apic_write(m_base, lapic_lvt_error, static_cast(isr::isrAPICError)); apic_write(m_base, lapic_spurious, static_cast(spurious)); log::info(logs::apic, "LAPIC created, base %lx", m_base); } +uint8_t +lapic::get_id() +{ + return static_cast(apic_read(m_base, lapic_id) >> 24); +} + +void +lapic::send_ipi(ipi_mode mode, uint8_t vector, uint8_t dest) +{ + // Wait until the APIC is ready to send + ipi_wait(); + + apic_write(m_base, lapic_icr_high, static_cast(dest) << 24); + uint32_t command = + static_cast(vector) | + static_cast(mode) << 8; + + apic_write(m_base, lapic_icr_low, command); +} + +void +lapic::ipi_wait() +{ + while (apic_read(m_base, lapic_icr_low) & (1<<12)) + asm volatile ("pause" : : : "memory"); +} + void lapic::calibrate_timer() { diff --git a/src/kernel/apic.h b/src/kernel/apic.h index 54b86ee..9b434ac 100644 --- a/src/kernel/apic.h +++ b/src/kernel/apic.h @@ -29,6 +29,27 @@ public: /// \arg spurious Vector of the spurious interrupt handler lapic(uintptr_t base, isr spurious); + /// Get the local APIC's ID + uint8_t get_id(); + + enum class ipi_mode : uint8_t { + fixed = 0, + smi = 2, + nmi = 4, + init = 5, + startup = 6, + }; + + /// Send an inter-processor interrupt. + /// \arg mode The sending mode + /// \arg vector The interrupt vector + /// \arg dest The APIC ID of the destination + void send_ipi(ipi_mode mode, uint8_t vector, uint8_t dest); + + /// Wait for an IPI to finish sending. This is done automatically + /// before sending another IPI with send_ipi(). + void ipi_wait(); + /// Enable interrupts for the LAPIC timer. /// \arg vector Interrupt vector the timer should use /// \arg repeat If false, this timer is one-off, otherwise repeating diff --git a/src/kernel/cpu.cpp b/src/kernel/cpu.cpp index 4b0720e..9d5aca7 100644 --- a/src/kernel/cpu.cpp +++ b/src/kernel/cpu.cpp @@ -1,11 +1,19 @@ #include #include "kutil/assert.h" #include "kutil/memory.h" +#include "apic.h" #include "cpu.h" -#include "cpu/cpu.h" +#include "cpu/cpu_id.h" +#include "device_manager.h" +#include "gdt.h" +#include "idt.h" +#include "kernel_memory.h" #include "log.h" +#include "msr.h" +#include "objects/vm_area.h" +#include "tss.h" -cpu_data bsp_cpu_data; +cpu_data g_bsp_cpu_data; void cpu_validate() @@ -29,3 +37,70 @@ cpu_validate() #undef CPU_FEATURE_OPT #undef CPU_FEATURE_REQ } + +void +init_cpu(bool bsp) +{ + extern TSS &g_bsp_tss; + extern GDT &g_bsp_gdt; + extern vm_area_guarded &g_kernel_stacks; + + uint8_t id = 0; + + TSS *tss = nullptr; + GDT *gdt = nullptr; + cpu_data *cpu = nullptr; + + if (bsp) { + gdt = &g_bsp_gdt; + tss = &g_bsp_tss; + cpu = &g_bsp_cpu_data; + } else { + g_idt.install(); + + tss = new TSS; + gdt = new GDT {tss}; + cpu = new cpu_data; + + gdt->install(); + + lapic &apic = device_manager::get().get_lapic(); + id = apic.get_id(); + } + + kutil::memset(cpu, 0, sizeof(cpu_data)); + + cpu->self = cpu; + cpu->id = id; + cpu->gdt = gdt; + cpu->tss = tss; + + // Install the GS base pointint to the cpu_data + wrmsr(msr::ia32_gs_base, reinterpret_cast(cpu)); + + using memory::frame_size; + using memory::kernel_stack_pages; + constexpr size_t stack_size = kernel_stack_pages * frame_size; + + uint8_t ist_entries = g_idt.used_ist_entries(); + + // Set up the IST stacks + for (unsigned ist = 1; ist < 8; ++ist) { + if (!(ist_entries & (1 << ist))) + continue; + + // Two zero entries at the top for the null frame + uintptr_t stack_bottom = g_kernel_stacks.get_section(); + uintptr_t stack_top = stack_bottom + stack_size - 2 * sizeof(uintptr_t); + + // Pre-realize these stacks, they're no good if they page fault + *reinterpret_cast(stack_top) = 0; + + tss->ist_stack(ist) = stack_top; + } + + // Set up the page attributes table + uint64_t pat = rdmsr(msr::ia32_pat); + pat = (pat & 0x00ffffffffffffffull) | (0x01ull << 56); // set PAT 7 to WC + wrmsr(msr::ia32_pat, pat); +} diff --git a/src/kernel/cpu.h b/src/kernel/cpu.h index ed5722d..029938d 100644 --- a/src/kernel/cpu.h +++ b/src/kernel/cpu.h @@ -2,9 +2,13 @@ #include +#include "kutil/spinlock.h" + +class GDT; +class process; struct TCB; class thread; -class process; +class TSS; struct cpu_state { @@ -18,15 +22,34 @@ struct cpu_state /// version in 'tasking.inc' struct cpu_data { + cpu_data *self; + uint64_t id; uintptr_t rsp0; uintptr_t rsp3; TCB *tcb; - thread *t; - process *p; + thread *thread; + process *process; + TSS *tss; + GDT *gdt; + + // Values from here on don't need to be in the asm version + kutil::spinlock::node spinner; }; -extern cpu_data bsp_cpu_data; +extern "C" cpu_data * _current_gsbase(); -// We already validated the required options in the bootloader, -// but iterate the options and log about them. +/// Initialize a CPU and set up its cpu_data structure +/// \arg bsp True if the current CPU is the BSP +void init_cpu(bool bsp); + +/// Get the cpu_data struct for the current executing CPU +inline cpu_data & current_cpu() { return *_current_gsbase(); } + +/// Validate the required CPU features are present. Really, the bootloader already +/// validated the required features, but still iterate the options and log about them. void cpu_validate(); + +/// Set up the running CPU. This sets GDT, IDT, and necessary MSRs as well as creating +/// the cpu_data structure for this processor. +/// \arg bsp True if this CPU is the BSP +void cpu_initialize(bool bsp); diff --git a/src/kernel/debug.cpp b/src/kernel/debug.cpp index 1cd79e2..ec1aa5e 100644 --- a/src/kernel/debug.cpp +++ b/src/kernel/debug.cpp @@ -13,6 +13,7 @@ void print_regs(const cpu_state ®s) { console *cons = console::get(); + cpu_data &cpu = current_cpu(); uint64_t cr2 = 0; __asm__ __volatile__ ("mov %%cr2, %0" : "=r"(cr2)); @@ -20,8 +21,8 @@ print_regs(const cpu_state ®s) uintptr_t cr3 = 0; __asm__ __volatile__ ( "mov %%cr3, %0" : "=r" (cr3) ); - cons->printf(" process: %llx", bsp_cpu_data.p->koid()); - cons->printf(" thread: %llx\n", bsp_cpu_data.t->koid()); + cons->printf(" process: %llx", cpu.process->koid()); + cons->printf(" thread: %llx\n", cpu.thread->koid()); print_regL("rax", regs.rax); print_regM("rbx", regs.rbx); @@ -43,7 +44,7 @@ print_regs(const cpu_state ®s) cons->puts("\n\n"); print_regL("rbp", regs.rbp); print_regM("rsp", regs.user_rsp); - print_regR("sp0", bsp_cpu_data.rsp0); + print_regR("sp0", cpu.rsp0); print_regL("rip", regs.rip); print_regM("cr3", cr3); diff --git a/src/kernel/device_manager.cpp b/src/kernel/device_manager.cpp index 19e4939..2f8f7d3 100644 --- a/src/kernel/device_manager.cpp +++ b/src/kernel/device_manager.cpp @@ -204,7 +204,8 @@ device_manager::load_apic(const acpi_table_header *header) case 0: { // Local APIC uint8_t uid = kutil::read_from(p+2); uint8_t id = kutil::read_from(p+3); - log::debug(logs::device, " Local APIC uid %x id %x", id); + m_apic_ids.append(id); + log::debug(logs::device, " Local APIC uid %x id %x", uid, id); } break; diff --git a/src/kernel/device_manager.h b/src/kernel/device_manager.h index d18c670..539f1c2 100644 --- a/src/kernel/device_manager.h +++ b/src/kernel/device_manager.h @@ -26,7 +26,7 @@ public: /// Get the LAPIC /// \returns An object representing the local APIC - lapic * get_lapic() { return m_lapic; } + lapic & get_lapic() { return *m_lapic; } /// Get an IOAPIC /// \arg i Index of the requested IOAPIC @@ -94,6 +94,9 @@ public: &m_hpets[i] : nullptr; } + /// Get the list of APIC ids for other CPUs + inline const kutil::vector & get_apic_ids() const { return m_apic_ids; } + private: /// Parse the ACPI XSDT and load relevant sub-tables. /// \arg xsdt Pointer to the XSDT from the firmware @@ -122,6 +125,7 @@ private: lapic *m_lapic; kutil::vector m_ioapics; kutil::vector m_hpets; + kutil::vector m_apic_ids; kutil::vector m_pci; kutil::vector m_devices; diff --git a/src/kernel/gdt.cpp b/src/kernel/gdt.cpp index d629eda..389f3e4 100644 --- a/src/kernel/gdt.cpp +++ b/src/kernel/gdt.cpp @@ -1,36 +1,80 @@ #include #include "kutil/assert.h" -#include "kutil/enum_bitfields.h" #include "kutil/memory.h" +#include "kutil/no_construct.h" #include "console.h" -#include "kernel_memory.h" +#include "cpu.h" +#include "gdt.h" #include "log.h" +#include "tss.h" + +extern "C" void gdt_write(const void *gdt_ptr, uint16_t cs, uint16_t ds, uint16_t tr); + +static constexpr uint8_t kern_cs_index = 1; +static constexpr uint8_t kern_ss_index = 2; +static constexpr uint8_t user_cs32_index = 3; +static constexpr uint8_t user_ss_index = 4; +static constexpr uint8_t user_cs64_index = 5; +static constexpr uint8_t tss_index = 6; // Note that this takes TWO GDT entries + +// The BSP's GDT is initialized _before_ global constructors are called, +// so we don't want it to have a global constructor, lest it overwrite +// the previous initialization. +static kutil::no_construct __g_bsp_gdt_storage; +GDT &g_bsp_gdt = __g_bsp_gdt_storage.value; -enum class gdt_type : uint8_t +GDT::GDT(TSS *tss) : + m_tss(tss) { - accessed = 0x01, - read_write = 0x02, - conforming = 0x04, - execute = 0x08, - system = 0x10, - ring1 = 0x20, - ring2 = 0x40, - ring3 = 0x60, - present = 0x80 -}; -IS_BITFIELD(gdt_type); + kutil::memset(this, 0, sizeof(GDT)); -struct gdt_descriptor + m_ptr.limit = sizeof(m_entries) - 1; + m_ptr.base = &m_entries[0]; + + // Kernel CS/SS - always 64bit + set(kern_cs_index, 0, 0xfffff, true, gdt_type::read_write | gdt_type::execute); + set(kern_ss_index, 0, 0xfffff, true, gdt_type::read_write); + + // User CS32/SS/CS64 - layout expected by SYSRET + set(user_cs32_index, 0, 0xfffff, false, gdt_type::ring3 | gdt_type::read_write | gdt_type::execute); + set(user_ss_index, 0, 0xfffff, true, gdt_type::ring3 | gdt_type::read_write); + set(user_cs64_index, 0, 0xfffff, true, gdt_type::ring3 | gdt_type::read_write | gdt_type::execute); + + set_tss(tss); +} + +GDT & +GDT::current() { - uint16_t limit_low; - uint16_t base_low; - uint8_t base_mid; - gdt_type type; - uint8_t size; - uint8_t base_high; -} __attribute__ ((packed)); + cpu_data &cpu = current_cpu(); + return *cpu.gdt; +} + +void +GDT::install() const +{ + gdt_write( + static_cast(&m_ptr), + kern_cs_index << 3, + kern_ss_index << 3, + tss_index << 3); +} + +void +GDT::set(uint8_t i, uint32_t base, uint64_t limit, bool is64, gdt_type type) +{ + m_entries[i].limit_low = limit & 0xffff; + m_entries[i].size = (limit >> 16) & 0xf; + m_entries[i].size |= (is64 ? 0xa0 : 0xc0); + + m_entries[i].base_low = base & 0xffff; + m_entries[i].base_mid = (base >> 16) & 0xff; + m_entries[i].base_high = (base >> 24) & 0xff; + + m_entries[i].type = type | gdt_type::system | gdt_type::present; +} struct tss_descriptor { @@ -44,72 +88,16 @@ struct tss_descriptor uint32_t reserved; } __attribute__ ((packed)); -struct tss_entry -{ - uint32_t reserved0; - - uint64_t rsp[3]; // stack pointers for CPL 0-2 - uint64_t ist[8]; // ist[0] is reserved - - uint64_t reserved1; - uint16_t reserved2; - uint16_t iomap_offset; -} __attribute__ ((packed)); - -struct idt_descriptor -{ - uint16_t base_low; - uint16_t selector; - uint8_t ist; - uint8_t flags; - uint16_t base_mid; - uint32_t base_high; - uint32_t reserved; // must be zero -} __attribute__ ((packed)); - -struct table_ptr -{ - uint16_t limit; - uint64_t base; -} __attribute__ ((packed)); - - -gdt_descriptor g_gdt_table[10]; -idt_descriptor g_idt_table[256]; -table_ptr g_gdtr; -table_ptr g_idtr; -tss_entry g_tss; - - -extern "C" { - void idt_write(); - void idt_load(); - - void gdt_write(uint16_t cs, uint16_t ds, uint16_t tr); - void gdt_load(); -} - void -gdt_set_entry(uint8_t i, uint32_t base, uint64_t limit, bool is64, gdt_type type) -{ - g_gdt_table[i].limit_low = limit & 0xffff; - g_gdt_table[i].size = (limit >> 16) & 0xf; - g_gdt_table[i].size |= (is64 ? 0xa0 : 0xc0); - - g_gdt_table[i].base_low = base & 0xffff; - g_gdt_table[i].base_mid = (base >> 16) & 0xff; - g_gdt_table[i].base_high = (base >> 24) & 0xff; - - g_gdt_table[i].type = type | gdt_type::system | gdt_type::present; -} - -void -tss_set_entry(uint8_t i, uint64_t base, uint64_t limit) +GDT::set_tss(TSS *tss) { tss_descriptor tssd; + + size_t limit = sizeof(TSS); tssd.limit_low = limit & 0xffff; tssd.size = (limit >> 16) & 0xf; + uintptr_t base = reinterpret_cast(tss); tssd.base_00 = base & 0xffff; tssd.base_16 = (base >> 16) & 0xff; tssd.base_24 = (base >> 24) & 0xff; @@ -121,123 +109,26 @@ tss_set_entry(uint8_t i, uint64_t base, uint64_t limit) gdt_type::execute | gdt_type::ring3 | gdt_type::present; - kutil::memcpy(&g_gdt_table[i], &tssd, sizeof(tss_descriptor)); + + kutil::memcpy(&m_entries[tss_index], &tssd, sizeof(tss_descriptor)); } void -idt_set_entry(uint8_t i, uint64_t addr, uint16_t selector, uint8_t flags) +GDT::dump(unsigned index) const { - g_idt_table[i].base_low = addr & 0xffff; - g_idt_table[i].base_mid = (addr >> 16) & 0xffff; - g_idt_table[i].base_high = (addr >> 32) & 0xffffffff; - g_idt_table[i].selector = selector; - g_idt_table[i].flags = flags; - g_idt_table[i].ist = 0; - g_idt_table[i].reserved = 0; -} - -void -tss_set_stack(unsigned ring, uintptr_t rsp) -{ - kassert(ring < 3, "Bad ring passed to tss_set_stack."); - g_tss.rsp[ring] = rsp; -} - -uintptr_t -tss_get_stack(unsigned ring) -{ - kassert(ring < 3, "Bad ring passed to tss_get_stack."); - return g_tss.rsp[ring]; -} - -void -idt_set_ist(unsigned i, unsigned ist) -{ - g_idt_table[i].ist = ist; -} - -void -tss_set_ist(unsigned ist, uintptr_t rsp) -{ - kassert(ist > 0 && ist < 7, "Bad ist passed to tss_set_ist."); - g_tss.ist[ist] = rsp; -} - -void -ist_increment(unsigned i) -{ - uint8_t ist = g_idt_table[i].ist; - if (ist) - g_tss.ist[ist] += memory::frame_size; -} - -void -ist_decrement(unsigned i) -{ - uint8_t ist = g_idt_table[i].ist; - if (ist) - g_tss.ist[ist] -= memory::frame_size; -} - -uintptr_t -tss_get_ist(unsigned ist) -{ - kassert(ist > 0 && ist < 7, "Bad ist passed to tss_get_ist."); - return g_tss.ist[ist]; -} - -void -gdt_init() -{ - kutil::memset(&g_gdt_table, 0, sizeof(g_gdt_table)); - kutil::memset(&g_idt_table, 0, sizeof(g_idt_table)); - - g_gdtr.limit = sizeof(g_gdt_table) - 1; - g_gdtr.base = reinterpret_cast(&g_gdt_table); - - // Kernel CS/SS - always 64bit - gdt_set_entry(1, 0, 0xfffff, true, gdt_type::read_write | gdt_type::execute); - gdt_set_entry(2, 0, 0xfffff, true, gdt_type::read_write); - - // User CS32/SS/CS64 - layout expected by SYSRET - gdt_set_entry(3, 0, 0xfffff, false, gdt_type::ring3 | gdt_type::read_write | gdt_type::execute); - gdt_set_entry(4, 0, 0xfffff, true, gdt_type::ring3 | gdt_type::read_write); - gdt_set_entry(5, 0, 0xfffff, true, gdt_type::ring3 | gdt_type::read_write | gdt_type::execute); - - kutil::memset(&g_tss, 0, sizeof(tss_entry)); - g_tss.iomap_offset = sizeof(tss_entry); - - uintptr_t tss_base = reinterpret_cast(&g_tss); - - // Note that this takes TWO GDT entries - tss_set_entry(6, tss_base, sizeof(tss_entry)); - - gdt_write(1 << 3, 2 << 3, 6 << 3); - - g_idtr.limit = sizeof(g_idt_table) - 1; - g_idtr.base = reinterpret_cast(&g_idt_table); - - idt_write(); -} - -void -gdt_dump(unsigned index) -{ - const table_ptr &table = g_gdtr; - console *cons = console::get(); unsigned start = 0; - unsigned count = (table.limit + 1) / sizeof(gdt_descriptor); + unsigned count = (m_ptr.limit + 1) / sizeof(descriptor); if (index != -1) { start = index; count = 1; } else { - cons->printf(" GDT: loc:%lx size:%d\n", table.base, table.limit+1); + cons->printf(" GDT: loc:%lx size:%d\n", m_ptr.base, m_ptr.limit+1); } - const gdt_descriptor *gdt = - reinterpret_cast(table.base); + const descriptor *gdt = + reinterpret_cast(m_ptr.base); for (int i = start; i < start+count; ++i) { uint32_t base = @@ -275,51 +166,3 @@ gdt_dump(unsigned index) (gdt[i].size & 0x60) == 0x40 ? "32" : "16"); } } - -void -idt_dump(unsigned index) -{ - const table_ptr &table = g_idtr; - - - unsigned start = 0; - unsigned count = (table.limit + 1) / sizeof(idt_descriptor); - if (index != -1) { - start = index; - count = 1; - log::info(logs::boot, "IDT FOR INDEX %02x", index); - } else { - log::info(logs::boot, "Loaded IDT at: %lx size: %d bytes", table.base, table.limit+1); - } - - const idt_descriptor *idt = - reinterpret_cast(table.base); - - for (int i = start; i < start+count; ++i) { - uint64_t base = - (static_cast(idt[i].base_high) << 32) | - (static_cast(idt[i].base_mid) << 16) | - idt[i].base_low; - - char const *type; - switch (idt[i].flags & 0xf) { - case 0x5: type = " 32tsk "; break; - case 0x6: type = " 16int "; break; - case 0x7: type = " 16trp "; break; - case 0xe: type = " 32int "; break; - case 0xf: type = " 32trp "; break; - default: type = " ????? "; break; - } - - if (idt[i].flags & 0x80) { - log::debug(logs::boot, - " Entry %3d: Base:%lx Sel(rpl %d, ti %d, %3d) IST:%d %s DPL:%d", i, base, - (idt[i].selector & 0x3), - ((idt[i].selector & 0x4) >> 2), - (idt[i].selector >> 3), - idt[i].ist, - type, - ((idt[i].flags >> 5) & 0x3)); - } - } -} diff --git a/src/kernel/gdt.h b/src/kernel/gdt.h index 9e8a959..984636b 100644 --- a/src/kernel/gdt.h +++ b/src/kernel/gdt.h @@ -1,58 +1,66 @@ #pragma once /// \file gdt.h -/// Definitions relating to system descriptor tables: GDT, IDT, TSS +/// Definitions relating to a CPU's GDT table #include -/// Set up the GDT and TSS, and switch segment registers to point -/// to them. -void gdt_init(); +#include "kutil/enum_bitfields.h" -/// Set an entry in the IDT -/// \arg i Index in the IDT (vector of the interrupt this handles) -/// \arg addr Address of the handler -/// \arg selector GDT selector to set when invoking this handler -/// \arg flags Descriptor flags to set -void idt_set_entry(uint8_t i, uint64_t addr, uint16_t selector, uint8_t flags); +class TSS; -/// Set the stack pointer for a given ring in the TSS -/// \arg ring Ring to set for (0-2) -/// \arg rsp Stack pointer to set -void tss_set_stack(unsigned ring, uintptr_t rsp); +enum class gdt_type : uint8_t +{ + accessed = 0x01, + read_write = 0x02, + conforming = 0x04, + execute = 0x08, + system = 0x10, + ring1 = 0x20, + ring2 = 0x40, + ring3 = 0x60, + present = 0x80 +}; +IS_BITFIELD(gdt_type); -/// Get the stack pointer for a given ring in the TSS -/// \arg ring Ring to get (0-2) -/// \returns Stack pointers for that ring -uintptr_t tss_get_stack(unsigned ring); +class GDT +{ +public: + GDT(TSS *tss); -/// Set the given IDT entry to use the given IST entry -/// \arg i Which IDT entry to set -/// \arg ist Which IST entry to set (1-7) -void idt_set_ist(unsigned i, unsigned ist); + /// Get the currently running CPU's GDT + static GDT & current(); -/// Set the stack pointer for a given IST in the TSS -/// \arg ist Which IST entry to set (1-7) -/// \arg rsp Stack pointer to set -void tss_set_ist(unsigned ist, uintptr_t rsp); + /// Install this GDT to the current CPU + void install() const; -/// Increment the stack pointer for the given vector, -/// if it's using an IST entry -/// \arg i Which IDT entry to use -void ist_increment(unsigned i); + /// Get the addrss of the pointer + inline const void * pointer() const { return static_cast(&m_ptr); } -/// Decrement the stack pointer for the given vector, -/// if it's using an IST entry -/// \arg i Which IDT entry to use -void ist_decrement(unsigned i); + /// Dump debug information about the GDT to the console. + /// \arg index Which entry to print, or -1 for all entries + void dump(unsigned index = -1) const; -/// Get the stack pointer for a given IST in the TSS -/// \arg ring Which IST entry to get (1-7) -/// \returns Stack pointers for that IST entry -uintptr_t tss_get_ist(unsigned ist); +private: + void set(uint8_t i, uint32_t base, uint64_t limit, bool is64, gdt_type type); + void set_tss(TSS *tss); -/// Dump information about the current GDT to the screen -/// \arg index Which entry to print, or -1 for all entries -void gdt_dump(unsigned index = -1); + struct descriptor + { + uint16_t limit_low; + uint16_t base_low; + uint8_t base_mid; + gdt_type type; + uint8_t size; + uint8_t base_high; + } __attribute__ ((packed, align(8))); -/// Dump information about the current IDT to the screen -/// \arg index Which entry to print, or -1 for all entries -void idt_dump(unsigned index = -1); + struct ptr + { + uint16_t limit; + descriptor *base; + } __attribute__ ((packed, align(4))); + + descriptor m_entries[8]; + TSS *m_tss; + + ptr m_ptr; +}; diff --git a/src/kernel/gdt.s b/src/kernel/gdt.s deleted file mode 100644 index c7d56ec..0000000 --- a/src/kernel/gdt.s +++ /dev/null @@ -1,35 +0,0 @@ -extern g_idtr -extern g_gdtr - -global idt_write -idt_write: - lidt [rel g_idtr] - ret - -global idt_load -idt_load: - sidt [rel g_idtr] - ret - -global gdt_write -gdt_write: - lgdt [rel g_gdtr] - mov ax, si ; second arg is data segment - mov ds, ax - mov es, ax - mov fs, ax - mov gs, ax - mov ss, ax - push qword rdi ; first arg is code segment - lea rax, [rel .next] - push rax - o64 retf -.next: - ltr dx ; third arg is the TSS - ret - -global gdt_load -gdt_load: - sgdt [rel g_gdtr] - ret - diff --git a/src/kernel/gdtidt.s b/src/kernel/gdtidt.s new file mode 100644 index 0000000..33fd4db --- /dev/null +++ b/src/kernel/gdtidt.s @@ -0,0 +1,35 @@ + +global idt_write +idt_write: + lidt [rdi] ; first arg is the IDT pointer location + ret + +global idt_load +idt_load: + sidt [rdi] ; first arg is where to write the idtr value + ret + +global gdt_write +gdt_write: + lgdt [rdi] ; first arg is the GDT pointer location + + mov ax, dx ; third arg is data segment + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + mov ss, ax + + push qword rsi ; second arg is code segment + lea rax, [rel .next] + push rax + o64 retf +.next: + ltr cx ; fourth arg is the TSS + ret + +global gdt_load +gdt_load: + sgdt [rdi] ; first arg is where to write the gdtr value + ret + diff --git a/src/kernel/idt.cpp b/src/kernel/idt.cpp new file mode 100644 index 0000000..03d3e1c --- /dev/null +++ b/src/kernel/idt.cpp @@ -0,0 +1,137 @@ +#include "kutil/memory.h" +#include "kutil/no_construct.h" +#include "idt.h" +#include "log.h" + +extern "C" { + void idt_write(const void *idt_ptr); + +#define ISR(i, s, name) extern void name (); +#define EISR(i, s, name) extern void name (); +#define IRQ(i, q, name) extern void name (); +#include "interrupt_isrs.inc" +#undef IRQ +#undef EISR +#undef ISR +} + +// The IDT is initialized _before_ global constructors are called, +// so we don't want it to have a global constructor, lest it overwrite +// the previous initialization. +static kutil::no_construct __g_idt_storage; +IDT &g_idt = __g_idt_storage.value; + + +IDT::IDT() +{ + kutil::memset(this, 0, sizeof(IDT)); + m_ptr.limit = sizeof(m_entries) - 1; + m_ptr.base = &m_entries[0]; + +#define ISR(i, s, name) set(i, & name, 0x08, 0x8e); +#define EISR(i, s, name) set(i, & name, 0x08, 0x8e); +#define IRQ(i, q, name) set(i, & name, 0x08, 0x8e); +#include "interrupt_isrs.inc" +#undef IRQ +#undef EISR +#undef ISR +} + +void +IDT::install() const +{ + idt_write(static_cast(&m_ptr)); +} + +void +IDT::add_ist_entries() +{ +#define ISR(i, s, name) if (s) { set_ist(i, s); } +#define EISR(i, s, name) if (s) { set_ist(i, s); } +#define IRQ(i, q, name) +#include "interrupt_isrs.inc" +#undef IRQ +#undef EISR +#undef ISR +} + +uint8_t +IDT::used_ist_entries() const +{ + uint8_t entries = 0; + +#define ISR(i, s, name) if (s) { entries |= (1 << s); } +#define EISR(i, s, name) if (s) { entries |= (1 << s); } +#define IRQ(i, q, name) +#include "interrupt_isrs.inc" +#undef IRQ +#undef EISR +#undef ISR + + return entries; +} + +void +IDT::set(uint8_t i, void (*handler)(), uint16_t selector, uint8_t flags) +{ + uintptr_t addr = reinterpret_cast(handler); + + m_entries[i].base_low = addr & 0xffff; + m_entries[i].base_mid = (addr >> 16) & 0xffff; + m_entries[i].base_high = (addr >> 32) & 0xffffffff; + m_entries[i].selector = selector; + m_entries[i].flags = flags; + m_entries[i].ist = 0; + m_entries[i].reserved = 0; +} + +void +IDT::set_ist(uint8_t i, uint8_t ist) +{ + m_entries[i].ist = ist; +} + +void +IDT::dump(unsigned index) const +{ + unsigned start = 0; + unsigned count = (m_ptr.limit + 1) / sizeof(descriptor); + if (index != -1) { + start = index; + count = 1; + log::info(logs::boot, "IDT FOR INDEX %02x", index); + } else { + log::info(logs::boot, "Loaded IDT at: %lx size: %d bytes", m_ptr.base, m_ptr.limit+1); + } + + const descriptor *idt = + reinterpret_cast(m_ptr.base); + + for (int i = start; i < start+count; ++i) { + uint64_t base = + (static_cast(idt[i].base_high) << 32) | + (static_cast(idt[i].base_mid) << 16) | + idt[i].base_low; + + char const *type; + switch (idt[i].flags & 0xf) { + case 0x5: type = " 32tsk "; break; + case 0x6: type = " 16int "; break; + case 0x7: type = " 16trp "; break; + case 0xe: type = " 32int "; break; + case 0xf: type = " 32trp "; break; + default: type = " ????? "; break; + } + + if (idt[i].flags & 0x80) { + log::debug(logs::boot, + " Entry %3d: Base:%lx Sel(rpl %d, ti %d, %3d) IST:%d %s DPL:%d", i, base, + (idt[i].selector & 0x3), + ((idt[i].selector & 0x4) >> 2), + (idt[i].selector >> 3), + idt[i].ist, + type, + ((idt[i].flags >> 5) & 0x3)); + } + } +} diff --git a/src/kernel/idt.h b/src/kernel/idt.h new file mode 100644 index 0000000..fc5ac24 --- /dev/null +++ b/src/kernel/idt.h @@ -0,0 +1,61 @@ +#pragma once +/// \file idt.h +/// Definitions relating to a CPU's IDT table +#include + +class IDT +{ +public: + static constexpr unsigned count = 256; + + IDT(); + + /// Install this IDT to the current CPU + void install() const; + + /// Add the IST entries listed in the ISR table into the IDT. + /// This can't be done until after memory is set up so the + /// stacks can be created. + void add_ist_entries(); + + /// Get the IST entry used by an entry. + /// \arg i Which IDT entry to look in + /// \returns The IST index used by entry i, or 0 for none + inline uint8_t get_ist(unsigned i) const { + if (i >= count) return 0; + return m_entries[i].ist; + } + + /// Get the IST entries that are used by this table, as a bitmap + uint8_t used_ist_entries() const; + + /// Dump debug information about the IDT to the console. + /// \arg index Which entry to print, or -1 for all entries + void dump(unsigned index = -1) const; + +private: + void set(uint8_t i, void (*handler)(), uint16_t selector, uint8_t flags); + void set_ist(uint8_t i, uint8_t ist); + + struct descriptor + { + uint16_t base_low; + uint16_t selector; + uint8_t ist; + uint8_t flags; + uint16_t base_mid; + uint32_t base_high; + uint32_t reserved; // must be zero + } __attribute__ ((packed, aligned(16))); + + struct ptr + { + uint16_t limit; + descriptor *base; + } __attribute__ ((packed, aligned(4))); + + descriptor m_entries[256]; + ptr m_ptr; +}; + +extern IDT &g_idt; diff --git a/src/kernel/interrupt_isrs.inc b/src/kernel/interrupt_isrs.inc index 2d55794..fe2f1b0 100644 --- a/src/kernel/interrupt_isrs.inc +++ b/src/kernel/interrupt_isrs.inc @@ -240,6 +240,7 @@ IRQ (0xdf, 0xbf, irqBF) ISR (0xe0, 0, isrTimer) ISR (0xe1, 0, isrLINT0) ISR (0xe2, 0, isrLINT1) +ISR (0xe3, 0, isrAPICError) ISR (0xe4, 0, isrAssert) ISR (0xef, 0, isrSpurious) diff --git a/src/kernel/interrupts.cpp b/src/kernel/interrupts.cpp index 97797fd..00d405b 100644 --- a/src/kernel/interrupts.cpp +++ b/src/kernel/interrupts.cpp @@ -8,6 +8,7 @@ #include "debug.h" #include "device_manager.h" #include "gdt.h" +#include "idt.h" #include "interrupts.h" #include "io.h" #include "kernel_memory.h" @@ -15,6 +16,7 @@ #include "objects/process.h" #include "scheduler.h" #include "syscall.h" +#include "tss.h" #include "vm_space.h" static const uint16_t PIC1 = 0x20; @@ -22,19 +24,14 @@ static const uint16_t PIC2 = 0xa0; constexpr uintptr_t apic_eoi_addr = 0xfee000b0 + ::memory::page_offset; +constexpr size_t increment_offset = 0x1000; + extern "C" { void _halt(); void isr_handler(cpu_state*); void irq_handler(cpu_state*); -#define ISR(i, s, name) extern void name (); -#define EISR(i, s, name) extern void name (); -#define IRQ(i, q, name) extern void name (); -#include "interrupt_isrs.inc" -#undef IRQ -#undef EISR -#undef ISR } isr @@ -60,7 +57,7 @@ get_irq(unsigned vector) } } -static void +void disable_legacy_pic() { // Mask all interrupts @@ -80,28 +77,16 @@ disable_legacy_pic() outb(PIC2+1, 0x02); io_wait(); } -void -interrupts_init() -{ -#define ISR(i, s, name) idt_set_entry(i, reinterpret_cast(& name), 0x08, 0x8e); -#define EISR(i, s, name) idt_set_entry(i, reinterpret_cast(& name), 0x08, 0x8e); -#define IRQ(i, q, name) idt_set_entry(i, reinterpret_cast(& name), 0x08, 0x8e); -#include "interrupt_isrs.inc" -#undef IRQ -#undef EISR -#undef ISR - - disable_legacy_pic(); - - log::info(logs::boot, "Interrupts enabled."); -} - void isr_handler(cpu_state *regs) { console *cons = console::get(); uint8_t vector = regs->interrupt & 0xff; - ist_decrement(vector); + + TSS &tss = TSS::current(); + uint8_t ist = g_idt.get_ist(vector); + if (ist) + tss.ist_stack(ist) -= increment_offset; switch (static_cast(vector)) { @@ -150,13 +135,13 @@ isr_handler(cpu_state *regs) switch ((regs->errorcode & 0x07) >> 1) { case 0: cons->printf(" GDT[%x]\n", index); - gdt_dump(index); + GDT::current().dump(index); break; case 1: case 3: cons->printf(" IDT[%x]\n", index); - idt_dump(index); + g_idt.dump(index); break; default: @@ -275,7 +260,9 @@ isr_handler(cpu_state *regs) print_stacktrace(2); _halt(); } - ist_increment(vector); + + if (ist) + tss.ist_stack(ist) += increment_offset; *reinterpret_cast(apic_eoi_addr) = 0; } diff --git a/src/kernel/interrupts.h b/src/kernel/interrupts.h index 793288b..2a8c35f 100644 --- a/src/kernel/interrupts.h +++ b/src/kernel/interrupts.h @@ -29,6 +29,5 @@ extern "C" { void interrupts_disable(); } -/// Fill the IDT with our ISRs, and disable the legacy -/// PIC interrupts. -void interrupts_init(); +/// Disable the legacy PIC +void disable_legacy_pic(); diff --git a/src/kernel/main.cpp b/src/kernel/main.cpp index 1bc4285..b8e0b32 100644 --- a/src/kernel/main.cpp +++ b/src/kernel/main.cpp @@ -6,22 +6,28 @@ #include "kutil/assert.h" #include "apic.h" #include "block_device.h" +#include "clock.h" #include "console.h" #include "cpu.h" #include "device_manager.h" #include "gdt.h" +#include "idt.h" #include "interrupts.h" #include "io.h" #include "kernel_args.h" #include "kernel_memory.h" #include "log.h" +#include "msr.h" #include "objects/channel.h" #include "objects/event.h" #include "objects/thread.h" +#include "objects/vm_area.h" #include "scheduler.h" #include "serial.h" #include "symbol_table.h" #include "syscall.h" +#include "tss.h" +#include "vm_space.h" #ifndef GIT_VERSION #define GIT_VERSION @@ -31,18 +37,24 @@ extern "C" { void kernel_main(kernel::args::header *header); void (*__ctors)(void); void (*__ctors_end)(void); + void long_ap_startup(); + void ap_startup(); + void init_ap_trampoline(void*, uintptr_t, void (*)()); } extern void __kernel_assert(const char *, unsigned, const char *); using namespace kernel; +volatile size_t ap_startup_count; + /// Bootstrap the memory managers. -void setup_pat(); void memory_initialize_pre_ctors(args::header &kargs); void memory_initialize_post_ctors(args::header &kargs); process * load_simple_process(args::program &program); +void start_aps(void *kpml4); + /// TODO: not this. this is awful. args::framebuffer *fb = nullptr; @@ -77,7 +89,18 @@ kernel_main(args::header *header) logger_init(); cpu_validate(); - setup_pat(); + + log::debug(logs::boot, " jsix header is at: %016lx", header); + log::debug(logs::boot, " Memory map is at: %016lx", header->mem_map); + log::debug(logs::boot, "ACPI root table is at: %016lx", header->acpi_table); + log::debug(logs::boot, "Runtime service is at: %016lx", header->runtime_services); + log::debug(logs::boot, " Kernel PML4 is at: %016lx", header->pml4); + + uint64_t cr0, cr4; + asm ("mov %%cr0, %0" : "=r"(cr0)); + asm ("mov %%cr4, %0" : "=r"(cr4)); + uint64_t efer = rdmsr(msr::ia32_efer); + log::debug(logs::boot, "Control regs: cr0:%lx cr4:%lx efer:%lx", cr0, cr4, efer); bool has_video = false; if (header->video.size > 0) { @@ -95,10 +118,20 @@ kernel_main(args::header *header) logger_clear_immediate(); } - gdt_init(); - interrupts_init(); + extern TSS &g_bsp_tss; + extern GDT &g_bsp_gdt; + + TSS *tss = new (&g_bsp_tss) TSS; + GDT *gdt = new (&g_bsp_gdt) GDT {tss}; + gdt->install(); + + IDT *idt = new (&g_idt) IDT; + idt->install(); + + disable_legacy_pic(); memory_initialize_pre_ctors(*header); + init_cpu(true); run_constructors(); memory_initialize_post_ctors(*header); @@ -116,16 +149,15 @@ kernel_main(args::header *header) } } - log::debug(logs::boot, " jsix header is at: %016lx", header); - log::debug(logs::boot, " Memory map is at: %016lx", header->mem_map); - log::debug(logs::boot, "ACPI root table is at: %016lx", header->acpi_table); - log::debug(logs::boot, "Runtime service is at: %016lx", header->runtime_services); device_manager &devices = device_manager::get(); devices.parse_acpi(header->acpi_table); devices.init_drivers(); - devices.get_lapic()->calibrate_timer(); + devices.get_lapic().calibrate_timer(); + + start_aps(header->pml4); + interrupts_enable(); /* @@ -164,3 +196,80 @@ kernel_main(args::header *header) sched->start(); } + +void +start_aps(void *kpml4) +{ + using memory::frame_size; + using memory::kernel_stack_pages; + + extern size_t ap_startup_code_size; + extern process &g_kernel_process; + extern vm_area_guarded &g_kernel_stacks; + + clock &clk = clock::get(); + lapic &apic = device_manager::get().get_lapic(); + + ap_startup_count = 1; // BSP processor + auto &ids = device_manager::get().get_apic_ids(); + log::info(logs::boot, "Starting %d other CPUs", ids.count() - 1); + + // Since we're using address space outside kernel space, make sure + // the kernel's vm_space is used + cpu_data &cpu = current_cpu(); + cpu.process = &g_kernel_process; + + // Copy the startup code somwhere the real mode trampoline can run + uintptr_t addr = 0x8000; // TODO: find a valid address, rewrite addresses + uint8_t vector = addr >> 12; + vm_area *vma = new vm_area_fixed(addr, 0x1000, vm_flags::write); + vm_space::kernel_space().add(addr, vma); + kutil::memcpy( + reinterpret_cast(addr), + reinterpret_cast(&ap_startup), + ap_startup_code_size); + + static constexpr size_t stack_bytes = kernel_stack_pages * frame_size; + + for (uint8_t id : ids) { + if (id == apic.get_id()) continue; + log::info(logs::boot, "Starting AP %d", id); + + size_t current_count = ap_startup_count; + uintptr_t stack_start = g_kernel_stacks.get_section(); + uintptr_t stack_end = stack_start + stack_bytes - 2 * sizeof(void*); + *reinterpret_cast(stack_end) = 0; // pre-fault the page + + init_ap_trampoline(kpml4, stack_end, long_ap_startup); + + apic.send_ipi(lapic::ipi_mode::init, 0, id); + clk.spinwait(1000); + + apic.send_ipi(lapic::ipi_mode::startup, vector, id); + for (unsigned i = 0; i < 20; ++i) { + if (ap_startup_count > current_count) break; + clk.spinwait(10); + } + + if (ap_startup_count > current_count) + continue; + + apic.send_ipi(lapic::ipi_mode::startup, vector, id); + for (unsigned i = 0; i < 100; ++i) { + if (ap_startup_count > current_count) break; + clk.spinwait(10); + } + } + + log::info(logs::boot, "%d CPUs running", ap_startup_count); + vm_space::kernel_space().remove(vma); +} + +void +long_ap_startup() +{ + init_cpu(false); + ++ap_startup_count; + + while(1) asm("hlt"); +} diff --git a/src/kernel/memory_bootstrap.cpp b/src/kernel/memory_bootstrap.cpp index 5668dc8..0323c02 100644 --- a/src/kernel/memory_bootstrap.cpp +++ b/src/kernel/memory_bootstrap.cpp @@ -39,11 +39,8 @@ frame_allocator &g_frame_allocator = __g_frame_allocator_storage.value; static kutil::no_construct __g_kernel_heap_area_storage; vm_area_untracked &g_kernel_heap_area = __g_kernel_heap_area_storage.value; -vm_area_guarded g_kernel_stacks { - memory::stacks_start, - memory::kernel_stack_pages, - memory::kernel_max_stacks, - vm_flags::write}; +static kutil::no_construct __g_kernel_stacks_storage; +vm_area_guarded &g_kernel_stacks = __g_kernel_stacks_storage.value; vm_area_guarded g_kernel_buffers { memory::buffers_start, @@ -66,6 +63,11 @@ memory_initialize_pre_ctors(args::header &kargs) { using kernel::args::frame_block; + // Clean out any remaning bootloader page table entries + page_table *kpml4 = static_cast(kargs.pml4); + for (unsigned i = 0; i < memory::pml4e_kernel; ++i) + kpml4->entries[i] = 0; + new (&g_kernel_heap) kutil::heap_allocator {heap_start, kernel_max_heap}; frame_block *blocks = reinterpret_cast(memory::bitmap_start); @@ -97,7 +99,6 @@ memory_initialize_pre_ctors(args::header &kargs) } } - page_table *kpml4 = reinterpret_cast(kargs.pml4); process *kp = process::create_kernel_process(kpml4); vm_space &vm = kp->space(); @@ -105,42 +106,24 @@ memory_initialize_pre_ctors(args::header &kargs) vm_area_untracked(kernel_max_heap, vm_flags::write); vm.add(heap_start, heap); + + vm_area *stacks = new (&g_kernel_stacks) vm_area_guarded { + memory::stacks_start, + memory::kernel_stack_pages, + memory::kernel_max_stacks, + vm_flags::write}; + vm.add(memory::stacks_start, &g_kernel_stacks); } void memory_initialize_post_ctors(args::header &kargs) { vm_space &vm = vm_space::kernel_space(); - vm.add(memory::stacks_start, &g_kernel_stacks); vm.add(memory::buffers_start, &g_kernel_buffers); g_frame_allocator.free( reinterpret_cast(kargs.page_tables), kargs.table_count); - - using memory::frame_size; - using memory::kernel_stack_pages; - constexpr size_t stack_size = kernel_stack_pages * frame_size; - - for (int ist = 1; ist <= 3; ++ist) { - uintptr_t bottom = g_kernel_stacks.get_section(); - log::debug(logs::boot, "Installing IST%d stack at %llx", ist, bottom); - - // Pre-realize and xerothese stacks, they're no good - // if they page fault - kutil::memset(reinterpret_cast(bottom), 0, stack_size); - - // Skip two entries to be the null frame - tss_set_ist(ist, bottom + stack_size - 2 * sizeof(uintptr_t)); - } - -#define ISR(i, s, name) if (s) { idt_set_ist(i, s); } -#define EISR(i, s, name) if (s) { idt_set_ist(i, s); } -#define IRQ(i, q, name) -#include "interrupt_isrs.inc" -#undef IRQ -#undef EISR -#undef ISR } static void @@ -198,15 +181,6 @@ log_mtrrs() pat_names[(pat >> (6*8)) & 7], pat_names[(pat >> (7*8)) & 7]); } -void -setup_pat() -{ - uint64_t pat = rdmsr(msr::ia32_pat); - pat = (pat & 0x00ffffffffffffffull) | (0x01ull << 56); // set PAT 7 to WC - wrmsr(msr::ia32_pat, pat); - log_mtrrs(); -} - process * load_simple_process(args::program &program) diff --git a/src/kernel/objects/process.cpp b/src/kernel/objects/process.cpp index d956363..64788f9 100644 --- a/src/kernel/objects/process.cpp +++ b/src/kernel/objects/process.cpp @@ -42,7 +42,7 @@ process::~process() s_processes.remove_swap(this); } -process & process::current() { return *bsp_cpu_data.p; } +process & process::current() { return *current_cpu().process; } process & process::kernel_process() { return g_kernel_process; } process * @@ -63,7 +63,7 @@ process::exit(int32_t code) thread->exit(code); } - if (this == bsp_cpu_data.p) + if (this == current_cpu().process) scheduler::get().schedule(); } diff --git a/src/kernel/objects/thread.cpp b/src/kernel/objects/thread.cpp index d1ab4c1..7dcfd2a 100644 --- a/src/kernel/objects/thread.cpp +++ b/src/kernel/objects/thread.cpp @@ -9,7 +9,7 @@ extern "C" void kernel_to_user_trampoline(); static constexpr j6_signal_t thread_default_signals = 0; -extern vm_area_guarded g_kernel_stacks; +extern vm_area_guarded &g_kernel_stacks; thread::thread(process &parent, uint8_t pri, uintptr_t rsp0) : kobject(kobject::type::thread, thread_default_signals), @@ -43,13 +43,9 @@ thread::from_tcb(TCB *tcb) return reinterpret_cast(kutil::offset_pointer(tcb, offset)); } -thread & -thread::current() -{ - return *bsp_cpu_data.t; -} +thread & thread::current() { return *current_cpu().thread; } -inline void schedule_if_current(thread *t) { if (t == bsp_cpu_data.t) scheduler::get().schedule(); } +inline void schedule_if_current(thread *t) { if (t == current_cpu().thread) scheduler::get().schedule(); } void thread::wait_on_signals(kobject *obj, j6_signal_t signals) diff --git a/src/kernel/scheduler.cpp b/src/kernel/scheduler.cpp index fa015f2..9027905 100644 --- a/src/kernel/scheduler.cpp +++ b/src/kernel/scheduler.cpp @@ -33,7 +33,9 @@ const uint64_t rflags_int = 0x202; extern uint64_t idle_stack_end; -scheduler::scheduler(lapic *apic) : +extern "C" void task_switch(TCB *tcb); + +scheduler::scheduler(lapic &apic) : m_apic(apic), m_next_pid(1), m_clock(0), @@ -55,10 +57,11 @@ scheduler::scheduler(lapic *apic) : m_runlists[max_priority].push_back(tcb); m_current = tcb; - bsp_cpu_data.rsp0 = tcb->rsp0; - bsp_cpu_data.tcb = tcb; - bsp_cpu_data.p = kp; - bsp_cpu_data.t = idle; + cpu_data &cpu = current_cpu(); + cpu.rsp0 = tcb->rsp0; + cpu.tcb = tcb; + cpu.process = kp; + cpu.thread = idle; } template @@ -113,9 +116,8 @@ void scheduler::start() { log::info(logs::sched, "Starting scheduler."); - wrmsr(msr::ia32_gs_base, reinterpret_cast(&bsp_cpu_data)); - m_apic->enable_timer(isr::isrTimer, false); - m_apic->reset_timer(10); + m_apic.enable_timer(isr::isrTimer, false); + m_apic.reset_timer(10); } void @@ -205,7 +207,7 @@ void scheduler::schedule() { uint8_t priority = m_current->priority; - uint32_t remaining = m_apic->stop_timer(); + uint32_t remaining = m_apic.stop_timer(); m_current->time_left = remaining; thread *th = thread::from_tcb(m_current); const bool constant = th->has_state(thread::state::constant); @@ -214,7 +216,7 @@ scheduler::schedule() if (priority < max_priority && !constant) { // Process used its whole timeslice, demote it ++m_current->priority; - log::info(logs::sched, "Scheduler demoting thread %llx, priority %d", + log::debug(logs::sched, "Scheduler demoting thread %llx, priority %d", th->koid(), m_current->priority); } m_current->time_left = quantum(m_current->priority); @@ -247,13 +249,14 @@ scheduler::schedule() auto *next = m_runlists[priority].pop_front(); next->last_ran = m_clock; - m_apic->reset_timer(next->time_left); + m_apic.reset_timer(next->time_left); if (next != m_current) { thread *next_thread = thread::from_tcb(next); - bsp_cpu_data.t = next_thread; - bsp_cpu_data.p = &next_thread->parent(); + cpu_data &cpu = current_cpu(); + cpu.thread = next_thread; + cpu.process = &next_thread->parent(); m_current = next; log::debug(logs::sched, "Scheduler switching threads %llx->%llx", diff --git a/src/kernel/scheduler.h b/src/kernel/scheduler.h index 7c119ab..d3d91c4 100644 --- a/src/kernel/scheduler.h +++ b/src/kernel/scheduler.h @@ -10,13 +10,10 @@ namespace args { struct program; }} +struct cpu_data; class lapic; class process; struct page_table; -struct cpu_state; - -extern "C" void isr_handler(cpu_state*); -extern "C" void task_switch(TCB *next); /// The task scheduler @@ -42,8 +39,8 @@ public: static const uint16_t process_quanta = 10; /// Constructor. - /// \arg apic Pointer to the local APIC object - scheduler(lapic *apic); + /// \arg apic The local APIC object for this CPU + scheduler(lapic &apic); /// Create a new process from a program image in memory. /// \arg program The descriptor of the pogram in memory @@ -82,7 +79,6 @@ public: static scheduler & get() { return *s_instance; } private: - friend uintptr_t syscall_dispatch(uintptr_t, cpu_state &); friend class process; static constexpr uint64_t promote_frequency = 10; @@ -96,7 +92,7 @@ private: void prune(uint64_t now); void check_promotions(uint64_t now); - lapic *m_apic; + lapic &m_apic; uint32_t m_next_pid; uint32_t m_tick_count; diff --git a/src/kernel/task.s b/src/kernel/task.s index be201d2..2f3bbc4 100644 --- a/src/kernel/task.s +++ b/src/kernel/task.s @@ -1,6 +1,5 @@ %include "tasking.inc" -extern g_tss global task_switch task_switch: push rbp @@ -18,7 +17,7 @@ task_switch: mov [rax + TCB.rsp], rsp ; Copy off saved user rsp - mov rcx, [gs:CPU_DATA.rsp3] ; rcx: curretn task's saved user rsp + mov rcx, [gs:CPU_DATA.rsp3] ; rcx: current task's saved user rsp mov [rax + TCB.rsp3], rcx ; Install next task's TCB @@ -31,7 +30,7 @@ task_switch: mov rcx, [rdi + TCB.rsp0] ; rcx: top of next task's kernel stack mov [gs:CPU_DATA.rsp0], rcx - lea rdx, [rel g_tss] ; rdx: address of TSS + mov rdx, [gs:CPU_DATA.tss] ; rdx: address of TSS mov [rdx + TSS.rsp0], rcx ; Update saved user rsp @@ -67,3 +66,8 @@ initialize_main_thread: ; the entrypoint should already be on the stack jmp kernel_to_user_trampoline + +global _current_gsbase +_current_gsbase: + mov rax, [gs:CPU_DATA.self] + ret diff --git a/src/kernel/tasking.inc b/src/kernel/tasking.inc index d3a711a..4c7ffdb 100644 --- a/src/kernel/tasking.inc +++ b/src/kernel/tasking.inc @@ -1,14 +1,20 @@ struc TCB .rsp: resq 1 -.rsp0: resq 1 -.rsp3: resq 1 +.rsp0: resq 1 +.rsp3: resq 1 .pml4: resq 1 endstruc struc CPU_DATA +.self: resq 1 +.id: resq 1 .rsp0: resq 1 .rsp3: resq 1 .tcb: resq 1 +.thread: resq 1 +.process: resq 1 +.tss: resq 1 +.gdt: resq 1 endstruc struc TSS diff --git a/src/kernel/tss.cpp b/src/kernel/tss.cpp new file mode 100644 index 0000000..9c7b057 --- /dev/null +++ b/src/kernel/tss.cpp @@ -0,0 +1,39 @@ +#include "kutil/assert.h" +#include "kutil/memory.h" +#include "kutil/no_construct.h" +#include "cpu.h" +#include "tss.h" + +// The BSP's TSS is initialized _before_ global constructors are called, +// so we don't want it to have a global constructor, lest it overwrite +// the previous initialization. +static kutil::no_construct __g_bsp_tss_storage; +TSS &g_bsp_tss = __g_bsp_tss_storage.value; + + +TSS::TSS() +{ + kutil::memset(this, 0, sizeof(TSS)); + m_iomap_offset = sizeof(TSS); +} + +TSS & +TSS::current() +{ + return *current_cpu().tss; +} + +uintptr_t & +TSS::ring_stack(unsigned ring) +{ + kassert(ring < 3, "Bad ring passed to TSS::ring_stack."); + return m_rsp[ring]; +} + +uintptr_t & +TSS::ist_stack(unsigned ist) +{ + kassert(ist > 0 && ist < 7, "Bad ist passed to TSS::ist_stack."); + return m_ist[ist]; +} + diff --git a/src/kernel/tss.h b/src/kernel/tss.h new file mode 100644 index 0000000..d0fe2b1 --- /dev/null +++ b/src/kernel/tss.h @@ -0,0 +1,35 @@ +#pragma once +/// \file tss.h +/// Definitions relating to the TSS +#include + +/// The 64bit TSS table +class TSS +{ +public: + TSS(); + + /// Get the currently running CPU's TSS. + static TSS & current(); + + /// Ring stack accessor. Returns a mutable reference. + /// \arg ring Which ring (0-3) to get the stack for + /// \returns A mutable reference to the stack pointer + uintptr_t & ring_stack(unsigned ring); + + /// IST stack accessor. Returns a mutable reference. + /// \arg ist Which IST entry (1-7) to get the stack for + /// \returns A mutable reference to the stack pointer + uintptr_t & ist_stack(unsigned ist); + +private: + uint32_t m_reserved0; + + uintptr_t m_rsp[3]; // stack pointers for CPL 0-2 + uintptr_t m_ist[8]; // ist[0] is reserved + + uint64_t m_reserved1; + uint16_t m_reserved2; + uint16_t m_iomap_offset; +} __attribute__ ((packed)); + diff --git a/src/libraries/cpu/cpu.cpp b/src/libraries/cpu/cpu_id.cpp similarity index 93% rename from src/libraries/cpu/cpu.cpp rename to src/libraries/cpu/cpu_id.cpp index 98395e5..dfd4ae0 100644 --- a/src/libraries/cpu/cpu.cpp +++ b/src/libraries/cpu/cpu_id.cpp @@ -1,5 +1,5 @@ #include -#include "cpu/cpu.h" +#include "cpu/cpu_id.h" namespace cpu { @@ -94,4 +94,13 @@ cpu_id::has_feature(feature feat) return (m_features & (1 << static_cast(feat))) != 0; } +uint8_t +cpu_id::local_apic_id() const +{ + uint32_t eax_unused; + uint32_t ebx; + __cpuid(1, 0, &eax_unused, &ebx); + return static_cast(ebx >> 24); +} + } diff --git a/src/libraries/cpu/include/cpu/cpu.h b/src/libraries/cpu/include/cpu/cpu_id.h similarity index 93% rename from src/libraries/cpu/include/cpu/cpu.h rename to src/libraries/cpu/include/cpu/cpu_id.h index ceace0f..fa7ccd5 100644 --- a/src/libraries/cpu/include/cpu/cpu.h +++ b/src/libraries/cpu/include/cpu/cpu_id.h @@ -1,5 +1,5 @@ #pragma once -/// \file cpu.h Definition of required cpu features for jsix +/// \file cpu_id.h Definition of required cpu features for jsix #include @@ -48,6 +48,9 @@ public: /// \returns A |regs| struct of the values retuned regs get(uint32_t leaf, uint32_t sub = 0) const; + /// Get the local APIC ID of the current CPU + uint8_t local_apic_id() const; + /// Get the name of the cpu vendor (eg, "GenuineIntel") inline const char * vendor_id() const { return m_vendor_id; }