diff --git a/src/kernel/ap_startup.s b/src/kernel/ap_startup.s index e5bb985..ebf5915 100644 --- a/src/kernel/ap_startup.s +++ b/src/kernel/ap_startup.s @@ -137,3 +137,13 @@ init_ap_trampoline: pop rbp ret + +extern long_ap_startup +global ap_idle +ap_idle: + call long_ap_startup + sti +.hang: + hlt + jmp .hang + diff --git a/src/kernel/apic.cpp b/src/kernel/apic.cpp index bd7d8da..a3b7574 100644 --- a/src/kernel/apic.cpp +++ b/src/kernel/apic.cpp @@ -6,6 +6,8 @@ #include "kernel_memory.h" #include "log.h" +uint64_t lapic::s_ticks_per_us = 0; + static constexpr uint16_t lapic_id = 0x0020; static constexpr uint16_t lapic_spurious = 0x00f0; @@ -54,12 +56,12 @@ apic::apic(uintptr_t base) : } -lapic::lapic(uintptr_t base, isr spurious) : +lapic::lapic(uintptr_t base) : apic(base), m_divisor(0) { apic_write(m_base, lapic_lvt_error, static_cast(isr::isrAPICError)); - apic_write(m_base, lapic_spurious, static_cast(spurious)); + apic_write(m_base, lapic_spurious, static_cast(isr::isrSpurious)); log::info(logs::apic, "LAPIC created, base %lx", m_base); } @@ -122,10 +124,10 @@ lapic::calibrate_timer() clock::get().spinwait(us); uint32_t remaining = apic_read(m_base, lapic_timer_cur); - uint32_t ticks_total = initial - remaining; - m_ticks_per_us = ticks_total / us; + uint64_t ticks_total = initial - remaining; + s_ticks_per_us = ticks_total / us; - log::info(logs::apic, "APIC timer ticks %d times per microsecond.", m_ticks_per_us); + log::info(logs::apic, "APIC timer ticks %d times per microsecond.", s_ticks_per_us); interrupts_enable(); } @@ -145,7 +147,7 @@ lapic::set_divisor(uint8_t divisor) case 64: divbits = 0x9; break; case 128: divbits = 0xa; break; default: - kassert(0, "Invalid divisor passed to lapic::enable_timer"); + kassert(0, "Invalid divisor passed to lapic::set_divisor"); } apic_write(m_base, lapic_timer_div, divbits); diff --git a/src/kernel/apic.h b/src/kernel/apic.h index c81fe7c..f15982a 100644 --- a/src/kernel/apic.h +++ b/src/kernel/apic.h @@ -43,8 +43,7 @@ class lapic : public: /// Constructor /// \arg base Physicl base address of the APIC's MMIO registers - /// \arg spurious Vector of the spurious interrupt handler - lapic(uintptr_t base, isr spurious); + lapic(uintptr_t base); /// Get the local APIC's ID uint8_t get_id(); @@ -93,19 +92,14 @@ public: void calibrate_timer(); private: - inline uint64_t ticks_to_us(uint32_t ticks) const { - return static_cast(ticks) / m_ticks_per_us; - } - - inline uint64_t us_to_ticks(uint64_t interval) const { - return interval * m_ticks_per_us; - } + inline static uint64_t ticks_to_us(uint64_t ticks) { return ticks / s_ticks_per_us; } + inline static uint64_t us_to_ticks(uint64_t interval) { return interval * s_ticks_per_us; } void set_divisor(uint8_t divisor); void set_repeat(bool repeat); uint32_t m_divisor; - uint32_t m_ticks_per_us; + static uint64_t s_ticks_per_us; }; diff --git a/src/kernel/cpu.cpp b/src/kernel/cpu.cpp index e5350b5..ec96053 100644 --- a/src/kernel/cpu.cpp +++ b/src/kernel/cpu.cpp @@ -1,7 +1,6 @@ #include #include "kutil/assert.h" #include "kutil/memory.h" -#include "apic.h" #include "cpu.h" #include "cpu/cpu_id.h" #include "device_manager.h" diff --git a/src/kernel/cpu.h b/src/kernel/cpu.h index e7ef485..3d4fc5e 100644 --- a/src/kernel/cpu.h +++ b/src/kernel/cpu.h @@ -2,9 +2,8 @@ #include -#include "kutil/spinlock.h" - class GDT; +class lapic; class process; struct TCB; class thread; @@ -34,6 +33,9 @@ struct cpu_data TSS *tss; GDT *gdt; + // Members beyond this point do not appear in + // the assembly version + lapic *apic; }; extern "C" cpu_data * _current_gsbase(); @@ -55,4 +57,3 @@ inline cpu_data & current_cpu() { return *_current_gsbase(); } /// Validate the required CPU features are present. Really, the bootloader already /// validated the required features, but still iterate the options and log about them. void cpu_validate(); - diff --git a/src/kernel/debug.h b/src/kernel/debug.h index 0f9c8b6..7bcdc82 100644 --- a/src/kernel/debug.h +++ b/src/kernel/debug.h @@ -4,6 +4,8 @@ #include +struct cpu_state; + extern "C" { uintptr_t get_rsp(); uintptr_t get_rip(); diff --git a/src/kernel/device_manager.cpp b/src/kernel/device_manager.cpp index 2f8f7d3..2cb82f0 100644 --- a/src/kernel/device_manager.cpp +++ b/src/kernel/device_manager.cpp @@ -63,7 +63,7 @@ void irq4_callback(void *) device_manager::device_manager() : - m_lapic(nullptr) + m_lapic_base(0) { m_irqs.ensure_capacity(32); m_irqs.set_size(16); @@ -106,6 +106,26 @@ device_manager::parse_acpi(const void *root_table) load_xsdt(memory::to_virtual(acpi2->xsdt_address)); } +const device_manager::apic_nmi * +device_manager::get_lapic_nmi(uint8_t id) const +{ + for (const auto &nmi : m_nmis) { + if (nmi.cpu == 0xff || nmi.cpu == id) + return &nmi; + } + + return nullptr; +} + +const device_manager::irq_override * +device_manager::get_irq_override(uint8_t irq) const +{ + for (const auto &o : m_overrides) + if (o.source == irq) return &o; + + return nullptr; +} + ioapic * device_manager::get_ioapic(int i) { @@ -163,38 +183,38 @@ device_manager::load_apic(const acpi_table_header *header) { const auto *apic = check_get_table(header); - uintptr_t local = apic->local_address; - m_lapic = new lapic(local, isr::isrSpurious); + m_lapic_base = apic->local_address; size_t count = acpi_table_entries(apic, 1); uint8_t const *p = apic->controller_data; uint8_t const *end = p + count; - // Pass one: count IOAPIC objcts - int num_ioapics = 0; + // Pass one: count objcts + unsigned num_lapics = 0; + unsigned num_ioapics = 0; + unsigned num_overrides = 0; + unsigned num_nmis = 0; while (p < end) { const uint8_t type = p[0]; const uint8_t length = p[1]; - if (type == 1) num_ioapics++; - p += length; - } - m_ioapics.set_capacity(num_ioapics); - - // Pass two: set up IOAPIC objcts - p = apic->controller_data; - while (p < end) { - const uint8_t type = p[0]; - const uint8_t length = p[1]; - if (type == 1) { - uintptr_t base = kutil::read_from(p+4); - uint32_t base_gsr = kutil::read_from(p+8); - m_ioapics.emplace(base, base_gsr); + switch (type) { + case 0: ++num_lapics; break; + case 1: ++num_ioapics; break; + case 2: ++num_overrides; break; + case 4: ++num_nmis; break; + default: break; } + p += length; } - // Pass three: configure APIC objects + m_apic_ids.set_capacity(num_lapics); + m_ioapics.set_capacity(num_ioapics); + m_overrides.set_capacity(num_overrides); + m_nmis.set_capacity(num_nmis); + + // Pass two: configure objects p = apic->controller_data; while (p < end) { const uint8_t type = p[0]; @@ -205,38 +225,41 @@ device_manager::load_apic(const acpi_table_header *header) uint8_t uid = kutil::read_from(p+2); uint8_t id = kutil::read_from(p+3); m_apic_ids.append(id); + log::debug(logs::device, " Local APIC uid %x id %x", uid, id); } break; - case 1: // I/O APIC + case 1: { // I/O APIC + uintptr_t base = kutil::read_from(p+4); + uint32_t base_gsi = kutil::read_from(p+8); + m_ioapics.emplace(base, base_gsi); + + log::debug(logs::device, " IO APIC gsi %x base %x", base_gsi, base); + } break; case 2: { // Interrupt source override - uint8_t source = kutil::read_from(p+3); - isr gsi = isr::irq00 + kutil::read_from(p+4); - uint16_t flags = kutil::read_from(p+8); + irq_override o; + o.source = kutil::read_from(p+3); + o.gsi = kutil::read_from(p+4); + o.flags = kutil::read_from(p+8); + m_overrides.append(o); log::debug(logs::device, " Intr source override IRQ %d -> %d Pol %d Tri %d", - source, gsi, (flags & 0x3), ((flags >> 2) & 0x3)); - - // TODO: in a multiple-IOAPIC system this might be elsewhere - m_ioapics[0].redirect(source, static_cast(gsi), flags, true); + o.source, o.gsi, (o.flags & 0x3), ((o.flags >> 2) & 0x3)); } break; case 4: {// LAPIC NMI - uint8_t cpu = kutil::read_from(p + 2); - uint8_t num = kutil::read_from(p + 5); - uint16_t flags = kutil::read_from(p + 3); + apic_nmi nmi; + nmi.cpu = kutil::read_from(p + 2); + nmi.lint = kutil::read_from(p + 5); + nmi.flags = kutil::read_from(p + 3); + m_nmis.append(nmi); - log::debug(logs::device, " LAPIC NMI Proc %d LINT%d Pol %d Tri %d", - kutil::read_from(p+2), - kutil::read_from(p+5), - kutil::read_from(p+3) & 0x3, - (kutil::read_from(p+3) >> 2) & 0x3); - - m_lapic->enable_lint(num, num == 0 ? isr::isrLINT0 : isr::isrLINT1, true, flags); + log::debug(logs::device, " LAPIC NMI Proc %02x LINT%d Pol %d Tri %d", + nmi.cpu, nmi.lint, nmi.flags & 0x3, (nmi.flags >> 2) & 0x3); } break; @@ -246,17 +269,6 @@ device_manager::load_apic(const acpi_table_header *header) p += length; } - - /* - for (uint8_t i = 0; i < m_ioapics[0].get_num_gsi(); ++i) { - switch (i) { - case 2: break; - default: m_ioapics[0].mask(i, false); - } - } - */ - - m_lapic->enable(); } void diff --git a/src/kernel/device_manager.h b/src/kernel/device_manager.h index 539f1c2..0750bd7 100644 --- a/src/kernel/device_manager.h +++ b/src/kernel/device_manager.h @@ -24,10 +24,6 @@ public: /// \returns A reference to the system device manager static device_manager & get() { return s_instance; } - /// Get the LAPIC - /// \returns An object representing the local APIC - lapic & get_lapic() { return *m_lapic; } - /// Get an IOAPIC /// \arg i Index of the requested IOAPIC /// \returns An object representing the given IOAPIC if it exists, @@ -68,6 +64,39 @@ public: /// \returns True if the interrupt was handled bool dispatch_irq(unsigned irq); + struct apic_nmi + { + uint8_t cpu; + uint8_t lint; + uint16_t flags; + }; + + struct irq_override + { + uint8_t source; + uint16_t flags; + uint32_t gsi; + }; + + /// Get the list of APIC ids for other CPUs + inline const kutil::vector & get_apic_ids() const { return m_apic_ids; } + + /// Get the LAPIC base address + /// \returns The physical base address of the local apic registers + uintptr_t get_lapic_base() const { return m_lapic_base; } + + /// Get the NMI mapping for the given local APIC + /// \arg id ID of the local APIC + /// \returns apic_nmi structure describing the NMI configuration, + /// or null if no configuration was provided + const apic_nmi * get_lapic_nmi(uint8_t id) const; + + /// Get the IRQ source override for the given IRQ + /// \arg irq IRQ number (not isr vector) + /// \returns irq_override structure describing that IRQ's + /// configuration, or null if no configuration was provided + const irq_override * get_irq_override(uint8_t irq) const; + /// Register the existance of a block device. /// \arg blockdev Pointer to the block device void register_block_device(block_device *blockdev); @@ -94,9 +123,6 @@ public: &m_hpets[i] : nullptr; } - /// Get the list of APIC ids for other CPUs - inline const kutil::vector & get_apic_ids() const { return m_apic_ids; } - private: /// Parse the ACPI XSDT and load relevant sub-tables. /// \arg xsdt Pointer to the XSDT from the firmware @@ -122,10 +148,13 @@ private: /// that has no callback. void bad_irq(uint8_t irq); - lapic *m_lapic; + uintptr_t m_lapic_base; + kutil::vector m_ioapics; kutil::vector m_hpets; kutil::vector m_apic_ids; + kutil::vector m_nmis; + kutil::vector m_overrides; kutil::vector m_pci; kutil::vector m_devices; diff --git a/src/kernel/main.cpp b/src/kernel/main.cpp index b82329c..577fcc2 100644 --- a/src/kernel/main.cpp +++ b/src/kernel/main.cpp @@ -39,7 +39,8 @@ extern "C" { void (*__ctors_end)(void); void long_ap_startup(cpu_data *cpu); void ap_startup(); - void init_ap_trampoline(void*, cpu_data *, void (*)(cpu_data *)); + void ap_idle(); + void init_ap_trampoline(void*, cpu_data *, void (*)()); } extern void __kernel_assert(const char *, unsigned, const char *); @@ -47,13 +48,14 @@ extern void __kernel_assert(const char *, unsigned, const char *); using namespace kernel; volatile size_t ap_startup_count; +static bool scheduler_ready = false; /// Bootstrap the memory managers. void memory_initialize_pre_ctors(args::header &kargs); void memory_initialize_post_ctors(args::header &kargs); process * load_simple_process(args::program &program); -void start_aps(void *kpml4); +unsigned start_aps(lapic &apic, const kutil::vector &ids, void *kpml4); /// TODO: not this. this is awful. args::framebuffer *fb = nullptr; @@ -122,6 +124,7 @@ kernel_main(args::header *header) extern TSS &g_bsp_tss; extern GDT &g_bsp_gdt; extern cpu_data g_bsp_cpu_data; + extern uintptr_t idle_stack_end; IDT *idt = new (&g_idt) IDT; @@ -131,6 +134,7 @@ kernel_main(args::header *header) cpu->self = cpu; cpu->tss = new (&g_bsp_tss) TSS; cpu->gdt = new (&g_bsp_gdt) GDT {cpu->tss}; + cpu->rsp0 = idle_stack_end; cpu_early_init(cpu); disable_legacy_pic(); @@ -160,15 +164,21 @@ kernel_main(args::header *header) devices.parse_acpi(header->acpi_table); // Need the local APIC to get the BSP's id - lapic &apic = device_manager::get().get_lapic(); - cpu->id = apic.get_id(); + uintptr_t apic_base = devices.get_lapic_base(); + + lapic *apic = new lapic(apic_base); + apic->enable(); + + cpu->id = apic->get_id(); + cpu->apic = apic; cpu_init(cpu, true); devices.init_drivers(); - devices.get_lapic().calibrate_timer(); + apic->calibrate_timer(); - start_aps(header->pml4); + const auto &apic_ids = devices.get_apic_ids(); + unsigned num_cpus = start_aps(*apic, apic_ids, header->pml4); idt->add_ist_entries(); interrupts_enable(); @@ -197,7 +207,8 @@ kernel_main(args::header *header) } */ - scheduler *sched = new scheduler(devices.get_lapic()); + scheduler *sched = new scheduler {num_cpus}; + scheduler_ready = true; // Skip program 0, which is the kernel itself for (unsigned i = 1; i < header->num_programs; ++i) @@ -209,8 +220,8 @@ kernel_main(args::header *header) sched->start(); } -void -start_aps(void *kpml4) +unsigned +start_aps(lapic &apic, const kutil::vector &ids, void *kpml4) { using memory::frame_size; using memory::kernel_stack_pages; @@ -220,10 +231,8 @@ start_aps(void *kpml4) extern vm_area_guarded &g_kernel_stacks; clock &clk = clock::get(); - lapic &apic = device_manager::get().get_lapic(); ap_startup_count = 1; // BSP processor - auto &ids = device_manager::get().get_apic_ids(); log::info(logs::boot, "Starting %d other CPUs", ids.count() - 1); // Since we're using address space outside kernel space, make sure @@ -245,7 +254,7 @@ start_aps(void *kpml4) // AP idle stacks need less room than normal stacks, so pack multiple // into a normal stack area - static constexpr size_t idle_stack_bytes = 1024; // 2KiB is generous + static constexpr size_t idle_stack_bytes = 2048; // 2KiB is generous static constexpr size_t full_stack_bytes = kernel_stack_pages * frame_size; static constexpr size_t idle_stacks_per = full_stack_bytes / idle_stack_bytes; @@ -258,13 +267,14 @@ start_aps(void *kpml4) apic.send_ipi_broadcast(mode, false, 0); for (uint8_t id : ids) { - if (id == apic.get_id()) continue; + if (id == bsp.id) continue; // Set up the CPU data structures TSS *tss = new TSS; GDT *gdt = new GDT {tss}; cpu_data *cpu = new cpu_data; kutil::memset(cpu, 0, sizeof(cpu_data)); + cpu->self = cpu; cpu->id = id; cpu->index = ++index; @@ -285,7 +295,7 @@ start_aps(void *kpml4) cpu->rsp0 = stack_end; // Set up the trampoline with this CPU's data - init_ap_trampoline(kpml4, cpu, long_ap_startup); + init_ap_trampoline(kpml4, cpu, ap_idle); // Kick it off! size_t current_count = ap_startup_count; @@ -315,6 +325,7 @@ start_aps(void *kpml4) log::info(logs::boot, "%d CPUs running", ap_startup_count); vm_space::kernel_space().remove(vma); + return ap_startup_count; } void @@ -322,6 +333,12 @@ long_ap_startup(cpu_data *cpu) { cpu_init(cpu, false); ++ap_startup_count; + while (!scheduler_ready) asm ("pause"); - while(1) asm("hlt"); + uintptr_t apic_base = + device_manager::get().get_lapic_base(); + cpu->apic = new lapic(apic_base); + cpu->apic->enable(); + + scheduler::get().start(); } diff --git a/src/kernel/objects/process.cpp b/src/kernel/objects/process.cpp index 64788f9..46d4be2 100644 --- a/src/kernel/objects/process.cpp +++ b/src/kernel/objects/process.cpp @@ -13,15 +13,11 @@ static kutil::no_construct __g_kernel_process_storage; process &g_kernel_process = __g_kernel_process_storage.value; -kutil::vector process::s_processes; - process::process() : kobject {kobject::type::process}, m_next_handle {1}, m_state {state::running} { - s_processes.append(this); - j6_handle_t self = add_handle(this); kassert(self == self_handle(), "Process self-handle is not 1"); } @@ -39,7 +35,6 @@ process::~process() { for (auto &it : m_handles) if (it.val) it.val->handle_release(); - s_processes.remove_swap(this); } process & process::current() { return *current_cpu().process; } diff --git a/src/kernel/objects/process.h b/src/kernel/objects/process.h index 6ccc63e..053638a 100644 --- a/src/kernel/objects/process.h +++ b/src/kernel/objects/process.h @@ -94,6 +94,4 @@ private: enum class state : uint8_t { running, exited }; state m_state; - - static kutil::vector s_processes; }; diff --git a/src/kernel/objects/thread.cpp b/src/kernel/objects/thread.cpp index 7dcfd2a..6bde889 100644 --- a/src/kernel/objects/thread.cpp +++ b/src/kernel/objects/thread.cpp @@ -221,7 +221,5 @@ thread::create_idle_thread(process &kernel, uint8_t pri, uintptr_t rsp0) thread *idle = new thread(kernel, pri, rsp0); idle->set_state(state::constant); idle->set_state(state::ready); - log::info(logs::task, "Created idle thread as koid %llx", idle->koid()); - return idle; } diff --git a/src/kernel/scheduler.cpp b/src/kernel/scheduler.cpp index 9027905..4baff62 100644 --- a/src/kernel/scheduler.cpp +++ b/src/kernel/scheduler.cpp @@ -17,6 +17,7 @@ #include "objects/channel.h" #include "objects/process.h" #include "objects/system.h" +#include "objects/thread.h" #include "objects/vm_area.h" #include "scheduler.h" @@ -25,43 +26,37 @@ #include "kutil/assert.h" - +extern "C" void task_switch(TCB *tcb); scheduler *scheduler::s_instance = nullptr; -const uint64_t rflags_noint = 0x002; -const uint64_t rflags_int = 0x202; - -extern uint64_t idle_stack_end; - -extern "C" void task_switch(TCB *tcb); - -scheduler::scheduler(lapic &apic) : - m_apic(apic), - m_next_pid(1), - m_clock(0), - m_last_promotion(0) +struct run_queue { - kassert(!s_instance, "Multiple schedulers created!"); - s_instance = this; + tcb_node *current = nullptr; + tcb_list ready[scheduler::num_priorities]; + tcb_list blocked; - process *kp = &process::kernel_process(); + uint64_t last_promotion = 0; + uint64_t last_steal = 0; + kutil::spinlock lock; +}; - log::debug(logs::task, "Kernel process koid %llx", kp->koid()); +scheduler::scheduler(unsigned cpus) : + m_next_pid {1}, + m_clock {0} +{ + kassert(!s_instance, "Created multiple schedulers!"); + if (!s_instance) + s_instance = this; - thread *idle = thread::create_idle_thread(*kp, max_priority, - reinterpret_cast(&idle_stack_end)); + m_run_queues.set_size(cpus); +} - log::debug(logs::task, "Idle thread koid %llx", idle->koid()); - - auto *tcb = idle->tcb(); - m_runlists[max_priority].push_back(tcb); - m_current = tcb; - - cpu_data &cpu = current_cpu(); - cpu.rsp0 = tcb->rsp0; - cpu.tcb = tcb; - cpu.process = kp; - cpu.thread = idle; +scheduler::~scheduler() +{ + // Not truly necessary - if the scheduler is going away, the whole + // system is probably going down. But let's be clean. + if (s_instance == this) + s_instance = nullptr; } template @@ -72,20 +67,6 @@ inline T * push(uintptr_t &rsp, size_t size = sizeof(T)) { return p; } -thread * -scheduler::create_process(bool user) -{ - process *p = new process; - thread *th = p->create_thread(default_priority, user); - - TCB *tcb = th->tcb(); - log::debug(logs::task, "Creating thread %llx, priority %d, time slice %d", - th->koid(), tcb->priority, tcb->time_left); - - th->set_state(thread::state::ready); - return th; -} - void scheduler::create_kernel_task(void (*task)(), uint8_t priority, bool constant) { @@ -115,24 +96,42 @@ scheduler::quantum(int priority) void scheduler::start() { - log::info(logs::sched, "Starting scheduler."); - m_apic.enable_timer(isr::isrTimer, false); - m_apic.reset_timer(10); + cpu_data &cpu = current_cpu(); + run_queue &queue = m_run_queues[cpu.index]; + kutil::scoped_lock lock {queue.lock}; + + process *kp = &process::kernel_process(); + thread *idle = thread::create_idle_thread(*kp, max_priority, cpu.rsp0); + log::debug(logs::task, "CPU%02x idle thread koid %llx", cpu.index, idle->koid()); + + auto *tcb = idle->tcb(); + cpu.process = kp; + cpu.thread = idle; + cpu.tcb = tcb; + + queue.current = tcb; + + log::info(logs::sched, "CPU%02x starting scheduler", cpu.index); + cpu.apic->enable_timer(isr::isrTimer, false); + cpu.apic->reset_timer(10); } void scheduler::add_thread(TCB *t) { - m_blocked.push_back(static_cast(t)); - t->time_left = quantum(t->priority); + cpu_data &cpu = current_cpu(); + run_queue &queue = m_run_queues[cpu.index]; + kutil::scoped_lock lock {queue.lock}; + queue.blocked.push_back(static_cast(t)); + t->time_left = quantum(t->priority); } -void scheduler::prune(uint64_t now) +void scheduler::prune(run_queue &queue, uint64_t now) { // Find processes that are ready or have exited and // move them to the appropriate lists. - auto *tcb = m_blocked.front(); + auto *tcb = queue.blocked.front(); while (tcb) { thread *th = thread::from_tcb(tcb); uint8_t priority = tcb->priority; @@ -140,7 +139,7 @@ void scheduler::prune(uint64_t now) bool ready = th->has_state(thread::state::ready); bool exited = th->has_state(thread::state::exited); bool constant = th->has_state(thread::state::constant); - bool current = tcb == m_current; + bool current = tcb == queue.current; ready |= th->wake_on_time(now); @@ -155,7 +154,7 @@ void scheduler::prune(uint64_t now) // page tables if (current) continue; - m_blocked.remove(remove); + queue.blocked.remove(remove); process &p = th->parent(); // thread_exited deletes the thread, and returns true if the process @@ -163,19 +162,19 @@ void scheduler::prune(uint64_t now) if(!current && p.thread_exited(th)) delete &p; } else { - m_blocked.remove(remove); + queue.blocked.remove(remove); log::debug(logs::sched, "Prune: readying unblocked thread %llx", th->koid()); - m_runlists[remove->priority].push_back(remove); + queue.ready[remove->priority].push_back(remove); } } } void -scheduler::check_promotions(uint64_t now) +scheduler::check_promotions(run_queue &queue, uint64_t now) { - for (auto &pri_list : m_runlists) { + for (auto &pri_list : queue.ready) { for (auto *tcb : pri_list) { - const thread *th = thread::from_tcb(m_current); + const thread *th = thread::from_tcb(queue.current); const bool constant = th->has_state(thread::state::constant); if (constant) continue; @@ -190,81 +189,145 @@ scheduler::check_promotions(uint64_t now) if (stale) { // If the thread is stale, promote it - m_runlists[priority].remove(tcb); + queue.ready[priority].remove(tcb); tcb->priority -= 1; tcb->time_left = quantum(tcb->priority); - m_runlists[tcb->priority].push_back(tcb); + queue.ready[tcb->priority].push_back(tcb); log::info(logs::sched, "Scheduler promoting thread %llx, priority %d", th->koid(), tcb->priority); } } } - m_last_promotion = now; + queue.last_promotion = now; +} + +static size_t +balance_lists(tcb_list &to, tcb_list &from) +{ + size_t to_len = to.length(); + size_t from_len = from.length(); + + // Only steal from the rich, don't be Dennis Moore + if (from_len <= to_len) + return 0; + + size_t steal = (from_len - to_len) / 2; + for (size_t i = 0; i < steal; ++i) + to.push_front(from.pop_front()); + return steal; +} + +void +scheduler::steal_work(cpu_data &cpu) +{ + // First grab a scheduler-wide lock to avoid deadlock + kutil::scoped_lock steal_lock {m_steal_lock}; + + // Lock this cpu's queue for the whole time while we modify it + run_queue &my_queue = m_run_queues[cpu.index]; + kutil::scoped_lock my_queue_lock {my_queue.lock}; + + const unsigned count = m_run_queues.count(); + for (unsigned i = 0; i < count; ++i) { + if (i == cpu.index) continue; + + run_queue &other_queue = m_run_queues[i]; + kutil::scoped_lock other_queue_lock {other_queue.lock}; + + size_t stolen = 0; + + // Don't steal from max_priority, that's the idle thread + for (unsigned pri = 0; pri < max_priority; ++pri) + stolen += balance_lists(my_queue.ready[pri], other_queue.ready[pri]); + + stolen += balance_lists(my_queue.blocked, other_queue.blocked); + + if (stolen) + log::debug(logs::sched, "CPU%02x stole %2d tasks from CPU%02x", + cpu.index, stolen, i); + } } void scheduler::schedule() { - uint8_t priority = m_current->priority; - uint32_t remaining = m_apic.stop_timer(); - m_current->time_left = remaining; - thread *th = thread::from_tcb(m_current); + cpu_data &cpu = current_cpu(); + run_queue &queue = m_run_queues[cpu.index]; + lapic &apic = *cpu.apic; + uint32_t remaining = apic.stop_timer(); + + if (m_clock - queue.last_steal > steal_frequency) { + steal_work(cpu); + queue.last_steal = m_clock; + } + + // We need to explicitly lock/unlock here instead of + // using a scoped lock, because the scope doesn't "end" + // for the current thread until it gets scheduled again + kutil::spinlock::waiter waiter; + queue.lock.acquire(&waiter); + + queue.current->time_left = remaining; + thread *th = thread::from_tcb(queue.current); + uint8_t priority = queue.current->priority; const bool constant = th->has_state(thread::state::constant); if (remaining == 0) { if (priority < max_priority && !constant) { // Process used its whole timeslice, demote it - ++m_current->priority; + ++queue.current->priority; log::debug(logs::sched, "Scheduler demoting thread %llx, priority %d", - th->koid(), m_current->priority); + th->koid(), queue.current->priority); } - m_current->time_left = quantum(m_current->priority); + queue.current->time_left = quantum(queue.current->priority); } else if (remaining > 0) { // Process gave up CPU, give it a small bonus to its // remaining timeslice. uint32_t bonus = quantum(priority) >> 4; - m_current->time_left += bonus; + queue.current->time_left += bonus; } - m_runlists[priority].remove(m_current); if (th->has_state(thread::state::ready)) { - m_runlists[m_current->priority].push_back(m_current); + queue.ready[queue.current->priority].push_back(queue.current); } else { - m_blocked.push_back(m_current); + queue.blocked.push_back(queue.current); } clock::get().update(); - prune(++m_clock); - if (m_clock - m_last_promotion > promote_frequency) - check_promotions(m_clock); + prune(queue, ++m_clock); + if (m_clock - queue.last_promotion > promote_frequency) + check_promotions(queue, m_clock); priority = 0; - while (m_runlists[priority].empty()) { + while (queue.ready[priority].empty()) { ++priority; kassert(priority < num_priorities, "All runlists are empty"); } - m_current->last_ran = m_clock; + queue.current->last_ran = m_clock; - auto *next = m_runlists[priority].pop_front(); + auto *next = queue.ready[priority].pop_front(); next->last_ran = m_clock; - m_apic.reset_timer(next->time_left); + apic.reset_timer(next->time_left); - if (next != m_current) { - thread *next_thread = thread::from_tcb(next); - - cpu_data &cpu = current_cpu(); - cpu.thread = next_thread; - cpu.process = &next_thread->parent(); - m_current = next; - - log::debug(logs::sched, "Scheduler switching threads %llx->%llx", - th->koid(), next_thread->koid()); - log::debug(logs::sched, " priority %d time left %d @ %lld.", - m_current->priority, m_current->time_left, m_clock); - log::debug(logs::sched, " PML4 %llx", m_current->pml4); - - task_switch(m_current); + if (next == queue.current) { + queue.lock.release(&waiter); + return; } + + thread *next_thread = thread::from_tcb(next); + + cpu.thread = next_thread; + cpu.process = &next_thread->parent(); + queue.current = next; + + log::debug(logs::sched, "CPU%02x switching threads %llx->%llx", + cpu.index, th->koid(), next_thread->koid()); + log::debug(logs::sched, " priority %d time left %d @ %lld.", + next->priority, next->time_left, m_clock); + log::debug(logs::sched, " PML4 %llx", next->pml4); + + queue.lock.release(&waiter); + task_switch(queue.current); } diff --git a/src/kernel/scheduler.h b/src/kernel/scheduler.h index d3d91c4..8dd232f 100644 --- a/src/kernel/scheduler.h +++ b/src/kernel/scheduler.h @@ -3,7 +3,8 @@ /// The task scheduler and related definitions #include -#include "objects/thread.h" +#include "kutil/spinlock.h" +#include "kutil/vector.h" namespace kernel { namespace args { @@ -14,6 +15,7 @@ struct cpu_data; class lapic; class process; struct page_table; +struct run_queue; /// The task scheduler @@ -39,8 +41,9 @@ public: static const uint16_t process_quanta = 10; /// Constructor. - /// \arg apic The local APIC object for this CPU - scheduler(lapic &apic); + /// \arg cpus The number of CPUs to schedule for + scheduler(unsigned cpus); + ~scheduler(); /// Create a new process from a program image in memory. /// \arg program The descriptor of the pogram in memory @@ -66,15 +69,11 @@ public: /// Run the scheduler, possibly switching to a new task void schedule(); - /// Get the current TCB. - /// \returns A pointer to the current thread's TCB - inline TCB * current() { return m_current; } - /// Start scheduling a new thread. /// \arg t The new thread's TCB void add_thread(TCB *t); - /// Get a reference to the system scheduler + /// Get a reference to the scheduler /// \returns A reference to the global system scheduler static scheduler & get() { return *s_instance; } @@ -82,30 +81,23 @@ private: friend class process; static constexpr uint64_t promote_frequency = 10; + static constexpr uint64_t steal_frequency = 10; - /// Create a new process object. This process will have its pid - /// set but nothing else. - /// \arg user True if this thread will enter userspace - /// \returns The new process' main thread - thread * create_process(bool user); - - void prune(uint64_t now); - void check_promotions(uint64_t now); - - lapic &m_apic; + void prune(run_queue &queue, uint64_t now); + void check_promotions(run_queue &queue, uint64_t now); + void steal_work(cpu_data &cpu); uint32_t m_next_pid; uint32_t m_tick_count; process *m_kernel_process; - tcb_node *m_current; - tcb_list m_runlists[num_priorities]; - tcb_list m_blocked; + + kutil::vector m_run_queues; // TODO: lol a real clock uint64_t m_clock = 0; - uint64_t m_last_promotion; + kutil::spinlock m_steal_lock; static scheduler *s_instance; }; diff --git a/src/kernel/syscall.cpp b/src/kernel/syscall.cpp index 2d90468..6ccc4b9 100644 --- a/src/kernel/syscall.cpp +++ b/src/kernel/syscall.cpp @@ -1,11 +1,10 @@ #include +#include "kutil/memory.h" + #include "console.h" -#include "cpu.h" #include "debug.h" #include "log.h" -#include "msr.h" -#include "scheduler.h" #include "syscall.h" extern "C" { diff --git a/src/libraries/kutil/include/kutil/logger.h b/src/libraries/kutil/include/kutil/logger.h index 03e1684..f260fe2 100644 --- a/src/libraries/kutil/include/kutil/logger.h +++ b/src/libraries/kutil/include/kutil/logger.h @@ -6,6 +6,7 @@ #include #include "kutil/bip_buffer.h" +#include "kutil/spinlock.h" namespace kutil { namespace log { @@ -111,6 +112,7 @@ private: uint8_t m_sequence; kutil::bip_buffer m_buffer; + kutil::spinlock m_lock; static logger *s_log; static const char *s_level_names[static_cast(level::max)]; diff --git a/src/libraries/kutil/logger.cpp b/src/libraries/kutil/logger.cpp index 143d2e2..909deee 100644 --- a/src/libraries/kutil/logger.cpp +++ b/src/libraries/kutil/logger.cpp @@ -91,6 +91,8 @@ logger::output(level severity, area_t area, const char *fmt, va_list args) header->bytes += vsnprintf(header->message, sizeof(buffer) - sizeof(entry), fmt, args); + kutil::scoped_lock lock {m_lock}; + if (m_immediate) { buffer[header->bytes] = 0; m_immediate(area, severity, header->message); @@ -117,6 +119,8 @@ logger::output(level severity, area_t area, const char *fmt, va_list args) size_t logger::get_entry(void *buffer, size_t size) { + kutil::scoped_lock lock {m_lock}; + void *out; size_t out_size = m_buffer.get_block(&out); if (out_size == 0 || out == 0)