diff --git a/src/kernel/apic.cpp b/src/kernel/apic.cpp index effa0c4..0a31cd9 100644 --- a/src/kernel/apic.cpp +++ b/src/kernel/apic.cpp @@ -71,7 +71,7 @@ lapic::get_id() } void -lapic::send_ipi(ipi mode, uint8_t vector, uint8_t dest) +lapic::send_ipi(ipi mode, isr vector, uint8_t dest) { // Wait until the APIC is ready to send ipi_wait(); @@ -85,7 +85,7 @@ lapic::send_ipi(ipi mode, uint8_t vector, uint8_t dest) } void -lapic::send_ipi_broadcast(ipi mode, bool self, uint8_t vector) +lapic::send_ipi_broadcast(ipi mode, bool self, isr vector) { // Wait until the APIC is ready to send ipi_wait(); diff --git a/src/kernel/apic.h b/src/kernel/apic.h index 2a25ed2..aee838a 100644 --- a/src/kernel/apic.h +++ b/src/kernel/apic.h @@ -5,6 +5,8 @@ #include #include +#include "interrupts.h" + enum class isr : uint8_t; /// Base class for other APIC types @@ -51,13 +53,13 @@ public: /// \arg mode The sending mode /// \arg vector The interrupt vector /// \arg dest The APIC ID of the destination - void send_ipi(ipi mode, uint8_t vector, uint8_t dest); + void send_ipi(ipi mode, isr vector, uint8_t dest); /// Send an inter-processor broadcast interrupt to all other CPUs /// \arg mode The sending mode /// \arg self If true, include this CPU in the broadcast /// \arg vector The interrupt vector - void send_ipi_broadcast(ipi mode, bool self, uint8_t vector); + void send_ipi_broadcast(ipi mode, bool self, isr vector); /// Wait for an IPI to finish sending. This is done automatically /// before sending another IPI with send_ipi(). diff --git a/src/kernel/interrupt_isrs.inc b/src/kernel/interrupt_isrs.inc index 6d1e700..dbaf484 100644 --- a/src/kernel/interrupt_isrs.inc +++ b/src/kernel/interrupt_isrs.inc @@ -242,6 +242,8 @@ ISR (0xe1, 0, isrLINT0) ISR (0xe2, 0, isrLINT1) ISR (0xe3, 0, isrAPICError) +ISR (0xe4, 0, ipiSchedule) + ISR (0xef, 0, isrSpurious) ISR (0xf0, 0, isrIgnore0) diff --git a/src/kernel/interrupts.cpp b/src/kernel/interrupts.cpp index 61d3f90..91b3839 100644 --- a/src/kernel/interrupts.cpp +++ b/src/kernel/interrupts.cpp @@ -150,6 +150,10 @@ isr_handler(cpu_state *regs) case isr::isrLINT1: break; + case isr::ipiSchedule: + scheduler::get().schedule(); + break; + case isr::isrSpurious: // No EOI for the spurious interrupt return; diff --git a/src/kernel/interrupts.h b/src/kernel/interrupts.h index 6789008..b3ef75f 100644 --- a/src/kernel/interrupts.h +++ b/src/kernel/interrupts.h @@ -10,7 +10,7 @@ enum class isr : uint8_t #define ISR(i, s, name) name = i, #define NISR(i, s, name) name = i, #define EISR(i, s, name) name = i, -#define IRQ(i, q, name) name = i, +#define IRQ(i, q, name) name = i, #include "interrupt_isrs.inc" #undef IRQ #undef EISR diff --git a/src/kernel/objects/thread.cpp b/src/kernel/objects/thread.cpp index 7cf7161..7682e72 100644 --- a/src/kernel/objects/thread.cpp +++ b/src/kernel/objects/thread.cpp @@ -53,7 +53,17 @@ thread::block() void thread::wake(uint64_t value) { + if (has_state(state::ready)) + return; + m_wake_value = value; + wake_only(); + scheduler::get().maybe_schedule(tcb()); +} + +void +thread::wake_only() +{ m_wake_timeout = 0; set_state(state::ready); } diff --git a/src/kernel/objects/thread.h b/src/kernel/objects/thread.h index 98b22df..737d7ca 100644 --- a/src/kernel/objects/thread.h +++ b/src/kernel/objects/thread.h @@ -9,6 +9,7 @@ #include "objects/kobject.h" +struct cpu_data; struct page_table; namespace obj { @@ -37,6 +38,7 @@ struct TCB uint64_t last_ran; uintptr_t kernel_stack; + cpu_data *cpu; }; using tcb_list = util::linked_list; @@ -45,15 +47,6 @@ using tcb_node = tcb_list::item_type; namespace obj { -enum class wait_type : uint8_t -{ - none = 0x00, - signal = 0x01, - time = 0x02, - object = 0x04, -}; -is_bitfield(wait_type); - class process; class thread : @@ -109,6 +102,10 @@ public: /// \arg value The value that block() should return void wake(uint64_t value = 0); + /// Set this thread as awake, but do not call the scheduler + /// or set the wake value. + void wake_only(); + /// Set a timeout to unblock this thread /// \arg time The clock time at which to wake. 0 for no timeout. inline void set_wake_timeout(uint64_t time) { m_wake_timeout = time; } diff --git a/src/kernel/scheduler.cpp b/src/kernel/scheduler.cpp index ea80db5..34f550d 100644 --- a/src/kernel/scheduler.cpp +++ b/src/kernel/scheduler.cpp @@ -112,8 +112,9 @@ scheduler::add_thread(TCB *t) run_queue &queue = m_run_queues[cpu.index]; util::scoped_lock lock {queue.lock}; - queue.blocked.push_back(static_cast(t)); + t->cpu = &cpu; t->time_left = quantum(t->priority); + queue.blocked.push_back(static_cast(t)); } void @@ -128,7 +129,7 @@ scheduler::prune(run_queue &queue, uint64_t now) uint64_t timeout = th->wake_timeout(); if (timeout && timeout <= now) - th->wake(); + th->wake_only(); bool ready = th->has_state(thread::state::ready); bool exited = th->has_state(thread::state::exited); @@ -167,8 +168,8 @@ scheduler::check_promotions(run_queue &queue, uint64_t now) for (auto &pri_list : queue.ready) { for (auto *tcb : pri_list) { const thread *th = tcb->thread; - const bool constant = th->has_state(thread::state::constant); - if (constant) + + if (th->has_state(thread::state::constant)) continue; const uint64_t age = now - tcb->last_ran; @@ -176,8 +177,7 @@ scheduler::check_promotions(run_queue &queue, uint64_t now) bool stale = age > quantum(priority) * 2 && - tcb->priority > promote_limit && - !constant; + tcb->priority > promote_limit; if (stale) { // If the thread is stale, promote it @@ -195,7 +195,7 @@ scheduler::check_promotions(run_queue &queue, uint64_t now) } static size_t -balance_lists(tcb_list &to, tcb_list &from) +balance_lists(tcb_list &to, tcb_list &from, cpu_data &new_cpu) { size_t to_len = to.length(); size_t from_len = from.length(); @@ -205,17 +205,18 @@ balance_lists(tcb_list &to, tcb_list &from) return 0; size_t steal = (from_len - to_len) / 2; - for (size_t i = 0; i < steal; ++i) - to.push_front(from.pop_front()); + for (size_t i = 0; i < steal; ++i) { + tcb_node *node = from.pop_front(); + node->cpu = &new_cpu; + to.push_front(node); + } return steal; } void scheduler::steal_work(cpu_data &cpu) { - // Lock this cpu's queue for the whole time while we modify it run_queue &my_queue = m_run_queues[cpu.index]; - util::scoped_lock my_queue_lock {my_queue.lock}; const unsigned count = m_run_queues.count(); for (unsigned i = 0; i < count; ++i) { @@ -228,9 +229,9 @@ scheduler::steal_work(cpu_data &cpu) // Don't steal from max_priority, that's the idle thread for (unsigned pri = 0; pri < max_priority; ++pri) - stolen += balance_lists(my_queue.ready[pri], other_queue.ready[pri]); + stolen += balance_lists(my_queue.ready[pri], other_queue.ready[pri], cpu); - stolen += balance_lists(my_queue.blocked, other_queue.blocked); + stolen += balance_lists(my_queue.blocked, other_queue.blocked, cpu); if (stolen) log::debug(logs::sched, "CPU%02x stole %2d tasks from CPU%02x", @@ -244,10 +245,18 @@ scheduler::schedule() cpu_data &cpu = current_cpu(); run_queue &queue = m_run_queues[cpu.index]; lapic &apic = *cpu.apic; - uint32_t remaining = apic.stop_timer(); + uint32_t remaining = apic.stop_timer(); uint64_t now = clock::get().value(); + // We need to explicitly lock/unlock here instead of + // using a scoped lock, because the scope doesn't "end" + // for the current thread until it gets scheduled again, + // and _new_ threads start their life at the end of this + // function, which screws up RAII + util::spinlock::waiter waiter {false, nullptr, "schedule"}; + queue.lock.acquire(&waiter); + // Only one CPU can be stealing at a time if (m_steal_turn == cpu.index && now - queue.last_steal > steal_frequency) { @@ -256,12 +265,6 @@ scheduler::schedule() m_steal_turn = (m_steal_turn + 1) % m_run_queues.count(); } - // We need to explicitly lock/unlock here instead of - // using a scoped lock, because the scope doesn't "end" - // for the current thread until it gets scheduled again - util::spinlock::waiter waiter; - queue.lock.acquire(&waiter); - queue.current->time_left = remaining; thread *th = queue.current->thread; uint8_t priority = queue.current->priority; @@ -325,3 +328,17 @@ scheduler::schedule() queue.lock.release(&waiter); task_switch(queue.current); } + +void +scheduler::maybe_schedule(TCB *t) +{ + cpu_data *cpu = t->cpu; + + run_queue &queue = m_run_queues[cpu->index]; + uint8_t current_pri = queue.current->priority; + if (current_pri <= t->priority) + return; + + current_cpu().apic->send_ipi( + lapic::ipi::fixed, isr::ipiSchedule, cpu->id); +} diff --git a/src/kernel/scheduler.h b/src/kernel/scheduler.h index eb3877d..140dc0d 100644 --- a/src/kernel/scheduler.h +++ b/src/kernel/scheduler.h @@ -71,6 +71,10 @@ public: /// Run the scheduler, possibly switching to a new task void schedule(); + /// Check if the CPU is running a more important task. If not, + /// run the scheduler. + void maybe_schedule(TCB *t); + /// Start scheduling a new thread. /// \arg t The new thread's TCB void add_thread(TCB *t); diff --git a/src/kernel/smp.cpp b/src/kernel/smp.cpp index 4f6f1e5..7e5612f 100644 --- a/src/kernel/smp.cpp +++ b/src/kernel/smp.cpp @@ -3,6 +3,7 @@ #include "apic.h" #include "clock.h" #include "device_manager.h" +#include "interrupts.h" #include "logger.h" #include "memory.h" #include "objects/vm_area.h" @@ -51,7 +52,7 @@ start(cpu_data &bsp, void *kpml4) // Copy the startup code somwhere the real mode trampoline can run uintptr_t addr = 0x8000; // TODO: find a valid address, rewrite addresses - uint8_t vector = addr >> 12; + isr vector = static_cast(addr >> 12); obj::vm_area *vma = new obj::vm_area_fixed(addr, 0x1000, vm_flags::write); vm_space::kernel_space().add(addr, vma); memcpy( @@ -70,7 +71,7 @@ start(cpu_data &bsp, void *kpml4) lapic &apic = *bsp.apic; lapic::ipi mode = lapic::ipi::init | lapic::ipi::level | lapic::ipi::assert; - apic.send_ipi_broadcast(mode, false, 0); + apic.send_ipi_broadcast(mode, false, static_cast(0)); for (uint8_t id : ids) { if (id == bsp.id) continue;