[kernel] Add an IPI to tell a CPU to run the scheduler

When waking another thread, if that thread has a more urgent priority
than the current thread on the same CPU, send that CPU an IPI to tell it
to run its scheduler.

Related changes in this commit:

- Addition of the ipiSchedule isr (vector 0xe4) and its handler in
  isr_handler().
- Change the APIC's send_ipi* functions to take an isr enum and not an
  int for their vector parameter
- Thread TCBs now contain a pointer to their current CPU's cpu_data
  structure
- Add the maybe_schedule() call to the scheduler, which sends the
  schedule IPI to the given thread's CPU only when that CPU is running a
  less-urgent thread.
- Move the locking of a run queue lock earlier in schedule() instead of
  taking the lock in steal_work() and again in schedule().
This commit is contained in:
Justin C. Miller
2022-02-26 14:04:14 -08:00
parent 40274f5fac
commit 982442eb00
10 changed files with 73 additions and 36 deletions

View File

@@ -71,7 +71,7 @@ lapic::get_id()
} }
void void
lapic::send_ipi(ipi mode, uint8_t vector, uint8_t dest) lapic::send_ipi(ipi mode, isr vector, uint8_t dest)
{ {
// Wait until the APIC is ready to send // Wait until the APIC is ready to send
ipi_wait(); ipi_wait();
@@ -85,7 +85,7 @@ lapic::send_ipi(ipi mode, uint8_t vector, uint8_t dest)
} }
void void
lapic::send_ipi_broadcast(ipi mode, bool self, uint8_t vector) lapic::send_ipi_broadcast(ipi mode, bool self, isr vector)
{ {
// Wait until the APIC is ready to send // Wait until the APIC is ready to send
ipi_wait(); ipi_wait();

View File

@@ -5,6 +5,8 @@
#include <stdint.h> #include <stdint.h>
#include <util/enum_bitfields.h> #include <util/enum_bitfields.h>
#include "interrupts.h"
enum class isr : uint8_t; enum class isr : uint8_t;
/// Base class for other APIC types /// Base class for other APIC types
@@ -51,13 +53,13 @@ public:
/// \arg mode The sending mode /// \arg mode The sending mode
/// \arg vector The interrupt vector /// \arg vector The interrupt vector
/// \arg dest The APIC ID of the destination /// \arg dest The APIC ID of the destination
void send_ipi(ipi mode, uint8_t vector, uint8_t dest); void send_ipi(ipi mode, isr vector, uint8_t dest);
/// Send an inter-processor broadcast interrupt to all other CPUs /// Send an inter-processor broadcast interrupt to all other CPUs
/// \arg mode The sending mode /// \arg mode The sending mode
/// \arg self If true, include this CPU in the broadcast /// \arg self If true, include this CPU in the broadcast
/// \arg vector The interrupt vector /// \arg vector The interrupt vector
void send_ipi_broadcast(ipi mode, bool self, uint8_t vector); void send_ipi_broadcast(ipi mode, bool self, isr vector);
/// Wait for an IPI to finish sending. This is done automatically /// Wait for an IPI to finish sending. This is done automatically
/// before sending another IPI with send_ipi(). /// before sending another IPI with send_ipi().

View File

@@ -242,6 +242,8 @@ ISR (0xe1, 0, isrLINT0)
ISR (0xe2, 0, isrLINT1) ISR (0xe2, 0, isrLINT1)
ISR (0xe3, 0, isrAPICError) ISR (0xe3, 0, isrAPICError)
ISR (0xe4, 0, ipiSchedule)
ISR (0xef, 0, isrSpurious) ISR (0xef, 0, isrSpurious)
ISR (0xf0, 0, isrIgnore0) ISR (0xf0, 0, isrIgnore0)

View File

@@ -150,6 +150,10 @@ isr_handler(cpu_state *regs)
case isr::isrLINT1: case isr::isrLINT1:
break; break;
case isr::ipiSchedule:
scheduler::get().schedule();
break;
case isr::isrSpurious: case isr::isrSpurious:
// No EOI for the spurious interrupt // No EOI for the spurious interrupt
return; return;

View File

@@ -53,7 +53,17 @@ thread::block()
void void
thread::wake(uint64_t value) thread::wake(uint64_t value)
{ {
if (has_state(state::ready))
return;
m_wake_value = value; m_wake_value = value;
wake_only();
scheduler::get().maybe_schedule(tcb());
}
void
thread::wake_only()
{
m_wake_timeout = 0; m_wake_timeout = 0;
set_state(state::ready); set_state(state::ready);
} }

View File

@@ -9,6 +9,7 @@
#include "objects/kobject.h" #include "objects/kobject.h"
struct cpu_data;
struct page_table; struct page_table;
namespace obj { namespace obj {
@@ -37,6 +38,7 @@ struct TCB
uint64_t last_ran; uint64_t last_ran;
uintptr_t kernel_stack; uintptr_t kernel_stack;
cpu_data *cpu;
}; };
using tcb_list = util::linked_list<TCB>; using tcb_list = util::linked_list<TCB>;
@@ -45,15 +47,6 @@ using tcb_node = tcb_list::item_type;
namespace obj { namespace obj {
enum class wait_type : uint8_t
{
none = 0x00,
signal = 0x01,
time = 0x02,
object = 0x04,
};
is_bitfield(wait_type);
class process; class process;
class thread : class thread :
@@ -109,6 +102,10 @@ public:
/// \arg value The value that block() should return /// \arg value The value that block() should return
void wake(uint64_t value = 0); void wake(uint64_t value = 0);
/// Set this thread as awake, but do not call the scheduler
/// or set the wake value.
void wake_only();
/// Set a timeout to unblock this thread /// Set a timeout to unblock this thread
/// \arg time The clock time at which to wake. 0 for no timeout. /// \arg time The clock time at which to wake. 0 for no timeout.
inline void set_wake_timeout(uint64_t time) { m_wake_timeout = time; } inline void set_wake_timeout(uint64_t time) { m_wake_timeout = time; }

View File

@@ -112,8 +112,9 @@ scheduler::add_thread(TCB *t)
run_queue &queue = m_run_queues[cpu.index]; run_queue &queue = m_run_queues[cpu.index];
util::scoped_lock lock {queue.lock}; util::scoped_lock lock {queue.lock};
queue.blocked.push_back(static_cast<tcb_node*>(t)); t->cpu = &cpu;
t->time_left = quantum(t->priority); t->time_left = quantum(t->priority);
queue.blocked.push_back(static_cast<tcb_node*>(t));
} }
void void
@@ -128,7 +129,7 @@ scheduler::prune(run_queue &queue, uint64_t now)
uint64_t timeout = th->wake_timeout(); uint64_t timeout = th->wake_timeout();
if (timeout && timeout <= now) if (timeout && timeout <= now)
th->wake(); th->wake_only();
bool ready = th->has_state(thread::state::ready); bool ready = th->has_state(thread::state::ready);
bool exited = th->has_state(thread::state::exited); bool exited = th->has_state(thread::state::exited);
@@ -167,8 +168,8 @@ scheduler::check_promotions(run_queue &queue, uint64_t now)
for (auto &pri_list : queue.ready) { for (auto &pri_list : queue.ready) {
for (auto *tcb : pri_list) { for (auto *tcb : pri_list) {
const thread *th = tcb->thread; const thread *th = tcb->thread;
const bool constant = th->has_state(thread::state::constant);
if (constant) if (th->has_state(thread::state::constant))
continue; continue;
const uint64_t age = now - tcb->last_ran; const uint64_t age = now - tcb->last_ran;
@@ -176,8 +177,7 @@ scheduler::check_promotions(run_queue &queue, uint64_t now)
bool stale = bool stale =
age > quantum(priority) * 2 && age > quantum(priority) * 2 &&
tcb->priority > promote_limit && tcb->priority > promote_limit;
!constant;
if (stale) { if (stale) {
// If the thread is stale, promote it // If the thread is stale, promote it
@@ -195,7 +195,7 @@ scheduler::check_promotions(run_queue &queue, uint64_t now)
} }
static size_t static size_t
balance_lists(tcb_list &to, tcb_list &from) balance_lists(tcb_list &to, tcb_list &from, cpu_data &new_cpu)
{ {
size_t to_len = to.length(); size_t to_len = to.length();
size_t from_len = from.length(); size_t from_len = from.length();
@@ -205,17 +205,18 @@ balance_lists(tcb_list &to, tcb_list &from)
return 0; return 0;
size_t steal = (from_len - to_len) / 2; size_t steal = (from_len - to_len) / 2;
for (size_t i = 0; i < steal; ++i) for (size_t i = 0; i < steal; ++i) {
to.push_front(from.pop_front()); tcb_node *node = from.pop_front();
node->cpu = &new_cpu;
to.push_front(node);
}
return steal; return steal;
} }
void void
scheduler::steal_work(cpu_data &cpu) scheduler::steal_work(cpu_data &cpu)
{ {
// Lock this cpu's queue for the whole time while we modify it
run_queue &my_queue = m_run_queues[cpu.index]; run_queue &my_queue = m_run_queues[cpu.index];
util::scoped_lock my_queue_lock {my_queue.lock};
const unsigned count = m_run_queues.count(); const unsigned count = m_run_queues.count();
for (unsigned i = 0; i < count; ++i) { for (unsigned i = 0; i < count; ++i) {
@@ -228,9 +229,9 @@ scheduler::steal_work(cpu_data &cpu)
// Don't steal from max_priority, that's the idle thread // Don't steal from max_priority, that's the idle thread
for (unsigned pri = 0; pri < max_priority; ++pri) for (unsigned pri = 0; pri < max_priority; ++pri)
stolen += balance_lists(my_queue.ready[pri], other_queue.ready[pri]); stolen += balance_lists(my_queue.ready[pri], other_queue.ready[pri], cpu);
stolen += balance_lists(my_queue.blocked, other_queue.blocked); stolen += balance_lists(my_queue.blocked, other_queue.blocked, cpu);
if (stolen) if (stolen)
log::debug(logs::sched, "CPU%02x stole %2d tasks from CPU%02x", log::debug(logs::sched, "CPU%02x stole %2d tasks from CPU%02x",
@@ -244,10 +245,18 @@ scheduler::schedule()
cpu_data &cpu = current_cpu(); cpu_data &cpu = current_cpu();
run_queue &queue = m_run_queues[cpu.index]; run_queue &queue = m_run_queues[cpu.index];
lapic &apic = *cpu.apic; lapic &apic = *cpu.apic;
uint32_t remaining = apic.stop_timer();
uint32_t remaining = apic.stop_timer();
uint64_t now = clock::get().value(); uint64_t now = clock::get().value();
// We need to explicitly lock/unlock here instead of
// using a scoped lock, because the scope doesn't "end"
// for the current thread until it gets scheduled again,
// and _new_ threads start their life at the end of this
// function, which screws up RAII
util::spinlock::waiter waiter {false, nullptr, "schedule"};
queue.lock.acquire(&waiter);
// Only one CPU can be stealing at a time // Only one CPU can be stealing at a time
if (m_steal_turn == cpu.index && if (m_steal_turn == cpu.index &&
now - queue.last_steal > steal_frequency) { now - queue.last_steal > steal_frequency) {
@@ -256,12 +265,6 @@ scheduler::schedule()
m_steal_turn = (m_steal_turn + 1) % m_run_queues.count(); m_steal_turn = (m_steal_turn + 1) % m_run_queues.count();
} }
// We need to explicitly lock/unlock here instead of
// using a scoped lock, because the scope doesn't "end"
// for the current thread until it gets scheduled again
util::spinlock::waiter waiter;
queue.lock.acquire(&waiter);
queue.current->time_left = remaining; queue.current->time_left = remaining;
thread *th = queue.current->thread; thread *th = queue.current->thread;
uint8_t priority = queue.current->priority; uint8_t priority = queue.current->priority;
@@ -325,3 +328,17 @@ scheduler::schedule()
queue.lock.release(&waiter); queue.lock.release(&waiter);
task_switch(queue.current); task_switch(queue.current);
} }
void
scheduler::maybe_schedule(TCB *t)
{
cpu_data *cpu = t->cpu;
run_queue &queue = m_run_queues[cpu->index];
uint8_t current_pri = queue.current->priority;
if (current_pri <= t->priority)
return;
current_cpu().apic->send_ipi(
lapic::ipi::fixed, isr::ipiSchedule, cpu->id);
}

View File

@@ -71,6 +71,10 @@ public:
/// Run the scheduler, possibly switching to a new task /// Run the scheduler, possibly switching to a new task
void schedule(); void schedule();
/// Check if the CPU is running a more important task. If not,
/// run the scheduler.
void maybe_schedule(TCB *t);
/// Start scheduling a new thread. /// Start scheduling a new thread.
/// \arg t The new thread's TCB /// \arg t The new thread's TCB
void add_thread(TCB *t); void add_thread(TCB *t);

View File

@@ -3,6 +3,7 @@
#include "apic.h" #include "apic.h"
#include "clock.h" #include "clock.h"
#include "device_manager.h" #include "device_manager.h"
#include "interrupts.h"
#include "logger.h" #include "logger.h"
#include "memory.h" #include "memory.h"
#include "objects/vm_area.h" #include "objects/vm_area.h"
@@ -51,7 +52,7 @@ start(cpu_data &bsp, void *kpml4)
// Copy the startup code somwhere the real mode trampoline can run // Copy the startup code somwhere the real mode trampoline can run
uintptr_t addr = 0x8000; // TODO: find a valid address, rewrite addresses uintptr_t addr = 0x8000; // TODO: find a valid address, rewrite addresses
uint8_t vector = addr >> 12; isr vector = static_cast<isr>(addr >> 12);
obj::vm_area *vma = new obj::vm_area_fixed(addr, 0x1000, vm_flags::write); obj::vm_area *vma = new obj::vm_area_fixed(addr, 0x1000, vm_flags::write);
vm_space::kernel_space().add(addr, vma); vm_space::kernel_space().add(addr, vma);
memcpy( memcpy(
@@ -70,7 +71,7 @@ start(cpu_data &bsp, void *kpml4)
lapic &apic = *bsp.apic; lapic &apic = *bsp.apic;
lapic::ipi mode = lapic::ipi::init | lapic::ipi::level | lapic::ipi::assert; lapic::ipi mode = lapic::ipi::init | lapic::ipi::level | lapic::ipi::assert;
apic.send_ipi_broadcast(mode, false, 0); apic.send_ipi_broadcast(mode, false, static_cast<isr>(0));
for (uint8_t id : ids) { for (uint8_t id : ids) {
if (id == bsp.id) continue; if (id == bsp.id) continue;