[kernel] Add an IPI to tell a CPU to run the scheduler
When waking another thread, if that thread has a more urgent priority than the current thread on the same CPU, send that CPU an IPI to tell it to run its scheduler. Related changes in this commit: - Addition of the ipiSchedule isr (vector 0xe4) and its handler in isr_handler(). - Change the APIC's send_ipi* functions to take an isr enum and not an int for their vector parameter - Thread TCBs now contain a pointer to their current CPU's cpu_data structure - Add the maybe_schedule() call to the scheduler, which sends the schedule IPI to the given thread's CPU only when that CPU is running a less-urgent thread. - Move the locking of a run queue lock earlier in schedule() instead of taking the lock in steal_work() and again in schedule().
This commit is contained in:
@@ -71,7 +71,7 @@ lapic::get_id()
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
lapic::send_ipi(ipi mode, uint8_t vector, uint8_t dest)
|
lapic::send_ipi(ipi mode, isr vector, uint8_t dest)
|
||||||
{
|
{
|
||||||
// Wait until the APIC is ready to send
|
// Wait until the APIC is ready to send
|
||||||
ipi_wait();
|
ipi_wait();
|
||||||
@@ -85,7 +85,7 @@ lapic::send_ipi(ipi mode, uint8_t vector, uint8_t dest)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
lapic::send_ipi_broadcast(ipi mode, bool self, uint8_t vector)
|
lapic::send_ipi_broadcast(ipi mode, bool self, isr vector)
|
||||||
{
|
{
|
||||||
// Wait until the APIC is ready to send
|
// Wait until the APIC is ready to send
|
||||||
ipi_wait();
|
ipi_wait();
|
||||||
|
|||||||
@@ -5,6 +5,8 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <util/enum_bitfields.h>
|
#include <util/enum_bitfields.h>
|
||||||
|
|
||||||
|
#include "interrupts.h"
|
||||||
|
|
||||||
enum class isr : uint8_t;
|
enum class isr : uint8_t;
|
||||||
|
|
||||||
/// Base class for other APIC types
|
/// Base class for other APIC types
|
||||||
@@ -51,13 +53,13 @@ public:
|
|||||||
/// \arg mode The sending mode
|
/// \arg mode The sending mode
|
||||||
/// \arg vector The interrupt vector
|
/// \arg vector The interrupt vector
|
||||||
/// \arg dest The APIC ID of the destination
|
/// \arg dest The APIC ID of the destination
|
||||||
void send_ipi(ipi mode, uint8_t vector, uint8_t dest);
|
void send_ipi(ipi mode, isr vector, uint8_t dest);
|
||||||
|
|
||||||
/// Send an inter-processor broadcast interrupt to all other CPUs
|
/// Send an inter-processor broadcast interrupt to all other CPUs
|
||||||
/// \arg mode The sending mode
|
/// \arg mode The sending mode
|
||||||
/// \arg self If true, include this CPU in the broadcast
|
/// \arg self If true, include this CPU in the broadcast
|
||||||
/// \arg vector The interrupt vector
|
/// \arg vector The interrupt vector
|
||||||
void send_ipi_broadcast(ipi mode, bool self, uint8_t vector);
|
void send_ipi_broadcast(ipi mode, bool self, isr vector);
|
||||||
|
|
||||||
/// Wait for an IPI to finish sending. This is done automatically
|
/// Wait for an IPI to finish sending. This is done automatically
|
||||||
/// before sending another IPI with send_ipi().
|
/// before sending another IPI with send_ipi().
|
||||||
|
|||||||
@@ -242,6 +242,8 @@ ISR (0xe1, 0, isrLINT0)
|
|||||||
ISR (0xe2, 0, isrLINT1)
|
ISR (0xe2, 0, isrLINT1)
|
||||||
ISR (0xe3, 0, isrAPICError)
|
ISR (0xe3, 0, isrAPICError)
|
||||||
|
|
||||||
|
ISR (0xe4, 0, ipiSchedule)
|
||||||
|
|
||||||
ISR (0xef, 0, isrSpurious)
|
ISR (0xef, 0, isrSpurious)
|
||||||
|
|
||||||
ISR (0xf0, 0, isrIgnore0)
|
ISR (0xf0, 0, isrIgnore0)
|
||||||
|
|||||||
@@ -150,6 +150,10 @@ isr_handler(cpu_state *regs)
|
|||||||
case isr::isrLINT1:
|
case isr::isrLINT1:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case isr::ipiSchedule:
|
||||||
|
scheduler::get().schedule();
|
||||||
|
break;
|
||||||
|
|
||||||
case isr::isrSpurious:
|
case isr::isrSpurious:
|
||||||
// No EOI for the spurious interrupt
|
// No EOI for the spurious interrupt
|
||||||
return;
|
return;
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ enum class isr : uint8_t
|
|||||||
#define ISR(i, s, name) name = i,
|
#define ISR(i, s, name) name = i,
|
||||||
#define NISR(i, s, name) name = i,
|
#define NISR(i, s, name) name = i,
|
||||||
#define EISR(i, s, name) name = i,
|
#define EISR(i, s, name) name = i,
|
||||||
#define IRQ(i, q, name) name = i,
|
#define IRQ(i, q, name) name = i,
|
||||||
#include "interrupt_isrs.inc"
|
#include "interrupt_isrs.inc"
|
||||||
#undef IRQ
|
#undef IRQ
|
||||||
#undef EISR
|
#undef EISR
|
||||||
|
|||||||
@@ -53,7 +53,17 @@ thread::block()
|
|||||||
void
|
void
|
||||||
thread::wake(uint64_t value)
|
thread::wake(uint64_t value)
|
||||||
{
|
{
|
||||||
|
if (has_state(state::ready))
|
||||||
|
return;
|
||||||
|
|
||||||
m_wake_value = value;
|
m_wake_value = value;
|
||||||
|
wake_only();
|
||||||
|
scheduler::get().maybe_schedule(tcb());
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
thread::wake_only()
|
||||||
|
{
|
||||||
m_wake_timeout = 0;
|
m_wake_timeout = 0;
|
||||||
set_state(state::ready);
|
set_state(state::ready);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
|
|
||||||
#include "objects/kobject.h"
|
#include "objects/kobject.h"
|
||||||
|
|
||||||
|
struct cpu_data;
|
||||||
struct page_table;
|
struct page_table;
|
||||||
|
|
||||||
namespace obj {
|
namespace obj {
|
||||||
@@ -37,6 +38,7 @@ struct TCB
|
|||||||
uint64_t last_ran;
|
uint64_t last_ran;
|
||||||
|
|
||||||
uintptr_t kernel_stack;
|
uintptr_t kernel_stack;
|
||||||
|
cpu_data *cpu;
|
||||||
};
|
};
|
||||||
|
|
||||||
using tcb_list = util::linked_list<TCB>;
|
using tcb_list = util::linked_list<TCB>;
|
||||||
@@ -45,15 +47,6 @@ using tcb_node = tcb_list::item_type;
|
|||||||
|
|
||||||
namespace obj {
|
namespace obj {
|
||||||
|
|
||||||
enum class wait_type : uint8_t
|
|
||||||
{
|
|
||||||
none = 0x00,
|
|
||||||
signal = 0x01,
|
|
||||||
time = 0x02,
|
|
||||||
object = 0x04,
|
|
||||||
};
|
|
||||||
is_bitfield(wait_type);
|
|
||||||
|
|
||||||
class process;
|
class process;
|
||||||
|
|
||||||
class thread :
|
class thread :
|
||||||
@@ -109,6 +102,10 @@ public:
|
|||||||
/// \arg value The value that block() should return
|
/// \arg value The value that block() should return
|
||||||
void wake(uint64_t value = 0);
|
void wake(uint64_t value = 0);
|
||||||
|
|
||||||
|
/// Set this thread as awake, but do not call the scheduler
|
||||||
|
/// or set the wake value.
|
||||||
|
void wake_only();
|
||||||
|
|
||||||
/// Set a timeout to unblock this thread
|
/// Set a timeout to unblock this thread
|
||||||
/// \arg time The clock time at which to wake. 0 for no timeout.
|
/// \arg time The clock time at which to wake. 0 for no timeout.
|
||||||
inline void set_wake_timeout(uint64_t time) { m_wake_timeout = time; }
|
inline void set_wake_timeout(uint64_t time) { m_wake_timeout = time; }
|
||||||
|
|||||||
@@ -112,8 +112,9 @@ scheduler::add_thread(TCB *t)
|
|||||||
run_queue &queue = m_run_queues[cpu.index];
|
run_queue &queue = m_run_queues[cpu.index];
|
||||||
util::scoped_lock lock {queue.lock};
|
util::scoped_lock lock {queue.lock};
|
||||||
|
|
||||||
queue.blocked.push_back(static_cast<tcb_node*>(t));
|
t->cpu = &cpu;
|
||||||
t->time_left = quantum(t->priority);
|
t->time_left = quantum(t->priority);
|
||||||
|
queue.blocked.push_back(static_cast<tcb_node*>(t));
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -128,7 +129,7 @@ scheduler::prune(run_queue &queue, uint64_t now)
|
|||||||
|
|
||||||
uint64_t timeout = th->wake_timeout();
|
uint64_t timeout = th->wake_timeout();
|
||||||
if (timeout && timeout <= now)
|
if (timeout && timeout <= now)
|
||||||
th->wake();
|
th->wake_only();
|
||||||
|
|
||||||
bool ready = th->has_state(thread::state::ready);
|
bool ready = th->has_state(thread::state::ready);
|
||||||
bool exited = th->has_state(thread::state::exited);
|
bool exited = th->has_state(thread::state::exited);
|
||||||
@@ -167,8 +168,8 @@ scheduler::check_promotions(run_queue &queue, uint64_t now)
|
|||||||
for (auto &pri_list : queue.ready) {
|
for (auto &pri_list : queue.ready) {
|
||||||
for (auto *tcb : pri_list) {
|
for (auto *tcb : pri_list) {
|
||||||
const thread *th = tcb->thread;
|
const thread *th = tcb->thread;
|
||||||
const bool constant = th->has_state(thread::state::constant);
|
|
||||||
if (constant)
|
if (th->has_state(thread::state::constant))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
const uint64_t age = now - tcb->last_ran;
|
const uint64_t age = now - tcb->last_ran;
|
||||||
@@ -176,8 +177,7 @@ scheduler::check_promotions(run_queue &queue, uint64_t now)
|
|||||||
|
|
||||||
bool stale =
|
bool stale =
|
||||||
age > quantum(priority) * 2 &&
|
age > quantum(priority) * 2 &&
|
||||||
tcb->priority > promote_limit &&
|
tcb->priority > promote_limit;
|
||||||
!constant;
|
|
||||||
|
|
||||||
if (stale) {
|
if (stale) {
|
||||||
// If the thread is stale, promote it
|
// If the thread is stale, promote it
|
||||||
@@ -195,7 +195,7 @@ scheduler::check_promotions(run_queue &queue, uint64_t now)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static size_t
|
static size_t
|
||||||
balance_lists(tcb_list &to, tcb_list &from)
|
balance_lists(tcb_list &to, tcb_list &from, cpu_data &new_cpu)
|
||||||
{
|
{
|
||||||
size_t to_len = to.length();
|
size_t to_len = to.length();
|
||||||
size_t from_len = from.length();
|
size_t from_len = from.length();
|
||||||
@@ -205,17 +205,18 @@ balance_lists(tcb_list &to, tcb_list &from)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
size_t steal = (from_len - to_len) / 2;
|
size_t steal = (from_len - to_len) / 2;
|
||||||
for (size_t i = 0; i < steal; ++i)
|
for (size_t i = 0; i < steal; ++i) {
|
||||||
to.push_front(from.pop_front());
|
tcb_node *node = from.pop_front();
|
||||||
|
node->cpu = &new_cpu;
|
||||||
|
to.push_front(node);
|
||||||
|
}
|
||||||
return steal;
|
return steal;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
scheduler::steal_work(cpu_data &cpu)
|
scheduler::steal_work(cpu_data &cpu)
|
||||||
{
|
{
|
||||||
// Lock this cpu's queue for the whole time while we modify it
|
|
||||||
run_queue &my_queue = m_run_queues[cpu.index];
|
run_queue &my_queue = m_run_queues[cpu.index];
|
||||||
util::scoped_lock my_queue_lock {my_queue.lock};
|
|
||||||
|
|
||||||
const unsigned count = m_run_queues.count();
|
const unsigned count = m_run_queues.count();
|
||||||
for (unsigned i = 0; i < count; ++i) {
|
for (unsigned i = 0; i < count; ++i) {
|
||||||
@@ -228,9 +229,9 @@ scheduler::steal_work(cpu_data &cpu)
|
|||||||
|
|
||||||
// Don't steal from max_priority, that's the idle thread
|
// Don't steal from max_priority, that's the idle thread
|
||||||
for (unsigned pri = 0; pri < max_priority; ++pri)
|
for (unsigned pri = 0; pri < max_priority; ++pri)
|
||||||
stolen += balance_lists(my_queue.ready[pri], other_queue.ready[pri]);
|
stolen += balance_lists(my_queue.ready[pri], other_queue.ready[pri], cpu);
|
||||||
|
|
||||||
stolen += balance_lists(my_queue.blocked, other_queue.blocked);
|
stolen += balance_lists(my_queue.blocked, other_queue.blocked, cpu);
|
||||||
|
|
||||||
if (stolen)
|
if (stolen)
|
||||||
log::debug(logs::sched, "CPU%02x stole %2d tasks from CPU%02x",
|
log::debug(logs::sched, "CPU%02x stole %2d tasks from CPU%02x",
|
||||||
@@ -244,10 +245,18 @@ scheduler::schedule()
|
|||||||
cpu_data &cpu = current_cpu();
|
cpu_data &cpu = current_cpu();
|
||||||
run_queue &queue = m_run_queues[cpu.index];
|
run_queue &queue = m_run_queues[cpu.index];
|
||||||
lapic &apic = *cpu.apic;
|
lapic &apic = *cpu.apic;
|
||||||
uint32_t remaining = apic.stop_timer();
|
|
||||||
|
|
||||||
|
uint32_t remaining = apic.stop_timer();
|
||||||
uint64_t now = clock::get().value();
|
uint64_t now = clock::get().value();
|
||||||
|
|
||||||
|
// We need to explicitly lock/unlock here instead of
|
||||||
|
// using a scoped lock, because the scope doesn't "end"
|
||||||
|
// for the current thread until it gets scheduled again,
|
||||||
|
// and _new_ threads start their life at the end of this
|
||||||
|
// function, which screws up RAII
|
||||||
|
util::spinlock::waiter waiter {false, nullptr, "schedule"};
|
||||||
|
queue.lock.acquire(&waiter);
|
||||||
|
|
||||||
// Only one CPU can be stealing at a time
|
// Only one CPU can be stealing at a time
|
||||||
if (m_steal_turn == cpu.index &&
|
if (m_steal_turn == cpu.index &&
|
||||||
now - queue.last_steal > steal_frequency) {
|
now - queue.last_steal > steal_frequency) {
|
||||||
@@ -256,12 +265,6 @@ scheduler::schedule()
|
|||||||
m_steal_turn = (m_steal_turn + 1) % m_run_queues.count();
|
m_steal_turn = (m_steal_turn + 1) % m_run_queues.count();
|
||||||
}
|
}
|
||||||
|
|
||||||
// We need to explicitly lock/unlock here instead of
|
|
||||||
// using a scoped lock, because the scope doesn't "end"
|
|
||||||
// for the current thread until it gets scheduled again
|
|
||||||
util::spinlock::waiter waiter;
|
|
||||||
queue.lock.acquire(&waiter);
|
|
||||||
|
|
||||||
queue.current->time_left = remaining;
|
queue.current->time_left = remaining;
|
||||||
thread *th = queue.current->thread;
|
thread *th = queue.current->thread;
|
||||||
uint8_t priority = queue.current->priority;
|
uint8_t priority = queue.current->priority;
|
||||||
@@ -325,3 +328,17 @@ scheduler::schedule()
|
|||||||
queue.lock.release(&waiter);
|
queue.lock.release(&waiter);
|
||||||
task_switch(queue.current);
|
task_switch(queue.current);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
scheduler::maybe_schedule(TCB *t)
|
||||||
|
{
|
||||||
|
cpu_data *cpu = t->cpu;
|
||||||
|
|
||||||
|
run_queue &queue = m_run_queues[cpu->index];
|
||||||
|
uint8_t current_pri = queue.current->priority;
|
||||||
|
if (current_pri <= t->priority)
|
||||||
|
return;
|
||||||
|
|
||||||
|
current_cpu().apic->send_ipi(
|
||||||
|
lapic::ipi::fixed, isr::ipiSchedule, cpu->id);
|
||||||
|
}
|
||||||
|
|||||||
@@ -71,6 +71,10 @@ public:
|
|||||||
/// Run the scheduler, possibly switching to a new task
|
/// Run the scheduler, possibly switching to a new task
|
||||||
void schedule();
|
void schedule();
|
||||||
|
|
||||||
|
/// Check if the CPU is running a more important task. If not,
|
||||||
|
/// run the scheduler.
|
||||||
|
void maybe_schedule(TCB *t);
|
||||||
|
|
||||||
/// Start scheduling a new thread.
|
/// Start scheduling a new thread.
|
||||||
/// \arg t The new thread's TCB
|
/// \arg t The new thread's TCB
|
||||||
void add_thread(TCB *t);
|
void add_thread(TCB *t);
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
#include "apic.h"
|
#include "apic.h"
|
||||||
#include "clock.h"
|
#include "clock.h"
|
||||||
#include "device_manager.h"
|
#include "device_manager.h"
|
||||||
|
#include "interrupts.h"
|
||||||
#include "logger.h"
|
#include "logger.h"
|
||||||
#include "memory.h"
|
#include "memory.h"
|
||||||
#include "objects/vm_area.h"
|
#include "objects/vm_area.h"
|
||||||
@@ -51,7 +52,7 @@ start(cpu_data &bsp, void *kpml4)
|
|||||||
|
|
||||||
// Copy the startup code somwhere the real mode trampoline can run
|
// Copy the startup code somwhere the real mode trampoline can run
|
||||||
uintptr_t addr = 0x8000; // TODO: find a valid address, rewrite addresses
|
uintptr_t addr = 0x8000; // TODO: find a valid address, rewrite addresses
|
||||||
uint8_t vector = addr >> 12;
|
isr vector = static_cast<isr>(addr >> 12);
|
||||||
obj::vm_area *vma = new obj::vm_area_fixed(addr, 0x1000, vm_flags::write);
|
obj::vm_area *vma = new obj::vm_area_fixed(addr, 0x1000, vm_flags::write);
|
||||||
vm_space::kernel_space().add(addr, vma);
|
vm_space::kernel_space().add(addr, vma);
|
||||||
memcpy(
|
memcpy(
|
||||||
@@ -70,7 +71,7 @@ start(cpu_data &bsp, void *kpml4)
|
|||||||
|
|
||||||
lapic &apic = *bsp.apic;
|
lapic &apic = *bsp.apic;
|
||||||
lapic::ipi mode = lapic::ipi::init | lapic::ipi::level | lapic::ipi::assert;
|
lapic::ipi mode = lapic::ipi::init | lapic::ipi::level | lapic::ipi::assert;
|
||||||
apic.send_ipi_broadcast(mode, false, 0);
|
apic.send_ipi_broadcast(mode, false, static_cast<isr>(0));
|
||||||
|
|
||||||
for (uint8_t id : ids) {
|
for (uint8_t id : ids) {
|
||||||
if (id == bsp.id) continue;
|
if (id == bsp.id) continue;
|
||||||
|
|||||||
Reference in New Issue
Block a user