Files
jsix/src/kernel/scheduler.cpp
Justin C. Miller f0025dbc47 [kernel] Schedule threads on other CPUs
Now that the other CPUs have been brought up, add support for scheduling
tasks on them. The scheduler now maintains separate ready/blocked lists
per CPU, and CPUs will attempt to balance load via periodic work
stealing.

Other changes as a result of this:
- The device manager no longer creates a local APIC object, but instead
  just gathers relevant info from the APCI tables. Each CPU creates its
  own local APIC object. This also spurred the APIC timer calibration to
  become a static value, as all APICs are assumed to be symmetrical.
- Fixed a bug where the scheduler was popping the current task off of
  its ready list, however the current task is never on the ready list
  (except the idle task was first set up as both current and ready).
  This was causing the lists to get into bad states. Now a task can only
  ever be current or in a ready or blocked list.
- Got rid of the unused static process::s_processes list of all
  processes, instead of trying to synchronize it via locks.
- Added spinlocks for synchronization to the scheduler and logger
  objects.
2021-02-15 12:56:22 -08:00

334 lines
8.5 KiB
C++

#include <stddef.h>
#include <j6/init.h>
#include "apic.h"
#include "clock.h"
#include "console.h"
#include "cpu.h"
#include "debug.h"
#include "device_manager.h"
#include "gdt.h"
#include "interrupts.h"
#include "io.h"
#include "kernel_memory.h"
#include "log.h"
#include "msr.h"
#include "objects/channel.h"
#include "objects/process.h"
#include "objects/system.h"
#include "objects/thread.h"
#include "objects/vm_area.h"
#include "scheduler.h"
// here for the framebuffer hack
#include "kernel_args.h"
#include "kutil/assert.h"
extern "C" void task_switch(TCB *tcb);
scheduler *scheduler::s_instance = nullptr;
struct run_queue
{
tcb_node *current = nullptr;
tcb_list ready[scheduler::num_priorities];
tcb_list blocked;
uint64_t last_promotion = 0;
uint64_t last_steal = 0;
kutil::spinlock lock;
};
scheduler::scheduler(unsigned cpus) :
m_next_pid {1},
m_clock {0}
{
kassert(!s_instance, "Created multiple schedulers!");
if (!s_instance)
s_instance = this;
m_run_queues.set_size(cpus);
}
scheduler::~scheduler()
{
// Not truly necessary - if the scheduler is going away, the whole
// system is probably going down. But let's be clean.
if (s_instance == this)
s_instance = nullptr;
}
template <typename T>
inline T * push(uintptr_t &rsp, size_t size = sizeof(T)) {
rsp -= size;
T *p = reinterpret_cast<T*>(rsp);
rsp &= ~(sizeof(uint64_t)-1); // Align the stack
return p;
}
void
scheduler::create_kernel_task(void (*task)(), uint8_t priority, bool constant)
{
thread *th = process::kernel_process().create_thread(priority, false);
auto *tcb = th->tcb();
th->add_thunk_kernel(reinterpret_cast<uintptr_t>(task));
tcb->time_left = quantum(priority);
if (constant)
th->set_state(thread::state::constant);
th->set_state(thread::state::ready);
log::debug(logs::task, "Creating kernel task: thread %llx pri %d", th->koid(), tcb->priority);
log::debug(logs::task, " RSP0 %016lx", tcb->rsp0);
log::debug(logs::task, " RSP %016lx", tcb->rsp);
log::debug(logs::task, " PML4 %016lx", tcb->pml4);
}
uint32_t
scheduler::quantum(int priority)
{
return quantum_micros << priority;
}
void
scheduler::start()
{
cpu_data &cpu = current_cpu();
run_queue &queue = m_run_queues[cpu.index];
kutil::scoped_lock lock {queue.lock};
process *kp = &process::kernel_process();
thread *idle = thread::create_idle_thread(*kp, max_priority, cpu.rsp0);
log::debug(logs::task, "CPU%02x idle thread koid %llx", cpu.index, idle->koid());
auto *tcb = idle->tcb();
cpu.process = kp;
cpu.thread = idle;
cpu.tcb = tcb;
queue.current = tcb;
log::info(logs::sched, "CPU%02x starting scheduler", cpu.index);
cpu.apic->enable_timer(isr::isrTimer, false);
cpu.apic->reset_timer(10);
}
void
scheduler::add_thread(TCB *t)
{
cpu_data &cpu = current_cpu();
run_queue &queue = m_run_queues[cpu.index];
kutil::scoped_lock lock {queue.lock};
queue.blocked.push_back(static_cast<tcb_node*>(t));
t->time_left = quantum(t->priority);
}
void scheduler::prune(run_queue &queue, uint64_t now)
{
// Find processes that are ready or have exited and
// move them to the appropriate lists.
auto *tcb = queue.blocked.front();
while (tcb) {
thread *th = thread::from_tcb(tcb);
uint8_t priority = tcb->priority;
bool ready = th->has_state(thread::state::ready);
bool exited = th->has_state(thread::state::exited);
bool constant = th->has_state(thread::state::constant);
bool current = tcb == queue.current;
ready |= th->wake_on_time(now);
auto *remove = tcb;
tcb = tcb->next();
if (!exited && !ready)
continue;
if (exited) {
// If the current thread has exited, wait until the next call
// to prune() to delete it, because we may be deleting our current
// page tables
if (current) continue;
queue.blocked.remove(remove);
process &p = th->parent();
// thread_exited deletes the thread, and returns true if the process
// should also now be deleted
if(!current && p.thread_exited(th))
delete &p;
} else {
queue.blocked.remove(remove);
log::debug(logs::sched, "Prune: readying unblocked thread %llx", th->koid());
queue.ready[remove->priority].push_back(remove);
}
}
}
void
scheduler::check_promotions(run_queue &queue, uint64_t now)
{
for (auto &pri_list : queue.ready) {
for (auto *tcb : pri_list) {
const thread *th = thread::from_tcb(queue.current);
const bool constant = th->has_state(thread::state::constant);
if (constant)
continue;
const uint64_t age = now - tcb->last_ran;
const uint8_t priority = tcb->priority;
bool stale =
age > quantum(priority) * 2 &&
tcb->priority > promote_limit &&
!constant;
if (stale) {
// If the thread is stale, promote it
queue.ready[priority].remove(tcb);
tcb->priority -= 1;
tcb->time_left = quantum(tcb->priority);
queue.ready[tcb->priority].push_back(tcb);
log::info(logs::sched, "Scheduler promoting thread %llx, priority %d",
th->koid(), tcb->priority);
}
}
}
queue.last_promotion = now;
}
static size_t
balance_lists(tcb_list &to, tcb_list &from)
{
size_t to_len = to.length();
size_t from_len = from.length();
// Only steal from the rich, don't be Dennis Moore
if (from_len <= to_len)
return 0;
size_t steal = (from_len - to_len) / 2;
for (size_t i = 0; i < steal; ++i)
to.push_front(from.pop_front());
return steal;
}
void
scheduler::steal_work(cpu_data &cpu)
{
// First grab a scheduler-wide lock to avoid deadlock
kutil::scoped_lock steal_lock {m_steal_lock};
// Lock this cpu's queue for the whole time while we modify it
run_queue &my_queue = m_run_queues[cpu.index];
kutil::scoped_lock my_queue_lock {my_queue.lock};
const unsigned count = m_run_queues.count();
for (unsigned i = 0; i < count; ++i) {
if (i == cpu.index) continue;
run_queue &other_queue = m_run_queues[i];
kutil::scoped_lock other_queue_lock {other_queue.lock};
size_t stolen = 0;
// Don't steal from max_priority, that's the idle thread
for (unsigned pri = 0; pri < max_priority; ++pri)
stolen += balance_lists(my_queue.ready[pri], other_queue.ready[pri]);
stolen += balance_lists(my_queue.blocked, other_queue.blocked);
if (stolen)
log::debug(logs::sched, "CPU%02x stole %2d tasks from CPU%02x",
cpu.index, stolen, i);
}
}
void
scheduler::schedule()
{
cpu_data &cpu = current_cpu();
run_queue &queue = m_run_queues[cpu.index];
lapic &apic = *cpu.apic;
uint32_t remaining = apic.stop_timer();
if (m_clock - queue.last_steal > steal_frequency) {
steal_work(cpu);
queue.last_steal = m_clock;
}
// We need to explicitly lock/unlock here instead of
// using a scoped lock, because the scope doesn't "end"
// for the current thread until it gets scheduled again
kutil::spinlock::waiter waiter;
queue.lock.acquire(&waiter);
queue.current->time_left = remaining;
thread *th = thread::from_tcb(queue.current);
uint8_t priority = queue.current->priority;
const bool constant = th->has_state(thread::state::constant);
if (remaining == 0) {
if (priority < max_priority && !constant) {
// Process used its whole timeslice, demote it
++queue.current->priority;
log::debug(logs::sched, "Scheduler demoting thread %llx, priority %d",
th->koid(), queue.current->priority);
}
queue.current->time_left = quantum(queue.current->priority);
} else if (remaining > 0) {
// Process gave up CPU, give it a small bonus to its
// remaining timeslice.
uint32_t bonus = quantum(priority) >> 4;
queue.current->time_left += bonus;
}
if (th->has_state(thread::state::ready)) {
queue.ready[queue.current->priority].push_back(queue.current);
} else {
queue.blocked.push_back(queue.current);
}
clock::get().update();
prune(queue, ++m_clock);
if (m_clock - queue.last_promotion > promote_frequency)
check_promotions(queue, m_clock);
priority = 0;
while (queue.ready[priority].empty()) {
++priority;
kassert(priority < num_priorities, "All runlists are empty");
}
queue.current->last_ran = m_clock;
auto *next = queue.ready[priority].pop_front();
next->last_ran = m_clock;
apic.reset_timer(next->time_left);
if (next == queue.current) {
queue.lock.release(&waiter);
return;
}
thread *next_thread = thread::from_tcb(next);
cpu.thread = next_thread;
cpu.process = &next_thread->parent();
queue.current = next;
log::debug(logs::sched, "CPU%02x switching threads %llx->%llx",
cpu.index, th->koid(), next_thread->koid());
log::debug(logs::sched, " priority %d time left %d @ %lld.",
next->priority, next->time_left, m_clock);
log::debug(logs::sched, " PML4 %llx", next->pml4);
queue.lock.release(&waiter);
task_switch(queue.current);
}