The idle threads for the APs have intentionally tiny stacks. Logging is currently an absolute hog of stack space, so avoid logging on the idle stacks as much as possible. Eventually we should instead just reclaim the physical pages used by most of the stack instead of making them tiny.
332 lines
8.3 KiB
C++
332 lines
8.3 KiB
C++
#include <stddef.h>
|
|
|
|
#include <j6/init.h>
|
|
|
|
#include "apic.h"
|
|
#include "clock.h"
|
|
#include "console.h"
|
|
#include "cpu.h"
|
|
#include "debug.h"
|
|
#include "device_manager.h"
|
|
#include "gdt.h"
|
|
#include "interrupts.h"
|
|
#include "io.h"
|
|
#include "kernel_memory.h"
|
|
#include "log.h"
|
|
#include "msr.h"
|
|
#include "objects/channel.h"
|
|
#include "objects/process.h"
|
|
#include "objects/system.h"
|
|
#include "objects/thread.h"
|
|
#include "objects/vm_area.h"
|
|
#include "scheduler.h"
|
|
|
|
// here for the framebuffer hack
|
|
#include "kernel_args.h"
|
|
|
|
#include "kutil/assert.h"
|
|
|
|
extern "C" void task_switch(TCB *tcb);
|
|
scheduler *scheduler::s_instance = nullptr;
|
|
|
|
struct run_queue
|
|
{
|
|
tcb_node *current = nullptr;
|
|
tcb_list ready[scheduler::num_priorities];
|
|
tcb_list blocked;
|
|
|
|
uint64_t last_promotion = 0;
|
|
uint64_t last_steal = 0;
|
|
kutil::spinlock lock;
|
|
};
|
|
|
|
scheduler::scheduler(unsigned cpus) :
|
|
m_next_pid {1},
|
|
m_clock {0}
|
|
{
|
|
kassert(!s_instance, "Created multiple schedulers!");
|
|
if (!s_instance)
|
|
s_instance = this;
|
|
|
|
m_run_queues.set_size(cpus);
|
|
}
|
|
|
|
scheduler::~scheduler()
|
|
{
|
|
// Not truly necessary - if the scheduler is going away, the whole
|
|
// system is probably going down. But let's be clean.
|
|
if (s_instance == this)
|
|
s_instance = nullptr;
|
|
}
|
|
|
|
template <typename T>
|
|
inline T * push(uintptr_t &rsp, size_t size = sizeof(T)) {
|
|
rsp -= size;
|
|
T *p = reinterpret_cast<T*>(rsp);
|
|
rsp &= ~(sizeof(uint64_t)-1); // Align the stack
|
|
return p;
|
|
}
|
|
|
|
void
|
|
scheduler::create_kernel_task(void (*task)(), uint8_t priority, bool constant)
|
|
{
|
|
thread *th = process::kernel_process().create_thread(priority, false);
|
|
auto *tcb = th->tcb();
|
|
|
|
th->add_thunk_kernel(reinterpret_cast<uintptr_t>(task));
|
|
|
|
tcb->time_left = quantum(priority);
|
|
if (constant)
|
|
th->set_state(thread::state::constant);
|
|
|
|
th->set_state(thread::state::ready);
|
|
|
|
log::debug(logs::task, "Creating kernel task: thread %llx pri %d", th->koid(), tcb->priority);
|
|
log::debug(logs::task, " RSP0 %016lx", tcb->rsp0);
|
|
log::debug(logs::task, " RSP %016lx", tcb->rsp);
|
|
log::debug(logs::task, " PML4 %016lx", tcb->pml4);
|
|
}
|
|
|
|
uint32_t
|
|
scheduler::quantum(int priority)
|
|
{
|
|
return quantum_micros << priority;
|
|
}
|
|
|
|
void
|
|
scheduler::start()
|
|
{
|
|
cpu_data &cpu = current_cpu();
|
|
run_queue &queue = m_run_queues[cpu.index];
|
|
kutil::scoped_lock lock {queue.lock};
|
|
|
|
process *kp = &process::kernel_process();
|
|
thread *idle = thread::create_idle_thread(*kp, max_priority, cpu.rsp0);
|
|
|
|
auto *tcb = idle->tcb();
|
|
cpu.process = kp;
|
|
cpu.thread = idle;
|
|
cpu.tcb = tcb;
|
|
|
|
queue.current = tcb;
|
|
|
|
cpu.apic->enable_timer(isr::isrTimer, false);
|
|
cpu.apic->reset_timer(10);
|
|
}
|
|
|
|
void
|
|
scheduler::add_thread(TCB *t)
|
|
{
|
|
cpu_data &cpu = current_cpu();
|
|
run_queue &queue = m_run_queues[cpu.index];
|
|
kutil::scoped_lock lock {queue.lock};
|
|
|
|
queue.blocked.push_back(static_cast<tcb_node*>(t));
|
|
t->time_left = quantum(t->priority);
|
|
}
|
|
|
|
void scheduler::prune(run_queue &queue, uint64_t now)
|
|
{
|
|
// Find processes that are ready or have exited and
|
|
// move them to the appropriate lists.
|
|
auto *tcb = queue.blocked.front();
|
|
while (tcb) {
|
|
thread *th = thread::from_tcb(tcb);
|
|
uint8_t priority = tcb->priority;
|
|
|
|
bool ready = th->has_state(thread::state::ready);
|
|
bool exited = th->has_state(thread::state::exited);
|
|
bool constant = th->has_state(thread::state::constant);
|
|
bool current = tcb == queue.current;
|
|
|
|
ready |= th->wake_on_time(now);
|
|
|
|
auto *remove = tcb;
|
|
tcb = tcb->next();
|
|
if (!exited && !ready)
|
|
continue;
|
|
|
|
if (exited) {
|
|
// If the current thread has exited, wait until the next call
|
|
// to prune() to delete it, because we may be deleting our current
|
|
// page tables
|
|
if (current) continue;
|
|
|
|
queue.blocked.remove(remove);
|
|
process &p = th->parent();
|
|
|
|
// thread_exited deletes the thread, and returns true if the process
|
|
// should also now be deleted
|
|
if(!current && p.thread_exited(th))
|
|
delete &p;
|
|
} else {
|
|
queue.blocked.remove(remove);
|
|
log::debug(logs::sched, "Prune: readying unblocked thread %llx", th->koid());
|
|
queue.ready[remove->priority].push_back(remove);
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
scheduler::check_promotions(run_queue &queue, uint64_t now)
|
|
{
|
|
for (auto &pri_list : queue.ready) {
|
|
for (auto *tcb : pri_list) {
|
|
const thread *th = thread::from_tcb(queue.current);
|
|
const bool constant = th->has_state(thread::state::constant);
|
|
if (constant)
|
|
continue;
|
|
|
|
const uint64_t age = now - tcb->last_ran;
|
|
const uint8_t priority = tcb->priority;
|
|
|
|
bool stale =
|
|
age > quantum(priority) * 2 &&
|
|
tcb->priority > promote_limit &&
|
|
!constant;
|
|
|
|
if (stale) {
|
|
// If the thread is stale, promote it
|
|
queue.ready[priority].remove(tcb);
|
|
tcb->priority -= 1;
|
|
tcb->time_left = quantum(tcb->priority);
|
|
queue.ready[tcb->priority].push_back(tcb);
|
|
log::info(logs::sched, "Scheduler promoting thread %llx, priority %d",
|
|
th->koid(), tcb->priority);
|
|
}
|
|
}
|
|
}
|
|
|
|
queue.last_promotion = now;
|
|
}
|
|
|
|
static size_t
|
|
balance_lists(tcb_list &to, tcb_list &from)
|
|
{
|
|
size_t to_len = to.length();
|
|
size_t from_len = from.length();
|
|
|
|
// Only steal from the rich, don't be Dennis Moore
|
|
if (from_len <= to_len)
|
|
return 0;
|
|
|
|
size_t steal = (from_len - to_len) / 2;
|
|
for (size_t i = 0; i < steal; ++i)
|
|
to.push_front(from.pop_front());
|
|
return steal;
|
|
}
|
|
|
|
void
|
|
scheduler::steal_work(cpu_data &cpu)
|
|
{
|
|
// First grab a scheduler-wide lock to avoid deadlock
|
|
kutil::scoped_lock steal_lock {m_steal_lock};
|
|
|
|
// Lock this cpu's queue for the whole time while we modify it
|
|
run_queue &my_queue = m_run_queues[cpu.index];
|
|
kutil::scoped_lock my_queue_lock {my_queue.lock};
|
|
|
|
const unsigned count = m_run_queues.count();
|
|
for (unsigned i = 0; i < count; ++i) {
|
|
if (i == cpu.index) continue;
|
|
|
|
run_queue &other_queue = m_run_queues[i];
|
|
kutil::scoped_lock other_queue_lock {other_queue.lock};
|
|
|
|
size_t stolen = 0;
|
|
|
|
// Don't steal from max_priority, that's the idle thread
|
|
for (unsigned pri = 0; pri < max_priority; ++pri)
|
|
stolen += balance_lists(my_queue.ready[pri], other_queue.ready[pri]);
|
|
|
|
stolen += balance_lists(my_queue.blocked, other_queue.blocked);
|
|
|
|
if (stolen)
|
|
log::debug(logs::sched, "CPU%02x stole %2d tasks from CPU%02x",
|
|
cpu.index, stolen, i);
|
|
}
|
|
}
|
|
|
|
void
|
|
scheduler::schedule()
|
|
{
|
|
cpu_data &cpu = current_cpu();
|
|
run_queue &queue = m_run_queues[cpu.index];
|
|
lapic &apic = *cpu.apic;
|
|
uint32_t remaining = apic.stop_timer();
|
|
|
|
if (m_clock - queue.last_steal > steal_frequency) {
|
|
steal_work(cpu);
|
|
queue.last_steal = m_clock;
|
|
}
|
|
|
|
// We need to explicitly lock/unlock here instead of
|
|
// using a scoped lock, because the scope doesn't "end"
|
|
// for the current thread until it gets scheduled again
|
|
kutil::spinlock::waiter waiter;
|
|
queue.lock.acquire(&waiter);
|
|
|
|
queue.current->time_left = remaining;
|
|
thread *th = thread::from_tcb(queue.current);
|
|
uint8_t priority = queue.current->priority;
|
|
const bool constant = th->has_state(thread::state::constant);
|
|
|
|
if (remaining == 0) {
|
|
if (priority < max_priority && !constant) {
|
|
// Process used its whole timeslice, demote it
|
|
++queue.current->priority;
|
|
log::debug(logs::sched, "Scheduler demoting thread %llx, priority %d",
|
|
th->koid(), queue.current->priority);
|
|
}
|
|
queue.current->time_left = quantum(queue.current->priority);
|
|
} else if (remaining > 0) {
|
|
// Process gave up CPU, give it a small bonus to its
|
|
// remaining timeslice.
|
|
uint32_t bonus = quantum(priority) >> 4;
|
|
queue.current->time_left += bonus;
|
|
}
|
|
|
|
if (th->has_state(thread::state::ready)) {
|
|
queue.ready[queue.current->priority].push_back(queue.current);
|
|
} else {
|
|
queue.blocked.push_back(queue.current);
|
|
}
|
|
|
|
clock::get().update();
|
|
prune(queue, ++m_clock);
|
|
if (m_clock - queue.last_promotion > promote_frequency)
|
|
check_promotions(queue, m_clock);
|
|
|
|
priority = 0;
|
|
while (queue.ready[priority].empty()) {
|
|
++priority;
|
|
kassert(priority < num_priorities, "All runlists are empty");
|
|
}
|
|
|
|
queue.current->last_ran = m_clock;
|
|
|
|
auto *next = queue.ready[priority].pop_front();
|
|
next->last_ran = m_clock;
|
|
apic.reset_timer(next->time_left);
|
|
|
|
if (next == queue.current) {
|
|
queue.lock.release(&waiter);
|
|
return;
|
|
}
|
|
|
|
thread *next_thread = thread::from_tcb(next);
|
|
|
|
cpu.thread = next_thread;
|
|
cpu.process = &next_thread->parent();
|
|
queue.current = next;
|
|
|
|
log::debug(logs::sched, "CPU%02x switching threads %llx->%llx",
|
|
cpu.index, th->koid(), next_thread->koid());
|
|
log::debug(logs::sched, " priority %d time left %d @ %lld.",
|
|
next->priority, next->time_left, m_clock);
|
|
log::debug(logs::sched, " PML4 %llx", next->pml4);
|
|
|
|
queue.lock.release(&waiter);
|
|
task_switch(queue.current);
|
|
}
|