#include #include #include "apic.h" #include "assert.h" #include "clock.h" #include "cpu.h" #include "debug.h" #include "device_manager.h" #include "gdt.h" #include "interrupts.h" #include "io.h" #include "logger.h" #include "msr.h" #include "objects/channel.h" #include "objects/process.h" #include "objects/system.h" #include "objects/thread.h" #include "objects/vm_area.h" #include "scheduler.h" using obj::process; using obj::thread; extern "C" void task_switch(TCB *tcb); scheduler *scheduler::s_instance = nullptr; struct run_queue { tcb_node *current = nullptr; tcb_node *prev = nullptr; tcb_list ready[scheduler::num_priorities]; tcb_list blocked; uint64_t last_promotion = 0; uint64_t last_steal = 0; util::spinlock lock; }; scheduler::scheduler(unsigned cpus) : m_add_index {0} { kassert(!s_instance, "Created multiple schedulers!"); if (!s_instance) s_instance = this; m_run_queues.set_size(cpus); } scheduler::~scheduler() { // Not truly necessary - if the scheduler is going away, the whole // system is probably going down. But let's be clean. if (s_instance == this) s_instance = nullptr; } template inline T * push(uintptr_t &rsp, size_t size = sizeof(T)) { rsp -= size; T *p = reinterpret_cast(rsp); rsp &= ~(sizeof(uint64_t)-1); // Align the stack return p; } void scheduler::create_kernel_task(void (*task)(), uint8_t priority, bool constant) { thread *th = process::kernel_process().create_thread(0, priority); auto *tcb = th->tcb(); th->add_thunk_kernel(reinterpret_cast(task)); tcb->time_left = quantum(priority); if (constant) th->set_state(thread::state::constant); th->set_state(thread::state::ready); log::debug(logs::task, "Creating kernel task: thread %llx pri %d", th->koid(), tcb->priority); log::debug(logs::task, " RSP0 %016lx", tcb->rsp0); log::debug(logs::task, " RSP %016lx", tcb->rsp); log::debug(logs::task, " PML4 %016lx", tcb->pml4); } uint32_t scheduler::quantum(int priority) { return quantum_micros << priority; } void scheduler::start() { cpu_data &cpu = current_cpu(); run_queue &queue = m_run_queues[cpu.index]; { util::scoped_lock lock {queue.lock}; thread *idle = cpu.thread; queue.current = idle->tcb(); } cpu.apic->enable_timer(isr::isrTimer, false); cpu.apic->reset_timer(10); } void scheduler::add_thread(TCB *t) { cpu_data *cpu = g_cpu_data[m_add_index++ % g_num_cpus]; run_queue &queue = m_run_queues[cpu->index]; util::scoped_lock lock {queue.lock}; t->cpu = cpu; t->time_left = quantum(t->priority); queue.blocked.push_back(static_cast(t)); } void scheduler::prune(run_queue &queue, uint64_t now) { // Find processes that are ready or have exited and // move them to the appropriate lists. auto *tcb = queue.blocked.front(); while (tcb) { thread *th = tcb->thread; uint8_t priority = tcb->priority; uint64_t timeout = th->wake_timeout(); if (timeout && timeout <= now) th->wake_only(); bool ready = th->has_state(thread::state::ready); bool exited = th->has_state(thread::state::exited); bool constant = th->has_state(thread::state::constant); bool current = tcb == queue.current; auto *remove = tcb; tcb = tcb->next(); if (!exited && !ready) continue; if (exited) { // If the current thread has exited, wait until the next call // to prune() to delete it, because we may be deleting our current // page tables if (current) continue; queue.blocked.remove(remove); process &p = th->parent(); // thread_exited deletes the thread, and returns true if the process // should also now be deleted if(!current && p.thread_exited(th)) delete &p; } else { queue.blocked.remove(remove); log::debug(logs::sched, "Prune: readying unblocked thread %llx", th->koid()); queue.ready[remove->priority].push_back(remove); } } } void scheduler::check_promotions(run_queue &queue, uint64_t now) { for (auto &pri_list : queue.ready) { for (auto *tcb : pri_list) { const thread *th = tcb->thread; if (th->has_state(thread::state::constant)) continue; const uint64_t age = now - tcb->last_ran; const uint8_t priority = tcb->priority; bool stale = age > quantum(priority) * 2 && tcb->priority > promote_limit; if (stale) { // If the thread is stale, promote it queue.ready[priority].remove(tcb); tcb->priority -= 1; tcb->time_left = quantum(tcb->priority); queue.ready[tcb->priority].push_back(tcb); log::info(logs::sched, "Scheduler promoting thread %llx, priority %d", th->koid(), tcb->priority); } } } queue.last_promotion = now; } static size_t balance_lists(tcb_list &to, tcb_list &from, cpu_data &new_cpu) { size_t to_len = to.length(); size_t from_len = from.length(); // Only steal from the rich, don't be Dennis Moore if (from_len <= to_len) return 0; size_t steal = (from_len - to_len) / 2; for (size_t i = 0; i < steal; ++i) { tcb_node *node = from.pop_front(); node->cpu = &new_cpu; to.push_front(node); } return steal; } void scheduler::steal_work(cpu_data &cpu) { run_queue &my_queue = m_run_queues[cpu.index]; const unsigned count = m_run_queues.count(); for (unsigned i = 0; i < count; ++i) { if (i == cpu.index) continue; run_queue &other_queue = m_run_queues[i]; util::scoped_lock other_queue_lock {other_queue.lock}; size_t stolen = 0; // Don't steal from max_priority, that's the idle thread for (unsigned pri = 0; pri < max_priority; ++pri) stolen += balance_lists(my_queue.ready[pri], other_queue.ready[pri], cpu); stolen += balance_lists(my_queue.blocked, other_queue.blocked, cpu); other_queue_lock.release(); if (stolen) log::debug(logs::sched, "CPU%02x stole %2d tasks from CPU%02x", cpu.index, stolen, i); } } void scheduler::schedule() { cpu_data &cpu = current_cpu(); run_queue &queue = m_run_queues[cpu.index]; lapic &apic = *cpu.apic; uint32_t remaining = apic.stop_timer(); uint64_t now = clock::get().value(); // We need to explicitly lock/unlock here instead of // using a scoped lock, because the scope doesn't "end" // for the current thread until it gets scheduled again, // and _new_ threads start their life at the end of this // function, which screws up RAII util::spinlock::waiter waiter {false, nullptr, "schedule"}; queue.lock.acquire(&waiter); // Only one CPU can be stealing at a time if (m_steal_turn == cpu.index && now - queue.last_steal > steal_frequency) { steal_work(cpu); queue.last_steal = now; m_steal_turn = (m_steal_turn + 1) % m_run_queues.count(); } queue.current->time_left = remaining; thread *th = queue.current->thread; uint8_t priority = queue.current->priority; const bool constant = th->has_state(thread::state::constant); if (remaining == 0) { if (priority < max_priority && !constant) { // Process used its whole timeslice, demote it ++queue.current->priority; log::debug(logs::sched, "Scheduler demoting thread %llx, priority %d", th->koid(), queue.current->priority); } queue.current->time_left = quantum(queue.current->priority); } else if (remaining > 0) { // Process gave up CPU, give it a small bonus to its // remaining timeslice. uint32_t bonus = quantum(priority) >> 4; queue.current->time_left += bonus; } if (th->has_state(thread::state::ready)) { queue.ready[queue.current->priority].push_back(queue.current); } else { queue.blocked.push_back(queue.current); } clock::get().update(); prune(queue, now); if (now - queue.last_promotion > promote_frequency) check_promotions(queue, now); priority = 0; while (queue.ready[priority].empty()) { ++priority; kassert(priority < num_priorities, "All runlists are empty"); } queue.current->last_ran = now; auto *next = queue.ready[priority].pop_front(); next->last_ran = now; apic.reset_timer(next->time_left); if (next == queue.current) { queue.lock.release(&waiter); return; } queue.prev = queue.current; thread *next_thread = next->thread; cpu.thread = next_thread; cpu.process = &next_thread->parent(); queue.current = next; log::debug(logs::sched, "CPU%02x switching threads %llx->%llx", cpu.index, th->koid(), next_thread->koid()); log::debug(logs::sched, " priority %d time left %d @ %lld.", next->priority, next->time_left, now); log::debug(logs::sched, " PML4 %llx", next->pml4); queue.lock.release(&waiter); task_switch(queue.current); } void scheduler::maybe_schedule(TCB *t) { cpu_data *cpu = t->cpu; run_queue &queue = m_run_queues[cpu->index]; uint8_t current_pri = queue.current->priority; if (current_pri <= t->priority) return; current_cpu().apic->send_ipi( lapic::ipi::fixed, isr::ipiSchedule, cpu->id); }