[kernel] Change to one-shot timer scheduler

Instead of many timer interrupts and decrementing a process' remaining
quanta, change to setting a single timer for when a process should be
preempted. If it uses its whole timeslice, demote it. If it uses less
than half before blocking, promote it. Determine timeslice based on
priority as well.

This change also required changing the apic timer interface to be purely
interval (in microseconds) based instead of its previous interval/tick
hybrid.
This commit is contained in:
Justin C. Miller
2020-06-03 20:56:59 -07:00
parent ea1224e213
commit a10aca573d
7 changed files with 125 additions and 77 deletions

View File

@@ -5,6 +5,15 @@
#include "log.h"
#include "page_manager.h"
static constexpr uint16_t lapic_spurious = 0x00f0;
static constexpr uint16_t lapic_lvt_timer = 0x0320;
static constexpr uint16_t lapic_lvt_lint0 = 0x0350;
static constexpr uint16_t lapic_lvt_lint1 = 0x0360;
static constexpr uint16_t lapic_timer_init = 0x0380;
static constexpr uint16_t lapic_timer_cur = 0x0390;
static constexpr uint16_t lapic_timer_div = 0x03e0;
static uint32_t
apic_read(uint32_t volatile *apic, uint16_t offset)
@@ -44,9 +53,10 @@ apic::apic(uint32_t *base) :
lapic::lapic(uint32_t *base, isr spurious) :
apic(base)
apic(base),
m_divisor(0)
{
apic_write(m_base, 0xf0, static_cast<uint32_t>(spurious));
apic_write(m_base, lapic_spurious, static_cast<uint32_t>(spurious));
log::info(logs::apic, "LAPIC created, base %lx", m_base);
}
@@ -62,7 +72,9 @@ lapic::calibrate_timer()
outb(0x43, command);
const uint32_t initial = -1u;
enable_timer_internal(isr::isrSpurious, 1, initial, false);
enable_timer(isr::isrSpurious);
set_divisor(1);
apic_write(m_base, lapic_timer_init, initial);
const int iterations = 5;
for (int i=0; i<iterations; ++i) {
@@ -81,16 +93,16 @@ lapic::calibrate_timer()
}
}
uint32_t remain = stop_timer();
uint32_t ticks_total = initial - remain;
uint32_t remaining = apic_read(m_base, lapic_timer_cur);
uint32_t ticks_total = initial - remaining;
m_ticks_per_us = ticks_total / (iterations * 33000);
log::info(logs::apic, "APIC timer ticks %d times per nanosecond.", m_ticks_per_us);
interrupts_enable();
}
uint32_t
lapic::enable_timer_internal(isr vector, uint8_t divisor, uint32_t count, bool repeat)
void
lapic::set_divisor(uint8_t divisor)
{
uint32_t divbits = 0;
@@ -107,39 +119,45 @@ lapic::enable_timer_internal(isr vector, uint8_t divisor, uint32_t count, bool r
kassert(0, "Invalid divisor passed to lapic::enable_timer");
}
uint32_t lvte = static_cast<uint8_t>(vector);
if (repeat)
lvte |= 0x20000;
apic_write(m_base, lapic_timer_div, divbits);
m_divisor = divisor;
}
log::debug(logs::apic, "Enabling APIC timer count %ld, divisor %d, isr %02x",
count, divisor, vector);
apic_write(m_base, 0x320, lvte);
apic_write(m_base, 0x3e0, divbits);
uint32_t
lapic::enable_timer_internal(isr vector, uint8_t divisor, uint32_t count, bool repeat)
{
reset_timer(count);
return count;
}
uint32_t
lapic::enable_timer(isr vector, uint64_t interval, bool repeat)
void
lapic::enable_timer(isr vector, bool repeat)
{
uint64_t ticks = interval * m_ticks_per_us;
uint32_t lvte = static_cast<uint8_t>(vector);
if (repeat)
lvte |= 0x20000;
apic_write(m_base, lapic_lvt_timer, lvte);
log::debug(logs::apic, "Enabling APIC timer at isr %02x", vector);
}
uint32_t
lapic::reset_timer(uint64_t interval)
{
uint64_t remaining = ticks_to_us(apic_read(m_base, lapic_timer_cur));
uint64_t ticks = us_to_ticks(interval);
int divisor = 1;
while (ticks > -1u) {
ticks /= 2;
divisor *= 2;
while (ticks > 0xffffffffull) {
ticks >>= 1;
divisor <<= 1;
}
return enable_timer_internal(vector, divisor, static_cast<uint32_t>(ticks), repeat);
}
if (divisor != m_divisor)
set_divisor(divisor);
uint32_t
lapic::reset_timer(uint32_t count)
{
uint32_t remaining = apic_read(m_base, 0x390);
apic_write(m_base, 0x380, count);
apic_write(m_base, lapic_timer_init, ticks);
return remaining;
}
@@ -148,7 +166,7 @@ lapic::enable_lint(uint8_t num, isr vector, bool nmi, uint16_t flags)
{
kassert(num == 0 || num == 1, "Invalid LINT passed to lapic::enable_lint.");
uint16_t off = num ? 0x360 : 0x350;
uint16_t off = num ? lapic_lvt_lint1 : lapic_lvt_lint0;
uint32_t lvte = static_cast<uint8_t>(vector);
uint16_t polarity = flags & 0x3;
@@ -169,16 +187,16 @@ lapic::enable_lint(uint8_t num, isr vector, bool nmi, uint16_t flags)
void
lapic::enable()
{
apic_write(m_base, 0xf0,
apic_read(m_base, 0xf0) | 0x100);
apic_write(m_base, lapic_spurious,
apic_read(m_base, lapic_spurious) | 0x100);
log::debug(logs::apic, "LAPIC enabled!");
}
void
lapic::disable()
{
apic_write(m_base, 0xf0,
apic_read(m_base, 0xf0) & ~0x100);
apic_write(m_base, lapic_spurious,
apic_read(m_base, lapic_spurious) & ~0x100);
log::debug(logs::apic, "LAPIC disabled.");
}

View File

@@ -32,18 +32,16 @@ public:
/// Enable interrupts for the LAPIC timer.
/// \arg vector Interrupt vector the timer should use
/// \arg interval The timer interval, in microseconds
/// \arg repeat If false, this timer is one-off, otherwise repeating
/// \returns The count of ticks the timer is set for
uint32_t enable_timer(isr vector, uint64_t interval, bool repeat = true);
void enable_timer(isr vector, bool repeat = true);
/// Reset the timer countdown.
/// \arg count The count of ticks before an interrupt, or 0 to stop the timer
/// \returns The count of ticks that were remaining before reset
uint32_t reset_timer(uint32_t count);
/// \arg interval The interval in us before an interrupt, or 0 to stop the timer
/// \returns The interval in us that was remaining before reset
uint32_t reset_timer(uint64_t interval);
/// Stop the timer.
/// \returns The count of ticks remaining before an interrupt was to happen
/// \returns The interval in us remaining before an interrupt was to happen
inline uint32_t stop_timer() { return reset_timer(0); }
/// Enable interrupts for the LAPIC LINT0 pin.
@@ -60,8 +58,19 @@ public:
void calibrate_timer();
private:
inline uint64_t ticks_to_us(uint32_t ticks) const {
return static_cast<uint64_t>(ticks) / m_ticks_per_us;
}
inline uint64_t us_to_ticks(uint64_t interval) const {
return interval * m_ticks_per_us;
}
void set_divisor(uint8_t divisor);
void set_repeat(bool repeat);
uint32_t enable_timer_internal(isr vector, uint8_t divisor, uint32_t count, bool repeat);
uint32_t m_divisor;
uint32_t m_ticks_per_us;
};

View File

@@ -210,7 +210,7 @@ isr_handler(cpu_state *regs)
break;
case isr::isrTimer:
scheduler::get().tick();
scheduler::get().schedule();
break;
case isr::isrLINT0:

View File

@@ -173,7 +173,7 @@ kernel_main(args::header *header)
syscall_enable();
scheduler *sched = new (&scheduler::get()) scheduler(devices.get_lapic());
sched->create_kernel_task(-1, logger_task);
sched->create_kernel_task(-1, logger_task, scheduler::max_priority-1, process_flags::const_pri);
for (auto &ird : initrds) {
for (auto &f : ird.files()) {

View File

@@ -52,7 +52,7 @@ struct process
process_flags flags;
uint16_t quanta;
uint16_t _reserved;
uint8_t priority;

View File

@@ -34,23 +34,21 @@ scheduler::scheduler(lapic *apic) :
m_clock(0)
{
auto *idle = new process_node;
uint8_t last_pri = num_priorities - 1;
// The kernel idle task, also the thread we're in now
idle->pid = 0;
idle->ppid = 0;
idle->priority = last_pri;
idle->priority = max_priority;
idle->rsp = 0; // This will get set when we switch away
idle->rsp3 = 0; // Never used for the idle task
idle->rsp0 = reinterpret_cast<uintptr_t>(&idle_stack_end);
idle->pml4 = page_manager::get_pml4();
idle->quanta = process_quanta;
idle->flags =
process_flags::running |
process_flags::ready |
process_flags::const_pri;
m_runlists[last_pri].push_back(idle);
m_runlists[max_priority].push_back(idle);
m_current = idle;
bsp_cpu_data.rsp0 = idle->rsp0;
@@ -161,7 +159,6 @@ scheduler::load_process(const char *name, const void *data, size_t size)
stack[6] = ss;
proc->rsp3 = initial_stack;
proc->quanta = process_quanta;
proc->flags =
process_flags::running |
process_flags::ready |
@@ -176,7 +173,7 @@ scheduler::load_process(const char *name, const void *data, size_t size)
}
void
scheduler::create_kernel_task(pid_t pid, void (*task)())
scheduler::create_kernel_task(pid_t pid, void (*task)(), uint8_t priority, process_flags flags)
{
auto *proc = create_process(pid);
@@ -188,11 +185,12 @@ scheduler::create_kernel_task(pid_t pid, void (*task)())
proc->add_fake_task_return(
reinterpret_cast<uintptr_t>(task));
proc->priority = priority;
proc->pml4 = page_manager::get()->get_kernel_pml4();
proc->quanta = process_quanta;
proc->flags =
process_flags::running |
process_flags::ready;
process_flags::ready |
flags;
m_runlists[default_priority].push_back(proc);
@@ -202,16 +200,25 @@ scheduler::create_kernel_task(pid_t pid, void (*task)())
log::debug(logs::task, " PML4 %016lx", proc->pml4);
}
uint32_t
scheduler::quantum(int priority)
{
return quantum_micros * (priority+1);
}
void
scheduler::start()
{
log::info(logs::task, "Starting scheduler.");
wrmsr(msr::ia32_gs_base, reinterpret_cast<uintptr_t>(&bsp_cpu_data));
m_tick_count = m_apic->enable_timer(isr::isrTimer, quantum_micros, false);
m_apic->enable_timer(isr::isrTimer, false);
schedule();
}
void scheduler::prune(uint64_t now)
{
// TODO: Promote processes that haven't been scheduled in too long
// Find processes that aren't ready or aren't running and
// move them to the appropriate lists.
for (auto &pri_list : m_runlists) {
@@ -263,8 +270,24 @@ void
scheduler::schedule()
{
pid_t lastpid = m_current->pid;
uint8_t priority = m_current->priority;
uint32_t remaining = m_apic->stop_timer();
m_runlists[m_current->priority].remove(m_current);
if (!(m_current->flags && process_flags::const_pri)) {
if (priority < max_priority && !remaining) {
// Process used its whole timeslice, demote it
++m_current->priority;
log::debug(logs::task, "Scheduler demoting process %d, priority %d",
m_current->pid, m_current->priority);
} else if (priority > 0 && remaining > quantum(priority)/2) {
// Process used less than half it timeslice, promote it
--m_current->priority;
log::debug(logs::task, "Scheduler promoting process %d, priority %d",
m_current->pid, m_current->priority);
}
}
m_runlists[priority].remove(m_current);
if (m_current->flags && process_flags::ready) {
m_runlists[m_current->priority].push_back(m_current);
} else {
@@ -273,31 +296,23 @@ scheduler::schedule()
prune(++m_clock);
uint8_t pri = 0;
while (m_runlists[pri].empty()) {
++pri;
kassert(pri < num_priorities, "All runlists are empty");
priority = 0;
while (m_runlists[priority].empty()) {
++priority;
kassert(priority < num_priorities, "All runlists are empty");
}
m_current = m_runlists[pri].pop_front();
m_current = m_runlists[priority].pop_front();
if (lastpid != m_current->pid) {
task_switch(m_current);
bool loading = m_current->flags && process_flags::loading;
log::debug(logs::task, "Scheduler switched to process %d, priority %d%s @ %lld.",
m_current->pid, m_current->priority, loading ? " (loading)" : "", m_clock);
}
m_current->pid, priority, loading ? " (loading)" : "", m_clock);
}
void
scheduler::tick()
{
if (--m_current->quanta == 0) {
m_current->quanta = process_quanta;
schedule();
}
m_apic->reset_timer(m_tick_count);
m_apic->reset_timer(quantum(priority));
}
process_node *

View File

@@ -19,10 +19,11 @@ class scheduler
{
public:
static const uint8_t num_priorities = 8;
static const uint8_t max_priority = num_priorities - 1;
static const uint8_t default_priority = num_priorities / 2;
/// How long the timer quantum is
static const uint64_t quantum_micros = 1000;
/// How long the base timer quantum is, in us
static const uint64_t quantum_micros = 5000;
/// How many quanta a process gets before being rescheduled
static const uint16_t process_quanta = 10;
@@ -40,7 +41,16 @@ public:
/// Create a new kernel task
/// \arg pid Pid to use for this task, must be negative
/// \arg proc Function to run as a kernel task
void create_kernel_task(pid_t pid, void (*task)());
/// \arg priority Priority to start the process with
/// \arg flags Flags to add to the process
void create_kernel_task(
pid_t pid,
void (*task)(),
uint8_t priority,
process_flags flags = process_flags::none);
/// Get the quantum for a given priority.
uint32_t quantum(int priority);
/// Start the scheduler working. This may involve starting
/// timer interrupts or other preemption methods.
@@ -64,7 +74,6 @@ public:
private:
friend uintptr_t syscall_dispatch(uintptr_t, cpu_state &);
friend void isr_handler(cpu_state*);
friend class process;
/// Create a new process object. This process will have its pid
@@ -73,9 +82,6 @@ private:
/// \returns The new process object
process_node * create_process(pid_t pid = 0);
/// Handle a timer tick
void tick();
void prune(uint64_t now);
lapic *m_apic;