[kernel] Schedule threads on other CPUs

Now that the other CPUs have been brought up, add support for scheduling
tasks on them. The scheduler now maintains separate ready/blocked lists
per CPU, and CPUs will attempt to balance load via periodic work
stealing.

Other changes as a result of this:
- The device manager no longer creates a local APIC object, but instead
  just gathers relevant info from the APCI tables. Each CPU creates its
  own local APIC object. This also spurred the APIC timer calibration to
  become a static value, as all APICs are assumed to be symmetrical.
- Fixed a bug where the scheduler was popping the current task off of
  its ready list, however the current task is never on the ready list
  (except the idle task was first set up as both current and ready).
  This was causing the lists to get into bad states. Now a task can only
  ever be current or in a ready or blocked list.
- Got rid of the unused static process::s_processes list of all
  processes, instead of trying to synchronize it via locks.
- Added spinlocks for synchronization to the scheduler and logger
  objects.
This commit is contained in:
Justin C. Miller
2021-02-15 12:56:22 -08:00
parent 2a347942bc
commit f0025dbc47
17 changed files with 337 additions and 220 deletions

View File

@@ -137,3 +137,13 @@ init_ap_trampoline:
pop rbp pop rbp
ret ret
extern long_ap_startup
global ap_idle
ap_idle:
call long_ap_startup
sti
.hang:
hlt
jmp .hang

View File

@@ -6,6 +6,8 @@
#include "kernel_memory.h" #include "kernel_memory.h"
#include "log.h" #include "log.h"
uint64_t lapic::s_ticks_per_us = 0;
static constexpr uint16_t lapic_id = 0x0020; static constexpr uint16_t lapic_id = 0x0020;
static constexpr uint16_t lapic_spurious = 0x00f0; static constexpr uint16_t lapic_spurious = 0x00f0;
@@ -54,12 +56,12 @@ apic::apic(uintptr_t base) :
} }
lapic::lapic(uintptr_t base, isr spurious) : lapic::lapic(uintptr_t base) :
apic(base), apic(base),
m_divisor(0) m_divisor(0)
{ {
apic_write(m_base, lapic_lvt_error, static_cast<uint32_t>(isr::isrAPICError)); apic_write(m_base, lapic_lvt_error, static_cast<uint32_t>(isr::isrAPICError));
apic_write(m_base, lapic_spurious, static_cast<uint32_t>(spurious)); apic_write(m_base, lapic_spurious, static_cast<uint32_t>(isr::isrSpurious));
log::info(logs::apic, "LAPIC created, base %lx", m_base); log::info(logs::apic, "LAPIC created, base %lx", m_base);
} }
@@ -122,10 +124,10 @@ lapic::calibrate_timer()
clock::get().spinwait(us); clock::get().spinwait(us);
uint32_t remaining = apic_read(m_base, lapic_timer_cur); uint32_t remaining = apic_read(m_base, lapic_timer_cur);
uint32_t ticks_total = initial - remaining; uint64_t ticks_total = initial - remaining;
m_ticks_per_us = ticks_total / us; s_ticks_per_us = ticks_total / us;
log::info(logs::apic, "APIC timer ticks %d times per microsecond.", m_ticks_per_us); log::info(logs::apic, "APIC timer ticks %d times per microsecond.", s_ticks_per_us);
interrupts_enable(); interrupts_enable();
} }
@@ -145,7 +147,7 @@ lapic::set_divisor(uint8_t divisor)
case 64: divbits = 0x9; break; case 64: divbits = 0x9; break;
case 128: divbits = 0xa; break; case 128: divbits = 0xa; break;
default: default:
kassert(0, "Invalid divisor passed to lapic::enable_timer"); kassert(0, "Invalid divisor passed to lapic::set_divisor");
} }
apic_write(m_base, lapic_timer_div, divbits); apic_write(m_base, lapic_timer_div, divbits);

View File

@@ -43,8 +43,7 @@ class lapic :
public: public:
/// Constructor /// Constructor
/// \arg base Physicl base address of the APIC's MMIO registers /// \arg base Physicl base address of the APIC's MMIO registers
/// \arg spurious Vector of the spurious interrupt handler lapic(uintptr_t base);
lapic(uintptr_t base, isr spurious);
/// Get the local APIC's ID /// Get the local APIC's ID
uint8_t get_id(); uint8_t get_id();
@@ -93,19 +92,14 @@ public:
void calibrate_timer(); void calibrate_timer();
private: private:
inline uint64_t ticks_to_us(uint32_t ticks) const { inline static uint64_t ticks_to_us(uint64_t ticks) { return ticks / s_ticks_per_us; }
return static_cast<uint64_t>(ticks) / m_ticks_per_us; inline static uint64_t us_to_ticks(uint64_t interval) { return interval * s_ticks_per_us; }
}
inline uint64_t us_to_ticks(uint64_t interval) const {
return interval * m_ticks_per_us;
}
void set_divisor(uint8_t divisor); void set_divisor(uint8_t divisor);
void set_repeat(bool repeat); void set_repeat(bool repeat);
uint32_t m_divisor; uint32_t m_divisor;
uint32_t m_ticks_per_us; static uint64_t s_ticks_per_us;
}; };

View File

@@ -1,7 +1,6 @@
#include <stdint.h> #include <stdint.h>
#include "kutil/assert.h" #include "kutil/assert.h"
#include "kutil/memory.h" #include "kutil/memory.h"
#include "apic.h"
#include "cpu.h" #include "cpu.h"
#include "cpu/cpu_id.h" #include "cpu/cpu_id.h"
#include "device_manager.h" #include "device_manager.h"

View File

@@ -2,9 +2,8 @@
#include <stdint.h> #include <stdint.h>
#include "kutil/spinlock.h"
class GDT; class GDT;
class lapic;
class process; class process;
struct TCB; struct TCB;
class thread; class thread;
@@ -34,6 +33,9 @@ struct cpu_data
TSS *tss; TSS *tss;
GDT *gdt; GDT *gdt;
// Members beyond this point do not appear in
// the assembly version
lapic *apic;
}; };
extern "C" cpu_data * _current_gsbase(); extern "C" cpu_data * _current_gsbase();
@@ -55,4 +57,3 @@ inline cpu_data & current_cpu() { return *_current_gsbase(); }
/// Validate the required CPU features are present. Really, the bootloader already /// Validate the required CPU features are present. Really, the bootloader already
/// validated the required features, but still iterate the options and log about them. /// validated the required features, but still iterate the options and log about them.
void cpu_validate(); void cpu_validate();

View File

@@ -4,6 +4,8 @@
#include <stdint.h> #include <stdint.h>
struct cpu_state;
extern "C" { extern "C" {
uintptr_t get_rsp(); uintptr_t get_rsp();
uintptr_t get_rip(); uintptr_t get_rip();

View File

@@ -63,7 +63,7 @@ void irq4_callback(void *)
device_manager::device_manager() : device_manager::device_manager() :
m_lapic(nullptr) m_lapic_base(0)
{ {
m_irqs.ensure_capacity(32); m_irqs.ensure_capacity(32);
m_irqs.set_size(16); m_irqs.set_size(16);
@@ -106,6 +106,26 @@ device_manager::parse_acpi(const void *root_table)
load_xsdt(memory::to_virtual(acpi2->xsdt_address)); load_xsdt(memory::to_virtual(acpi2->xsdt_address));
} }
const device_manager::apic_nmi *
device_manager::get_lapic_nmi(uint8_t id) const
{
for (const auto &nmi : m_nmis) {
if (nmi.cpu == 0xff || nmi.cpu == id)
return &nmi;
}
return nullptr;
}
const device_manager::irq_override *
device_manager::get_irq_override(uint8_t irq) const
{
for (const auto &o : m_overrides)
if (o.source == irq) return &o;
return nullptr;
}
ioapic * ioapic *
device_manager::get_ioapic(int i) device_manager::get_ioapic(int i)
{ {
@@ -163,38 +183,38 @@ device_manager::load_apic(const acpi_table_header *header)
{ {
const auto *apic = check_get_table<acpi_apic>(header); const auto *apic = check_get_table<acpi_apic>(header);
uintptr_t local = apic->local_address; m_lapic_base = apic->local_address;
m_lapic = new lapic(local, isr::isrSpurious);
size_t count = acpi_table_entries(apic, 1); size_t count = acpi_table_entries(apic, 1);
uint8_t const *p = apic->controller_data; uint8_t const *p = apic->controller_data;
uint8_t const *end = p + count; uint8_t const *end = p + count;
// Pass one: count IOAPIC objcts // Pass one: count objcts
int num_ioapics = 0; unsigned num_lapics = 0;
unsigned num_ioapics = 0;
unsigned num_overrides = 0;
unsigned num_nmis = 0;
while (p < end) { while (p < end) {
const uint8_t type = p[0]; const uint8_t type = p[0];
const uint8_t length = p[1]; const uint8_t length = p[1];
if (type == 1) num_ioapics++;
p += length;
}
m_ioapics.set_capacity(num_ioapics); switch (type) {
case 0: ++num_lapics; break;
// Pass two: set up IOAPIC objcts case 1: ++num_ioapics; break;
p = apic->controller_data; case 2: ++num_overrides; break;
while (p < end) { case 4: ++num_nmis; break;
const uint8_t type = p[0]; default: break;
const uint8_t length = p[1];
if (type == 1) {
uintptr_t base = kutil::read_from<uint32_t>(p+4);
uint32_t base_gsr = kutil::read_from<uint32_t>(p+8);
m_ioapics.emplace(base, base_gsr);
} }
p += length; p += length;
} }
// Pass three: configure APIC objects m_apic_ids.set_capacity(num_lapics);
m_ioapics.set_capacity(num_ioapics);
m_overrides.set_capacity(num_overrides);
m_nmis.set_capacity(num_nmis);
// Pass two: configure objects
p = apic->controller_data; p = apic->controller_data;
while (p < end) { while (p < end) {
const uint8_t type = p[0]; const uint8_t type = p[0];
@@ -205,38 +225,41 @@ device_manager::load_apic(const acpi_table_header *header)
uint8_t uid = kutil::read_from<uint8_t>(p+2); uint8_t uid = kutil::read_from<uint8_t>(p+2);
uint8_t id = kutil::read_from<uint8_t>(p+3); uint8_t id = kutil::read_from<uint8_t>(p+3);
m_apic_ids.append(id); m_apic_ids.append(id);
log::debug(logs::device, " Local APIC uid %x id %x", uid, id); log::debug(logs::device, " Local APIC uid %x id %x", uid, id);
} }
break; break;
case 1: // I/O APIC case 1: { // I/O APIC
uintptr_t base = kutil::read_from<uint32_t>(p+4);
uint32_t base_gsi = kutil::read_from<uint32_t>(p+8);
m_ioapics.emplace(base, base_gsi);
log::debug(logs::device, " IO APIC gsi %x base %x", base_gsi, base);
}
break; break;
case 2: { // Interrupt source override case 2: { // Interrupt source override
uint8_t source = kutil::read_from<uint8_t>(p+3); irq_override o;
isr gsi = isr::irq00 + kutil::read_from<uint32_t>(p+4); o.source = kutil::read_from<uint8_t>(p+3);
uint16_t flags = kutil::read_from<uint16_t>(p+8); o.gsi = kutil::read_from<uint32_t>(p+4);
o.flags = kutil::read_from<uint16_t>(p+8);
m_overrides.append(o);
log::debug(logs::device, " Intr source override IRQ %d -> %d Pol %d Tri %d", log::debug(logs::device, " Intr source override IRQ %d -> %d Pol %d Tri %d",
source, gsi, (flags & 0x3), ((flags >> 2) & 0x3)); o.source, o.gsi, (o.flags & 0x3), ((o.flags >> 2) & 0x3));
// TODO: in a multiple-IOAPIC system this might be elsewhere
m_ioapics[0].redirect(source, static_cast<isr>(gsi), flags, true);
} }
break; break;
case 4: {// LAPIC NMI case 4: {// LAPIC NMI
uint8_t cpu = kutil::read_from<uint8_t>(p + 2); apic_nmi nmi;
uint8_t num = kutil::read_from<uint8_t>(p + 5); nmi.cpu = kutil::read_from<uint8_t>(p + 2);
uint16_t flags = kutil::read_from<uint16_t>(p + 3); nmi.lint = kutil::read_from<uint8_t>(p + 5);
nmi.flags = kutil::read_from<uint16_t>(p + 3);
m_nmis.append(nmi);
log::debug(logs::device, " LAPIC NMI Proc %d LINT%d Pol %d Tri %d", log::debug(logs::device, " LAPIC NMI Proc %02x LINT%d Pol %d Tri %d",
kutil::read_from<uint8_t>(p+2), nmi.cpu, nmi.lint, nmi.flags & 0x3, (nmi.flags >> 2) & 0x3);
kutil::read_from<uint8_t>(p+5),
kutil::read_from<uint16_t>(p+3) & 0x3,
(kutil::read_from<uint16_t>(p+3) >> 2) & 0x3);
m_lapic->enable_lint(num, num == 0 ? isr::isrLINT0 : isr::isrLINT1, true, flags);
} }
break; break;
@@ -246,17 +269,6 @@ device_manager::load_apic(const acpi_table_header *header)
p += length; p += length;
} }
/*
for (uint8_t i = 0; i < m_ioapics[0].get_num_gsi(); ++i) {
switch (i) {
case 2: break;
default: m_ioapics[0].mask(i, false);
}
}
*/
m_lapic->enable();
} }
void void

View File

@@ -24,10 +24,6 @@ public:
/// \returns A reference to the system device manager /// \returns A reference to the system device manager
static device_manager & get() { return s_instance; } static device_manager & get() { return s_instance; }
/// Get the LAPIC
/// \returns An object representing the local APIC
lapic & get_lapic() { return *m_lapic; }
/// Get an IOAPIC /// Get an IOAPIC
/// \arg i Index of the requested IOAPIC /// \arg i Index of the requested IOAPIC
/// \returns An object representing the given IOAPIC if it exists, /// \returns An object representing the given IOAPIC if it exists,
@@ -68,6 +64,39 @@ public:
/// \returns True if the interrupt was handled /// \returns True if the interrupt was handled
bool dispatch_irq(unsigned irq); bool dispatch_irq(unsigned irq);
struct apic_nmi
{
uint8_t cpu;
uint8_t lint;
uint16_t flags;
};
struct irq_override
{
uint8_t source;
uint16_t flags;
uint32_t gsi;
};
/// Get the list of APIC ids for other CPUs
inline const kutil::vector<uint8_t> & get_apic_ids() const { return m_apic_ids; }
/// Get the LAPIC base address
/// \returns The physical base address of the local apic registers
uintptr_t get_lapic_base() const { return m_lapic_base; }
/// Get the NMI mapping for the given local APIC
/// \arg id ID of the local APIC
/// \returns apic_nmi structure describing the NMI configuration,
/// or null if no configuration was provided
const apic_nmi * get_lapic_nmi(uint8_t id) const;
/// Get the IRQ source override for the given IRQ
/// \arg irq IRQ number (not isr vector)
/// \returns irq_override structure describing that IRQ's
/// configuration, or null if no configuration was provided
const irq_override * get_irq_override(uint8_t irq) const;
/// Register the existance of a block device. /// Register the existance of a block device.
/// \arg blockdev Pointer to the block device /// \arg blockdev Pointer to the block device
void register_block_device(block_device *blockdev); void register_block_device(block_device *blockdev);
@@ -94,9 +123,6 @@ public:
&m_hpets[i] : nullptr; &m_hpets[i] : nullptr;
} }
/// Get the list of APIC ids for other CPUs
inline const kutil::vector<uint8_t> & get_apic_ids() const { return m_apic_ids; }
private: private:
/// Parse the ACPI XSDT and load relevant sub-tables. /// Parse the ACPI XSDT and load relevant sub-tables.
/// \arg xsdt Pointer to the XSDT from the firmware /// \arg xsdt Pointer to the XSDT from the firmware
@@ -122,10 +148,13 @@ private:
/// that has no callback. /// that has no callback.
void bad_irq(uint8_t irq); void bad_irq(uint8_t irq);
lapic *m_lapic; uintptr_t m_lapic_base;
kutil::vector<ioapic> m_ioapics; kutil::vector<ioapic> m_ioapics;
kutil::vector<hpet> m_hpets; kutil::vector<hpet> m_hpets;
kutil::vector<uint8_t> m_apic_ids; kutil::vector<uint8_t> m_apic_ids;
kutil::vector<apic_nmi> m_nmis;
kutil::vector<irq_override> m_overrides;
kutil::vector<pci_group> m_pci; kutil::vector<pci_group> m_pci;
kutil::vector<pci_device> m_devices; kutil::vector<pci_device> m_devices;

View File

@@ -39,7 +39,8 @@ extern "C" {
void (*__ctors_end)(void); void (*__ctors_end)(void);
void long_ap_startup(cpu_data *cpu); void long_ap_startup(cpu_data *cpu);
void ap_startup(); void ap_startup();
void init_ap_trampoline(void*, cpu_data *, void (*)(cpu_data *)); void ap_idle();
void init_ap_trampoline(void*, cpu_data *, void (*)());
} }
extern void __kernel_assert(const char *, unsigned, const char *); extern void __kernel_assert(const char *, unsigned, const char *);
@@ -47,13 +48,14 @@ extern void __kernel_assert(const char *, unsigned, const char *);
using namespace kernel; using namespace kernel;
volatile size_t ap_startup_count; volatile size_t ap_startup_count;
static bool scheduler_ready = false;
/// Bootstrap the memory managers. /// Bootstrap the memory managers.
void memory_initialize_pre_ctors(args::header &kargs); void memory_initialize_pre_ctors(args::header &kargs);
void memory_initialize_post_ctors(args::header &kargs); void memory_initialize_post_ctors(args::header &kargs);
process * load_simple_process(args::program &program); process * load_simple_process(args::program &program);
void start_aps(void *kpml4); unsigned start_aps(lapic &apic, const kutil::vector<uint8_t> &ids, void *kpml4);
/// TODO: not this. this is awful. /// TODO: not this. this is awful.
args::framebuffer *fb = nullptr; args::framebuffer *fb = nullptr;
@@ -122,6 +124,7 @@ kernel_main(args::header *header)
extern TSS &g_bsp_tss; extern TSS &g_bsp_tss;
extern GDT &g_bsp_gdt; extern GDT &g_bsp_gdt;
extern cpu_data g_bsp_cpu_data; extern cpu_data g_bsp_cpu_data;
extern uintptr_t idle_stack_end;
IDT *idt = new (&g_idt) IDT; IDT *idt = new (&g_idt) IDT;
@@ -131,6 +134,7 @@ kernel_main(args::header *header)
cpu->self = cpu; cpu->self = cpu;
cpu->tss = new (&g_bsp_tss) TSS; cpu->tss = new (&g_bsp_tss) TSS;
cpu->gdt = new (&g_bsp_gdt) GDT {cpu->tss}; cpu->gdt = new (&g_bsp_gdt) GDT {cpu->tss};
cpu->rsp0 = idle_stack_end;
cpu_early_init(cpu); cpu_early_init(cpu);
disable_legacy_pic(); disable_legacy_pic();
@@ -160,15 +164,21 @@ kernel_main(args::header *header)
devices.parse_acpi(header->acpi_table); devices.parse_acpi(header->acpi_table);
// Need the local APIC to get the BSP's id // Need the local APIC to get the BSP's id
lapic &apic = device_manager::get().get_lapic(); uintptr_t apic_base = devices.get_lapic_base();
cpu->id = apic.get_id();
lapic *apic = new lapic(apic_base);
apic->enable();
cpu->id = apic->get_id();
cpu->apic = apic;
cpu_init(cpu, true); cpu_init(cpu, true);
devices.init_drivers(); devices.init_drivers();
devices.get_lapic().calibrate_timer(); apic->calibrate_timer();
start_aps(header->pml4); const auto &apic_ids = devices.get_apic_ids();
unsigned num_cpus = start_aps(*apic, apic_ids, header->pml4);
idt->add_ist_entries(); idt->add_ist_entries();
interrupts_enable(); interrupts_enable();
@@ -197,7 +207,8 @@ kernel_main(args::header *header)
} }
*/ */
scheduler *sched = new scheduler(devices.get_lapic()); scheduler *sched = new scheduler {num_cpus};
scheduler_ready = true;
// Skip program 0, which is the kernel itself // Skip program 0, which is the kernel itself
for (unsigned i = 1; i < header->num_programs; ++i) for (unsigned i = 1; i < header->num_programs; ++i)
@@ -209,8 +220,8 @@ kernel_main(args::header *header)
sched->start(); sched->start();
} }
void unsigned
start_aps(void *kpml4) start_aps(lapic &apic, const kutil::vector<uint8_t> &ids, void *kpml4)
{ {
using memory::frame_size; using memory::frame_size;
using memory::kernel_stack_pages; using memory::kernel_stack_pages;
@@ -220,10 +231,8 @@ start_aps(void *kpml4)
extern vm_area_guarded &g_kernel_stacks; extern vm_area_guarded &g_kernel_stacks;
clock &clk = clock::get(); clock &clk = clock::get();
lapic &apic = device_manager::get().get_lapic();
ap_startup_count = 1; // BSP processor ap_startup_count = 1; // BSP processor
auto &ids = device_manager::get().get_apic_ids();
log::info(logs::boot, "Starting %d other CPUs", ids.count() - 1); log::info(logs::boot, "Starting %d other CPUs", ids.count() - 1);
// Since we're using address space outside kernel space, make sure // Since we're using address space outside kernel space, make sure
@@ -245,7 +254,7 @@ start_aps(void *kpml4)
// AP idle stacks need less room than normal stacks, so pack multiple // AP idle stacks need less room than normal stacks, so pack multiple
// into a normal stack area // into a normal stack area
static constexpr size_t idle_stack_bytes = 1024; // 2KiB is generous static constexpr size_t idle_stack_bytes = 2048; // 2KiB is generous
static constexpr size_t full_stack_bytes = kernel_stack_pages * frame_size; static constexpr size_t full_stack_bytes = kernel_stack_pages * frame_size;
static constexpr size_t idle_stacks_per = full_stack_bytes / idle_stack_bytes; static constexpr size_t idle_stacks_per = full_stack_bytes / idle_stack_bytes;
@@ -258,13 +267,14 @@ start_aps(void *kpml4)
apic.send_ipi_broadcast(mode, false, 0); apic.send_ipi_broadcast(mode, false, 0);
for (uint8_t id : ids) { for (uint8_t id : ids) {
if (id == apic.get_id()) continue; if (id == bsp.id) continue;
// Set up the CPU data structures // Set up the CPU data structures
TSS *tss = new TSS; TSS *tss = new TSS;
GDT *gdt = new GDT {tss}; GDT *gdt = new GDT {tss};
cpu_data *cpu = new cpu_data; cpu_data *cpu = new cpu_data;
kutil::memset(cpu, 0, sizeof(cpu_data)); kutil::memset(cpu, 0, sizeof(cpu_data));
cpu->self = cpu; cpu->self = cpu;
cpu->id = id; cpu->id = id;
cpu->index = ++index; cpu->index = ++index;
@@ -285,7 +295,7 @@ start_aps(void *kpml4)
cpu->rsp0 = stack_end; cpu->rsp0 = stack_end;
// Set up the trampoline with this CPU's data // Set up the trampoline with this CPU's data
init_ap_trampoline(kpml4, cpu, long_ap_startup); init_ap_trampoline(kpml4, cpu, ap_idle);
// Kick it off! // Kick it off!
size_t current_count = ap_startup_count; size_t current_count = ap_startup_count;
@@ -315,6 +325,7 @@ start_aps(void *kpml4)
log::info(logs::boot, "%d CPUs running", ap_startup_count); log::info(logs::boot, "%d CPUs running", ap_startup_count);
vm_space::kernel_space().remove(vma); vm_space::kernel_space().remove(vma);
return ap_startup_count;
} }
void void
@@ -322,6 +333,12 @@ long_ap_startup(cpu_data *cpu)
{ {
cpu_init(cpu, false); cpu_init(cpu, false);
++ap_startup_count; ++ap_startup_count;
while (!scheduler_ready) asm ("pause");
while(1) asm("hlt"); uintptr_t apic_base =
device_manager::get().get_lapic_base();
cpu->apic = new lapic(apic_base);
cpu->apic->enable();
scheduler::get().start();
} }

View File

@@ -13,15 +13,11 @@ static kutil::no_construct<process> __g_kernel_process_storage;
process &g_kernel_process = __g_kernel_process_storage.value; process &g_kernel_process = __g_kernel_process_storage.value;
kutil::vector<process*> process::s_processes;
process::process() : process::process() :
kobject {kobject::type::process}, kobject {kobject::type::process},
m_next_handle {1}, m_next_handle {1},
m_state {state::running} m_state {state::running}
{ {
s_processes.append(this);
j6_handle_t self = add_handle(this); j6_handle_t self = add_handle(this);
kassert(self == self_handle(), "Process self-handle is not 1"); kassert(self == self_handle(), "Process self-handle is not 1");
} }
@@ -39,7 +35,6 @@ process::~process()
{ {
for (auto &it : m_handles) for (auto &it : m_handles)
if (it.val) it.val->handle_release(); if (it.val) it.val->handle_release();
s_processes.remove_swap(this);
} }
process & process::current() { return *current_cpu().process; } process & process::current() { return *current_cpu().process; }

View File

@@ -94,6 +94,4 @@ private:
enum class state : uint8_t { running, exited }; enum class state : uint8_t { running, exited };
state m_state; state m_state;
static kutil::vector<process*> s_processes;
}; };

View File

@@ -221,7 +221,5 @@ thread::create_idle_thread(process &kernel, uint8_t pri, uintptr_t rsp0)
thread *idle = new thread(kernel, pri, rsp0); thread *idle = new thread(kernel, pri, rsp0);
idle->set_state(state::constant); idle->set_state(state::constant);
idle->set_state(state::ready); idle->set_state(state::ready);
log::info(logs::task, "Created idle thread as koid %llx", idle->koid());
return idle; return idle;
} }

View File

@@ -17,6 +17,7 @@
#include "objects/channel.h" #include "objects/channel.h"
#include "objects/process.h" #include "objects/process.h"
#include "objects/system.h" #include "objects/system.h"
#include "objects/thread.h"
#include "objects/vm_area.h" #include "objects/vm_area.h"
#include "scheduler.h" #include "scheduler.h"
@@ -25,43 +26,37 @@
#include "kutil/assert.h" #include "kutil/assert.h"
extern "C" void task_switch(TCB *tcb);
scheduler *scheduler::s_instance = nullptr; scheduler *scheduler::s_instance = nullptr;
const uint64_t rflags_noint = 0x002; struct run_queue
const uint64_t rflags_int = 0x202;
extern uint64_t idle_stack_end;
extern "C" void task_switch(TCB *tcb);
scheduler::scheduler(lapic &apic) :
m_apic(apic),
m_next_pid(1),
m_clock(0),
m_last_promotion(0)
{ {
kassert(!s_instance, "Multiple schedulers created!"); tcb_node *current = nullptr;
s_instance = this; tcb_list ready[scheduler::num_priorities];
tcb_list blocked;
process *kp = &process::kernel_process(); uint64_t last_promotion = 0;
uint64_t last_steal = 0;
kutil::spinlock lock;
};
log::debug(logs::task, "Kernel process koid %llx", kp->koid()); scheduler::scheduler(unsigned cpus) :
m_next_pid {1},
m_clock {0}
{
kassert(!s_instance, "Created multiple schedulers!");
if (!s_instance)
s_instance = this;
thread *idle = thread::create_idle_thread(*kp, max_priority, m_run_queues.set_size(cpus);
reinterpret_cast<uintptr_t>(&idle_stack_end)); }
log::debug(logs::task, "Idle thread koid %llx", idle->koid()); scheduler::~scheduler()
{
auto *tcb = idle->tcb(); // Not truly necessary - if the scheduler is going away, the whole
m_runlists[max_priority].push_back(tcb); // system is probably going down. But let's be clean.
m_current = tcb; if (s_instance == this)
s_instance = nullptr;
cpu_data &cpu = current_cpu();
cpu.rsp0 = tcb->rsp0;
cpu.tcb = tcb;
cpu.process = kp;
cpu.thread = idle;
} }
template <typename T> template <typename T>
@@ -72,20 +67,6 @@ inline T * push(uintptr_t &rsp, size_t size = sizeof(T)) {
return p; return p;
} }
thread *
scheduler::create_process(bool user)
{
process *p = new process;
thread *th = p->create_thread(default_priority, user);
TCB *tcb = th->tcb();
log::debug(logs::task, "Creating thread %llx, priority %d, time slice %d",
th->koid(), tcb->priority, tcb->time_left);
th->set_state(thread::state::ready);
return th;
}
void void
scheduler::create_kernel_task(void (*task)(), uint8_t priority, bool constant) scheduler::create_kernel_task(void (*task)(), uint8_t priority, bool constant)
{ {
@@ -115,24 +96,42 @@ scheduler::quantum(int priority)
void void
scheduler::start() scheduler::start()
{ {
log::info(logs::sched, "Starting scheduler."); cpu_data &cpu = current_cpu();
m_apic.enable_timer(isr::isrTimer, false); run_queue &queue = m_run_queues[cpu.index];
m_apic.reset_timer(10); kutil::scoped_lock lock {queue.lock};
process *kp = &process::kernel_process();
thread *idle = thread::create_idle_thread(*kp, max_priority, cpu.rsp0);
log::debug(logs::task, "CPU%02x idle thread koid %llx", cpu.index, idle->koid());
auto *tcb = idle->tcb();
cpu.process = kp;
cpu.thread = idle;
cpu.tcb = tcb;
queue.current = tcb;
log::info(logs::sched, "CPU%02x starting scheduler", cpu.index);
cpu.apic->enable_timer(isr::isrTimer, false);
cpu.apic->reset_timer(10);
} }
void void
scheduler::add_thread(TCB *t) scheduler::add_thread(TCB *t)
{ {
m_blocked.push_back(static_cast<tcb_node*>(t)); cpu_data &cpu = current_cpu();
t->time_left = quantum(t->priority); run_queue &queue = m_run_queues[cpu.index];
kutil::scoped_lock lock {queue.lock};
queue.blocked.push_back(static_cast<tcb_node*>(t));
t->time_left = quantum(t->priority);
} }
void scheduler::prune(uint64_t now) void scheduler::prune(run_queue &queue, uint64_t now)
{ {
// Find processes that are ready or have exited and // Find processes that are ready or have exited and
// move them to the appropriate lists. // move them to the appropriate lists.
auto *tcb = m_blocked.front(); auto *tcb = queue.blocked.front();
while (tcb) { while (tcb) {
thread *th = thread::from_tcb(tcb); thread *th = thread::from_tcb(tcb);
uint8_t priority = tcb->priority; uint8_t priority = tcb->priority;
@@ -140,7 +139,7 @@ void scheduler::prune(uint64_t now)
bool ready = th->has_state(thread::state::ready); bool ready = th->has_state(thread::state::ready);
bool exited = th->has_state(thread::state::exited); bool exited = th->has_state(thread::state::exited);
bool constant = th->has_state(thread::state::constant); bool constant = th->has_state(thread::state::constant);
bool current = tcb == m_current; bool current = tcb == queue.current;
ready |= th->wake_on_time(now); ready |= th->wake_on_time(now);
@@ -155,7 +154,7 @@ void scheduler::prune(uint64_t now)
// page tables // page tables
if (current) continue; if (current) continue;
m_blocked.remove(remove); queue.blocked.remove(remove);
process &p = th->parent(); process &p = th->parent();
// thread_exited deletes the thread, and returns true if the process // thread_exited deletes the thread, and returns true if the process
@@ -163,19 +162,19 @@ void scheduler::prune(uint64_t now)
if(!current && p.thread_exited(th)) if(!current && p.thread_exited(th))
delete &p; delete &p;
} else { } else {
m_blocked.remove(remove); queue.blocked.remove(remove);
log::debug(logs::sched, "Prune: readying unblocked thread %llx", th->koid()); log::debug(logs::sched, "Prune: readying unblocked thread %llx", th->koid());
m_runlists[remove->priority].push_back(remove); queue.ready[remove->priority].push_back(remove);
} }
} }
} }
void void
scheduler::check_promotions(uint64_t now) scheduler::check_promotions(run_queue &queue, uint64_t now)
{ {
for (auto &pri_list : m_runlists) { for (auto &pri_list : queue.ready) {
for (auto *tcb : pri_list) { for (auto *tcb : pri_list) {
const thread *th = thread::from_tcb(m_current); const thread *th = thread::from_tcb(queue.current);
const bool constant = th->has_state(thread::state::constant); const bool constant = th->has_state(thread::state::constant);
if (constant) if (constant)
continue; continue;
@@ -190,81 +189,145 @@ scheduler::check_promotions(uint64_t now)
if (stale) { if (stale) {
// If the thread is stale, promote it // If the thread is stale, promote it
m_runlists[priority].remove(tcb); queue.ready[priority].remove(tcb);
tcb->priority -= 1; tcb->priority -= 1;
tcb->time_left = quantum(tcb->priority); tcb->time_left = quantum(tcb->priority);
m_runlists[tcb->priority].push_back(tcb); queue.ready[tcb->priority].push_back(tcb);
log::info(logs::sched, "Scheduler promoting thread %llx, priority %d", log::info(logs::sched, "Scheduler promoting thread %llx, priority %d",
th->koid(), tcb->priority); th->koid(), tcb->priority);
} }
} }
} }
m_last_promotion = now; queue.last_promotion = now;
}
static size_t
balance_lists(tcb_list &to, tcb_list &from)
{
size_t to_len = to.length();
size_t from_len = from.length();
// Only steal from the rich, don't be Dennis Moore
if (from_len <= to_len)
return 0;
size_t steal = (from_len - to_len) / 2;
for (size_t i = 0; i < steal; ++i)
to.push_front(from.pop_front());
return steal;
}
void
scheduler::steal_work(cpu_data &cpu)
{
// First grab a scheduler-wide lock to avoid deadlock
kutil::scoped_lock steal_lock {m_steal_lock};
// Lock this cpu's queue for the whole time while we modify it
run_queue &my_queue = m_run_queues[cpu.index];
kutil::scoped_lock my_queue_lock {my_queue.lock};
const unsigned count = m_run_queues.count();
for (unsigned i = 0; i < count; ++i) {
if (i == cpu.index) continue;
run_queue &other_queue = m_run_queues[i];
kutil::scoped_lock other_queue_lock {other_queue.lock};
size_t stolen = 0;
// Don't steal from max_priority, that's the idle thread
for (unsigned pri = 0; pri < max_priority; ++pri)
stolen += balance_lists(my_queue.ready[pri], other_queue.ready[pri]);
stolen += balance_lists(my_queue.blocked, other_queue.blocked);
if (stolen)
log::debug(logs::sched, "CPU%02x stole %2d tasks from CPU%02x",
cpu.index, stolen, i);
}
} }
void void
scheduler::schedule() scheduler::schedule()
{ {
uint8_t priority = m_current->priority; cpu_data &cpu = current_cpu();
uint32_t remaining = m_apic.stop_timer(); run_queue &queue = m_run_queues[cpu.index];
m_current->time_left = remaining; lapic &apic = *cpu.apic;
thread *th = thread::from_tcb(m_current); uint32_t remaining = apic.stop_timer();
if (m_clock - queue.last_steal > steal_frequency) {
steal_work(cpu);
queue.last_steal = m_clock;
}
// We need to explicitly lock/unlock here instead of
// using a scoped lock, because the scope doesn't "end"
// for the current thread until it gets scheduled again
kutil::spinlock::waiter waiter;
queue.lock.acquire(&waiter);
queue.current->time_left = remaining;
thread *th = thread::from_tcb(queue.current);
uint8_t priority = queue.current->priority;
const bool constant = th->has_state(thread::state::constant); const bool constant = th->has_state(thread::state::constant);
if (remaining == 0) { if (remaining == 0) {
if (priority < max_priority && !constant) { if (priority < max_priority && !constant) {
// Process used its whole timeslice, demote it // Process used its whole timeslice, demote it
++m_current->priority; ++queue.current->priority;
log::debug(logs::sched, "Scheduler demoting thread %llx, priority %d", log::debug(logs::sched, "Scheduler demoting thread %llx, priority %d",
th->koid(), m_current->priority); th->koid(), queue.current->priority);
} }
m_current->time_left = quantum(m_current->priority); queue.current->time_left = quantum(queue.current->priority);
} else if (remaining > 0) { } else if (remaining > 0) {
// Process gave up CPU, give it a small bonus to its // Process gave up CPU, give it a small bonus to its
// remaining timeslice. // remaining timeslice.
uint32_t bonus = quantum(priority) >> 4; uint32_t bonus = quantum(priority) >> 4;
m_current->time_left += bonus; queue.current->time_left += bonus;
} }
m_runlists[priority].remove(m_current);
if (th->has_state(thread::state::ready)) { if (th->has_state(thread::state::ready)) {
m_runlists[m_current->priority].push_back(m_current); queue.ready[queue.current->priority].push_back(queue.current);
} else { } else {
m_blocked.push_back(m_current); queue.blocked.push_back(queue.current);
} }
clock::get().update(); clock::get().update();
prune(++m_clock); prune(queue, ++m_clock);
if (m_clock - m_last_promotion > promote_frequency) if (m_clock - queue.last_promotion > promote_frequency)
check_promotions(m_clock); check_promotions(queue, m_clock);
priority = 0; priority = 0;
while (m_runlists[priority].empty()) { while (queue.ready[priority].empty()) {
++priority; ++priority;
kassert(priority < num_priorities, "All runlists are empty"); kassert(priority < num_priorities, "All runlists are empty");
} }
m_current->last_ran = m_clock; queue.current->last_ran = m_clock;
auto *next = m_runlists[priority].pop_front(); auto *next = queue.ready[priority].pop_front();
next->last_ran = m_clock; next->last_ran = m_clock;
m_apic.reset_timer(next->time_left); apic.reset_timer(next->time_left);
if (next != m_current) { if (next == queue.current) {
thread *next_thread = thread::from_tcb(next); queue.lock.release(&waiter);
return;
cpu_data &cpu = current_cpu();
cpu.thread = next_thread;
cpu.process = &next_thread->parent();
m_current = next;
log::debug(logs::sched, "Scheduler switching threads %llx->%llx",
th->koid(), next_thread->koid());
log::debug(logs::sched, " priority %d time left %d @ %lld.",
m_current->priority, m_current->time_left, m_clock);
log::debug(logs::sched, " PML4 %llx", m_current->pml4);
task_switch(m_current);
} }
thread *next_thread = thread::from_tcb(next);
cpu.thread = next_thread;
cpu.process = &next_thread->parent();
queue.current = next;
log::debug(logs::sched, "CPU%02x switching threads %llx->%llx",
cpu.index, th->koid(), next_thread->koid());
log::debug(logs::sched, " priority %d time left %d @ %lld.",
next->priority, next->time_left, m_clock);
log::debug(logs::sched, " PML4 %llx", next->pml4);
queue.lock.release(&waiter);
task_switch(queue.current);
} }

View File

@@ -3,7 +3,8 @@
/// The task scheduler and related definitions /// The task scheduler and related definitions
#include <stdint.h> #include <stdint.h>
#include "objects/thread.h" #include "kutil/spinlock.h"
#include "kutil/vector.h"
namespace kernel { namespace kernel {
namespace args { namespace args {
@@ -14,6 +15,7 @@ struct cpu_data;
class lapic; class lapic;
class process; class process;
struct page_table; struct page_table;
struct run_queue;
/// The task scheduler /// The task scheduler
@@ -39,8 +41,9 @@ public:
static const uint16_t process_quanta = 10; static const uint16_t process_quanta = 10;
/// Constructor. /// Constructor.
/// \arg apic The local APIC object for this CPU /// \arg cpus The number of CPUs to schedule for
scheduler(lapic &apic); scheduler(unsigned cpus);
~scheduler();
/// Create a new process from a program image in memory. /// Create a new process from a program image in memory.
/// \arg program The descriptor of the pogram in memory /// \arg program The descriptor of the pogram in memory
@@ -66,15 +69,11 @@ public:
/// Run the scheduler, possibly switching to a new task /// Run the scheduler, possibly switching to a new task
void schedule(); void schedule();
/// Get the current TCB.
/// \returns A pointer to the current thread's TCB
inline TCB * current() { return m_current; }
/// Start scheduling a new thread. /// Start scheduling a new thread.
/// \arg t The new thread's TCB /// \arg t The new thread's TCB
void add_thread(TCB *t); void add_thread(TCB *t);
/// Get a reference to the system scheduler /// Get a reference to the scheduler
/// \returns A reference to the global system scheduler /// \returns A reference to the global system scheduler
static scheduler & get() { return *s_instance; } static scheduler & get() { return *s_instance; }
@@ -82,30 +81,23 @@ private:
friend class process; friend class process;
static constexpr uint64_t promote_frequency = 10; static constexpr uint64_t promote_frequency = 10;
static constexpr uint64_t steal_frequency = 10;
/// Create a new process object. This process will have its pid void prune(run_queue &queue, uint64_t now);
/// set but nothing else. void check_promotions(run_queue &queue, uint64_t now);
/// \arg user True if this thread will enter userspace void steal_work(cpu_data &cpu);
/// \returns The new process' main thread
thread * create_process(bool user);
void prune(uint64_t now);
void check_promotions(uint64_t now);
lapic &m_apic;
uint32_t m_next_pid; uint32_t m_next_pid;
uint32_t m_tick_count; uint32_t m_tick_count;
process *m_kernel_process; process *m_kernel_process;
tcb_node *m_current;
tcb_list m_runlists[num_priorities]; kutil::vector<run_queue> m_run_queues;
tcb_list m_blocked;
// TODO: lol a real clock // TODO: lol a real clock
uint64_t m_clock = 0; uint64_t m_clock = 0;
uint64_t m_last_promotion;
kutil::spinlock m_steal_lock;
static scheduler *s_instance; static scheduler *s_instance;
}; };

View File

@@ -1,11 +1,10 @@
#include <stddef.h> #include <stddef.h>
#include "kutil/memory.h"
#include "console.h" #include "console.h"
#include "cpu.h"
#include "debug.h" #include "debug.h"
#include "log.h" #include "log.h"
#include "msr.h"
#include "scheduler.h"
#include "syscall.h" #include "syscall.h"
extern "C" { extern "C" {

View File

@@ -6,6 +6,7 @@
#include <stdint.h> #include <stdint.h>
#include "kutil/bip_buffer.h" #include "kutil/bip_buffer.h"
#include "kutil/spinlock.h"
namespace kutil { namespace kutil {
namespace log { namespace log {
@@ -111,6 +112,7 @@ private:
uint8_t m_sequence; uint8_t m_sequence;
kutil::bip_buffer m_buffer; kutil::bip_buffer m_buffer;
kutil::spinlock m_lock;
static logger *s_log; static logger *s_log;
static const char *s_level_names[static_cast<unsigned>(level::max)]; static const char *s_level_names[static_cast<unsigned>(level::max)];

View File

@@ -91,6 +91,8 @@ logger::output(level severity, area_t area, const char *fmt, va_list args)
header->bytes += header->bytes +=
vsnprintf(header->message, sizeof(buffer) - sizeof(entry), fmt, args); vsnprintf(header->message, sizeof(buffer) - sizeof(entry), fmt, args);
kutil::scoped_lock lock {m_lock};
if (m_immediate) { if (m_immediate) {
buffer[header->bytes] = 0; buffer[header->bytes] = 0;
m_immediate(area, severity, header->message); m_immediate(area, severity, header->message);
@@ -117,6 +119,8 @@ logger::output(level severity, area_t area, const char *fmt, va_list args)
size_t size_t
logger::get_entry(void *buffer, size_t size) logger::get_entry(void *buffer, size_t size)
{ {
kutil::scoped_lock lock {m_lock};
void *out; void *out;
size_t out_size = m_buffer.get_block(&out); size_t out_size = m_buffer.get_block(&out);
if (out_size == 0 || out == 0) if (out_size == 0 || out == 0)