[kernel] Start all other processors in the system

This very large commit is mainly focused on getting the APs started and
to a state where they're waiting to have work scheduled. (Actually
scheduling on them is for another commit.)

To do this, a bunch of major changes were needed:

- Moving a lot of the CPU initialization (including for the BSP) to
  init_cpu(). This includes setting up IST stacks, writing MSRs, and
  creating the cpu_data structure. For the APs, this also creates and
  installs the GDT and TSS, and installs the global IDT.

- Creating the AP startup code, which tries to be as position
  independent as possible. It's copied from its location to 0x8000 for
  AP startup, and some of it is fixed at that address. The AP startup
  code jumps from real mode to long mode with paging in one swell foop.

- Adding limited IPI capability to the lapic class. This will need to
  improve.

- Renaming cpu/cpu.* to cpu/cpu_id.* because it was just annoying in GDB
  and really isn't anything but cpu_id anymore.

- Moved all the GDT, TSS, and IDT code into their own files and made
  them classes instead of a mess of free functions.

- Got rid of bsp_cpu_data everywhere. Now always call the new
  current_cpu() to get the current CPU's cpu_data.

- Device manager keeps a list of APIC ids now. This should go somewhere
  else eventually, device_manager needs to be refactored away.

- Moved some more things (notably the g_kernel_stacks vma) to the
  pre-constructor setup in memory_bootstrap. That whole file is in bad
  need of a refactor.
This commit is contained in:
Justin C. Miller
2021-02-07 23:26:47 -08:00
parent a65ecb157d
commit c88170f6e0
31 changed files with 952 additions and 446 deletions

View File

@@ -12,6 +12,7 @@ modules:
- src/kernel
source:
- src/kernel/apic.cpp
- src/kernel/ap_startup.s
- src/kernel/assert.cpp
- src/kernel/boot.s
- src/kernel/clock.cpp
@@ -24,8 +25,9 @@ modules:
- src/kernel/frame_allocator.cpp
- src/kernel/fs/gpt.cpp
- src/kernel/gdt.cpp
- src/kernel/gdt.s
- src/kernel/gdtidt.s
- src/kernel/hpet.cpp
- src/kernel/idt.cpp
- src/kernel/interrupts.cpp
- src/kernel/interrupts.s
- src/kernel/io.cpp
@@ -56,6 +58,7 @@ modules:
- src/kernel/syscalls/thread.cpp
- src/kernel/syscalls/vm_area.cpp
- src/kernel/task.s
- src/kernel/tss.cpp
- src/kernel/vm_space.cpp
boot:
@@ -118,7 +121,7 @@ modules:
includes:
- src/libraries/cpu/include
source:
- src/libraries/cpu/cpu.cpp
- src/libraries/cpu/cpu_id.cpp
j6:
kind: lib

View File

@@ -8,7 +8,7 @@
#include <stdint.h>
#include "console.h"
#include "cpu/cpu.h"
#include "cpu/cpu_id.h"
#include "error.h"
#include "fs.h"
#include "hardware.h"

134
src/kernel/ap_startup.s Normal file
View File

@@ -0,0 +1,134 @@
%include "tasking.inc"
section .ap_startup
BASE equ 0x8000 ; Where the kernel will map this at runtime
CR0_PE equ (1 << 0)
CR0_MP equ (1 << 1)
CR0_ET equ (1 << 4)
CR0_NE equ (1 << 5)
CR0_WP equ (1 << 16)
CR0_PG equ (1 << 31)
CR0_VAL equ CR0_PE|CR0_MP|CR0_ET|CR0_NE|CR0_WP|CR0_PG
CR4_DE equ (1 << 3)
CR4_PAE equ (1 << 5)
CR4_MCE equ (1 << 6)
CR4_PGE equ (1 << 7)
CR4_OSFXSR equ (1 << 9)
CR4_OSCMMEXCPT equ (1 << 10)
CR4_FSGSBASE equ (1 << 16)
CR4_PCIDE equ (1 << 17)
CR4_VAL equ CR4_DE|CR4_PAE|CR4_MCE|CR4_PGE|CR4_OSFXSR|CR4_OSCMMEXCPT|CR4_FSGSBASE|CR4_PCIDE
EFER_MSR equ 0xC0000080
EFER_SCE equ (1 << 0)
EFER_LME equ (1 << 8)
EFER_NXE equ (1 << 11)
EFER_VAL equ EFER_SCE|EFER_LME|EFER_NXE
bits 16
default rel
align 8
global ap_startup
ap_startup:
jmp .start_real
align 8
.pml4: dq 0
.stack: dq 0
.ret: dq 0
align 16
.gdt:
dq 0x0 ; Null GDT entry
dq 0x00209A0000000000 ; Code
dq 0x0000920000000000 ; Data
align 4
.gdtd:
dw ($ - .gdt)
dd BASE + (.gdt - ap_startup)
align 4
.idtd:
dw 0 ; zero-length IDT descriptor
dd 0
.start_real:
cli
cld
xor ax, ax
mov ds, ax
; set the temporary null IDT
lidt [BASE + (.idtd - ap_startup)]
; Enter long mode
mov eax, CR4_VAL
mov cr4, eax
mov eax, [BASE + (.pml4 - ap_startup)]
mov cr3, eax
mov ecx, EFER_MSR
rdmsr
or eax, EFER_VAL
wrmsr
mov eax, CR0_VAL
mov cr0, eax
; Set the temporary minimal GDT
lgdt [BASE + (.gdtd - ap_startup)]
jmp (1 << 3):(BASE + (.start_long - ap_startup))
bits 64
default abs
align 8
.start_long:
; set data segments
mov ax, (2 << 3)
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
mov ss, ax
mov rax, [BASE + (.stack - ap_startup)]
mov rsp, rax
mov rax, [BASE + (.ret - ap_startup)]
jmp rax
global ap_startup_code_size
ap_startup_code_size:
dq ($ - ap_startup)
section .text
global init_ap_trampoline
init_ap_trampoline:
push rbp
mov rbp, rsp
; rdi is the kernel pml4
mov [BASE + (ap_startup.pml4 - ap_startup)], rdi
; rsi is the stack for this AP
mov [BASE + (ap_startup.stack - ap_startup)], rsi
; rdx is the address to jump to
mov [BASE + (ap_startup.ret - ap_startup)], rdx
; rcx is the processor id
mov rdi, rdx
pop rbp
ret

View File

@@ -6,11 +6,16 @@
#include "kernel_memory.h"
#include "log.h"
static constexpr uint16_t lapic_id = 0x0020;
static constexpr uint16_t lapic_spurious = 0x00f0;
static constexpr uint16_t lapic_icr_low = 0x0300;
static constexpr uint16_t lapic_icr_high = 0x0310;
static constexpr uint16_t lapic_lvt_timer = 0x0320;
static constexpr uint16_t lapic_lvt_lint0 = 0x0350;
static constexpr uint16_t lapic_lvt_lint1 = 0x0360;
static constexpr uint16_t lapic_lvt_error = 0x0370;
static constexpr uint16_t lapic_timer_init = 0x0380;
static constexpr uint16_t lapic_timer_cur = 0x0390;
@@ -25,6 +30,7 @@ apic_read(uint32_t volatile *apic, uint16_t offset)
static void
apic_write(uint32_t volatile *apic, uint16_t offset, uint32_t value)
{
log::debug(logs::apic, "LAPIC write: %x = %08lx", offset, value);
*(apic + offset/sizeof(uint32_t)) = value;
}
@@ -52,10 +58,38 @@ lapic::lapic(uintptr_t base, isr spurious) :
apic(base),
m_divisor(0)
{
apic_write(m_base, lapic_lvt_error, static_cast<uint32_t>(isr::isrAPICError));
apic_write(m_base, lapic_spurious, static_cast<uint32_t>(spurious));
log::info(logs::apic, "LAPIC created, base %lx", m_base);
}
uint8_t
lapic::get_id()
{
return static_cast<uint8_t>(apic_read(m_base, lapic_id) >> 24);
}
void
lapic::send_ipi(ipi_mode mode, uint8_t vector, uint8_t dest)
{
// Wait until the APIC is ready to send
ipi_wait();
apic_write(m_base, lapic_icr_high, static_cast<uint32_t>(dest) << 24);
uint32_t command =
static_cast<uint32_t>(vector) |
static_cast<uint32_t>(mode) << 8;
apic_write(m_base, lapic_icr_low, command);
}
void
lapic::ipi_wait()
{
while (apic_read(m_base, lapic_icr_low) & (1<<12))
asm volatile ("pause" : : : "memory");
}
void
lapic::calibrate_timer()
{

View File

@@ -29,6 +29,27 @@ public:
/// \arg spurious Vector of the spurious interrupt handler
lapic(uintptr_t base, isr spurious);
/// Get the local APIC's ID
uint8_t get_id();
enum class ipi_mode : uint8_t {
fixed = 0,
smi = 2,
nmi = 4,
init = 5,
startup = 6,
};
/// Send an inter-processor interrupt.
/// \arg mode The sending mode
/// \arg vector The interrupt vector
/// \arg dest The APIC ID of the destination
void send_ipi(ipi_mode mode, uint8_t vector, uint8_t dest);
/// Wait for an IPI to finish sending. This is done automatically
/// before sending another IPI with send_ipi().
void ipi_wait();
/// Enable interrupts for the LAPIC timer.
/// \arg vector Interrupt vector the timer should use
/// \arg repeat If false, this timer is one-off, otherwise repeating

View File

@@ -1,11 +1,19 @@
#include <stdint.h>
#include "kutil/assert.h"
#include "kutil/memory.h"
#include "apic.h"
#include "cpu.h"
#include "cpu/cpu.h"
#include "cpu/cpu_id.h"
#include "device_manager.h"
#include "gdt.h"
#include "idt.h"
#include "kernel_memory.h"
#include "log.h"
#include "msr.h"
#include "objects/vm_area.h"
#include "tss.h"
cpu_data bsp_cpu_data;
cpu_data g_bsp_cpu_data;
void
cpu_validate()
@@ -29,3 +37,70 @@ cpu_validate()
#undef CPU_FEATURE_OPT
#undef CPU_FEATURE_REQ
}
void
init_cpu(bool bsp)
{
extern TSS &g_bsp_tss;
extern GDT &g_bsp_gdt;
extern vm_area_guarded &g_kernel_stacks;
uint8_t id = 0;
TSS *tss = nullptr;
GDT *gdt = nullptr;
cpu_data *cpu = nullptr;
if (bsp) {
gdt = &g_bsp_gdt;
tss = &g_bsp_tss;
cpu = &g_bsp_cpu_data;
} else {
g_idt.install();
tss = new TSS;
gdt = new GDT {tss};
cpu = new cpu_data;
gdt->install();
lapic &apic = device_manager::get().get_lapic();
id = apic.get_id();
}
kutil::memset(cpu, 0, sizeof(cpu_data));
cpu->self = cpu;
cpu->id = id;
cpu->gdt = gdt;
cpu->tss = tss;
// Install the GS base pointint to the cpu_data
wrmsr(msr::ia32_gs_base, reinterpret_cast<uintptr_t>(cpu));
using memory::frame_size;
using memory::kernel_stack_pages;
constexpr size_t stack_size = kernel_stack_pages * frame_size;
uint8_t ist_entries = g_idt.used_ist_entries();
// Set up the IST stacks
for (unsigned ist = 1; ist < 8; ++ist) {
if (!(ist_entries & (1 << ist)))
continue;
// Two zero entries at the top for the null frame
uintptr_t stack_bottom = g_kernel_stacks.get_section();
uintptr_t stack_top = stack_bottom + stack_size - 2 * sizeof(uintptr_t);
// Pre-realize these stacks, they're no good if they page fault
*reinterpret_cast<uint64_t*>(stack_top) = 0;
tss->ist_stack(ist) = stack_top;
}
// Set up the page attributes table
uint64_t pat = rdmsr(msr::ia32_pat);
pat = (pat & 0x00ffffffffffffffull) | (0x01ull << 56); // set PAT 7 to WC
wrmsr(msr::ia32_pat, pat);
}

View File

@@ -2,9 +2,13 @@
#include <stdint.h>
#include "kutil/spinlock.h"
class GDT;
class process;
struct TCB;
class thread;
class process;
class TSS;
struct cpu_state
{
@@ -18,15 +22,34 @@ struct cpu_state
/// version in 'tasking.inc'
struct cpu_data
{
cpu_data *self;
uint64_t id;
uintptr_t rsp0;
uintptr_t rsp3;
TCB *tcb;
thread *t;
process *p;
thread *thread;
process *process;
TSS *tss;
GDT *gdt;
// Values from here on don't need to be in the asm version
kutil::spinlock::node spinner;
};
extern cpu_data bsp_cpu_data;
extern "C" cpu_data * _current_gsbase();
// We already validated the required options in the bootloader,
// but iterate the options and log about them.
/// Initialize a CPU and set up its cpu_data structure
/// \arg bsp True if the current CPU is the BSP
void init_cpu(bool bsp);
/// Get the cpu_data struct for the current executing CPU
inline cpu_data & current_cpu() { return *_current_gsbase(); }
/// Validate the required CPU features are present. Really, the bootloader already
/// validated the required features, but still iterate the options and log about them.
void cpu_validate();
/// Set up the running CPU. This sets GDT, IDT, and necessary MSRs as well as creating
/// the cpu_data structure for this processor.
/// \arg bsp True if this CPU is the BSP
void cpu_initialize(bool bsp);

View File

@@ -13,6 +13,7 @@ void
print_regs(const cpu_state &regs)
{
console *cons = console::get();
cpu_data &cpu = current_cpu();
uint64_t cr2 = 0;
__asm__ __volatile__ ("mov %%cr2, %0" : "=r"(cr2));
@@ -20,8 +21,8 @@ print_regs(const cpu_state &regs)
uintptr_t cr3 = 0;
__asm__ __volatile__ ( "mov %%cr3, %0" : "=r" (cr3) );
cons->printf(" process: %llx", bsp_cpu_data.p->koid());
cons->printf(" thread: %llx\n", bsp_cpu_data.t->koid());
cons->printf(" process: %llx", cpu.process->koid());
cons->printf(" thread: %llx\n", cpu.thread->koid());
print_regL("rax", regs.rax);
print_regM("rbx", regs.rbx);
@@ -43,7 +44,7 @@ print_regs(const cpu_state &regs)
cons->puts("\n\n");
print_regL("rbp", regs.rbp);
print_regM("rsp", regs.user_rsp);
print_regR("sp0", bsp_cpu_data.rsp0);
print_regR("sp0", cpu.rsp0);
print_regL("rip", regs.rip);
print_regM("cr3", cr3);

View File

@@ -204,7 +204,8 @@ device_manager::load_apic(const acpi_table_header *header)
case 0: { // Local APIC
uint8_t uid = kutil::read_from<uint8_t>(p+2);
uint8_t id = kutil::read_from<uint8_t>(p+3);
log::debug(logs::device, " Local APIC uid %x id %x", id);
m_apic_ids.append(id);
log::debug(logs::device, " Local APIC uid %x id %x", uid, id);
}
break;

View File

@@ -26,7 +26,7 @@ public:
/// Get the LAPIC
/// \returns An object representing the local APIC
lapic * get_lapic() { return m_lapic; }
lapic & get_lapic() { return *m_lapic; }
/// Get an IOAPIC
/// \arg i Index of the requested IOAPIC
@@ -94,6 +94,9 @@ public:
&m_hpets[i] : nullptr;
}
/// Get the list of APIC ids for other CPUs
inline const kutil::vector<uint8_t> & get_apic_ids() const { return m_apic_ids; }
private:
/// Parse the ACPI XSDT and load relevant sub-tables.
/// \arg xsdt Pointer to the XSDT from the firmware
@@ -122,6 +125,7 @@ private:
lapic *m_lapic;
kutil::vector<ioapic> m_ioapics;
kutil::vector<hpet> m_hpets;
kutil::vector<uint8_t> m_apic_ids;
kutil::vector<pci_group> m_pci;
kutil::vector<pci_device> m_devices;

View File

@@ -1,36 +1,80 @@
#include <stdint.h>
#include "kutil/assert.h"
#include "kutil/enum_bitfields.h"
#include "kutil/memory.h"
#include "kutil/no_construct.h"
#include "console.h"
#include "kernel_memory.h"
#include "cpu.h"
#include "gdt.h"
#include "log.h"
#include "tss.h"
extern "C" void gdt_write(const void *gdt_ptr, uint16_t cs, uint16_t ds, uint16_t tr);
static constexpr uint8_t kern_cs_index = 1;
static constexpr uint8_t kern_ss_index = 2;
static constexpr uint8_t user_cs32_index = 3;
static constexpr uint8_t user_ss_index = 4;
static constexpr uint8_t user_cs64_index = 5;
static constexpr uint8_t tss_index = 6; // Note that this takes TWO GDT entries
// The BSP's GDT is initialized _before_ global constructors are called,
// so we don't want it to have a global constructor, lest it overwrite
// the previous initialization.
static kutil::no_construct<GDT> __g_bsp_gdt_storage;
GDT &g_bsp_gdt = __g_bsp_gdt_storage.value;
enum class gdt_type : uint8_t
GDT::GDT(TSS *tss) :
m_tss(tss)
{
accessed = 0x01,
read_write = 0x02,
conforming = 0x04,
execute = 0x08,
system = 0x10,
ring1 = 0x20,
ring2 = 0x40,
ring3 = 0x60,
present = 0x80
};
IS_BITFIELD(gdt_type);
kutil::memset(this, 0, sizeof(GDT));
struct gdt_descriptor
m_ptr.limit = sizeof(m_entries) - 1;
m_ptr.base = &m_entries[0];
// Kernel CS/SS - always 64bit
set(kern_cs_index, 0, 0xfffff, true, gdt_type::read_write | gdt_type::execute);
set(kern_ss_index, 0, 0xfffff, true, gdt_type::read_write);
// User CS32/SS/CS64 - layout expected by SYSRET
set(user_cs32_index, 0, 0xfffff, false, gdt_type::ring3 | gdt_type::read_write | gdt_type::execute);
set(user_ss_index, 0, 0xfffff, true, gdt_type::ring3 | gdt_type::read_write);
set(user_cs64_index, 0, 0xfffff, true, gdt_type::ring3 | gdt_type::read_write | gdt_type::execute);
set_tss(tss);
}
GDT &
GDT::current()
{
uint16_t limit_low;
uint16_t base_low;
uint8_t base_mid;
gdt_type type;
uint8_t size;
uint8_t base_high;
} __attribute__ ((packed));
cpu_data &cpu = current_cpu();
return *cpu.gdt;
}
void
GDT::install() const
{
gdt_write(
static_cast<const void*>(&m_ptr),
kern_cs_index << 3,
kern_ss_index << 3,
tss_index << 3);
}
void
GDT::set(uint8_t i, uint32_t base, uint64_t limit, bool is64, gdt_type type)
{
m_entries[i].limit_low = limit & 0xffff;
m_entries[i].size = (limit >> 16) & 0xf;
m_entries[i].size |= (is64 ? 0xa0 : 0xc0);
m_entries[i].base_low = base & 0xffff;
m_entries[i].base_mid = (base >> 16) & 0xff;
m_entries[i].base_high = (base >> 24) & 0xff;
m_entries[i].type = type | gdt_type::system | gdt_type::present;
}
struct tss_descriptor
{
@@ -44,72 +88,16 @@ struct tss_descriptor
uint32_t reserved;
} __attribute__ ((packed));
struct tss_entry
{
uint32_t reserved0;
uint64_t rsp[3]; // stack pointers for CPL 0-2
uint64_t ist[8]; // ist[0] is reserved
uint64_t reserved1;
uint16_t reserved2;
uint16_t iomap_offset;
} __attribute__ ((packed));
struct idt_descriptor
{
uint16_t base_low;
uint16_t selector;
uint8_t ist;
uint8_t flags;
uint16_t base_mid;
uint32_t base_high;
uint32_t reserved; // must be zero
} __attribute__ ((packed));
struct table_ptr
{
uint16_t limit;
uint64_t base;
} __attribute__ ((packed));
gdt_descriptor g_gdt_table[10];
idt_descriptor g_idt_table[256];
table_ptr g_gdtr;
table_ptr g_idtr;
tss_entry g_tss;
extern "C" {
void idt_write();
void idt_load();
void gdt_write(uint16_t cs, uint16_t ds, uint16_t tr);
void gdt_load();
}
void
gdt_set_entry(uint8_t i, uint32_t base, uint64_t limit, bool is64, gdt_type type)
{
g_gdt_table[i].limit_low = limit & 0xffff;
g_gdt_table[i].size = (limit >> 16) & 0xf;
g_gdt_table[i].size |= (is64 ? 0xa0 : 0xc0);
g_gdt_table[i].base_low = base & 0xffff;
g_gdt_table[i].base_mid = (base >> 16) & 0xff;
g_gdt_table[i].base_high = (base >> 24) & 0xff;
g_gdt_table[i].type = type | gdt_type::system | gdt_type::present;
}
void
tss_set_entry(uint8_t i, uint64_t base, uint64_t limit)
GDT::set_tss(TSS *tss)
{
tss_descriptor tssd;
size_t limit = sizeof(TSS);
tssd.limit_low = limit & 0xffff;
tssd.size = (limit >> 16) & 0xf;
uintptr_t base = reinterpret_cast<uintptr_t>(tss);
tssd.base_00 = base & 0xffff;
tssd.base_16 = (base >> 16) & 0xff;
tssd.base_24 = (base >> 24) & 0xff;
@@ -121,123 +109,26 @@ tss_set_entry(uint8_t i, uint64_t base, uint64_t limit)
gdt_type::execute |
gdt_type::ring3 |
gdt_type::present;
kutil::memcpy(&g_gdt_table[i], &tssd, sizeof(tss_descriptor));
kutil::memcpy(&m_entries[tss_index], &tssd, sizeof(tss_descriptor));
}
void
idt_set_entry(uint8_t i, uint64_t addr, uint16_t selector, uint8_t flags)
GDT::dump(unsigned index) const
{
g_idt_table[i].base_low = addr & 0xffff;
g_idt_table[i].base_mid = (addr >> 16) & 0xffff;
g_idt_table[i].base_high = (addr >> 32) & 0xffffffff;
g_idt_table[i].selector = selector;
g_idt_table[i].flags = flags;
g_idt_table[i].ist = 0;
g_idt_table[i].reserved = 0;
}
void
tss_set_stack(unsigned ring, uintptr_t rsp)
{
kassert(ring < 3, "Bad ring passed to tss_set_stack.");
g_tss.rsp[ring] = rsp;
}
uintptr_t
tss_get_stack(unsigned ring)
{
kassert(ring < 3, "Bad ring passed to tss_get_stack.");
return g_tss.rsp[ring];
}
void
idt_set_ist(unsigned i, unsigned ist)
{
g_idt_table[i].ist = ist;
}
void
tss_set_ist(unsigned ist, uintptr_t rsp)
{
kassert(ist > 0 && ist < 7, "Bad ist passed to tss_set_ist.");
g_tss.ist[ist] = rsp;
}
void
ist_increment(unsigned i)
{
uint8_t ist = g_idt_table[i].ist;
if (ist)
g_tss.ist[ist] += memory::frame_size;
}
void
ist_decrement(unsigned i)
{
uint8_t ist = g_idt_table[i].ist;
if (ist)
g_tss.ist[ist] -= memory::frame_size;
}
uintptr_t
tss_get_ist(unsigned ist)
{
kassert(ist > 0 && ist < 7, "Bad ist passed to tss_get_ist.");
return g_tss.ist[ist];
}
void
gdt_init()
{
kutil::memset(&g_gdt_table, 0, sizeof(g_gdt_table));
kutil::memset(&g_idt_table, 0, sizeof(g_idt_table));
g_gdtr.limit = sizeof(g_gdt_table) - 1;
g_gdtr.base = reinterpret_cast<uint64_t>(&g_gdt_table);
// Kernel CS/SS - always 64bit
gdt_set_entry(1, 0, 0xfffff, true, gdt_type::read_write | gdt_type::execute);
gdt_set_entry(2, 0, 0xfffff, true, gdt_type::read_write);
// User CS32/SS/CS64 - layout expected by SYSRET
gdt_set_entry(3, 0, 0xfffff, false, gdt_type::ring3 | gdt_type::read_write | gdt_type::execute);
gdt_set_entry(4, 0, 0xfffff, true, gdt_type::ring3 | gdt_type::read_write);
gdt_set_entry(5, 0, 0xfffff, true, gdt_type::ring3 | gdt_type::read_write | gdt_type::execute);
kutil::memset(&g_tss, 0, sizeof(tss_entry));
g_tss.iomap_offset = sizeof(tss_entry);
uintptr_t tss_base = reinterpret_cast<uintptr_t>(&g_tss);
// Note that this takes TWO GDT entries
tss_set_entry(6, tss_base, sizeof(tss_entry));
gdt_write(1 << 3, 2 << 3, 6 << 3);
g_idtr.limit = sizeof(g_idt_table) - 1;
g_idtr.base = reinterpret_cast<uint64_t>(&g_idt_table);
idt_write();
}
void
gdt_dump(unsigned index)
{
const table_ptr &table = g_gdtr;
console *cons = console::get();
unsigned start = 0;
unsigned count = (table.limit + 1) / sizeof(gdt_descriptor);
unsigned count = (m_ptr.limit + 1) / sizeof(descriptor);
if (index != -1) {
start = index;
count = 1;
} else {
cons->printf(" GDT: loc:%lx size:%d\n", table.base, table.limit+1);
cons->printf(" GDT: loc:%lx size:%d\n", m_ptr.base, m_ptr.limit+1);
}
const gdt_descriptor *gdt =
reinterpret_cast<const gdt_descriptor *>(table.base);
const descriptor *gdt =
reinterpret_cast<const descriptor *>(m_ptr.base);
for (int i = start; i < start+count; ++i) {
uint32_t base =
@@ -275,51 +166,3 @@ gdt_dump(unsigned index)
(gdt[i].size & 0x60) == 0x40 ? "32" : "16");
}
}
void
idt_dump(unsigned index)
{
const table_ptr &table = g_idtr;
unsigned start = 0;
unsigned count = (table.limit + 1) / sizeof(idt_descriptor);
if (index != -1) {
start = index;
count = 1;
log::info(logs::boot, "IDT FOR INDEX %02x", index);
} else {
log::info(logs::boot, "Loaded IDT at: %lx size: %d bytes", table.base, table.limit+1);
}
const idt_descriptor *idt =
reinterpret_cast<const idt_descriptor *>(table.base);
for (int i = start; i < start+count; ++i) {
uint64_t base =
(static_cast<uint64_t>(idt[i].base_high) << 32) |
(static_cast<uint64_t>(idt[i].base_mid) << 16) |
idt[i].base_low;
char const *type;
switch (idt[i].flags & 0xf) {
case 0x5: type = " 32tsk "; break;
case 0x6: type = " 16int "; break;
case 0x7: type = " 16trp "; break;
case 0xe: type = " 32int "; break;
case 0xf: type = " 32trp "; break;
default: type = " ????? "; break;
}
if (idt[i].flags & 0x80) {
log::debug(logs::boot,
" Entry %3d: Base:%lx Sel(rpl %d, ti %d, %3d) IST:%d %s DPL:%d", i, base,
(idt[i].selector & 0x3),
((idt[i].selector & 0x4) >> 2),
(idt[i].selector >> 3),
idt[i].ist,
type,
((idt[i].flags >> 5) & 0x3));
}
}
}

View File

@@ -1,58 +1,66 @@
#pragma once
/// \file gdt.h
/// Definitions relating to system descriptor tables: GDT, IDT, TSS
/// Definitions relating to a CPU's GDT table
#include <stdint.h>
/// Set up the GDT and TSS, and switch segment registers to point
/// to them.
void gdt_init();
#include "kutil/enum_bitfields.h"
/// Set an entry in the IDT
/// \arg i Index in the IDT (vector of the interrupt this handles)
/// \arg addr Address of the handler
/// \arg selector GDT selector to set when invoking this handler
/// \arg flags Descriptor flags to set
void idt_set_entry(uint8_t i, uint64_t addr, uint16_t selector, uint8_t flags);
class TSS;
/// Set the stack pointer for a given ring in the TSS
/// \arg ring Ring to set for (0-2)
/// \arg rsp Stack pointer to set
void tss_set_stack(unsigned ring, uintptr_t rsp);
enum class gdt_type : uint8_t
{
accessed = 0x01,
read_write = 0x02,
conforming = 0x04,
execute = 0x08,
system = 0x10,
ring1 = 0x20,
ring2 = 0x40,
ring3 = 0x60,
present = 0x80
};
IS_BITFIELD(gdt_type);
/// Get the stack pointer for a given ring in the TSS
/// \arg ring Ring to get (0-2)
/// \returns Stack pointers for that ring
uintptr_t tss_get_stack(unsigned ring);
class GDT
{
public:
GDT(TSS *tss);
/// Set the given IDT entry to use the given IST entry
/// \arg i Which IDT entry to set
/// \arg ist Which IST entry to set (1-7)
void idt_set_ist(unsigned i, unsigned ist);
/// Get the currently running CPU's GDT
static GDT & current();
/// Set the stack pointer for a given IST in the TSS
/// \arg ist Which IST entry to set (1-7)
/// \arg rsp Stack pointer to set
void tss_set_ist(unsigned ist, uintptr_t rsp);
/// Install this GDT to the current CPU
void install() const;
/// Increment the stack pointer for the given vector,
/// if it's using an IST entry
/// \arg i Which IDT entry to use
void ist_increment(unsigned i);
/// Get the addrss of the pointer
inline const void * pointer() const { return static_cast<const void*>(&m_ptr); }
/// Decrement the stack pointer for the given vector,
/// if it's using an IST entry
/// \arg i Which IDT entry to use
void ist_decrement(unsigned i);
/// Dump debug information about the GDT to the console.
/// \arg index Which entry to print, or -1 for all entries
void dump(unsigned index = -1) const;
/// Get the stack pointer for a given IST in the TSS
/// \arg ring Which IST entry to get (1-7)
/// \returns Stack pointers for that IST entry
uintptr_t tss_get_ist(unsigned ist);
private:
void set(uint8_t i, uint32_t base, uint64_t limit, bool is64, gdt_type type);
void set_tss(TSS *tss);
/// Dump information about the current GDT to the screen
/// \arg index Which entry to print, or -1 for all entries
void gdt_dump(unsigned index = -1);
struct descriptor
{
uint16_t limit_low;
uint16_t base_low;
uint8_t base_mid;
gdt_type type;
uint8_t size;
uint8_t base_high;
} __attribute__ ((packed, align(8)));
/// Dump information about the current IDT to the screen
/// \arg index Which entry to print, or -1 for all entries
void idt_dump(unsigned index = -1);
struct ptr
{
uint16_t limit;
descriptor *base;
} __attribute__ ((packed, align(4)));
descriptor m_entries[8];
TSS *m_tss;
ptr m_ptr;
};

View File

@@ -1,35 +0,0 @@
extern g_idtr
extern g_gdtr
global idt_write
idt_write:
lidt [rel g_idtr]
ret
global idt_load
idt_load:
sidt [rel g_idtr]
ret
global gdt_write
gdt_write:
lgdt [rel g_gdtr]
mov ax, si ; second arg is data segment
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
mov ss, ax
push qword rdi ; first arg is code segment
lea rax, [rel .next]
push rax
o64 retf
.next:
ltr dx ; third arg is the TSS
ret
global gdt_load
gdt_load:
sgdt [rel g_gdtr]
ret

35
src/kernel/gdtidt.s Normal file
View File

@@ -0,0 +1,35 @@
global idt_write
idt_write:
lidt [rdi] ; first arg is the IDT pointer location
ret
global idt_load
idt_load:
sidt [rdi] ; first arg is where to write the idtr value
ret
global gdt_write
gdt_write:
lgdt [rdi] ; first arg is the GDT pointer location
mov ax, dx ; third arg is data segment
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
mov ss, ax
push qword rsi ; second arg is code segment
lea rax, [rel .next]
push rax
o64 retf
.next:
ltr cx ; fourth arg is the TSS
ret
global gdt_load
gdt_load:
sgdt [rdi] ; first arg is where to write the gdtr value
ret

137
src/kernel/idt.cpp Normal file
View File

@@ -0,0 +1,137 @@
#include "kutil/memory.h"
#include "kutil/no_construct.h"
#include "idt.h"
#include "log.h"
extern "C" {
void idt_write(const void *idt_ptr);
#define ISR(i, s, name) extern void name ();
#define EISR(i, s, name) extern void name ();
#define IRQ(i, q, name) extern void name ();
#include "interrupt_isrs.inc"
#undef IRQ
#undef EISR
#undef ISR
}
// The IDT is initialized _before_ global constructors are called,
// so we don't want it to have a global constructor, lest it overwrite
// the previous initialization.
static kutil::no_construct<IDT> __g_idt_storage;
IDT &g_idt = __g_idt_storage.value;
IDT::IDT()
{
kutil::memset(this, 0, sizeof(IDT));
m_ptr.limit = sizeof(m_entries) - 1;
m_ptr.base = &m_entries[0];
#define ISR(i, s, name) set(i, & name, 0x08, 0x8e);
#define EISR(i, s, name) set(i, & name, 0x08, 0x8e);
#define IRQ(i, q, name) set(i, & name, 0x08, 0x8e);
#include "interrupt_isrs.inc"
#undef IRQ
#undef EISR
#undef ISR
}
void
IDT::install() const
{
idt_write(static_cast<const void*>(&m_ptr));
}
void
IDT::add_ist_entries()
{
#define ISR(i, s, name) if (s) { set_ist(i, s); }
#define EISR(i, s, name) if (s) { set_ist(i, s); }
#define IRQ(i, q, name)
#include "interrupt_isrs.inc"
#undef IRQ
#undef EISR
#undef ISR
}
uint8_t
IDT::used_ist_entries() const
{
uint8_t entries = 0;
#define ISR(i, s, name) if (s) { entries |= (1 << s); }
#define EISR(i, s, name) if (s) { entries |= (1 << s); }
#define IRQ(i, q, name)
#include "interrupt_isrs.inc"
#undef IRQ
#undef EISR
#undef ISR
return entries;
}
void
IDT::set(uint8_t i, void (*handler)(), uint16_t selector, uint8_t flags)
{
uintptr_t addr = reinterpret_cast<uintptr_t>(handler);
m_entries[i].base_low = addr & 0xffff;
m_entries[i].base_mid = (addr >> 16) & 0xffff;
m_entries[i].base_high = (addr >> 32) & 0xffffffff;
m_entries[i].selector = selector;
m_entries[i].flags = flags;
m_entries[i].ist = 0;
m_entries[i].reserved = 0;
}
void
IDT::set_ist(uint8_t i, uint8_t ist)
{
m_entries[i].ist = ist;
}
void
IDT::dump(unsigned index) const
{
unsigned start = 0;
unsigned count = (m_ptr.limit + 1) / sizeof(descriptor);
if (index != -1) {
start = index;
count = 1;
log::info(logs::boot, "IDT FOR INDEX %02x", index);
} else {
log::info(logs::boot, "Loaded IDT at: %lx size: %d bytes", m_ptr.base, m_ptr.limit+1);
}
const descriptor *idt =
reinterpret_cast<const descriptor *>(m_ptr.base);
for (int i = start; i < start+count; ++i) {
uint64_t base =
(static_cast<uint64_t>(idt[i].base_high) << 32) |
(static_cast<uint64_t>(idt[i].base_mid) << 16) |
idt[i].base_low;
char const *type;
switch (idt[i].flags & 0xf) {
case 0x5: type = " 32tsk "; break;
case 0x6: type = " 16int "; break;
case 0x7: type = " 16trp "; break;
case 0xe: type = " 32int "; break;
case 0xf: type = " 32trp "; break;
default: type = " ????? "; break;
}
if (idt[i].flags & 0x80) {
log::debug(logs::boot,
" Entry %3d: Base:%lx Sel(rpl %d, ti %d, %3d) IST:%d %s DPL:%d", i, base,
(idt[i].selector & 0x3),
((idt[i].selector & 0x4) >> 2),
(idt[i].selector >> 3),
idt[i].ist,
type,
((idt[i].flags >> 5) & 0x3));
}
}
}

61
src/kernel/idt.h Normal file
View File

@@ -0,0 +1,61 @@
#pragma once
/// \file idt.h
/// Definitions relating to a CPU's IDT table
#include <stdint.h>
class IDT
{
public:
static constexpr unsigned count = 256;
IDT();
/// Install this IDT to the current CPU
void install() const;
/// Add the IST entries listed in the ISR table into the IDT.
/// This can't be done until after memory is set up so the
/// stacks can be created.
void add_ist_entries();
/// Get the IST entry used by an entry.
/// \arg i Which IDT entry to look in
/// \returns The IST index used by entry i, or 0 for none
inline uint8_t get_ist(unsigned i) const {
if (i >= count) return 0;
return m_entries[i].ist;
}
/// Get the IST entries that are used by this table, as a bitmap
uint8_t used_ist_entries() const;
/// Dump debug information about the IDT to the console.
/// \arg index Which entry to print, or -1 for all entries
void dump(unsigned index = -1) const;
private:
void set(uint8_t i, void (*handler)(), uint16_t selector, uint8_t flags);
void set_ist(uint8_t i, uint8_t ist);
struct descriptor
{
uint16_t base_low;
uint16_t selector;
uint8_t ist;
uint8_t flags;
uint16_t base_mid;
uint32_t base_high;
uint32_t reserved; // must be zero
} __attribute__ ((packed, aligned(16)));
struct ptr
{
uint16_t limit;
descriptor *base;
} __attribute__ ((packed, aligned(4)));
descriptor m_entries[256];
ptr m_ptr;
};
extern IDT &g_idt;

View File

@@ -240,6 +240,7 @@ IRQ (0xdf, 0xbf, irqBF)
ISR (0xe0, 0, isrTimer)
ISR (0xe1, 0, isrLINT0)
ISR (0xe2, 0, isrLINT1)
ISR (0xe3, 0, isrAPICError)
ISR (0xe4, 0, isrAssert)
ISR (0xef, 0, isrSpurious)

View File

@@ -8,6 +8,7 @@
#include "debug.h"
#include "device_manager.h"
#include "gdt.h"
#include "idt.h"
#include "interrupts.h"
#include "io.h"
#include "kernel_memory.h"
@@ -15,6 +16,7 @@
#include "objects/process.h"
#include "scheduler.h"
#include "syscall.h"
#include "tss.h"
#include "vm_space.h"
static const uint16_t PIC1 = 0x20;
@@ -22,19 +24,14 @@ static const uint16_t PIC2 = 0xa0;
constexpr uintptr_t apic_eoi_addr = 0xfee000b0 + ::memory::page_offset;
constexpr size_t increment_offset = 0x1000;
extern "C" {
void _halt();
void isr_handler(cpu_state*);
void irq_handler(cpu_state*);
#define ISR(i, s, name) extern void name ();
#define EISR(i, s, name) extern void name ();
#define IRQ(i, q, name) extern void name ();
#include "interrupt_isrs.inc"
#undef IRQ
#undef EISR
#undef ISR
}
isr
@@ -60,7 +57,7 @@ get_irq(unsigned vector)
}
}
static void
void
disable_legacy_pic()
{
// Mask all interrupts
@@ -80,28 +77,16 @@ disable_legacy_pic()
outb(PIC2+1, 0x02); io_wait();
}
void
interrupts_init()
{
#define ISR(i, s, name) idt_set_entry(i, reinterpret_cast<uint64_t>(& name), 0x08, 0x8e);
#define EISR(i, s, name) idt_set_entry(i, reinterpret_cast<uint64_t>(& name), 0x08, 0x8e);
#define IRQ(i, q, name) idt_set_entry(i, reinterpret_cast<uint64_t>(& name), 0x08, 0x8e);
#include "interrupt_isrs.inc"
#undef IRQ
#undef EISR
#undef ISR
disable_legacy_pic();
log::info(logs::boot, "Interrupts enabled.");
}
void
isr_handler(cpu_state *regs)
{
console *cons = console::get();
uint8_t vector = regs->interrupt & 0xff;
ist_decrement(vector);
TSS &tss = TSS::current();
uint8_t ist = g_idt.get_ist(vector);
if (ist)
tss.ist_stack(ist) -= increment_offset;
switch (static_cast<isr>(vector)) {
@@ -150,13 +135,13 @@ isr_handler(cpu_state *regs)
switch ((regs->errorcode & 0x07) >> 1) {
case 0:
cons->printf(" GDT[%x]\n", index);
gdt_dump(index);
GDT::current().dump(index);
break;
case 1:
case 3:
cons->printf(" IDT[%x]\n", index);
idt_dump(index);
g_idt.dump(index);
break;
default:
@@ -275,7 +260,9 @@ isr_handler(cpu_state *regs)
print_stacktrace(2);
_halt();
}
ist_increment(vector);
if (ist)
tss.ist_stack(ist) += increment_offset;
*reinterpret_cast<uint32_t *>(apic_eoi_addr) = 0;
}

View File

@@ -29,6 +29,5 @@ extern "C" {
void interrupts_disable();
}
/// Fill the IDT with our ISRs, and disable the legacy
/// PIC interrupts.
void interrupts_init();
/// Disable the legacy PIC
void disable_legacy_pic();

View File

@@ -6,22 +6,28 @@
#include "kutil/assert.h"
#include "apic.h"
#include "block_device.h"
#include "clock.h"
#include "console.h"
#include "cpu.h"
#include "device_manager.h"
#include "gdt.h"
#include "idt.h"
#include "interrupts.h"
#include "io.h"
#include "kernel_args.h"
#include "kernel_memory.h"
#include "log.h"
#include "msr.h"
#include "objects/channel.h"
#include "objects/event.h"
#include "objects/thread.h"
#include "objects/vm_area.h"
#include "scheduler.h"
#include "serial.h"
#include "symbol_table.h"
#include "syscall.h"
#include "tss.h"
#include "vm_space.h"
#ifndef GIT_VERSION
#define GIT_VERSION
@@ -31,18 +37,24 @@ extern "C" {
void kernel_main(kernel::args::header *header);
void (*__ctors)(void);
void (*__ctors_end)(void);
void long_ap_startup();
void ap_startup();
void init_ap_trampoline(void*, uintptr_t, void (*)());
}
extern void __kernel_assert(const char *, unsigned, const char *);
using namespace kernel;
volatile size_t ap_startup_count;
/// Bootstrap the memory managers.
void setup_pat();
void memory_initialize_pre_ctors(args::header &kargs);
void memory_initialize_post_ctors(args::header &kargs);
process * load_simple_process(args::program &program);
void start_aps(void *kpml4);
/// TODO: not this. this is awful.
args::framebuffer *fb = nullptr;
@@ -77,7 +89,18 @@ kernel_main(args::header *header)
logger_init();
cpu_validate();
setup_pat();
log::debug(logs::boot, " jsix header is at: %016lx", header);
log::debug(logs::boot, " Memory map is at: %016lx", header->mem_map);
log::debug(logs::boot, "ACPI root table is at: %016lx", header->acpi_table);
log::debug(logs::boot, "Runtime service is at: %016lx", header->runtime_services);
log::debug(logs::boot, " Kernel PML4 is at: %016lx", header->pml4);
uint64_t cr0, cr4;
asm ("mov %%cr0, %0" : "=r"(cr0));
asm ("mov %%cr4, %0" : "=r"(cr4));
uint64_t efer = rdmsr(msr::ia32_efer);
log::debug(logs::boot, "Control regs: cr0:%lx cr4:%lx efer:%lx", cr0, cr4, efer);
bool has_video = false;
if (header->video.size > 0) {
@@ -95,10 +118,20 @@ kernel_main(args::header *header)
logger_clear_immediate();
}
gdt_init();
interrupts_init();
extern TSS &g_bsp_tss;
extern GDT &g_bsp_gdt;
TSS *tss = new (&g_bsp_tss) TSS;
GDT *gdt = new (&g_bsp_gdt) GDT {tss};
gdt->install();
IDT *idt = new (&g_idt) IDT;
idt->install();
disable_legacy_pic();
memory_initialize_pre_ctors(*header);
init_cpu(true);
run_constructors();
memory_initialize_post_ctors(*header);
@@ -116,16 +149,15 @@ kernel_main(args::header *header)
}
}
log::debug(logs::boot, " jsix header is at: %016lx", header);
log::debug(logs::boot, " Memory map is at: %016lx", header->mem_map);
log::debug(logs::boot, "ACPI root table is at: %016lx", header->acpi_table);
log::debug(logs::boot, "Runtime service is at: %016lx", header->runtime_services);
device_manager &devices = device_manager::get();
devices.parse_acpi(header->acpi_table);
devices.init_drivers();
devices.get_lapic()->calibrate_timer();
devices.get_lapic().calibrate_timer();
start_aps(header->pml4);
interrupts_enable();
/*
@@ -164,3 +196,80 @@ kernel_main(args::header *header)
sched->start();
}
void
start_aps(void *kpml4)
{
using memory::frame_size;
using memory::kernel_stack_pages;
extern size_t ap_startup_code_size;
extern process &g_kernel_process;
extern vm_area_guarded &g_kernel_stacks;
clock &clk = clock::get();
lapic &apic = device_manager::get().get_lapic();
ap_startup_count = 1; // BSP processor
auto &ids = device_manager::get().get_apic_ids();
log::info(logs::boot, "Starting %d other CPUs", ids.count() - 1);
// Since we're using address space outside kernel space, make sure
// the kernel's vm_space is used
cpu_data &cpu = current_cpu();
cpu.process = &g_kernel_process;
// Copy the startup code somwhere the real mode trampoline can run
uintptr_t addr = 0x8000; // TODO: find a valid address, rewrite addresses
uint8_t vector = addr >> 12;
vm_area *vma = new vm_area_fixed(addr, 0x1000, vm_flags::write);
vm_space::kernel_space().add(addr, vma);
kutil::memcpy(
reinterpret_cast<void*>(addr),
reinterpret_cast<void*>(&ap_startup),
ap_startup_code_size);
static constexpr size_t stack_bytes = kernel_stack_pages * frame_size;
for (uint8_t id : ids) {
if (id == apic.get_id()) continue;
log::info(logs::boot, "Starting AP %d", id);
size_t current_count = ap_startup_count;
uintptr_t stack_start = g_kernel_stacks.get_section();
uintptr_t stack_end = stack_start + stack_bytes - 2 * sizeof(void*);
*reinterpret_cast<uint64_t*>(stack_end) = 0; // pre-fault the page
init_ap_trampoline(kpml4, stack_end, long_ap_startup);
apic.send_ipi(lapic::ipi_mode::init, 0, id);
clk.spinwait(1000);
apic.send_ipi(lapic::ipi_mode::startup, vector, id);
for (unsigned i = 0; i < 20; ++i) {
if (ap_startup_count > current_count) break;
clk.spinwait(10);
}
if (ap_startup_count > current_count)
continue;
apic.send_ipi(lapic::ipi_mode::startup, vector, id);
for (unsigned i = 0; i < 100; ++i) {
if (ap_startup_count > current_count) break;
clk.spinwait(10);
}
}
log::info(logs::boot, "%d CPUs running", ap_startup_count);
vm_space::kernel_space().remove(vma);
}
void
long_ap_startup()
{
init_cpu(false);
++ap_startup_count;
while(1) asm("hlt");
}

View File

@@ -39,11 +39,8 @@ frame_allocator &g_frame_allocator = __g_frame_allocator_storage.value;
static kutil::no_construct<vm_area_untracked> __g_kernel_heap_area_storage;
vm_area_untracked &g_kernel_heap_area = __g_kernel_heap_area_storage.value;
vm_area_guarded g_kernel_stacks {
memory::stacks_start,
memory::kernel_stack_pages,
memory::kernel_max_stacks,
vm_flags::write};
static kutil::no_construct<vm_area_guarded> __g_kernel_stacks_storage;
vm_area_guarded &g_kernel_stacks = __g_kernel_stacks_storage.value;
vm_area_guarded g_kernel_buffers {
memory::buffers_start,
@@ -66,6 +63,11 @@ memory_initialize_pre_ctors(args::header &kargs)
{
using kernel::args::frame_block;
// Clean out any remaning bootloader page table entries
page_table *kpml4 = static_cast<page_table*>(kargs.pml4);
for (unsigned i = 0; i < memory::pml4e_kernel; ++i)
kpml4->entries[i] = 0;
new (&g_kernel_heap) kutil::heap_allocator {heap_start, kernel_max_heap};
frame_block *blocks = reinterpret_cast<frame_block*>(memory::bitmap_start);
@@ -97,7 +99,6 @@ memory_initialize_pre_ctors(args::header &kargs)
}
}
page_table *kpml4 = reinterpret_cast<page_table*>(kargs.pml4);
process *kp = process::create_kernel_process(kpml4);
vm_space &vm = kp->space();
@@ -105,42 +106,24 @@ memory_initialize_pre_ctors(args::header &kargs)
vm_area_untracked(kernel_max_heap, vm_flags::write);
vm.add(heap_start, heap);
vm_area *stacks = new (&g_kernel_stacks) vm_area_guarded {
memory::stacks_start,
memory::kernel_stack_pages,
memory::kernel_max_stacks,
vm_flags::write};
vm.add(memory::stacks_start, &g_kernel_stacks);
}
void
memory_initialize_post_ctors(args::header &kargs)
{
vm_space &vm = vm_space::kernel_space();
vm.add(memory::stacks_start, &g_kernel_stacks);
vm.add(memory::buffers_start, &g_kernel_buffers);
g_frame_allocator.free(
reinterpret_cast<uintptr_t>(kargs.page_tables),
kargs.table_count);
using memory::frame_size;
using memory::kernel_stack_pages;
constexpr size_t stack_size = kernel_stack_pages * frame_size;
for (int ist = 1; ist <= 3; ++ist) {
uintptr_t bottom = g_kernel_stacks.get_section();
log::debug(logs::boot, "Installing IST%d stack at %llx", ist, bottom);
// Pre-realize and xerothese stacks, they're no good
// if they page fault
kutil::memset(reinterpret_cast<void*>(bottom), 0, stack_size);
// Skip two entries to be the null frame
tss_set_ist(ist, bottom + stack_size - 2 * sizeof(uintptr_t));
}
#define ISR(i, s, name) if (s) { idt_set_ist(i, s); }
#define EISR(i, s, name) if (s) { idt_set_ist(i, s); }
#define IRQ(i, q, name)
#include "interrupt_isrs.inc"
#undef IRQ
#undef EISR
#undef ISR
}
static void
@@ -198,15 +181,6 @@ log_mtrrs()
pat_names[(pat >> (6*8)) & 7], pat_names[(pat >> (7*8)) & 7]);
}
void
setup_pat()
{
uint64_t pat = rdmsr(msr::ia32_pat);
pat = (pat & 0x00ffffffffffffffull) | (0x01ull << 56); // set PAT 7 to WC
wrmsr(msr::ia32_pat, pat);
log_mtrrs();
}
process *
load_simple_process(args::program &program)

View File

@@ -42,7 +42,7 @@ process::~process()
s_processes.remove_swap(this);
}
process & process::current() { return *bsp_cpu_data.p; }
process & process::current() { return *current_cpu().process; }
process & process::kernel_process() { return g_kernel_process; }
process *
@@ -63,7 +63,7 @@ process::exit(int32_t code)
thread->exit(code);
}
if (this == bsp_cpu_data.p)
if (this == current_cpu().process)
scheduler::get().schedule();
}

View File

@@ -9,7 +9,7 @@
extern "C" void kernel_to_user_trampoline();
static constexpr j6_signal_t thread_default_signals = 0;
extern vm_area_guarded g_kernel_stacks;
extern vm_area_guarded &g_kernel_stacks;
thread::thread(process &parent, uint8_t pri, uintptr_t rsp0) :
kobject(kobject::type::thread, thread_default_signals),
@@ -43,13 +43,9 @@ thread::from_tcb(TCB *tcb)
return reinterpret_cast<thread*>(kutil::offset_pointer(tcb, offset));
}
thread &
thread::current()
{
return *bsp_cpu_data.t;
}
thread & thread::current() { return *current_cpu().thread; }
inline void schedule_if_current(thread *t) { if (t == bsp_cpu_data.t) scheduler::get().schedule(); }
inline void schedule_if_current(thread *t) { if (t == current_cpu().thread) scheduler::get().schedule(); }
void
thread::wait_on_signals(kobject *obj, j6_signal_t signals)

View File

@@ -33,7 +33,9 @@ const uint64_t rflags_int = 0x202;
extern uint64_t idle_stack_end;
scheduler::scheduler(lapic *apic) :
extern "C" void task_switch(TCB *tcb);
scheduler::scheduler(lapic &apic) :
m_apic(apic),
m_next_pid(1),
m_clock(0),
@@ -55,10 +57,11 @@ scheduler::scheduler(lapic *apic) :
m_runlists[max_priority].push_back(tcb);
m_current = tcb;
bsp_cpu_data.rsp0 = tcb->rsp0;
bsp_cpu_data.tcb = tcb;
bsp_cpu_data.p = kp;
bsp_cpu_data.t = idle;
cpu_data &cpu = current_cpu();
cpu.rsp0 = tcb->rsp0;
cpu.tcb = tcb;
cpu.process = kp;
cpu.thread = idle;
}
template <typename T>
@@ -113,9 +116,8 @@ void
scheduler::start()
{
log::info(logs::sched, "Starting scheduler.");
wrmsr(msr::ia32_gs_base, reinterpret_cast<uintptr_t>(&bsp_cpu_data));
m_apic->enable_timer(isr::isrTimer, false);
m_apic->reset_timer(10);
m_apic.enable_timer(isr::isrTimer, false);
m_apic.reset_timer(10);
}
void
@@ -205,7 +207,7 @@ void
scheduler::schedule()
{
uint8_t priority = m_current->priority;
uint32_t remaining = m_apic->stop_timer();
uint32_t remaining = m_apic.stop_timer();
m_current->time_left = remaining;
thread *th = thread::from_tcb(m_current);
const bool constant = th->has_state(thread::state::constant);
@@ -214,7 +216,7 @@ scheduler::schedule()
if (priority < max_priority && !constant) {
// Process used its whole timeslice, demote it
++m_current->priority;
log::info(logs::sched, "Scheduler demoting thread %llx, priority %d",
log::debug(logs::sched, "Scheduler demoting thread %llx, priority %d",
th->koid(), m_current->priority);
}
m_current->time_left = quantum(m_current->priority);
@@ -247,13 +249,14 @@ scheduler::schedule()
auto *next = m_runlists[priority].pop_front();
next->last_ran = m_clock;
m_apic->reset_timer(next->time_left);
m_apic.reset_timer(next->time_left);
if (next != m_current) {
thread *next_thread = thread::from_tcb(next);
bsp_cpu_data.t = next_thread;
bsp_cpu_data.p = &next_thread->parent();
cpu_data &cpu = current_cpu();
cpu.thread = next_thread;
cpu.process = &next_thread->parent();
m_current = next;
log::debug(logs::sched, "Scheduler switching threads %llx->%llx",

View File

@@ -10,13 +10,10 @@ namespace args {
struct program;
}}
struct cpu_data;
class lapic;
class process;
struct page_table;
struct cpu_state;
extern "C" void isr_handler(cpu_state*);
extern "C" void task_switch(TCB *next);
/// The task scheduler
@@ -42,8 +39,8 @@ public:
static const uint16_t process_quanta = 10;
/// Constructor.
/// \arg apic Pointer to the local APIC object
scheduler(lapic *apic);
/// \arg apic The local APIC object for this CPU
scheduler(lapic &apic);
/// Create a new process from a program image in memory.
/// \arg program The descriptor of the pogram in memory
@@ -82,7 +79,6 @@ public:
static scheduler & get() { return *s_instance; }
private:
friend uintptr_t syscall_dispatch(uintptr_t, cpu_state &);
friend class process;
static constexpr uint64_t promote_frequency = 10;
@@ -96,7 +92,7 @@ private:
void prune(uint64_t now);
void check_promotions(uint64_t now);
lapic *m_apic;
lapic &m_apic;
uint32_t m_next_pid;
uint32_t m_tick_count;

View File

@@ -1,6 +1,5 @@
%include "tasking.inc"
extern g_tss
global task_switch
task_switch:
push rbp
@@ -18,7 +17,7 @@ task_switch:
mov [rax + TCB.rsp], rsp
; Copy off saved user rsp
mov rcx, [gs:CPU_DATA.rsp3] ; rcx: curretn task's saved user rsp
mov rcx, [gs:CPU_DATA.rsp3] ; rcx: current task's saved user rsp
mov [rax + TCB.rsp3], rcx
; Install next task's TCB
@@ -31,7 +30,7 @@ task_switch:
mov rcx, [rdi + TCB.rsp0] ; rcx: top of next task's kernel stack
mov [gs:CPU_DATA.rsp0], rcx
lea rdx, [rel g_tss] ; rdx: address of TSS
mov rdx, [gs:CPU_DATA.tss] ; rdx: address of TSS
mov [rdx + TSS.rsp0], rcx
; Update saved user rsp
@@ -67,3 +66,8 @@ initialize_main_thread:
; the entrypoint should already be on the stack
jmp kernel_to_user_trampoline
global _current_gsbase
_current_gsbase:
mov rax, [gs:CPU_DATA.self]
ret

View File

@@ -6,9 +6,15 @@ struc TCB
endstruc
struc CPU_DATA
.self: resq 1
.id: resq 1
.rsp0: resq 1
.rsp3: resq 1
.tcb: resq 1
.thread: resq 1
.process: resq 1
.tss: resq 1
.gdt: resq 1
endstruc
struc TSS

39
src/kernel/tss.cpp Normal file
View File

@@ -0,0 +1,39 @@
#include "kutil/assert.h"
#include "kutil/memory.h"
#include "kutil/no_construct.h"
#include "cpu.h"
#include "tss.h"
// The BSP's TSS is initialized _before_ global constructors are called,
// so we don't want it to have a global constructor, lest it overwrite
// the previous initialization.
static kutil::no_construct<TSS> __g_bsp_tss_storage;
TSS &g_bsp_tss = __g_bsp_tss_storage.value;
TSS::TSS()
{
kutil::memset(this, 0, sizeof(TSS));
m_iomap_offset = sizeof(TSS);
}
TSS &
TSS::current()
{
return *current_cpu().tss;
}
uintptr_t &
TSS::ring_stack(unsigned ring)
{
kassert(ring < 3, "Bad ring passed to TSS::ring_stack.");
return m_rsp[ring];
}
uintptr_t &
TSS::ist_stack(unsigned ist)
{
kassert(ist > 0 && ist < 7, "Bad ist passed to TSS::ist_stack.");
return m_ist[ist];
}

35
src/kernel/tss.h Normal file
View File

@@ -0,0 +1,35 @@
#pragma once
/// \file tss.h
/// Definitions relating to the TSS
#include <stdint.h>
/// The 64bit TSS table
class TSS
{
public:
TSS();
/// Get the currently running CPU's TSS.
static TSS & current();
/// Ring stack accessor. Returns a mutable reference.
/// \arg ring Which ring (0-3) to get the stack for
/// \returns A mutable reference to the stack pointer
uintptr_t & ring_stack(unsigned ring);
/// IST stack accessor. Returns a mutable reference.
/// \arg ist Which IST entry (1-7) to get the stack for
/// \returns A mutable reference to the stack pointer
uintptr_t & ist_stack(unsigned ist);
private:
uint32_t m_reserved0;
uintptr_t m_rsp[3]; // stack pointers for CPL 0-2
uintptr_t m_ist[8]; // ist[0] is reserved
uint64_t m_reserved1;
uint16_t m_reserved2;
uint16_t m_iomap_offset;
} __attribute__ ((packed));

View File

@@ -1,5 +1,5 @@
#include <stdint.h>
#include "cpu/cpu.h"
#include "cpu/cpu_id.h"
namespace cpu {
@@ -94,4 +94,13 @@ cpu_id::has_feature(feature feat)
return (m_features & (1 << static_cast<uint64_t>(feat))) != 0;
}
uint8_t
cpu_id::local_apic_id() const
{
uint32_t eax_unused;
uint32_t ebx;
__cpuid(1, 0, &eax_unused, &ebx);
return static_cast<uint8_t>(ebx >> 24);
}
}

View File

@@ -1,5 +1,5 @@
#pragma once
/// \file cpu.h Definition of required cpu features for jsix
/// \file cpu_id.h Definition of required cpu features for jsix
#include <stdint.h>
@@ -48,6 +48,9 @@ public:
/// \returns A |regs| struct of the values retuned
regs get(uint32_t leaf, uint32_t sub = 0) const;
/// Get the local APIC ID of the current CPU
uint8_t local_apic_id() const;
/// Get the name of the cpu vendor (eg, "GenuineIntel")
inline const char * vendor_id() const { return m_vendor_id; }