[kernel] Initial XSAVE support implementation

Initial support for XSAVE, but not XSAVEOPT or XSAVEC:

- Enable XSAVE and set up xcr0 for all CPUs
- Allocate XSAVE area for all non-kernel threads
- Call XSAVE and XRSTOR on task switch
This commit is contained in:
Justin C. Miller
2023-05-05 12:04:37 -06:00
parent 3b3857548c
commit b5662bfd25
12 changed files with 135 additions and 9 deletions

View File

@@ -22,7 +22,7 @@ variables:
'-DGIT_VERSION=\"${version_major}.${version_minor}.${version_patch}+${version_sha}\"',
'-DGIT_VERSION_WIDE=L\"${version_major}.${version_minor}.${version_patch}+${version_sha}\"',
"-Wformat=2", "-Winit-self", "-Wfloat-equal", "-Winline", "-Wmissing-format-attribute",
"-Wformat=2", "-Winit-self", "-Winline", "-Wmissing-format-attribute",
"-Wmissing-include-dirs", "-Wswitch", "-Wundef", "-Wdisabled-optimization",
"-Wpointer-arith", "-Wno-attributes", "-Wno-sign-compare", "-Wno-multichar",
"-Wno-div-by-zero", "-Wno-endif-labels", "-Wno-pragmas", "-Wno-format-extra-args",

View File

@@ -14,6 +14,7 @@
#include "objects/thread.h"
#include "syscall.h"
#include "tss.h"
#include "xsave.h"
unsigned g_num_cpus = 1;
@@ -140,6 +141,7 @@ bsp_early_init()
cpu->gdt = new (&g_bsp_gdt) GDT {cpu->tss};
cpu->rsp0 = reinterpret_cast<uintptr_t>(&idle_stack_end);
cpu_early_init(cpu);
xsave_init();
return cpu;
}
@@ -229,4 +231,6 @@ cpu_init(cpu_data *cpu, bool bsp)
cpu->id = apic->get_id();
apic->calibrate_timer();
}
xsave_enable();
}

View File

@@ -51,6 +51,8 @@ enum class xcr0
ZMM_Hi256,
ZMM_Hi16,
PKRU = 9,
J6_SUPPORTED = X87 | SSE | AVX | BINDREG | BINDCSR | OPMASK | ZMM_Hi16 | ZMM_Hi256,
};
enum class efer

View File

@@ -65,6 +65,7 @@ kernel = module("kernel",
"tss.cpp",
"vm_space.cpp",
"wait_queue.cpp",
"xsave.cpp",
])
if config == "debug":

View File

@@ -88,6 +88,9 @@ process::create_thread(uintptr_t rsp3, uint8_t priority)
if (rsp3)
th->tcb()->rsp3 = rsp3;
if (this != &g_kernel_process)
th->init_xsave_area();
m_threads.append(th);
scheduler::get().add_thread(th->tcb());
return th;

View File

@@ -8,6 +8,7 @@
#include "objects/process.h"
#include "objects/vm_area.h"
#include "scheduler.h"
#include "xsave.h"
extern "C" void initialize_user_cpu();
extern obj::vm_area_guarded &g_kernel_stacks;
@@ -37,6 +38,9 @@ thread::thread(process &parent, uint8_t pri, uintptr_t rsp0) :
thread::~thread()
{
if (m_tcb.xsave)
delete [] reinterpret_cast<uint8_t*>(m_tcb.xsave);
g_kernel_stacks.return_section(m_tcb.kernel_stack);
m_parent.handle_release();
}
@@ -158,6 +162,14 @@ thread::add_thunk_user(uintptr_t rip3, uint64_t arg0, uint64_t arg1, uintptr_t r
add_thunk_kernel(rip0 ? rip0 : trampoline);
}
void
thread::init_xsave_area()
{
void *xsave_area = new uint8_t [xsave_size];
memset(xsave_area, 0, xsave_size);
m_tcb.xsave = reinterpret_cast<uintptr_t>(xsave_area);
}
void
thread::setup_kernel_stack()
{

View File

@@ -26,6 +26,7 @@ struct TCB
uintptr_t rsp3;
uintptr_t rflags3;
uintptr_t pml4;
uintptr_t xsave;
// End of area used by asembly
obj::thread* thread;
@@ -182,6 +183,9 @@ private:
/// \arg rsp0 The existing kernel stack rsp, 0 for none
thread(process &parent, uint8_t pri, uintptr_t rsp0 = 0);
/// Set up the XSAVE saved processor state area for this thread
void init_xsave_area();
/// Set up a new empty kernel stack for this thread.
void setup_kernel_stack();

View File

@@ -1,5 +1,7 @@
%include "tasking.inc"
extern xcr0_val
global task_switch: function hidden (task_switch.end - task_switch)
task_switch:
push rbp
@@ -13,22 +15,33 @@ task_switch:
push r15
; Update previous task's TCB
mov rax, [gs:CPU_DATA.tcb] ; rax: current task TCB
mov [rax + TCB.rsp], rsp
mov r15, [gs:CPU_DATA.tcb] ; r15: current task TCB
mov [r15 + TCB.rsp], rsp
; Copy off saved user rsp
mov rcx, [gs:CPU_DATA.rsp3] ; rcx: current task's saved user rsp
mov [rax + TCB.rsp3], rcx
mov [r15 + TCB.rsp3], rcx
; Copy off saved user rflags
mov rcx, [gs:CPU_DATA.rflags3] ; rcx: current task's saved user rflags
mov [rax + TCB.rflags3], rcx
mov [r15 + TCB.rflags3], rcx
; Save processor extended state
mov rcx, [r15 + TCB.xsave] ; rcx: current task's XSAVE area
cmp rcx, 0
jz .xsave_done
mov rax, [rel xcr0_val]
mov rdx, rax
shl rdx, 32
xsave [rcx]
.xsave_done:
; Install next task's TCB
mov [gs:CPU_DATA.tcb], rdi ; rdi: next TCB (function param)
mov rsp, [rdi + TCB.rsp] ; next task's stack pointer
mov rax, 0x00003fffffffffff
and rax, [rdi + TCB.pml4] ; rax: next task's pml4 (phys portion of address)
mov r14, 0x00003fffffffffff
and r14, [rdi + TCB.pml4] ; r14: next task's pml4 (phys portion of address)
; Update syscall/interrupt rsp
mov rcx, [rdi + TCB.rsp0] ; rcx: top of next task's kernel stack
@@ -41,15 +54,26 @@ task_switch:
mov rcx, [rdi + TCB.rsp3] ; rcx: new task's saved user rsp
mov [gs:CPU_DATA.rsp3], rcx
; Load processor extended state
mov rcx, [rdi + TCB.xsave] ; rcx: new task's XSAVE area
cmp rcx, 0
jz .xrstor_done
mov rax, [rel xcr0_val]
mov rdx, rax
shl rdx, 32
xrstor [rcx]
.xrstor_done:
; Update saved user rflags
mov rcx, [rdi + TCB.rflags3] ; rcx: new task's saved user rflags
mov [gs:CPU_DATA.rflags3], rcx
; check if we need to update CR3
mov rdx, cr3 ; rdx: old CR3
cmp rax, rdx
cmp r14, rdx
je .no_cr3
mov cr3, rax
mov cr3, r14
.no_cr3:
pop r15

View File

@@ -4,6 +4,7 @@ struc TCB
.rsp3: resq 1
.rflags3: resq 1
.pml4: resq 1
.xsave: resq 1
endstruc
struc CPU_DATA

30
src/kernel/xsave.cpp Normal file
View File

@@ -0,0 +1,30 @@
#include <stdint.h>
#include <cpu/cpu_id.h>
#include "cpu.h"
#include "xsave.h"
uint64_t xcr0_val = 0;
static size_t xsave_size_val = 0;
const size_t &xsave_size = xsave_size_val;
void
xsave_init()
{
cpu::cpu_id cpuid;
const auto regs = cpuid.get(0x0d);
const uint64_t cpu_supported =
(static_cast<uint64_t>(regs.edx) << 32) |
static_cast<uint64_t>(regs.eax);
xcr0_val = static_cast<uint64_t>(xcr0::J6_SUPPORTED) & cpu_supported;
xsave_size_val = regs.ebx;
}
void
xsave_enable()
{
const uint64_t rax = (xcr0_val & 0xFFFFFFFF);
const uint64_t rdx = (xcr0_val >> 32);
asm volatile ( "xsetbv" :: "c"(0), "d"(xcr0_val >> 32), "a"(xcr0_val) );
}

10
src/kernel/xsave.h Normal file
View File

@@ -0,0 +1,10 @@
#pragma once
/// \file xsave.h
/// XSAVE operations
#include <stddef.h>
extern const size_t &xsave_size;
void xsave_init();
void xsave_enable();

View File

@@ -5,6 +5,7 @@
#include <j6/errors.h>
#include <j6/flags.h>
#include <j6/syscalls.h>
#include <j6/syslog.hh>
#include <j6/thread.hh>
#include <j6/types.h>
@@ -17,11 +18,42 @@ extern j6_handle_t __handle_self;
constexpr uintptr_t stack_top = 0xf80000000;
uint32_t flipflop = 0;
bool
test_floats()
{
static constexpr int len = 30;
double as[len];
double bs[len];
double orig = 345.72;
double mult = 3.21;
for (int i = 0; i < len * 100; ++i) {
int idx = i % len;
as[idx] = orig * idx;
}
for (int i = 0; i < len * 100; ++i) {
int idx = i % len;
bs[idx] = as[idx] * mult;
}
for (int i = 0; i < len; ++i) {
if (bs[i] != orig * i * mult) {
j6::syslog("ERROR: floating point discrepency");
return false;
}
}
return true;
}
void
thread_proc(void* channelp)
{
j6_log("sub thread starting");
for (int i = 0; i < 100; ++i)
if (!test_floats()) break;
j6::channel *chan = reinterpret_cast<j6::channel*>(channelp);
char buffer[512];
@@ -86,6 +118,9 @@ main(int argc, const char **argv)
j6_log("main thread created sub thread");
for (int i = 0; i < 100; ++i)
if (!test_floats()) break;
char message[] = "MAIN THREAD SUCCESSFULLY CALLED SEND AND RECEIVE IF THIS IS LOWERCASE";
size_t size = sizeof(message);