mirror of
https://github.com/justinian/jsix.git
synced 2025-12-10 08:24:32 -08:00
[kernel] Initial XSAVE support implementation
Initial support for XSAVE, but not XSAVEOPT or XSAVEC: - Enable XSAVE and set up xcr0 for all CPUs - Allocate XSAVE area for all non-kernel threads - Call XSAVE and XRSTOR on task switch
This commit is contained in:
@@ -22,7 +22,7 @@ variables:
|
|||||||
'-DGIT_VERSION=\"${version_major}.${version_minor}.${version_patch}+${version_sha}\"',
|
'-DGIT_VERSION=\"${version_major}.${version_minor}.${version_patch}+${version_sha}\"',
|
||||||
'-DGIT_VERSION_WIDE=L\"${version_major}.${version_minor}.${version_patch}+${version_sha}\"',
|
'-DGIT_VERSION_WIDE=L\"${version_major}.${version_minor}.${version_patch}+${version_sha}\"',
|
||||||
|
|
||||||
"-Wformat=2", "-Winit-self", "-Wfloat-equal", "-Winline", "-Wmissing-format-attribute",
|
"-Wformat=2", "-Winit-self", "-Winline", "-Wmissing-format-attribute",
|
||||||
"-Wmissing-include-dirs", "-Wswitch", "-Wundef", "-Wdisabled-optimization",
|
"-Wmissing-include-dirs", "-Wswitch", "-Wundef", "-Wdisabled-optimization",
|
||||||
"-Wpointer-arith", "-Wno-attributes", "-Wno-sign-compare", "-Wno-multichar",
|
"-Wpointer-arith", "-Wno-attributes", "-Wno-sign-compare", "-Wno-multichar",
|
||||||
"-Wno-div-by-zero", "-Wno-endif-labels", "-Wno-pragmas", "-Wno-format-extra-args",
|
"-Wno-div-by-zero", "-Wno-endif-labels", "-Wno-pragmas", "-Wno-format-extra-args",
|
||||||
|
|||||||
@@ -14,6 +14,7 @@
|
|||||||
#include "objects/thread.h"
|
#include "objects/thread.h"
|
||||||
#include "syscall.h"
|
#include "syscall.h"
|
||||||
#include "tss.h"
|
#include "tss.h"
|
||||||
|
#include "xsave.h"
|
||||||
|
|
||||||
unsigned g_num_cpus = 1;
|
unsigned g_num_cpus = 1;
|
||||||
|
|
||||||
@@ -140,6 +141,7 @@ bsp_early_init()
|
|||||||
cpu->gdt = new (&g_bsp_gdt) GDT {cpu->tss};
|
cpu->gdt = new (&g_bsp_gdt) GDT {cpu->tss};
|
||||||
cpu->rsp0 = reinterpret_cast<uintptr_t>(&idle_stack_end);
|
cpu->rsp0 = reinterpret_cast<uintptr_t>(&idle_stack_end);
|
||||||
cpu_early_init(cpu);
|
cpu_early_init(cpu);
|
||||||
|
xsave_init();
|
||||||
|
|
||||||
return cpu;
|
return cpu;
|
||||||
}
|
}
|
||||||
@@ -229,4 +231,6 @@ cpu_init(cpu_data *cpu, bool bsp)
|
|||||||
cpu->id = apic->get_id();
|
cpu->id = apic->get_id();
|
||||||
apic->calibrate_timer();
|
apic->calibrate_timer();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
xsave_enable();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -51,6 +51,8 @@ enum class xcr0
|
|||||||
ZMM_Hi256,
|
ZMM_Hi256,
|
||||||
ZMM_Hi16,
|
ZMM_Hi16,
|
||||||
PKRU = 9,
|
PKRU = 9,
|
||||||
|
|
||||||
|
J6_SUPPORTED = X87 | SSE | AVX | BINDREG | BINDCSR | OPMASK | ZMM_Hi16 | ZMM_Hi256,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class efer
|
enum class efer
|
||||||
|
|||||||
@@ -65,6 +65,7 @@ kernel = module("kernel",
|
|||||||
"tss.cpp",
|
"tss.cpp",
|
||||||
"vm_space.cpp",
|
"vm_space.cpp",
|
||||||
"wait_queue.cpp",
|
"wait_queue.cpp",
|
||||||
|
"xsave.cpp",
|
||||||
])
|
])
|
||||||
|
|
||||||
if config == "debug":
|
if config == "debug":
|
||||||
|
|||||||
@@ -88,6 +88,9 @@ process::create_thread(uintptr_t rsp3, uint8_t priority)
|
|||||||
if (rsp3)
|
if (rsp3)
|
||||||
th->tcb()->rsp3 = rsp3;
|
th->tcb()->rsp3 = rsp3;
|
||||||
|
|
||||||
|
if (this != &g_kernel_process)
|
||||||
|
th->init_xsave_area();
|
||||||
|
|
||||||
m_threads.append(th);
|
m_threads.append(th);
|
||||||
scheduler::get().add_thread(th->tcb());
|
scheduler::get().add_thread(th->tcb());
|
||||||
return th;
|
return th;
|
||||||
|
|||||||
@@ -8,6 +8,7 @@
|
|||||||
#include "objects/process.h"
|
#include "objects/process.h"
|
||||||
#include "objects/vm_area.h"
|
#include "objects/vm_area.h"
|
||||||
#include "scheduler.h"
|
#include "scheduler.h"
|
||||||
|
#include "xsave.h"
|
||||||
|
|
||||||
extern "C" void initialize_user_cpu();
|
extern "C" void initialize_user_cpu();
|
||||||
extern obj::vm_area_guarded &g_kernel_stacks;
|
extern obj::vm_area_guarded &g_kernel_stacks;
|
||||||
@@ -37,6 +38,9 @@ thread::thread(process &parent, uint8_t pri, uintptr_t rsp0) :
|
|||||||
|
|
||||||
thread::~thread()
|
thread::~thread()
|
||||||
{
|
{
|
||||||
|
if (m_tcb.xsave)
|
||||||
|
delete [] reinterpret_cast<uint8_t*>(m_tcb.xsave);
|
||||||
|
|
||||||
g_kernel_stacks.return_section(m_tcb.kernel_stack);
|
g_kernel_stacks.return_section(m_tcb.kernel_stack);
|
||||||
m_parent.handle_release();
|
m_parent.handle_release();
|
||||||
}
|
}
|
||||||
@@ -158,6 +162,14 @@ thread::add_thunk_user(uintptr_t rip3, uint64_t arg0, uint64_t arg1, uintptr_t r
|
|||||||
add_thunk_kernel(rip0 ? rip0 : trampoline);
|
add_thunk_kernel(rip0 ? rip0 : trampoline);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
thread::init_xsave_area()
|
||||||
|
{
|
||||||
|
void *xsave_area = new uint8_t [xsave_size];
|
||||||
|
memset(xsave_area, 0, xsave_size);
|
||||||
|
m_tcb.xsave = reinterpret_cast<uintptr_t>(xsave_area);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
thread::setup_kernel_stack()
|
thread::setup_kernel_stack()
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ struct TCB
|
|||||||
uintptr_t rsp3;
|
uintptr_t rsp3;
|
||||||
uintptr_t rflags3;
|
uintptr_t rflags3;
|
||||||
uintptr_t pml4;
|
uintptr_t pml4;
|
||||||
|
uintptr_t xsave;
|
||||||
// End of area used by asembly
|
// End of area used by asembly
|
||||||
|
|
||||||
obj::thread* thread;
|
obj::thread* thread;
|
||||||
@@ -182,6 +183,9 @@ private:
|
|||||||
/// \arg rsp0 The existing kernel stack rsp, 0 for none
|
/// \arg rsp0 The existing kernel stack rsp, 0 for none
|
||||||
thread(process &parent, uint8_t pri, uintptr_t rsp0 = 0);
|
thread(process &parent, uint8_t pri, uintptr_t rsp0 = 0);
|
||||||
|
|
||||||
|
/// Set up the XSAVE saved processor state area for this thread
|
||||||
|
void init_xsave_area();
|
||||||
|
|
||||||
/// Set up a new empty kernel stack for this thread.
|
/// Set up a new empty kernel stack for this thread.
|
||||||
void setup_kernel_stack();
|
void setup_kernel_stack();
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
%include "tasking.inc"
|
%include "tasking.inc"
|
||||||
|
|
||||||
|
extern xcr0_val
|
||||||
|
|
||||||
global task_switch: function hidden (task_switch.end - task_switch)
|
global task_switch: function hidden (task_switch.end - task_switch)
|
||||||
task_switch:
|
task_switch:
|
||||||
push rbp
|
push rbp
|
||||||
@@ -13,22 +15,33 @@ task_switch:
|
|||||||
push r15
|
push r15
|
||||||
|
|
||||||
; Update previous task's TCB
|
; Update previous task's TCB
|
||||||
mov rax, [gs:CPU_DATA.tcb] ; rax: current task TCB
|
mov r15, [gs:CPU_DATA.tcb] ; r15: current task TCB
|
||||||
mov [rax + TCB.rsp], rsp
|
mov [r15 + TCB.rsp], rsp
|
||||||
|
|
||||||
; Copy off saved user rsp
|
; Copy off saved user rsp
|
||||||
mov rcx, [gs:CPU_DATA.rsp3] ; rcx: current task's saved user rsp
|
mov rcx, [gs:CPU_DATA.rsp3] ; rcx: current task's saved user rsp
|
||||||
mov [rax + TCB.rsp3], rcx
|
mov [r15 + TCB.rsp3], rcx
|
||||||
|
|
||||||
; Copy off saved user rflags
|
; Copy off saved user rflags
|
||||||
mov rcx, [gs:CPU_DATA.rflags3] ; rcx: current task's saved user rflags
|
mov rcx, [gs:CPU_DATA.rflags3] ; rcx: current task's saved user rflags
|
||||||
mov [rax + TCB.rflags3], rcx
|
mov [r15 + TCB.rflags3], rcx
|
||||||
|
|
||||||
|
; Save processor extended state
|
||||||
|
mov rcx, [r15 + TCB.xsave] ; rcx: current task's XSAVE area
|
||||||
|
cmp rcx, 0
|
||||||
|
jz .xsave_done
|
||||||
|
|
||||||
|
mov rax, [rel xcr0_val]
|
||||||
|
mov rdx, rax
|
||||||
|
shl rdx, 32
|
||||||
|
xsave [rcx]
|
||||||
|
.xsave_done:
|
||||||
|
|
||||||
; Install next task's TCB
|
; Install next task's TCB
|
||||||
mov [gs:CPU_DATA.tcb], rdi ; rdi: next TCB (function param)
|
mov [gs:CPU_DATA.tcb], rdi ; rdi: next TCB (function param)
|
||||||
mov rsp, [rdi + TCB.rsp] ; next task's stack pointer
|
mov rsp, [rdi + TCB.rsp] ; next task's stack pointer
|
||||||
mov rax, 0x00003fffffffffff
|
mov r14, 0x00003fffffffffff
|
||||||
and rax, [rdi + TCB.pml4] ; rax: next task's pml4 (phys portion of address)
|
and r14, [rdi + TCB.pml4] ; r14: next task's pml4 (phys portion of address)
|
||||||
|
|
||||||
; Update syscall/interrupt rsp
|
; Update syscall/interrupt rsp
|
||||||
mov rcx, [rdi + TCB.rsp0] ; rcx: top of next task's kernel stack
|
mov rcx, [rdi + TCB.rsp0] ; rcx: top of next task's kernel stack
|
||||||
@@ -41,15 +54,26 @@ task_switch:
|
|||||||
mov rcx, [rdi + TCB.rsp3] ; rcx: new task's saved user rsp
|
mov rcx, [rdi + TCB.rsp3] ; rcx: new task's saved user rsp
|
||||||
mov [gs:CPU_DATA.rsp3], rcx
|
mov [gs:CPU_DATA.rsp3], rcx
|
||||||
|
|
||||||
|
; Load processor extended state
|
||||||
|
mov rcx, [rdi + TCB.xsave] ; rcx: new task's XSAVE area
|
||||||
|
cmp rcx, 0
|
||||||
|
jz .xrstor_done
|
||||||
|
|
||||||
|
mov rax, [rel xcr0_val]
|
||||||
|
mov rdx, rax
|
||||||
|
shl rdx, 32
|
||||||
|
xrstor [rcx]
|
||||||
|
.xrstor_done:
|
||||||
|
|
||||||
; Update saved user rflags
|
; Update saved user rflags
|
||||||
mov rcx, [rdi + TCB.rflags3] ; rcx: new task's saved user rflags
|
mov rcx, [rdi + TCB.rflags3] ; rcx: new task's saved user rflags
|
||||||
mov [gs:CPU_DATA.rflags3], rcx
|
mov [gs:CPU_DATA.rflags3], rcx
|
||||||
|
|
||||||
; check if we need to update CR3
|
; check if we need to update CR3
|
||||||
mov rdx, cr3 ; rdx: old CR3
|
mov rdx, cr3 ; rdx: old CR3
|
||||||
cmp rax, rdx
|
cmp r14, rdx
|
||||||
je .no_cr3
|
je .no_cr3
|
||||||
mov cr3, rax
|
mov cr3, r14
|
||||||
.no_cr3:
|
.no_cr3:
|
||||||
|
|
||||||
pop r15
|
pop r15
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ struc TCB
|
|||||||
.rsp3: resq 1
|
.rsp3: resq 1
|
||||||
.rflags3: resq 1
|
.rflags3: resq 1
|
||||||
.pml4: resq 1
|
.pml4: resq 1
|
||||||
|
.xsave: resq 1
|
||||||
endstruc
|
endstruc
|
||||||
|
|
||||||
struc CPU_DATA
|
struc CPU_DATA
|
||||||
|
|||||||
30
src/kernel/xsave.cpp
Normal file
30
src/kernel/xsave.cpp
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
#include <stdint.h>
|
||||||
|
#include <cpu/cpu_id.h>
|
||||||
|
|
||||||
|
#include "cpu.h"
|
||||||
|
#include "xsave.h"
|
||||||
|
|
||||||
|
uint64_t xcr0_val = 0;
|
||||||
|
static size_t xsave_size_val = 0;
|
||||||
|
const size_t &xsave_size = xsave_size_val;
|
||||||
|
|
||||||
|
void
|
||||||
|
xsave_init()
|
||||||
|
{
|
||||||
|
cpu::cpu_id cpuid;
|
||||||
|
const auto regs = cpuid.get(0x0d);
|
||||||
|
const uint64_t cpu_supported =
|
||||||
|
(static_cast<uint64_t>(regs.edx) << 32) |
|
||||||
|
static_cast<uint64_t>(regs.eax);
|
||||||
|
|
||||||
|
xcr0_val = static_cast<uint64_t>(xcr0::J6_SUPPORTED) & cpu_supported;
|
||||||
|
xsave_size_val = regs.ebx;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
xsave_enable()
|
||||||
|
{
|
||||||
|
const uint64_t rax = (xcr0_val & 0xFFFFFFFF);
|
||||||
|
const uint64_t rdx = (xcr0_val >> 32);
|
||||||
|
asm volatile ( "xsetbv" :: "c"(0), "d"(xcr0_val >> 32), "a"(xcr0_val) );
|
||||||
|
}
|
||||||
10
src/kernel/xsave.h
Normal file
10
src/kernel/xsave.h
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
#pragma once
|
||||||
|
/// \file xsave.h
|
||||||
|
/// XSAVE operations
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
extern const size_t &xsave_size;
|
||||||
|
|
||||||
|
void xsave_init();
|
||||||
|
void xsave_enable();
|
||||||
@@ -5,6 +5,7 @@
|
|||||||
#include <j6/errors.h>
|
#include <j6/errors.h>
|
||||||
#include <j6/flags.h>
|
#include <j6/flags.h>
|
||||||
#include <j6/syscalls.h>
|
#include <j6/syscalls.h>
|
||||||
|
#include <j6/syslog.hh>
|
||||||
#include <j6/thread.hh>
|
#include <j6/thread.hh>
|
||||||
#include <j6/types.h>
|
#include <j6/types.h>
|
||||||
|
|
||||||
@@ -17,11 +18,42 @@ extern j6_handle_t __handle_self;
|
|||||||
constexpr uintptr_t stack_top = 0xf80000000;
|
constexpr uintptr_t stack_top = 0xf80000000;
|
||||||
uint32_t flipflop = 0;
|
uint32_t flipflop = 0;
|
||||||
|
|
||||||
|
bool
|
||||||
|
test_floats()
|
||||||
|
{
|
||||||
|
static constexpr int len = 30;
|
||||||
|
double as[len];
|
||||||
|
double bs[len];
|
||||||
|
|
||||||
|
double orig = 345.72;
|
||||||
|
double mult = 3.21;
|
||||||
|
for (int i = 0; i < len * 100; ++i) {
|
||||||
|
int idx = i % len;
|
||||||
|
as[idx] = orig * idx;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < len * 100; ++i) {
|
||||||
|
int idx = i % len;
|
||||||
|
bs[idx] = as[idx] * mult;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < len; ++i) {
|
||||||
|
if (bs[i] != orig * i * mult) {
|
||||||
|
j6::syslog("ERROR: floating point discrepency");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
thread_proc(void* channelp)
|
thread_proc(void* channelp)
|
||||||
{
|
{
|
||||||
j6_log("sub thread starting");
|
j6_log("sub thread starting");
|
||||||
|
|
||||||
|
for (int i = 0; i < 100; ++i)
|
||||||
|
if (!test_floats()) break;
|
||||||
|
|
||||||
j6::channel *chan = reinterpret_cast<j6::channel*>(channelp);
|
j6::channel *chan = reinterpret_cast<j6::channel*>(channelp);
|
||||||
|
|
||||||
char buffer[512];
|
char buffer[512];
|
||||||
@@ -86,6 +118,9 @@ main(int argc, const char **argv)
|
|||||||
|
|
||||||
j6_log("main thread created sub thread");
|
j6_log("main thread created sub thread");
|
||||||
|
|
||||||
|
for (int i = 0; i < 100; ++i)
|
||||||
|
if (!test_floats()) break;
|
||||||
|
|
||||||
char message[] = "MAIN THREAD SUCCESSFULLY CALLED SEND AND RECEIVE IF THIS IS LOWERCASE";
|
char message[] = "MAIN THREAD SUCCESSFULLY CALLED SEND AND RECEIVE IF THIS IS LOWERCASE";
|
||||||
size_t size = sizeof(message);
|
size_t size = sizeof(message);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user