diff --git a/assets/build/base.yaml b/assets/build/base.yaml index 114c4fc..3c7ea0e 100644 --- a/assets/build/base.yaml +++ b/assets/build/base.yaml @@ -22,7 +22,7 @@ variables: '-DGIT_VERSION=\"${version_major}.${version_minor}.${version_patch}+${version_sha}\"', '-DGIT_VERSION_WIDE=L\"${version_major}.${version_minor}.${version_patch}+${version_sha}\"', - "-Wformat=2", "-Winit-self", "-Wfloat-equal", "-Winline", "-Wmissing-format-attribute", + "-Wformat=2", "-Winit-self", "-Winline", "-Wmissing-format-attribute", "-Wmissing-include-dirs", "-Wswitch", "-Wundef", "-Wdisabled-optimization", "-Wpointer-arith", "-Wno-attributes", "-Wno-sign-compare", "-Wno-multichar", "-Wno-div-by-zero", "-Wno-endif-labels", "-Wno-pragmas", "-Wno-format-extra-args", diff --git a/src/kernel/cpu.cpp b/src/kernel/cpu.cpp index e46be42..71fc366 100644 --- a/src/kernel/cpu.cpp +++ b/src/kernel/cpu.cpp @@ -14,6 +14,7 @@ #include "objects/thread.h" #include "syscall.h" #include "tss.h" +#include "xsave.h" unsigned g_num_cpus = 1; @@ -140,6 +141,7 @@ bsp_early_init() cpu->gdt = new (&g_bsp_gdt) GDT {cpu->tss}; cpu->rsp0 = reinterpret_cast(&idle_stack_end); cpu_early_init(cpu); + xsave_init(); return cpu; } @@ -229,4 +231,6 @@ cpu_init(cpu_data *cpu, bool bsp) cpu->id = apic->get_id(); apic->calibrate_timer(); } + + xsave_enable(); } diff --git a/src/kernel/cpu.h b/src/kernel/cpu.h index 5d4e996..8912bb1 100644 --- a/src/kernel/cpu.h +++ b/src/kernel/cpu.h @@ -51,6 +51,8 @@ enum class xcr0 ZMM_Hi256, ZMM_Hi16, PKRU = 9, + + J6_SUPPORTED = X87 | SSE | AVX | BINDREG | BINDCSR | OPMASK | ZMM_Hi16 | ZMM_Hi256, }; enum class efer diff --git a/src/kernel/kernel.module b/src/kernel/kernel.module index fa5fb33..6680182 100644 --- a/src/kernel/kernel.module +++ b/src/kernel/kernel.module @@ -65,6 +65,7 @@ kernel = module("kernel", "tss.cpp", "vm_space.cpp", "wait_queue.cpp", + "xsave.cpp", ]) if config == "debug": diff --git a/src/kernel/objects/process.cpp b/src/kernel/objects/process.cpp index b99a3ce..b2e3e9e 100644 --- a/src/kernel/objects/process.cpp +++ b/src/kernel/objects/process.cpp @@ -88,6 +88,9 @@ process::create_thread(uintptr_t rsp3, uint8_t priority) if (rsp3) th->tcb()->rsp3 = rsp3; + if (this != &g_kernel_process) + th->init_xsave_area(); + m_threads.append(th); scheduler::get().add_thread(th->tcb()); return th; diff --git a/src/kernel/objects/thread.cpp b/src/kernel/objects/thread.cpp index 6d6ddad..1a58f2f 100644 --- a/src/kernel/objects/thread.cpp +++ b/src/kernel/objects/thread.cpp @@ -8,6 +8,7 @@ #include "objects/process.h" #include "objects/vm_area.h" #include "scheduler.h" +#include "xsave.h" extern "C" void initialize_user_cpu(); extern obj::vm_area_guarded &g_kernel_stacks; @@ -37,6 +38,9 @@ thread::thread(process &parent, uint8_t pri, uintptr_t rsp0) : thread::~thread() { + if (m_tcb.xsave) + delete [] reinterpret_cast(m_tcb.xsave); + g_kernel_stacks.return_section(m_tcb.kernel_stack); m_parent.handle_release(); } @@ -158,6 +162,14 @@ thread::add_thunk_user(uintptr_t rip3, uint64_t arg0, uint64_t arg1, uintptr_t r add_thunk_kernel(rip0 ? rip0 : trampoline); } +void +thread::init_xsave_area() +{ + void *xsave_area = new uint8_t [xsave_size]; + memset(xsave_area, 0, xsave_size); + m_tcb.xsave = reinterpret_cast(xsave_area); +} + void thread::setup_kernel_stack() { diff --git a/src/kernel/objects/thread.h b/src/kernel/objects/thread.h index 22d5c60..52c685a 100644 --- a/src/kernel/objects/thread.h +++ b/src/kernel/objects/thread.h @@ -26,6 +26,7 @@ struct TCB uintptr_t rsp3; uintptr_t rflags3; uintptr_t pml4; + uintptr_t xsave; // End of area used by asembly obj::thread* thread; @@ -182,6 +183,9 @@ private: /// \arg rsp0 The existing kernel stack rsp, 0 for none thread(process &parent, uint8_t pri, uintptr_t rsp0 = 0); + /// Set up the XSAVE saved processor state area for this thread + void init_xsave_area(); + /// Set up a new empty kernel stack for this thread. void setup_kernel_stack(); diff --git a/src/kernel/task.s b/src/kernel/task.s index 9941db4..704b242 100644 --- a/src/kernel/task.s +++ b/src/kernel/task.s @@ -1,5 +1,7 @@ %include "tasking.inc" +extern xcr0_val + global task_switch: function hidden (task_switch.end - task_switch) task_switch: push rbp @@ -13,22 +15,33 @@ task_switch: push r15 ; Update previous task's TCB - mov rax, [gs:CPU_DATA.tcb] ; rax: current task TCB - mov [rax + TCB.rsp], rsp + mov r15, [gs:CPU_DATA.tcb] ; r15: current task TCB + mov [r15 + TCB.rsp], rsp ; Copy off saved user rsp mov rcx, [gs:CPU_DATA.rsp3] ; rcx: current task's saved user rsp - mov [rax + TCB.rsp3], rcx + mov [r15 + TCB.rsp3], rcx ; Copy off saved user rflags mov rcx, [gs:CPU_DATA.rflags3] ; rcx: current task's saved user rflags - mov [rax + TCB.rflags3], rcx + mov [r15 + TCB.rflags3], rcx + + ; Save processor extended state + mov rcx, [r15 + TCB.xsave] ; rcx: current task's XSAVE area + cmp rcx, 0 + jz .xsave_done + + mov rax, [rel xcr0_val] + mov rdx, rax + shl rdx, 32 + xsave [rcx] +.xsave_done: ; Install next task's TCB mov [gs:CPU_DATA.tcb], rdi ; rdi: next TCB (function param) mov rsp, [rdi + TCB.rsp] ; next task's stack pointer - mov rax, 0x00003fffffffffff - and rax, [rdi + TCB.pml4] ; rax: next task's pml4 (phys portion of address) + mov r14, 0x00003fffffffffff + and r14, [rdi + TCB.pml4] ; r14: next task's pml4 (phys portion of address) ; Update syscall/interrupt rsp mov rcx, [rdi + TCB.rsp0] ; rcx: top of next task's kernel stack @@ -41,15 +54,26 @@ task_switch: mov rcx, [rdi + TCB.rsp3] ; rcx: new task's saved user rsp mov [gs:CPU_DATA.rsp3], rcx + ; Load processor extended state + mov rcx, [rdi + TCB.xsave] ; rcx: new task's XSAVE area + cmp rcx, 0 + jz .xrstor_done + + mov rax, [rel xcr0_val] + mov rdx, rax + shl rdx, 32 + xrstor [rcx] +.xrstor_done: + ; Update saved user rflags mov rcx, [rdi + TCB.rflags3] ; rcx: new task's saved user rflags mov [gs:CPU_DATA.rflags3], rcx ; check if we need to update CR3 mov rdx, cr3 ; rdx: old CR3 - cmp rax, rdx + cmp r14, rdx je .no_cr3 - mov cr3, rax + mov cr3, r14 .no_cr3: pop r15 diff --git a/src/kernel/tasking.inc b/src/kernel/tasking.inc index f70002c..56922e8 100644 --- a/src/kernel/tasking.inc +++ b/src/kernel/tasking.inc @@ -4,6 +4,7 @@ struc TCB .rsp3: resq 1 .rflags3: resq 1 .pml4: resq 1 +.xsave: resq 1 endstruc struc CPU_DATA diff --git a/src/kernel/xsave.cpp b/src/kernel/xsave.cpp new file mode 100644 index 0000000..3c6470a --- /dev/null +++ b/src/kernel/xsave.cpp @@ -0,0 +1,30 @@ +#include +#include + +#include "cpu.h" +#include "xsave.h" + +uint64_t xcr0_val = 0; +static size_t xsave_size_val = 0; +const size_t &xsave_size = xsave_size_val; + +void +xsave_init() +{ + cpu::cpu_id cpuid; + const auto regs = cpuid.get(0x0d); + const uint64_t cpu_supported = + (static_cast(regs.edx) << 32) | + static_cast(regs.eax); + + xcr0_val = static_cast(xcr0::J6_SUPPORTED) & cpu_supported; + xsave_size_val = regs.ebx; +} + +void +xsave_enable() +{ + const uint64_t rax = (xcr0_val & 0xFFFFFFFF); + const uint64_t rdx = (xcr0_val >> 32); + asm volatile ( "xsetbv" :: "c"(0), "d"(xcr0_val >> 32), "a"(xcr0_val) ); +} diff --git a/src/kernel/xsave.h b/src/kernel/xsave.h new file mode 100644 index 0000000..1520126 --- /dev/null +++ b/src/kernel/xsave.h @@ -0,0 +1,10 @@ +#pragma once +/// \file xsave.h +/// XSAVE operations + +#include + +extern const size_t &xsave_size; + +void xsave_init(); +void xsave_enable(); diff --git a/src/user/testapp/main.cpp b/src/user/testapp/main.cpp index 576313b..bf655b9 100644 --- a/src/user/testapp/main.cpp +++ b/src/user/testapp/main.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -17,11 +18,42 @@ extern j6_handle_t __handle_self; constexpr uintptr_t stack_top = 0xf80000000; uint32_t flipflop = 0; +bool +test_floats() +{ + static constexpr int len = 30; + double as[len]; + double bs[len]; + + double orig = 345.72; + double mult = 3.21; + for (int i = 0; i < len * 100; ++i) { + int idx = i % len; + as[idx] = orig * idx; + } + for (int i = 0; i < len * 100; ++i) { + int idx = i % len; + bs[idx] = as[idx] * mult; + } + + for (int i = 0; i < len; ++i) { + if (bs[i] != orig * i * mult) { + j6::syslog("ERROR: floating point discrepency"); + return false; + } + } + + return true; +} + void thread_proc(void* channelp) { j6_log("sub thread starting"); + for (int i = 0; i < 100; ++i) + if (!test_floats()) break; + j6::channel *chan = reinterpret_cast(channelp); char buffer[512]; @@ -86,6 +118,9 @@ main(int argc, const char **argv) j6_log("main thread created sub thread"); + for (int i = 0; i < 100; ++i) + if (!test_floats()) break; + char message[] = "MAIN THREAD SUCCESSFULLY CALLED SEND AND RECEIVE IF THIS IS LOWERCASE"; size_t size = sizeof(message);