[kernel] Set mxcsr and xcr0 in cpu_early_init

There are some SSE instructions (moveaps, moveups) in userland code that
QEMU software emulation seems to be fine with but generate `#UD` on KVM.
So let's finally get floating-point support working. This is the first
step, just setting the control regs to try to fix that error.
This commit is contained in:
Justin C. Miller
2023-02-23 18:22:22 -08:00
parent 841ac41e36
commit 95627ba43c
5 changed files with 87 additions and 3 deletions

View File

@@ -63,6 +63,13 @@ cpu_early_init(cpu_data *cpu)
cpu->idt->install();
cpu->gdt->install();
util::bitset64 cr0_val = 0;
asm ("mov %%cr0, %0" : "=r"(cr0_val));
cr0_val
.set(cr0::WP)
.clear(cr0::CD);
asm volatile ( "mov %0, %%cr0" :: "r" (cr0_val) );
util::bitset64 cr4_val = 0;
asm ("mov %%cr4, %0" : "=r"(cr4_val));
cr4_val
@@ -79,6 +86,11 @@ cpu_early_init(cpu_data *cpu)
.set(efer::NXE);
wrmsr(msr::ia32_efer, efer_val);
util::bitset64 xcr0_val = get_xcr0();
xcr0_val
.set(xcr0::SSE);
set_xcr0(xcr0_val);
// Install the GS base pointint to the cpu_data
wrmsr(msr::ia32_gs_base, reinterpret_cast<uintptr_t>(cpu));
}
@@ -119,8 +131,11 @@ bsp_late_init()
asm ("mov %%cr0, %0" : "=r"(cr0v));
asm ("mov %%cr4, %0" : "=r"(cr4v));
uint32_t mxcsrv = get_mxcsr();
uint64_t xcr0v = get_xcr0();
uint64_t efer = rdmsr(msr::ia32_efer);
log::spam(logs::boot, "Control regs: cr0:%lx cr4:%lx efer:%lx", cr0v, cr4v, efer);
log::spam(logs::boot, "Control regs: cr0:%lx cr4:%lx efer:%lx mxcsr:%x xcr0:%x", cr0v, cr4v, efer, mxcsrv, xcr0v);
}
cpu_data *

View File

@@ -17,9 +17,12 @@ enum class cr0
{
PE = 0, // Protected mode enable
MP = 1, // Monitor co-processor
EM = 2, // (FPU) Emulation
TS = 3, // Task switched
ET = 4, // Extension type
NE = 5, // Numeric error
WP = 16, // (ring 0) Write protect
CD = 30, // Cache disable
PG = 31, // Paging
};
@@ -58,6 +61,26 @@ enum class efer
FFXSR = 14, // Fast FXSAVE
};
enum class mxcsr
{
IE = 0, // Invalid operation flag
DE = 1, // Denormal flag
ZE = 2, // Divide by zero flag
OE = 3, // Overflow flag
UE = 4, // Underflow flag
PE = 5, // Precision flag
DAZ = 6, // Denormals are zero
IM = 7, // Invalid operation mask
DM = 8, // Denormal mask
ZM = 9, // Divide by zero mask
OM = 10, // Overflow mask
UM = 11, // Underflow mask
PM = 12, // Precision mask
RC0 = 13, // Rounding control bit 0
RC1 = 14, // Rounding control bit 1
FTZ = 15, // Flush to zero
};
struct cpu_state
{
uint64_t r15, r14, r13, r12, r11, r10, r9, r8;
@@ -107,7 +130,15 @@ struct cpu_data
panic_data *panic;
};
extern "C" cpu_data * _current_gsbase();
extern "C" {
uint32_t get_mxcsr();
uint32_t set_mxcsr(uint32_t val);
uint64_t get_xcr0();
uint64_t set_xcr0(uint64_t val);
cpu_data * _current_gsbase();
}
/// Do early initialization of the BSP CPU.
/// \returns A pointer to the BSP cpu_data structure

View File

@@ -1,3 +1,34 @@
global get_mxcsr: function hidden (get_mxcsr.end - get_mxcsr)
get_mxcsr:
push 0
stmxcsr [rsp]
pop rax
ret
.end:
global set_mxcsr: function hidden (set_mxcsr.end - set_mxcsr)
set_mxcsr:
push rdi
ldmxcsr [rsp]
pop rax
ret
.end:
global get_xcr0: function hidden (get_xcr0.end - get_xcr0)
get_xcr0:
xor rcx, rcx ; there is no dana there is only xcr0
xgetbv
ret ; technically edx has the high 32 bits, but bits 10+ are reserved
.end:
global set_xcr0: function hidden (set_xcr0.end - set_xcr0)
set_xcr0:
xor rcx, rcx ; there is no dana there is only xcr0
mov rax, rdi ; technically edx should be or'd into the high bits, but xcr0 bits 10+ are resereved
xsetbv
ret
.end:
global get_rsp: function hidden (get_rsp.end - get_rsp)
get_rsp:
mov rax, rsp

View File

@@ -16,7 +16,7 @@ kernel = module("kernel",
"clock.cpp",
"cpprt.cpp",
"cpu.cpp",
"debug.s",
"cpu.s",
"device_manager.cpp",
"frame_allocator.cpp",
"gdt.cpp",