[kernel] Set mxcsr and xcr0 in cpu_early_init
There are some SSE instructions (moveaps, moveups) in userland code that QEMU software emulation seems to be fine with but generate `#UD` on KVM. So let's finally get floating-point support working. This is the first step, just setting the control regs to try to fix that error.
This commit is contained in:
@@ -63,6 +63,13 @@ cpu_early_init(cpu_data *cpu)
|
||||
cpu->idt->install();
|
||||
cpu->gdt->install();
|
||||
|
||||
util::bitset64 cr0_val = 0;
|
||||
asm ("mov %%cr0, %0" : "=r"(cr0_val));
|
||||
cr0_val
|
||||
.set(cr0::WP)
|
||||
.clear(cr0::CD);
|
||||
asm volatile ( "mov %0, %%cr0" :: "r" (cr0_val) );
|
||||
|
||||
util::bitset64 cr4_val = 0;
|
||||
asm ("mov %%cr4, %0" : "=r"(cr4_val));
|
||||
cr4_val
|
||||
@@ -79,6 +86,11 @@ cpu_early_init(cpu_data *cpu)
|
||||
.set(efer::NXE);
|
||||
wrmsr(msr::ia32_efer, efer_val);
|
||||
|
||||
util::bitset64 xcr0_val = get_xcr0();
|
||||
xcr0_val
|
||||
.set(xcr0::SSE);
|
||||
set_xcr0(xcr0_val);
|
||||
|
||||
// Install the GS base pointint to the cpu_data
|
||||
wrmsr(msr::ia32_gs_base, reinterpret_cast<uintptr_t>(cpu));
|
||||
}
|
||||
@@ -119,8 +131,11 @@ bsp_late_init()
|
||||
asm ("mov %%cr0, %0" : "=r"(cr0v));
|
||||
asm ("mov %%cr4, %0" : "=r"(cr4v));
|
||||
|
||||
uint32_t mxcsrv = get_mxcsr();
|
||||
uint64_t xcr0v = get_xcr0();
|
||||
|
||||
uint64_t efer = rdmsr(msr::ia32_efer);
|
||||
log::spam(logs::boot, "Control regs: cr0:%lx cr4:%lx efer:%lx", cr0v, cr4v, efer);
|
||||
log::spam(logs::boot, "Control regs: cr0:%lx cr4:%lx efer:%lx mxcsr:%x xcr0:%x", cr0v, cr4v, efer, mxcsrv, xcr0v);
|
||||
}
|
||||
|
||||
cpu_data *
|
||||
|
||||
@@ -17,9 +17,12 @@ enum class cr0
|
||||
{
|
||||
PE = 0, // Protected mode enable
|
||||
MP = 1, // Monitor co-processor
|
||||
EM = 2, // (FPU) Emulation
|
||||
TS = 3, // Task switched
|
||||
ET = 4, // Extension type
|
||||
NE = 5, // Numeric error
|
||||
WP = 16, // (ring 0) Write protect
|
||||
CD = 30, // Cache disable
|
||||
PG = 31, // Paging
|
||||
};
|
||||
|
||||
@@ -58,6 +61,26 @@ enum class efer
|
||||
FFXSR = 14, // Fast FXSAVE
|
||||
};
|
||||
|
||||
enum class mxcsr
|
||||
{
|
||||
IE = 0, // Invalid operation flag
|
||||
DE = 1, // Denormal flag
|
||||
ZE = 2, // Divide by zero flag
|
||||
OE = 3, // Overflow flag
|
||||
UE = 4, // Underflow flag
|
||||
PE = 5, // Precision flag
|
||||
DAZ = 6, // Denormals are zero
|
||||
IM = 7, // Invalid operation mask
|
||||
DM = 8, // Denormal mask
|
||||
ZM = 9, // Divide by zero mask
|
||||
OM = 10, // Overflow mask
|
||||
UM = 11, // Underflow mask
|
||||
PM = 12, // Precision mask
|
||||
RC0 = 13, // Rounding control bit 0
|
||||
RC1 = 14, // Rounding control bit 1
|
||||
FTZ = 15, // Flush to zero
|
||||
};
|
||||
|
||||
struct cpu_state
|
||||
{
|
||||
uint64_t r15, r14, r13, r12, r11, r10, r9, r8;
|
||||
@@ -107,7 +130,15 @@ struct cpu_data
|
||||
panic_data *panic;
|
||||
};
|
||||
|
||||
extern "C" cpu_data * _current_gsbase();
|
||||
extern "C" {
|
||||
uint32_t get_mxcsr();
|
||||
uint32_t set_mxcsr(uint32_t val);
|
||||
|
||||
uint64_t get_xcr0();
|
||||
uint64_t set_xcr0(uint64_t val);
|
||||
|
||||
cpu_data * _current_gsbase();
|
||||
}
|
||||
|
||||
/// Do early initialization of the BSP CPU.
|
||||
/// \returns A pointer to the BSP cpu_data structure
|
||||
|
||||
@@ -1,3 +1,34 @@
|
||||
global get_mxcsr: function hidden (get_mxcsr.end - get_mxcsr)
|
||||
get_mxcsr:
|
||||
push 0
|
||||
stmxcsr [rsp]
|
||||
pop rax
|
||||
ret
|
||||
.end:
|
||||
|
||||
global set_mxcsr: function hidden (set_mxcsr.end - set_mxcsr)
|
||||
set_mxcsr:
|
||||
push rdi
|
||||
ldmxcsr [rsp]
|
||||
pop rax
|
||||
ret
|
||||
.end:
|
||||
|
||||
global get_xcr0: function hidden (get_xcr0.end - get_xcr0)
|
||||
get_xcr0:
|
||||
xor rcx, rcx ; there is no dana there is only xcr0
|
||||
xgetbv
|
||||
ret ; technically edx has the high 32 bits, but bits 10+ are reserved
|
||||
.end:
|
||||
|
||||
global set_xcr0: function hidden (set_xcr0.end - set_xcr0)
|
||||
set_xcr0:
|
||||
xor rcx, rcx ; there is no dana there is only xcr0
|
||||
mov rax, rdi ; technically edx should be or'd into the high bits, but xcr0 bits 10+ are resereved
|
||||
xsetbv
|
||||
ret
|
||||
.end:
|
||||
|
||||
global get_rsp: function hidden (get_rsp.end - get_rsp)
|
||||
get_rsp:
|
||||
mov rax, rsp
|
||||
@@ -16,7 +16,7 @@ kernel = module("kernel",
|
||||
"clock.cpp",
|
||||
"cpprt.cpp",
|
||||
"cpu.cpp",
|
||||
"debug.s",
|
||||
"cpu.s",
|
||||
"device_manager.cpp",
|
||||
"frame_allocator.cpp",
|
||||
"gdt.cpp",
|
||||
|
||||
Reference in New Issue
Block a user