[kernel] Begin replacing page_manager with vm_space

This is the first commit of several reworking the VM system. The main
focus is replacing page_manager's global functionality with objects
representing individual VM spaces. The main changes in this commit were:

- Adding the (as yet unused) vm_area object, which will be the main
  point of control for programs to allocate or share memory.
- Replace the old vm_space with a new one based on state in its page
  tables. They will also be containers for vm_areas.
- vm_space takes over from page_manager as the page fault handler
- Commented out the page walking in memory_bootstrap; I'll probably need
  to recreate this functionality, but it was broken as it was.
- Split out the page_table.h implementations from page_manager.cpp into
  the new page_table.cpp, updated it, and added page_table::iterator as
  well.
This commit is contained in:
2020-09-17 00:48:17 -07:00
parent ca7f78565d
commit 9aa08a70cf
16 changed files with 1004 additions and 401 deletions

View File

@@ -42,7 +42,9 @@ modules:
- src/kernel/objects/kobject.cpp
- src/kernel/objects/thread.cpp
- src/kernel/objects/process.cpp
- src/kernel/objects/vm_area.cpp
- src/kernel/page_manager.cpp
- src/kernel/page_table.cpp
- src/kernel/pci.cpp
- src/kernel/scheduler.cpp
- src/kernel/screen.cpp

View File

@@ -1,7 +1,8 @@
#include "kutil/assert.h"
#include "kernel_memory.h"
#include "page_manager.h"
#include "buffer_cache.h"
#include "kernel_memory.h"
#include "objects/vm_area.h"
#include "page_manager.h"
#include "vm_space.h"
extern vm_space g_kernel_space;
@@ -33,16 +34,17 @@ buffer_cache::get_buffer()
m_next += m_size;
}
g_kernel_space.commit(addr, m_size);
vm_space &vm = vm_space::kernel_space();
vm.allow(addr, m_size, true);
return addr;
}
void
buffer_cache::return_buffer(uintptr_t addr)
{
void *ptr = reinterpret_cast<void*>(addr);
size_t page_count = page_manager::page_count(m_size);
page_manager::get()->unmap_pages(ptr, page_count);
g_kernel_space.unreserve(addr, m_size);
vm_space &vm = vm_space::kernel_space();
vm.allow(addr, m_size, false);
m_cache.append(addr);
}

View File

@@ -10,10 +10,12 @@
#include "gdt.h"
#include "interrupts.h"
#include "io.h"
#include "kernel_memory.h"
#include "log.h"
#include "page_manager.h"
#include "objects/process.h"
#include "scheduler.h"
#include "syscall.h"
#include "vm_space.h"
static const uint16_t PIC1 = 0x20;
static const uint16_t PIC2 = 0xa0;
@@ -189,8 +191,15 @@ isr_handler(cpu_state *regs)
uintptr_t cr2 = 0;
__asm__ __volatile__ ("mov %%cr2, %0" : "=r"(cr2));
if ((regs->errorcode & 0x9) == 0 &&
page_manager::get()->fault_handler(cr2))
bool user = cr2 < memory::kernel_offset;
vm_space::fault_type ft =
static_cast<vm_space::fault_type>(regs->errorcode);
vm_space &space = user
? process::current().space()
: vm_space::kernel_space();
if (cr2 && space.handle_fault(cr2, ft))
break;
cons->set_color(11);

View File

@@ -9,6 +9,7 @@
#include "frame_allocator.h"
#include "io.h"
#include "log.h"
#include "objects/vm_area.h"
#include "page_manager.h"
#include "vm_space.h"
@@ -24,8 +25,6 @@ using memory::table_entries;
using namespace kernel;
vm_space g_kernel_space {kernel_offset, (heap_start-kernel_offset)};
// These objects are initialized _before_ global constructors are called,
// so we don't want them to have global constructors at all, lest they
@@ -39,6 +38,9 @@ page_manager &g_page_manager = __g_page_manager_storage.value;
static kutil::no_construct<frame_allocator> __g_frame_allocator_storage;
frame_allocator &g_frame_allocator = __g_frame_allocator_storage.value;
static kutil::no_construct<vm_space> __g_kernel_space_storage;
vm_space &g_kernel_space = __g_kernel_space_storage.value;
void * operator new(size_t size) { return g_kernel_heap.allocate(size); }
void * operator new [] (size_t size) { return g_kernel_heap.allocate(size); }
void operator delete (void *p) noexcept { return g_kernel_heap.free(p); }
@@ -49,12 +51,13 @@ void * kalloc(size_t size) { return g_kernel_heap.allocate(size); }
void kfree(void *p) { return g_kernel_heap.free(p); }
}
/*
void walk_page_table(
page_table *table,
page_table::level level,
uintptr_t &current_start,
size_t &current_bytes,
vm_space &kspace)
vm_area &karea)
{
constexpr size_t huge_page_size = (1ull<<30);
constexpr size_t large_page_size = (1ull<<21);
@@ -63,7 +66,7 @@ void walk_page_table(
page_table *next = table->get(i);
if (!next) {
if (current_bytes)
kspace.commit(current_start, current_bytes);
karea.commit(current_start, current_bytes);
current_start = 0;
current_bytes = 0;
continue;
@@ -86,6 +89,7 @@ void walk_page_table(
}
}
}
*/
void
memory_initialize_pre_ctors(args::header *kargs)
@@ -106,11 +110,15 @@ memory_initialize_pre_ctors(args::header *kargs)
// Create the page manager
new (&g_page_manager) page_manager {g_frame_allocator, kpml4};
vm_space &vm = *new (&g_kernel_space) vm_space {kpml4, true};
vm.allow(memory::heap_start, memory::kernel_max_heap, true);
}
void
memory_initialize_post_ctors(args::header *kargs)
{
/*
uintptr_t current_start = 0;
size_t current_bytes = 0;
@@ -128,8 +136,10 @@ memory_initialize_post_ctors(args::header *kargs)
if (current_bytes)
g_kernel_space.commit(current_start, current_bytes);
*/
g_frame_allocator.free(
reinterpret_cast<uintptr_t>(kargs->page_table_cache),
kargs->num_free_tables);
}

View File

@@ -18,12 +18,10 @@ public:
none,
event,
eventpair,
channel,
endpoint,
vms,
vmo,
vma,
job,
process,

View File

@@ -10,6 +10,7 @@ kutil::vector<process*> process::s_processes;
process::process(page_table *pml4) :
kobject(kobject::type::process),
m_pml4(pml4),
m_space(pml4),
m_next_handle(0),
m_state(state::running)
{

View File

@@ -6,6 +6,7 @@
#include "kutil/vector.h"
#include "objects/kobject.h"
#include "page_table.h"
#include "vm_space.h"
class process :
public kobject
@@ -37,6 +38,9 @@ public:
/// Get the process' page table root
page_table * pml4() { return m_pml4; }
/// Get the process' virtual memory space
vm_space & space() { return m_space; }
/// Create a new thread in this process
/// \args priority The new thread's scheduling priority
/// \args user If true, create a userspace stack for this thread
@@ -72,6 +76,8 @@ private:
uint32_t m_return_code;
page_table *m_pml4;
vm_space m_space;
kutil::vector<thread*> m_threads;
kutil::map<j6_handle_t, kobject*> m_handles;
j6_handle_t m_next_handle;

View File

@@ -0,0 +1,278 @@
#include "kernel_memory.h"
#include "objects/process.h"
#include "objects/vm_area.h"
using memory::frame_size;
vm_area::vm_area(size_t size, vm_flags flags) :
m_size(size),
m_flags(flags),
kobject(kobject::type::vma)
{
}
vm_area::~vm_area()
{
}
size_t
vm_area::resize(size_t size)
{
return m_size;
}
j6_status_t
vm_area::add_to(vm_space *space, uintptr_t *base)
{
if (!base || !space)
return j6_err_invalid_arg;
uintptr_t *prev = m_procs.find(space);
if (prev) {
*base = *prev;
return j6_status_exists;
}
if (!*base)
return j6_err_nyi;
m_procs.insert(space, *base);
for (auto &m : m_mappings)
if (m.state == state::mapped)
space->page_in(*base + m.offset, m.count, m.phys);
return j6_status_ok;
}
j6_status_t
vm_area::remove_from(vm_space *space)
{
uintptr_t *base = m_procs.find(space);
if (space && base) {
for (auto &m : m_mappings)
if (m.state == state::mapped)
space->page_out(*base + m.offset, m.count);
m_procs.erase(space);
}
return j6_status_ok;
}
size_t
vm_area::overlaps(uintptr_t offset, size_t pages, size_t *count)
{
size_t first = 0;
size_t n = 0;
uintptr_t end = offset + pages * frame_size;
for (size_t i = 0; i < m_mappings.count(); ++i) {
mapping &m = m_mappings[i];
uintptr_t map_end = m.offset + m.count * frame_size;
if (offset < map_end && end > m.offset) {
if (!first) first = i;
++n;
} else if (n) {
break;
}
}
if (count) *count = n;
return first;
}
bool
vm_area::commit(uintptr_t phys, uintptr_t offset, size_t count)
{
return add(offset, count, state::mapped, phys);
}
bool
vm_area::uncommit(uintptr_t offset, size_t count)
{
return remove(offset, count, state::reserved);
}
bool
vm_area::reserve(uintptr_t offset, size_t count)
{
return add(offset, count, state::reserved, 0);
}
bool
vm_area::unreserve(uintptr_t offset, size_t count)
{
return remove(offset, count, state::reserved);
}
vm_area::state
vm_area::get(uintptr_t offset, uintptr_t *phys)
{
size_t n = 0;
size_t o = overlaps(offset, 1, &n);
if (n) {
mapping &m = m_mappings[o];
if (phys) *phys = m.phys;
return m.state;
}
return state::none;
}
bool
vm_area::add(uintptr_t offset, size_t count, state desired, uintptr_t phys)
{
const bool do_map = desired == state::mapped;
size_t n = 0;
size_t o = overlaps(offset, count, &n);
if (!n) {
// In the clear, map it
size_t o = m_mappings.sorted_insert({
.offset = offset,
.count = count,
.phys = phys,
.state = desired});
n = 1;
if (do_map)
map(offset, count, phys);
} else if (desired == state::mapped) {
// Mapping overlaps not allowed
return false;
}
// Any overlaps with different states is not allowed
for (size_t i = o; i < o+n; ++i)
if (m_mappings[i].state != desired)
return false;
// Try to expand to abutting similar areas
if (o > 0 &&
m_mappings[o-1].state == desired &&
m_mappings[o-1].end() == offset &&
(!do_map || m_mappings[o-1].phys_end() == phys)) {
--o;
++n;
}
uintptr_t end = offset + count * frame_size;
uintptr_t pend = offset + count * frame_size;
if (o + n < m_mappings.count() &&
m_mappings[o+n].state == desired &&
end == m_mappings[o+n].offset &&
(!do_map || m_mappings[o-1].phys == pend)) {
++n;
}
// Use the first overlap block as our new block
mapping &first = m_mappings[o];
mapping &last = m_mappings[o + n -1];
if (offset < first.offset)
first.offset = offset;
size_t diff =
(end > last.end() ? end : last.end()) -
first.offset;
first.count = diff / frame_size;
if (n > 1)
m_mappings.remove_at(o+1, n-1);
return true;
}
bool
vm_area::remove(uintptr_t offset, size_t count, state expected)
{
size_t n = 0;
size_t o = overlaps(offset, count, &n);
if (!n) return true;
// Any overlaps with different states is not allowed
for (size_t i = o; i < o+n; ++i)
if (m_mappings[i].state != expected)
return false;
mapping *first = &m_mappings[o];
mapping *last = &m_mappings[o+n-1];
uintptr_t end = offset + count * frame_size;
size_t leading = offset - first->offset;
size_t trailing = last->end() - end;
// if were entirely contained in one, we need to split it
if (leading && trailing && n == 1) {
size_t i = m_mappings.sorted_insert({
.offset = end,
.count = trailing / frame_size,
.state = first->state,
});
last = &m_mappings[i];
trailing = 0;
first->count -= last->count;
if (first->state == state::mapped)
last->phys = first->phys + first->count * frame_size;
}
if (leading) {
size_t remove_pages = first->count;
first->count = leading / frame_size;
remove_pages -= first->count;
if (expected == state::mapped)
unmap(first->end(), remove_pages);
}
if (trailing) {
uintptr_t remove_off = last->offset;
size_t remove_pages = last->count;
last->offset = end;
last->count = trailing / frame_size;
remove_pages -= last->count;
if (expected == state::mapped) {
unmap(remove_off, remove_pages);
last->phys += remove_pages * frame_size;
}
}
size_t delete_start = 0;
size_t delete_count = 0;
for (size_t i = o; i < o+n; ++i) {
mapping &m = m_mappings[i];
if (offset <= m.offset && end >= m.end()) {
if (!delete_count) delete_start = i;
++delete_count;
if (expected == state::mapped)
unmap(m.offset, m.count);
}
}
if (delete_count)
m_mappings.remove_at(delete_start, delete_count);
return true;
}
void
vm_area::map(uintptr_t offset, size_t count, uintptr_t phys)
{
for (auto &it : m_procs) {
uintptr_t addr = it.val + offset;
vm_space *space = it.key;
space->page_in(addr, count, phys);
}
}
void
vm_area::unmap(uintptr_t offset, size_t count)
{
for (auto &it : m_procs) {
uintptr_t addr = it.val + offset;
vm_space *space = it.key;
space->page_out(addr, count);
}
}

View File

@@ -0,0 +1,127 @@
#pragma once
/// \file vm_area.h
/// Definition of VMA objects and related functions
#include "j6/signals.h"
#include "kutil/enum_bitfields.h"
#include "kutil/map.h"
#include "kernel_memory.h"
#include "objects/kobject.h"
class vm_space;
enum class vm_flags : uint32_t
{
none = 0x00000000,
zero = 0x00000001,
contiguous = 0x00000002,
large_pages = 0x00000100,
huge_pages = 0x00000200,
offset_linear = 0x80000000
};
IS_BITFIELD(vm_flags);
/// Virtual memory areas allow control over memory allocation
class vm_area :
public kobject
{
public:
/// Constructor.
/// \arg size Initial virtual size of the memory area
/// \arg flags Flags for this memory area
vm_area(size_t size, vm_flags flags = vm_flags::none);
virtual ~vm_area();
/// Get the current virtual size of the memory area
size_t size() const { return m_size; }
/// Change the virtual size of the memory area. This may cause
/// deallocation if the new size is smaller than the current size.
/// Note that if resizing is unsuccessful, the previous size will
/// be returned.
/// \arg size The desired new virtual size
/// \returns The new virtual size
size_t resize(size_t size);
/// Add this virtual area to a process' virtual address space. If
/// the given base address is zero, a base address will be chosen
/// automatically.
/// \arg s The target address space
/// \arg base [in] The desired base address [out] the actual base address
/// \returns j6_status_ok on success
j6_status_t add_to(vm_space *s, uintptr_t *base);
/// Remove this virtual area from a process' virtual address space.
/// \arg s The target address space
/// \returns j6_status_ok on success
j6_status_t remove_from(vm_space *s);
/// Commit contiguous physical pages to this area
/// \arg phys The physical address of the first page
/// \arg offset The offset from the start of this area these pages represent
/// \arg count The number of pages
/// \returns True if successful
bool commit(uintptr_t phys, uintptr_t offset, size_t count);
/// Uncommit physical pages from this area
/// \arg offset The offset from the start of this area these pages represent
/// \arg count The number of pages
/// \returns True if successful
bool uncommit(uintptr_t offset, size_t count);
/// Reserve a range of this area to never commit
/// \arg offset The offset from the start of this area
/// \arg count The number of pages
/// \returns True if successful
bool reserve(uintptr_t offset, size_t count);
/// Unreserve a range of this area to allow commits
/// \arg offset The offset from the start of this area
/// \arg count The number of pages
/// \returns True if successful
bool unreserve(uintptr_t offset, size_t count);
enum class state : uint8_t { none, reserved, mapped };
/// Get the physical page representing an offset in this area
/// \arg offset The offset into the area
/// \arg phys [out] The physical page address
/// \returns State of the given address
state get(uintptr_t offset, uintptr_t *phys);
/// Get the flags set for this area
vm_flags flags() const { return m_flags; }
private:
struct mapping {
uintptr_t offset;
size_t count;
uintptr_t phys;
state state;
int compare(const struct mapping &o) const {
return offset > o.offset ? 1 : offset < o.offset ? -1 : 0;
}
inline uintptr_t end() const { return offset + count * memory::frame_size; }
inline uintptr_t phys_end() const { return phys + count * memory::frame_size; }
};
size_t overlaps(uintptr_t offset, size_t pages, size_t *count);
bool add(uintptr_t offset, size_t count, state desired, uintptr_t phys);
bool remove(uintptr_t offset, size_t count, state expected);
void map(uintptr_t offset, size_t count, uintptr_t phys);
void unmap(uintptr_t offset, size_t count);
size_t m_size;
vm_flags m_flags;
kutil::map<vm_space*, uintptr_t> m_procs;
kutil::vector<mapping> m_mappings;
};

View File

@@ -2,6 +2,8 @@
#include "console.h"
#include "io.h"
#include "log.h"
#include "objects/process.h"
#include "objects/vm_area.h"
#include "page_manager.h"
#include "vm_space.h"
@@ -327,33 +329,6 @@ page_manager::unmap_pages(void* address, size_t count, page_table *pml4)
page_out(pml4, iaddr, count, true);
}
bool
page_manager::fault_handler(uintptr_t addr)
{
if (!addr)
return false;
extern vm_space g_kernel_space;
bool is_heap = addr >= ::memory::heap_start &&
addr < ::memory::heap_start + ::memory::kernel_max_heap;
if (!is_heap &&
g_kernel_space.get(addr) != vm_state::committed)
return false;
uintptr_t page = addr & ~0xfffull;
log::debug(logs::memory, "PF: attempting to page in %016lx for %016lx", page, addr);
bool user = addr < kernel_offset;
map_pages(page, 1, user);
// Kernel stacks: zero them upon mapping them
if (addr >= memory::stacks_start && addr < memory::heap_start)
kutil::memset(reinterpret_cast<void*>(page), 0, memory::frame_size);
return true;
}
void
page_manager::check_needs_page(page_table *table, unsigned index, bool user)
{
@@ -489,61 +464,3 @@ page_out_end:
if (free && free_count)
m_frames.free(free_start, free_count);
}
void
page_table::dump(page_table::level lvl, bool recurse)
{
console *cons = console::get();
cons->printf("\nLevel %d page table @ %lx:\n", lvl, this);
for (int i=0; i<table_entries; ++i) {
uint64_t ent = entries[i];
if ((ent & 0x1) == 0)
cons->printf(" %3d: %016lx NOT PRESENT\n", i, ent);
else if ((lvl == level::pdp || lvl == level::pd) && (ent & 0x80) == 0x80)
cons->printf(" %3d: %016lx -> Large page at %016lx\n", i, ent, ent & ~0xfffull);
else if (lvl == level::pt)
cons->printf(" %3d: %016lx -> Page at %016lx\n", i, ent, ent & ~0xfffull);
else
cons->printf(" %3d: %016lx -> Level %d table at %016lx\n",
i, ent, deeper(lvl), (ent & ~0xfffull) + page_offset);
}
if (lvl != level::pt && recurse) {
for (int i=0; i<=table_entries; ++i) {
if (is_large_page(lvl, i))
continue;
page_table *next = get(i);
if (next)
next->dump(deeper(lvl), true);
}
}
}
page_table_indices::page_table_indices(uint64_t v) :
index{
(v >> 39) & 0x1ff,
(v >> 30) & 0x1ff,
(v >> 21) & 0x1ff,
(v >> 12) & 0x1ff }
{}
uintptr_t
page_table_indices::addr() const
{
return
(index[0] << 39) |
(index[1] << 30) |
(index[2] << 21) |
(index[3] << 12);
}
bool operator==(const page_table_indices &l, const page_table_indices &r)
{
return l[0] == r[0] && l[1] == r[1] && l[2] == r[2] && l[3] == r[3];
}

View File

@@ -118,11 +118,6 @@ public:
/// Get a pointer to the kernel's PML4
inline page_table * get_kernel_pml4() { return m_kernel_pml4; }
/// Attempt to handle a page fault.
/// \arg addr Address that triggered the fault
/// \returns True if the fault was handled
bool fault_handler(uintptr_t addr);
private:
/// Copy a physical page
/// \arg orig Physical address of the page to copy
@@ -182,6 +177,7 @@ private:
frame_allocator &m_frames;
friend class memory_bootstrap;
friend class vm_space;
page_manager(const page_manager &) = delete;
};

255
src/kernel/page_table.cpp Normal file
View File

@@ -0,0 +1,255 @@
#include "kutil/assert.h"
#include "kutil/memory.h"
#include "console.h"
#include "frame_allocator.h"
#include "kernel_memory.h"
#include "page_table.h"
using memory::page_offset;
using level = page_table::level;
// Flags: 0 0 0 0 0 0 0 0 0 0 1 1 = 0x0003
// IGNORED | | | | | | | +- Present
// | | | | | | +--- Writeable
// | | | | | +----- Usermode access (Supervisor only)
// | | | | +------- PWT (determining memory type for pdpt)
// | | | +---------- PCD (determining memory type for pdpt)
// | | +------------ Accessed flag (not accessed yet)
// | +-------------- Ignored
// +---------------- Reserved 0 (Table pointer, not page)
/// Page table entry flags for entries pointing at another table
constexpr uint16_t table_flags = 0x003;
page_table::iterator::iterator(uintptr_t virt, page_table *pml4) :
m_table {pml4, 0, 0, 0}
{
for (unsigned i = 0; i < D; ++i)
m_index[i] = static_cast<uint16_t>((virt >> (12 + 9*(3-i))) & 0x1ff);
}
page_table::iterator::iterator(const page_table::iterator &o)
{
kutil::memcpy(&m_table, &o.m_table, sizeof(m_table));
kutil::memcpy(&m_index, &o.m_index, sizeof(m_index));
}
inline static level to_lv(unsigned i) { return static_cast<level>(i); }
inline static unsigned to_un(level i) { return static_cast<unsigned>(i); }
uintptr_t
page_table::iterator::start(level l) const
{
uintptr_t address = 0;
for (level i = level::pml4; i <= l; ++i)
address |= static_cast<uintptr_t>(index(i)) << (12 + 9*(3-unsigned(i)));
// canonicalize the address
if (address & (1ull<<47))
address |= (0xffffull<<48);
return address;
}
uintptr_t
page_table::iterator::end(level l) const
{
kassert(l != level::pml4, "Called end() with level::pml4");
uintptr_t address = 0;
for (level i = level::pml4; i < l; ++i) {
uintptr_t idx = index(i) +
((i == l) ? 1 : 0);
address |= idx << (12 + 9*(3-unsigned(i)));
}
// canonicalize the address
if (address & (1ull<<47))
address |= (0xffffull<<48);
return address;
}
level
page_table::iterator::align() const
{
for (int i = 4; i > 0; --i)
if (m_index[i-1]) return level(i);
return level::pml4;
}
page_table::level
page_table::iterator::depth() const
{
for (level i = level::pml4; i < level::pt; ++i)
if (!(entry(i) & 1)) return i;
return level::pt;
}
void
page_table::iterator::next(level l)
{
kassert(l != level::pml4, "Called next() with level::pml4");
kassert(l <= level::page, "Called next() with too deep level");
for (level i = l; i < level::page; ++i)
index(i) = 0;
while (++index(--l) == 512) {
kassert(to_un(l), "iterator ran off the end of memory");
index(l) = 0;
}
}
bool
page_table::iterator::allowed() const
{
level d = depth();
while (true) {
if (entry(d) & flag_allowed) return true;
else if (d == level::pml4) return false;
--d;
}
}
void
page_table::iterator::allow(level at, bool allowed)
{
for (level l = level::pdp; l <= at; ++l)
ensure_table(l);
if (allowed) entry(at) |= flag_allowed;
else entry(at) &= ~flag_allowed;
}
bool
page_table::iterator::operator!=(const iterator &o) const
{
for (unsigned i = 0; i < D; ++i)
if (o.m_index[i] != m_index[i])
return true;
return o.m_table[0] != m_table[0];
}
bool
page_table::iterator::check_table(level l) const
{
// We're only dealing with D levels of paging, and
// there must always be a PML4.
if (l == level::pml4 || l > level::pt)
return l == level::pml4;
uint64_t parent = entry(l - 1);
if (parent & 1) {
table(l) = reinterpret_cast<page_table*>(page_offset | (parent & ~0xfffull));
return true;
}
return false;
}
void
page_table::iterator::ensure_table(level l)
{
// We're only dealing with D levels of paging, and
// there must always be a PML4.
if (l == level::pml4 || l > level::pt) return;
if (check_table(l)) return;
// TODO: a better way to get at the frame allocator
extern frame_allocator g_frame_allocator;
uintptr_t phys = 0;
size_t n = g_frame_allocator.allocate(1, &phys);
kassert(n, "Failed to allocate a page table");
uint64_t &parent = entry(l - 1);
uint64_t flags = table_flags |
(parent & flag_allowed) ? flag_allowed : 0;
m_table[unsigned(l)] = reinterpret_cast<page_table*>(phys | page_offset);
parent = (reinterpret_cast<uintptr_t>(phys) & ~0xfffull) | flags;
}
page_table *
page_table::get(int i, uint16_t *flags) const
{
uint64_t entry = entries[i];
if ((entry & 0x1) == 0)
return nullptr;
if (flags)
*flags = entry & 0xfffull;
return reinterpret_cast<page_table *>((entry & ~0xfffull) + page_offset);
}
void
page_table::set(int i, page_table *p, uint16_t flags)
{
if (entries[i] & flag_allowed) flags |= flag_allowed;
entries[i] =
(reinterpret_cast<uint64_t>(p) - page_offset) |
(flags & 0xfff);
}
void
page_table::dump(page_table::level lvl, bool recurse)
{
console *cons = console::get();
cons->printf("\nLevel %d page table @ %lx:\n", lvl, this);
for (int i=0; i<memory::table_entries; ++i) {
uint64_t ent = entries[i];
if ((ent & 0x1) == 0)
cons->printf(" %3d: %016lx NOT PRESENT\n", i, ent);
else if ((lvl == level::pdp || lvl == level::pd) && (ent & 0x80) == 0x80)
cons->printf(" %3d: %016lx -> Large page at %016lx\n", i, ent, ent & ~0xfffull);
else if (lvl == level::pt)
cons->printf(" %3d: %016lx -> Page at %016lx\n", i, ent, ent & ~0xfffull);
else
cons->printf(" %3d: %016lx -> Level %d table at %016lx\n",
i, ent, deeper(lvl), (ent & ~0xfffull) + page_offset);
}
if (lvl != level::pt && recurse) {
for (int i=0; i<=memory::table_entries; ++i) {
if (is_large_page(lvl, i))
continue;
page_table *next = get(i);
if (next)
next->dump(deeper(lvl), true);
}
}
}
page_table_indices::page_table_indices(uint64_t v) :
index{
(v >> 39) & 0x1ff,
(v >> 30) & 0x1ff,
(v >> 21) & 0x1ff,
(v >> 12) & 0x1ff }
{}
uintptr_t
page_table_indices::addr() const
{
return
(index[0] << 39) |
(index[1] << 30) |
(index[2] << 21) |
(index[3] << 12);
}
bool operator==(const page_table_indices &l, const page_table_indices &r)
{
return l[0] == r[0] && l[1] == r[1] && l[2] == r[2] && l[3] == r[3];
}

View File

@@ -10,39 +10,133 @@ class page_manager;
/// Struct to allow easy accessing of a memory page being used as a page table.
struct page_table
{
enum class level : unsigned { pml4, pdp, pd, pt };
/// Enum representing the table levels in 4-level paging
enum class level : unsigned { pml4, pdp, pd, pt, page };
/// Helper for getting the next level value
inline static level deeper(level l) {
return static_cast<level>(static_cast<unsigned>(l) + 1);
}
uint64_t entries[memory::table_entries];
static constexpr size_t entry_sizes[] = {
0x8000000000, // PML4 entry: 512 GiB
0x40000000, // PDP entry: 1 GiB
0x200000, // PD entry: 2 MiB
0x1000}; // PT entry: 4 KiB
inline page_table * get(int i, uint16_t *flags = nullptr) const {
uint64_t entry = entries[i];
if ((entry & 0x1) == 0) return nullptr;
if (flags) *flags = entry & 0xfffull;
return reinterpret_cast<page_table *>((entry & ~0xfffull) + memory::page_offset);
/// Flag marking unused space as allowed for allocation
static constexpr uint64_t flag_allowed = (1ull << 11);
/// Iterator over page table entries.
class iterator
{
/// The number of levels
static constexpr unsigned D = 4;
public:
/// Constructor.
/// \arg virt Virtual address this iterator is starting at
/// \arg pml4 Root of the page tables to iterate
iterator(uintptr_t virt, page_table *pml4);
/// Copy constructor.
iterator(const iterator &o);
/// Get the starting address of pages mapped by the current table
/// of level l.
uintptr_t start(level l) const;
/// Get the ending address of pages mapped by the current table
/// of level l.
uintptr_t end(level l) const;
/// Get the widest table type the current address is aligned to
level align() const;
/// Get the current virtual address
inline uintptr_t vaddress() const { return start(level::pt); }
/// Get the nth page table of the current address
inline page_table *& table(level l) const { return m_table[unsigned(l)]; }
/// Get the index in the nth page table of the current address
inline uint16_t & index(level l) { return m_index[unsigned(l)]; }
/// Get the index in the nth page table of the current address
inline uint16_t index(level l) const { return m_index[unsigned(l)]; }
/// Get the current table entry of the table at the given level.
inline uint64_t entry(level l) const {
for (unsigned i = 1; i <= unsigned(l); ++i)
if (!check_table(level(i))) return 0;
return table(l)->entries[index(l)];
}
inline void set(int i, page_table *p, uint16_t flags) {
entries[i] = (reinterpret_cast<uint64_t>(p) - memory::page_offset) | (flags & 0xfff);
/// Get a *non-const* reference to the current table entry of
/// the table at the given level.
inline uint64_t & entry(level l) {
for (unsigned i = 1; i < unsigned(l); ++i) ensure_table(level(i));
return table(l)->entries[index(l)];
}
/// Get the depth of tables that actually exist for the current address
level depth() const;
/// Increment iteration to the next entry aligned to the given level
void next(level l);
/// Check if allocation is allowed at the current location
bool allowed() const;
/// Mark allocation allowed at the given depth for the current location
void allow(level at, bool allowed);
/// Increment iteration to the next entry at the deepest level
inline void increment() { next(level::page); }
inline uint64_t & operator*() { return entry(level::pt); }
inline iterator & operator++() { increment(); return *this; }
inline iterator operator++(int) { iterator old {*this}; increment(); return old; }
bool operator!=(const iterator &o) const;
bool check_table(level l) const;
void ensure_table(level l);
private:
// The table array is mutable because we might update it with existing
// tables; a 'view switch'. therefore, be careful not to modify table
// contents in const functions.
mutable page_table *m_table[D];
uint16_t m_index[D];
};
/// Get an entry in the page table as a page_table pointer
/// \arg i Index of the entry in this page table
/// \arg flags [out] If set, this will receive the entry's flags
/// \returns The corresponding entry, offset into page-offset memory
page_table * get(int i, uint16_t *flags = nullptr) const;
/// Set an entry in the page table as a page_table pointer
/// \arg i Index of the entry in this page table
/// \arg flags The flags for the entry
void set(int i, page_table *p, uint16_t flags);
/// Check if the given entry represents a large or huge page
inline bool is_large_page(level l, int i) const {
return (l == level::pdp || l == level::pd) && (entries[i] & 0x80) == 0x80;
}
/// Check if the given entry is marked as present in the table
inline bool is_present(int i) const { return (entries[i] & 0x1) == 0x1; }
inline bool is_large_page(level l, int i) const {
return
(l == level::pdp || l == level::pd) &&
(entries[i] & 0x80) == 0x80;
}
/// Check if the given entry represents a page (of any size)
inline bool is_page(level l, int i) const { return (l == level::pt) || is_large_page(l, i); }
inline bool is_page(level l, int i) const {
return (l == level::pt) || is_large_page(l, i);
}
/// Print this table to the debug console.
void dump(level lvl = level::pml4, bool recurse = true);
void dump(
level lvl = level::pml4,
bool recurse = true);
uint64_t entries[memory::table_entries];
};
@@ -65,3 +159,21 @@ struct page_table_indices
bool operator==(const page_table_indices &l, const page_table_indices &r);
inline page_table::level operator+(page_table::level a, unsigned b) {
return static_cast<page_table::level>(static_cast<unsigned>(a) + b);
}
inline page_table::level operator-(page_table::level a, unsigned b) {
return static_cast<page_table::level>(static_cast<unsigned>(a) - b);
}
inline bool operator>(page_table::level a, page_table::level b) {
return static_cast<unsigned>(a) > static_cast<unsigned>(b);
}
inline bool operator<(page_table::level a, page_table::level b) {
return static_cast<unsigned>(a) < static_cast<unsigned>(b);
}
inline page_table::level& operator++(page_table::level& l) { l = l + 1; return l; }
inline page_table::level& operator--(page_table::level& l) { l = l - 1; return l; }

View File

@@ -1,256 +1,125 @@
#include <algorithm>
#include "kutil/vector.h"
#include "log.h"
#include "objects/process.h"
#include "objects/vm_area.h"
#include "page_manager.h"
#include "vm_space.h"
using node_type = kutil::avl_node<vm_range>;
using node_vec = kutil::vector<node_type*>;
DEFINE_SLAB_ALLOCATOR(node_type, 1);
vm_space::vm_space(uintptr_t start, size_t size)
int
vm_space::area::compare(const vm_space::area &o) const
{
node_type *node = new node_type;
node->address = start;
node->size = size;
node->state = vm_state::none;
m_ranges.insert(node);
log::info(logs::vmem, "Creating address space from %016llx-%016llx",
start, start+size);
if (base > o.base) return 1;
else if (base < o.base) return -1;
else return 0;
}
vm_space::vm_space()
bool
vm_space::area::operator==(const vm_space::area &o) const
{
return o.base == base && o.area == area;
}
vm_space::vm_space(page_table *p, bool kernel) :
m_kernel(kernel),
m_pml4(p)
{
}
inline static bool
contains(node_type *node, uintptr_t start, size_t size)
vm_space::~vm_space()
{
return start >= node->address &&
size <= node->size;
for (auto &a : m_areas)
a.area->remove_from(this);
}
inline static bool
overlaps(node_type *node, uintptr_t start, size_t size)
vm_space &
vm_space::kernel_space()
{
return start < node->end() &&
(start + size) > node->address;
extern vm_space &g_kernel_space;
return g_kernel_space;
}
static node_type *
find_overlapping(node_type *from, uintptr_t start, size_t size)
bool
vm_space::add(uintptr_t base, vm_area *area)
{
while (from) {
if (overlaps(from, start, size))
return from;
from = start < from->address ?
from->left() :
from->right();
//TODO: check for collisions
m_areas.sorted_insert({base, area});
return true;
}
bool
vm_space::remove(vm_area *area)
{
for (auto &a : m_areas) {
if (a.area == area) {
m_areas.remove(a);
return true;
}
}
return false;
}
vm_area *
vm_space::get(uintptr_t addr, uintptr_t *base)
{
for (auto &a : m_areas) {
uintptr_t end = a.base + a.area->size();
if (addr >= a.base && addr < end) {
if (base) *base = a.base;
return a.area;
}
}
return nullptr;
}
node_type *
vm_space::split_out(node_type *node, uintptr_t start, size_t size, vm_state state)
{
// No cross-boundary splits allowed for now
const bool contained = contains(node, start, size);
kassert(contained, "Tried to split an address range across existing boundaries");
if (!contained)
return nullptr;
vm_state old_state = node->state;
if (state == old_state)
return node;
node->state = state;
log::debug(logs::vmem, "Splitting out region %016llx-%016llx[%d] from %016llx-%016llx[%d]",
start, start+size, state, node->address, node->end(), old_state);
bool do_consolidate = false;
if (node->address < start) {
// Split off rest into new node
size_t leading = start - node->address;
node_type *next = new node_type;
next->address = start;
next->size = node->size - leading;
next->state = state;
node->size = leading;
node->state = old_state;
log::debug(logs::vmem,
" leading region %016llx-%016llx[%d]",
node->address, node->address + node->size, node->state);
m_ranges.insert(next);
node = next;
} else {
do_consolidate = true;
}
if (node->end() > start + size) {
// Split off remaining into new node
size_t trailing = node->size - size;
node->size -= trailing;
node_type *next = new node_type;
next->state = old_state;
next->address = node->end();
next->size = trailing;
log::debug(logs::vmem,
" tailing region %016llx-%016llx[%d]",
next->address, next->address + next->size, next->state);
m_ranges.insert(next);
} else {
do_consolidate = true;
}
if (do_consolidate)
node = consolidate(node);
return node;
}
node_type *
vm_space::find_empty(node_type *node, size_t size, vm_state state)
{
if (node->state == vm_state::none && node->size >= size)
return split_out(node, node->address, size, state);
if (node->left()) {
node_type *found = find_empty(node->left(), size, state);
if (found)
return found;
}
if (node->right()) {
node_type *found = find_empty(node->right(), size, state);
if (found)
return found;
}
return nullptr;
}
inline void gather(node_type *node, node_vec &vec)
{
if (node) {
gather(node->left(), vec);
vec.append(node);
gather(node->right(), vec);
}
}
node_type *
vm_space::consolidate(node_type *needle)
{
node_vec nodes(m_ranges.count());
gather(m_ranges.root(), nodes);
node_type *prev = nullptr;
for (auto *node : nodes) {
log::debug(logs::vmem,
"* Existing region %016llx-%016llx[%d]",
node->address, node->address + node->size, node->state);
if (prev && node->address == prev->end() && node->state == prev->state) {
log::debug(logs::vmem,
"Joining regions %016llx-%016llx[%d] %016llx-%016llx[%d]",
prev->address, prev->address + prev->size, prev->state,
node->address, node->address + node->size, node->state);
prev->size += node->size;
if (needle == node)
needle = prev;
m_ranges.remove(node);
} else {
prev = node;
}
}
return needle;
}
uintptr_t
vm_space::reserve(uintptr_t start, size_t size)
{
if (start == 0) {
log::debug(logs::vmem, "Reserving any region of size %llx", size);
node_type *node = find_empty(m_ranges.root(), size, vm_state::reserved);
if (!node) {
log::debug(logs::vmem, " found no large enough region");
return 0;
}
return node->address;
}
log::debug(logs::vmem, "Reserving region %016llx-%016llx",
start, start+size);
node_type *node = find_overlapping(m_ranges.root(), start, size);
if (!node) {
log::debug(logs::vmem, " found no match");
return 0;
}
node = split_out(node, start, size, vm_state::reserved);
return node ? start : 0;
}
void
vm_space::unreserve(uintptr_t start, size_t size)
vm_space::page_in(uintptr_t addr, size_t count, uintptr_t phys)
{
log::debug(logs::vmem, "Unreserving region %016llx-%016llx", start, start+size);
node_type *node = find_overlapping(m_ranges.root(), start, size);
if (!node || !contains(node, start, size)) {
log::debug(logs::vmem, " found no match");
return;
page_manager *pm = page_manager::get();
pm->page_in(m_pml4, phys, addr, count, is_kernel());
}
split_out(node, start, size, vm_state::none);
}
uintptr_t
vm_space::commit(uintptr_t start, size_t size)
void
vm_space::page_out(uintptr_t addr, size_t count)
{
if (start == 0) {
log::debug(logs::vmem, "Committing any region of size %llx", size);
node_type *node = find_empty(m_ranges.root(), size, vm_state::committed);
if (!node) {
log::debug(logs::vmem, " found no large enough region");
return 0;
page_manager *pm = page_manager::get();
pm->page_out(m_pml4, addr, count, false);
}
return node->address;
}
log::debug(logs::vmem, "Committing region %016llx-%016llx",
start, start+size);
node_type *node = find_overlapping(m_ranges.root(), start, size);
if (!node) {
log::debug(logs::vmem, " found no match");
return 0;
}
node = split_out(node, start, size, vm_state::committed);
return node ? start : 0;
}
vm_state
vm_space::get(uintptr_t addr)
void
vm_space::allow(uintptr_t start, size_t length, bool allow)
{
node_type *node = find_overlapping(m_ranges.root(), addr, 1);
return node ? node->state : vm_state::unknown;
using level = page_table::level;
kassert((start & 0xfff) == 0, "non-page-aligned address");
kassert((length & 0xfff) == 0, "non-page-aligned length");
const uintptr_t end = start + length;
page_table::iterator it {start, m_pml4};
while (it.vaddress() < end) {
level d = it.align();
while (it.end(d) > end) ++d;
it.allow(d-1, allow);
it.next(d);
}
}
bool
vm_space::handle_fault(uintptr_t addr, fault_type fault)
{
uintptr_t page = addr & ~0xfffull;
page_table::iterator it {addr, m_pml4};
if (!it.allowed())
return false;
// TODO: pull this out of PM
page_manager::get()->map_pages(page, 1, m_pml4);
/* TODO: Tell the VMA if there is one
uintptr_t base = 0;
vm_area *area = get(addr, &base);
*/
return true;
}

View File

@@ -1,73 +1,93 @@
#pragma once
/// \file vm_range.h
/// \file vm_space.h
/// Structure for tracking a range of virtual memory addresses
#include <stdint.h>
#include "kutil/avl_tree.h"
#include "kutil/enum_bitfields.h"
#include "kutil/vector.h"
enum class vm_state : uint8_t {
unknown,
none,
reserved,
committed
};
struct vm_range
{
uintptr_t address;
size_t size;
vm_state state;
inline uintptr_t end() const { return address + size; }
inline int64_t compare(const vm_range *other) const {
if (address > other->address) return -1;
else if (address < other->address) return 1;
else return 0;
}
};
struct page_table;
class process;
class vm_area;
/// Tracks a region of virtual memory address space
class vm_space
{
public:
/// Default constructor. Define an empty range.
vm_space();
/// Constructor.
/// \arg pml4 The pml4 for this address space
/// \arg kernel True if this is the kernel address space
vm_space(page_table *pml4, bool kernel = false);
/// Constructor. Define a range of managed VM space.
/// \arg start Starting address of the managed space
/// \arg size Size of the managed space, in bytes
vm_space(uintptr_t start, size_t size);
~vm_space();
/// Reserve a section of address space.
/// \arg start Starting address of reservaion, or 0 for any address
/// \arg size Size of reservation in bytes
/// \returns The address of the reservation, or 0 on failure
uintptr_t reserve(uintptr_t start, size_t size);
/// Add a virtual memorty area to this address space
/// \arg base The starting address of the area
/// \arg area The area to add
/// \returns True if the add succeeded
bool add(uintptr_t base, vm_area *area);
/// Unreserve (and uncommit, if committed) a section of address space.
/// \arg start Starting address of reservaion
/// \arg size Size of reservation in bytes
void unreserve(uintptr_t start, size_t size);
/// Remove a virtual memory area from this address space
/// \arg area The area to remove
/// \returns True if the area was removed
bool remove(vm_area *area);
/// Mark a section of address space as committed.
/// \arg start Starting address of reservaion, or 0 for any address
/// \arg size Size of reservation in bytes
/// \returns The address of the reservation, or 0 on failure
uintptr_t commit(uintptr_t start, size_t size);
/// Check the state of the given address.
/// Get the virtual memory area corresponding to an address
/// \arg addr The address to check
/// \returns The state of the memory if known, or 'unknown'
vm_state get(uintptr_t addr);
/// \arg base [out] if not null, receives the base address of the area
/// \returns The vm_area, or nullptr if not found
vm_area * get(uintptr_t addr, uintptr_t *base = nullptr);
private:
using node_type = kutil::avl_node<vm_range>;
using tree_type = kutil::avl_tree<vm_range>;
/// Check if this is the kernel space
inline bool is_kernel() const { return m_kernel; }
node_type * split_out(node_type* node, uintptr_t start, size_t size, vm_state state);
node_type * consolidate(node_type* needle);
node_type * find_empty(node_type* node, size_t size, vm_state state);
/// Get the kernel virtual memory space
static vm_space & kernel_space();
tree_type m_ranges;
/// Add page mappings into this space's page tables
/// \arg addr The virtual address to map at
/// \arg count The number of pages
/// \arg phys The physical address of the first page
void page_in(uintptr_t addr, size_t count, uintptr_t phys);
/// Remove page mappings from this space's page tables
/// \arg addr The virtual address to unmap
/// \arg count The number of pages
void page_out(uintptr_t addr, size_t count);
/// Mark whether allocation is allowed or not in a range of
/// virtual memory.
/// \arg start The starting virtual address of the area
/// \arg length The length in bytes of the area
/// \arg allow True if allocation should be allowed
void allow(uintptr_t start, size_t length, bool allow);
enum class fault_type : uint8_t {
none = 0x00,
present = 0x01,
write = 0x02,
user = 0x04,
reserved = 0x08,
fetch = 0x10
};
/// Handle a page fault.
/// \arg addr Address which caused the fault
/// \arg ft Flags from the interrupt about the kind of fault
/// \returns True if the fault was successfully handled
bool handle_fault(uintptr_t addr, fault_type fault);
private:
bool m_kernel;
page_table *m_pml4;
struct area {
uintptr_t base;
vm_area *area;
int compare(const struct area &o) const;
bool operator==(const struct area &o) const;
};
kutil::vector<area> m_areas;
};
IS_BITFIELD(vm_space::fault_type);

View File

@@ -10,6 +10,7 @@ union no_construct
{
T value;
no_construct() {}
~no_construct() {}
};
} // namespace kutil