diff --git a/modules.yaml b/modules.yaml index 74b2bb0..ae92e78 100644 --- a/modules.yaml +++ b/modules.yaml @@ -318,6 +318,7 @@ modules: - src/tests/logger.cpp - src/tests/heap_allocator.cpp - src/tests/main.cpp + - src/tests/map.cpp - src/tests/vector.cpp overlays: - url: https://f000.backblazeb2.com/file/jsix-os/sysroot-llvm8-20190706.tar.bz2 diff --git a/src/libraries/kutil/heap_allocator.cpp b/src/libraries/kutil/heap_allocator.cpp index 16fb3e2..cdafd24 100644 --- a/src/libraries/kutil/heap_allocator.cpp +++ b/src/libraries/kutil/heap_allocator.cpp @@ -1,7 +1,8 @@ #include #include "kutil/assert.h" -#include "kutil/memory.h" #include "kutil/heap_allocator.h" +#include "kutil/memory.h" +#include "kutil/util.h" namespace kutil { @@ -77,8 +78,7 @@ heap_allocator::allocate(size_t length) if (length == 0) return nullptr; - const unsigned clz = __builtin_clzll(total - 1); - unsigned order = 64 - clz; + unsigned order = log2(total); if (order < min_order) order = min_order; diff --git a/src/libraries/kutil/include/kutil/hash.h b/src/libraries/kutil/include/kutil/hash.h new file mode 100644 index 0000000..17be9a5 --- /dev/null +++ b/src/libraries/kutil/include/kutil/hash.h @@ -0,0 +1,45 @@ +#pragma once +/// \file hash.h +/// Simple templated hashing functions + +#include +#include + +namespace kutil { + +constexpr uint64_t fnv_64_prime = 0x100000001b3ull; +constexpr uint64_t fnv1a_64_init = 0xcbf29ce484222325ull; + +/// Return the FNV-1a hash of the given 0-terminated string. +inline uint64_t hash_string(char const *s, uint64_t init = 0) { + if (!init) init = fnv1a_64_init; + while(s && *s) { + init ^= static_cast(*s++); + init *= fnv_64_prime; + } + return init; +} + +/// Return the FNV-1a hash of the given buffer. +inline uint64_t hash_buffer(const void *v, size_t len, uint64_t init = 0) { + uint8_t const *p = reinterpret_cast(v); + uint8_t const *end = p + len; + if (!init) init = fnv1a_64_init; + while(p < end) { + init ^= static_cast(*p++); + init *= fnv_64_prime; + } + return init; +} + +template +uint64_t hash(const T &v) { + return hash_buffer(reinterpret_cast(&v), sizeof(T)); +} + +template <> +uint64_t hash(const char * const &s) { + return hash_string(s); +} + +} // namespace kutil diff --git a/src/libraries/kutil/include/kutil/map.h b/src/libraries/kutil/include/kutil/map.h new file mode 100644 index 0000000..acaa08c --- /dev/null +++ b/src/libraries/kutil/include/kutil/map.h @@ -0,0 +1,211 @@ +#pragma once +/// \file map.h +/// Definition of a simple associative array collection for use in kernel space. +/// Thanks to the following people for inspiration of this implementation: +/// +/// Sebastian Sylvan +/// https://www.sebastiansylvan.com/post/robin-hood-hashing-should-be-your-default-hash-table-implementation/ +/// +/// Emmanuel Goossaert +/// http://codecapsule.com/2013/11/11/robin-hood-hashing/ +/// http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/ + +#include +#include "kutil/hash.h" +#include "kutil/vector.h" +#include "kutil/util.h" + +namespace kutil { + +/// Templated equality check to allow overriding +template +inline bool equal(const T &a, const T &b) { return a == b; } + +template <> +inline bool equal(const char * const &a, const char * const &b) { + if (!a || !b) return a == b; + const char *a1 = a, *b1 = b; + while (*a1 && *b1) if (*a1++ != *b1++) return false; + return *a1 == *b1; // Make sure they're both zero +} + +/// An open addressing hash map using robinhood hashing. +template +class map +{ +public: + static constexpr size_t min_capacity = 8; + static constexpr size_t max_load = 90; + + /// Default constructor. Creates an empty map with the given capacity. + map(size_t capacity = 0) : + m_count(0), + m_capacity(0), + m_nodes(nullptr) + { + if (capacity) + set_capacity(1 << log2(capacity)); + } + + ~map() { + for (size_t i = 0; i < m_capacity; ++i) + m_nodes[i].~node(); + kfree(m_nodes); + } + + void insert(K k, V v) { + if (++m_count > threshold()) grow(); + insert_node(hash(k), std::move(k), std::move(v)); + } + + V * find(const K &k) { + node *n = lookup(k); + return n ? &n->val : nullptr; + } + + const V * find(const K &k) const { + const node *n = lookup(k); + return n ? &n->val : nullptr; + } + + bool erase(const K &k) + { + node *n = lookup(k); + if (!n) return false; + + n->~node(); + --m_count; + + size_t i = n - m_nodes; + while (true) { + size_t next = mod(i+1); + node &m = m_nodes[next]; + if (!m.hash || mod(m.hash) == next) break; + construct(i, m.hash, std::move(m.key), std::move(m.val)); + m.~node(); + i = mod(++i); + } + + return true; + } + + inline size_t count() const { return m_count; } + inline size_t capacity() const { return m_capacity; } + inline size_t threshold() const { return (m_capacity * max_load) / 100; } + +private: + struct node + { + uint64_t hash {0}; + K key; + V val; + + node(node &&o) : hash(o.h), key(std::move(o.key)), val(std::move(o.val)) {} + node(uint64_t h, K &&k, V &&v) : hash(h), key(std::move(k)), val(std::move(v)) {} + ~node() { hash = 0; } + }; + + inline size_t mod(uint64_t i) const { return i & (m_capacity - 1); } + inline size_t offset(uint64_t h, size_t i) const { + return mod(i + m_capacity - mod(h)); + } + + void set_capacity(size_t capacity) { + kassert((capacity & (capacity - 1)) == 0, + "Map capacity must be a power of two"); + + m_capacity = capacity; + const size_t size = m_capacity * sizeof(node); + m_nodes = reinterpret_cast(kalloc(size)); + memset(m_nodes, 0, size); + } + + void grow() { + node *old = m_nodes; + size_t count = m_capacity; + + size_t cap = m_capacity * 2; + if (cap < min_capacity) + cap = min_capacity; + + set_capacity(cap); + + for (size_t i = 0; i < count; ++i) { + node &n = old[i]; + insert_node(n.hash, std::move(n.key), std::move(n.val)); + n.~node(); + } + + kfree(old); + } + + inline node * construct(size_t i, uint64_t h, K &&k, V &&v) { + return new (&m_nodes[i]) node(h, std::move(k), std::move(v)); + } + + node * insert_node(uint64_t h, K &&k, V &&v) { + size_t i = mod(h); + size_t dist = 0; + + while (true) { + if (!m_nodes[i].hash) { + return construct(i, h, std::move(k), std::move(v)); + } + + node &elem = m_nodes[i]; + size_t elem_dist = offset(elem.hash, i); + if (elem_dist < dist) { + std::swap(h, elem.hash); + std::swap(k, elem.key); + std::swap(v, elem.val); + dist = elem_dist; + } + + i = mod(++i); + ++dist; + } + } + + node * lookup(const K &k) { + uint64_t h = hash(k); + size_t i = mod(h); + size_t dist = 0; + + while (true) { + node &n = m_nodes[i]; + if (!n.hash || dist > offset(n.hash, i)) + return nullptr; + + else if (n.hash == h && equal(n.key, k)) + return &n; + + i = mod(++i); + ++dist; + } + } + + const node * lookup(const K &k) const + { + uint64_t h = hash(k); + size_t i = mod(h); + size_t dist = 0; + + while (true) { + const node &n = m_nodes[i]; + if (!n.hash || dist > offset(n.hash, i)) + return nullptr; + + else if (n.hash == h && equal(n.key, k)) + return &n; + + i = mod(++i); + ++dist; + } + } + + size_t m_count; + size_t m_capacity; + node *m_nodes; +}; + +} // namespace kutil diff --git a/src/libraries/kutil/include/kutil/util.h b/src/libraries/kutil/include/kutil/util.h new file mode 100644 index 0000000..d1e36b2 --- /dev/null +++ b/src/libraries/kutil/include/kutil/util.h @@ -0,0 +1,16 @@ +#pragma once +/// \file util.h +/// Utility functions used in other kutil code + +#include + +namespace kutil { + +// Get the base-2 logarithm of i +inline unsigned log2(uint64_t i) { + if (i < 2) return 0; + const unsigned clz = __builtin_clzll(i - 1); + return 64 - clz; +} + +} diff --git a/src/libraries/kutil/include/kutil/vector.h b/src/libraries/kutil/include/kutil/vector.h index ea27a37..e34297f 100644 --- a/src/libraries/kutil/include/kutil/vector.h +++ b/src/libraries/kutil/include/kutil/vector.h @@ -5,6 +5,7 @@ #include #include "kutil/assert.h" #include "kutil/memory.h" +#include "kutil/util.h" namespace kutil { @@ -12,6 +13,8 @@ namespace kutil { template class vector { + static constexpr size_t min_capacity = 4; + public: /// Default constructor. Creates an empty vector with no capacity. vector() : @@ -227,12 +230,9 @@ public: void ensure_capacity(size_t size) { if (m_capacity >= size) return; - - size_t capacity = m_capacity; - while (capacity < size) { - if (capacity == 0) capacity = 4; - else capacity *= 2; - } + size_t capacity = (1 << log2(size)); + if (capacity < min_capacity) + capacity = min_capacity; set_capacity(capacity); } diff --git a/src/tests/map.cpp b/src/tests/map.cpp new file mode 100644 index 0000000..dfa322a --- /dev/null +++ b/src/tests/map.cpp @@ -0,0 +1,63 @@ +#include "kutil/map.h" +#include "catch.hpp" + +TEST_CASE( "map insertion", "[containers] [vector]" ) +{ + using clock = std::chrono::system_clock; + unsigned seed = clock::now().time_since_epoch().count(); + std::default_random_engine rng {seed}; + std::uniform_int_distribution distrib {0, 10000}; + + + size_t sizes[] = {1, 2, 3, 5, 100}; + for (size_t s : sizes) { + kutil::map v; + std::vector r; + + for (int i = 0; i < s; ++i) { + int j = distrib(rng); + r.push_back(j); + v.insert(j, j); + } + + for (int i : r) { + int *p = v.find(i); + CAPTURE( i ); + CHECK( p ); + CHECK( *p == i ); + } + } +} + +TEST_CASE( "map deletion", "[containers] [vector]" ) +{ + using clock = std::chrono::system_clock; + unsigned seed = clock::now().time_since_epoch().count(); + std::default_random_engine rng {seed}; + std::uniform_int_distribution distrib {0, 10000}; + + size_t sizes[] = {1, 2, 3, 5, 100}; + for (size_t s : sizes) { + kutil::map v; + std::vector r; + + for (int i = 0; i < s; ++i) { + int j = distrib(rng); + r.push_back(j); + v.insert(j, j); + } + + for (int i = 0; i < s; i += 2) { + v.erase(r[i]); + } + + for (int i = 0; i < s; ++i) { + int *p = v.find(r[i]); + CAPTURE( i ); + if ( i%2 ) + CHECK( p ); + else + CHECK( !p ); + } + } +}