From 6ef15a27214923ede2fe8e98dfa178af7b85ad56 Mon Sep 17 00:00:00 2001 From: "Justin C. Miller" Date: Tue, 17 Jan 2023 18:48:28 -0700 Subject: [PATCH] [init] Add new initrd format A new compressed initrd format for srv.init to load drivers, services, and data from, instead of every file getting loaded by the bootloader. This will allow for less memory allocated by the bootloader and passed to init if not every driver or data file is loaded. Loading, passing, and using the new initrd will be done in a coming commit. --- configs/rules.ninja | 4 ++ requirements.txt | 2 + scripts/mkinitrd.py | 29 ++++++++ src/libraries/util/cdb.cpp | 110 ++++++++++++++++++++++++++++++ src/libraries/util/util.module | 2 + src/libraries/util/util/cdb.h | 37 ++++++++++ src/libraries/util/util/counted.h | 13 ++++ src/user/srv.init/init.module | 5 +- src/user/srv.init/ramdisk.cpp | 61 +++++++++++++++++ src/user/srv.init/ramdisk.h | 30 ++++++++ 10 files changed, 290 insertions(+), 3 deletions(-) create mode 100755 scripts/mkinitrd.py create mode 100644 src/libraries/util/cdb.cpp create mode 100644 src/libraries/util/util/cdb.h create mode 100644 src/user/srv.init/ramdisk.cpp create mode 100644 src/user/srv.init/ramdisk.h diff --git a/configs/rules.ninja b/configs/rules.ninja index 2074166..0b0d661 100644 --- a/configs/rules.ninja +++ b/configs/rules.ninja @@ -66,6 +66,10 @@ rule makefat cp $in $out; $ mcopy -s -D o -i $out@@1M ${build_root}/fatroot/* ::/ +rule mkinitrd + description = Creating initrd + command = ${source_root}/scripts/mkinitrd.py $out $in + rule strip description = Stripping $name command = $ diff --git a/requirements.txt b/requirements.txt index c7c90af..12f1ac2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,5 @@ ninja >= 1.10.2 peru >= 1.2.1 pyyaml >= 5.4 lark == 0.12.0 +pure-cdb == 4 +pyzstd == 0.15 diff --git a/scripts/mkinitrd.py b/scripts/mkinitrd.py new file mode 100755 index 0000000..07672c8 --- /dev/null +++ b/scripts/mkinitrd.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 + +compress_level = 19 + +def write_image(image, files): + from pathlib import Path + from cdblib import Writer, djb_hash + from pyzstd import compress + + with open(image, 'wb') as db: + with Writer(db) as writer: + for f in files: + key = Path(f).name.encode('utf-8') + with open(f, 'rb') as input_file: + writer.put(key, compress(input_file.read(), compress_level)) + +if __name__ == "__main__": + from argparse import ArgumentParser + + p = ArgumentParser(description="Generate a jsix initrd image") + + p.add_argument("image", metavar="INITRD", + help="initrd image file to generate") + + p.add_argument("files", metavar="FILE", nargs='+', + help="files to add to the image") + + args = p.parse_args() + write_image(args.image, args.files) diff --git a/src/libraries/util/cdb.cpp b/src/libraries/util/cdb.cpp new file mode 100644 index 0000000..9e8be13 --- /dev/null +++ b/src/libraries/util/cdb.cpp @@ -0,0 +1,110 @@ +#include + +namespace util { + +namespace { + +struct pointer +{ + uint32_t position; + uint32_t length; +}; + +struct slot +{ + uint32_t hash; + uint32_t position; +}; + +struct record +{ + uint32_t keylen; + uint32_t vallen; + uint8_t data[]; +}; + +static constexpr size_t min_length = 256 * sizeof(pointer); + +inline uint32_t +djbhash(uint8_t const *key, uint32_t len) +{ + static constexpr uint32_t starting_hash = 5381; + uint32_t h = starting_hash; + for (unsigned i = 0; i < len; ++i) + h = ((h << 5) + h) ^ key[i]; + return h; +} + +inline bool +equal(uint8_t const *key1, size_t len1, uint8_t const *key2, uint32_t len2) +{ + if (len1 != len2) + return false; + + for (unsigned i = 0; i < len1; ++i) + if (key1[i] != key2[i]) + return false; + + return true; +} + +// util cannot depend on libc +inline uint32_t strlen(const char *s) { + uint32_t i = 0; + while (s && *s++) ++i; + return i; +} + +} // anon namespace + +cdb::cdb(buffer data) : + m_data(data) +{ + if (data.count < min_length) + m_data = {0, 0}; +} + +const buffer +cdb::retrieve(const char *key) const +{ + uint32_t len = strlen(key); + return retrieve(reinterpret_cast(key), len); +} + +const buffer +cdb::retrieve(const uint8_t *key, uint32_t len) const +{ + if (!m_data.pointer || !m_data.count) + return {0,0}; + + uint32_t h = djbhash(key, len); + uint32_t pindex = h & 0xff; + + pointer const *p = &at(0)[pindex]; + + if (!p->length) + return {0, 0}; + + uint32_t hindex = (h >> 8) % p->length; + slot const *table = at(p->position); + + uint32_t i = hindex; + slot const *s = &table[i]; + + while (s->hash != 0) { + if (s->hash == h) { + record const *r = at(s->position); + if (equal(key, len, &r->data[0], r->keylen)) + return buffer::from_const( &r->data[r->keylen], r->vallen ); + } + + i = (i + 1) % p->length; + if (i == hindex) break; + s = &table[i]; + } + + return {0, 0}; + +} + +} // namespace util diff --git a/src/libraries/util/util.module b/src/libraries/util/util.module index 3162a8d..d5f0181 100644 --- a/src/libraries/util/util.module +++ b/src/libraries/util/util.module @@ -3,6 +3,7 @@ module("util", kind = "lib", sources = [ + "cdb.cpp", "bip_buffer.cpp", "format.cpp", "spinlock.cpp", @@ -12,6 +13,7 @@ module("util", "util/basic_types.h", "util/bip_buffer.h", "util/bitset.h", + "util/cdb.h", "util/counted.h", "util/deque.h", "util/enum_bitfields.h", diff --git a/src/libraries/util/util/cdb.h b/src/libraries/util/util/cdb.h new file mode 100644 index 0000000..5e80b21 --- /dev/null +++ b/src/libraries/util/util/cdb.h @@ -0,0 +1,37 @@ +/// \file cdb.h +/// Helper functions and types for working with djb's constant database archives +#pragma once + +#include +#include + +namespace util { + +class cdb +{ +public: + cdb(buffer data); + + /// Retrieve a value from the database for the given key. + /// \arg key A null-terminated string key + /// \returns A const util::buffer pointing to the data in memory. + /// The buffer will be {0, 0} if the key is not found. + const buffer retrieve(const char *key) const; + + /// Retrieve a value from the database for the given key. + /// \arg key Pointer to a key as an array of bytes + /// \arg len Length of the key + /// \returns A const util::buffer pointing to the data in memory. + /// The buffer will be {0, 0} if the key is not found. + const buffer retrieve(const uint8_t *key, uint32_t len) const; + +private: + template + T const * at(uint32_t offset) const { + return reinterpret_cast(util::offset_pointer(m_data.pointer, offset)); + } + + buffer m_data; +}; + +} // namespace diff --git a/src/libraries/util/util/counted.h b/src/libraries/util/util/counted.h index e99be2b..ff1c3a0 100644 --- a/src/libraries/util/util/counted.h +++ b/src/libraries/util/util/counted.h @@ -43,6 +43,11 @@ struct counted count -= i; return *this; } + + /// Get a constant buffer from a const pointer + static const counted from_const(const T *p, size_t count) { + return { const_cast(p), count }; + } }; /// Specialize for `void` which cannot be indexed or iterated @@ -59,12 +64,20 @@ struct counted count -= i; return *this; } + + /// Get a constant buffer from a const pointer + static const counted from_const(const void *p, size_t count) { + return { const_cast(p), count }; + } }; using buffer = counted; template const T * read(buffer &b) { + if (b.count < sizeof(T)) + return nullptr; + const T *p = reinterpret_cast(b.pointer); b.pointer = offset_pointer(b.pointer, sizeof(T)); b.count -= sizeof(T); diff --git a/src/user/srv.init/init.module b/src/user/srv.init/init.module index 6025c58..aafbd89 100644 --- a/src/user/srv.init/init.module +++ b/src/user/srv.init/init.module @@ -2,14 +2,13 @@ init = module("srv.init", targets = [ "user" ], - deps = [ "libc", "elf", "bootproto" ], + deps = [ "libc", "elf", "bootproto", "zstd" ], description = "Init server", sources = [ "loader.cpp", "main.cpp", "modules.cpp", + "ramdisk.cpp", "service_locator.cpp", "start.s", ]) - -init.variables['ldflags'] = ["${ldflags}", "-section-start=.rodata=0x800000"] diff --git a/src/user/srv.init/ramdisk.cpp b/src/user/srv.init/ramdisk.cpp new file mode 100644 index 0000000..fcee532 --- /dev/null +++ b/src/user/srv.init/ramdisk.cpp @@ -0,0 +1,61 @@ +#include +#include +#include + +#include +#include + +#include "ramdisk.h" + +inline constexpr uint64_t manifest_magic = 0x74696e697869736a; // "jsixinit" +inline constexpr size_t manifest_min = 18; +inline constexpr size_t manifest_version = 1; + +using util::read; + +ramdisk::ramdisk(util::buffer data) : m_data {data} {} + +util::buffer +ramdisk::load_file(const char *name) +{ + util::cdb cdb {m_data}; + util::buffer c = cdb.retrieve(name); + if (!c.count) + return c; + + size_t size = ZSTD_getFrameContentSize(c.pointer, c.count); + + util::buffer d {malloc(size), size}; + size_t out = ZSTD_decompress( + d.pointer, d.count, + c.pointer, c.count); + + if (out != size) { + free(d.pointer); + return {0,0}; + } + + return d; +} + +manifest::manifest(util::buffer data) +{ + if (data.count < manifest_min) + return; + + char const *base = reinterpret_cast(data.pointer); + + if (*read(data) != manifest_magic) + return; + + uint8_t version = *read(data); + if (version != manifest_version) + return; + + read(data); // reserved byte + uint16_t services_len = *read(data); + uint16_t drivers_len = *read(data); + + base += *read(data); // start of the string section + +} diff --git a/src/user/srv.init/ramdisk.h b/src/user/srv.init/ramdisk.h new file mode 100644 index 0000000..1711d70 --- /dev/null +++ b/src/user/srv.init/ramdisk.h @@ -0,0 +1,30 @@ +#pragma once +/// \file loader.h +/// Data structure for a ramdisk archive, based on djb's CDB format + +#include +#include + +#include +#include + +class ramdisk +{ +public: + ramdisk(util::buffer data); + + util::buffer load_file(const char *name); + +private: + util::buffer m_data; +}; + +class manifest +{ +public: + manifest(util::buffer data); + +private: + std::vector m_services; + std::unordered_map m_drivers; +};