From a03804b09df128418b0094fd921bfe17240970c0 Mon Sep 17 00:00:00 2001 From: "Justin C. Miller" Date: Sat, 26 Feb 2022 13:19:21 -0800 Subject: [PATCH] [kernel] Add RAII profiler object Added profiler.h which defines classes and macros for defining profiler objects. Also added gdb command j6prof for printing profile data. Added the syscall_profiles profiler class and auto wrapping of syscalls with profile objects. Other changes in this commit: - Made the gdb command `j6threads` argument for specifying a CPU optional. Without an argument, it loops through all CPUs. - Switched to -mcmodel=kernel for kernel code, which makes `call` instructions easier to follow when debugging / looking at disassembly. --- assets/debugging/jsix.elf-gdb.py | 62 +++++++++++++++++++++++++------ configs/kernel-debug.yaml | 2 +- src/kernel/profiler.h | 50 +++++++++++++++++++++++++ src/kernel/syscall_verify.cpp.cog | 10 +++++ 4 files changed, 112 insertions(+), 12 deletions(-) create mode 100644 src/kernel/profiler.h diff --git a/assets/debugging/jsix.elf-gdb.py b/assets/debugging/jsix.elf-gdb.py index 2341dfe..671bbfc 100644 --- a/assets/debugging/jsix.elf-gdb.py +++ b/assets/debugging/jsix.elf-gdb.py @@ -199,29 +199,69 @@ class GetThreadsCommand(gdb.Command): addr = int(gdb.parse_and_eval(f"((tcb_node*){addr:#x})->m_next")) + def invoke(self, arg, from_tty): + args = gdb.string_to_argv(arg) + if len(args) > 1: + raise RuntimeError("Usage: j6threads [cpu]") + + ncpus = int(gdb.parse_and_eval("g_num_cpus")) + cpus = list(range(ncpus)) + if len(args) == 1: + cpus = [int(args[0])] + + for cpu in cpus: + runlist = f"scheduler::s_instance->m_run_queues.m_elements[{cpu:#x}]" + + print(f"=== CPU {cpu:2}: CURRENT ===") + current = int(gdb.parse_and_eval(f"{runlist}.current")) + self.print_thread(current) + + for pri in range(8): + ready = int(gdb.parse_and_eval(f"{runlist}.ready[{pri:#x}].m_head")) + self.print_thread_list(ready, f"CPU {cpu:2}: PRIORITY {pri}") + + blocked = int(gdb.parse_and_eval(f"{runlist}.blocked.m_head")) + self.print_thread_list(blocked, f"CPU {cpu:2}: BLOCKED") + + +class PrintProfilesCommand(gdb.Command): + def __init__(self): + super().__init__("j6prof", gdb.COMMAND_DATA) + def invoke(self, arg, from_tty): args = gdb.string_to_argv(arg) if len(args) != 1: - raise RuntimeError("Usage: j6threads ") + raise RuntimeError("Usage: j6prof ") - ncpu = int(args[0]) - runlist = f"scheduler::s_instance->m_run_queues.m_elements[{ncpu:#x}]" + profclass = args[0] + root_type = f"profile_class<{profclass}>" - print("CURRENT:") - current = int(gdb.parse_and_eval(f"{runlist}.current")) - self.print_thread(current) + try: + baseclass = gdb.lookup_type(root_type) + except Exception as e: + print(e) + return - for pri in range(8): - ready = int(gdb.parse_and_eval(f"{runlist}.ready[{pri:#x}].m_head")) - self.print_thread_list(ready, f"PRIORITY {pri}") + results = {} + max_len = 0 + count = gdb.parse_and_eval(f"{profclass}::count") + for i in range(count): + name = gdb.parse_and_eval(f"{root_type}::function_names[{i:#x}]") + if name == 0: continue - blocked = int(gdb.parse_and_eval(f"{runlist}.blocked.m_head")) - self.print_thread_list(blocked, "BLOCKED") + call_counts = gdb.parse_and_eval(f"{root_type}::call_counts[{i:#x}]") + call_durations = gdb.parse_and_eval(f"{root_type}::call_durations[{i:#x}]") + results[name.string()] = float(call_durations) / float(call_counts) + max_len = max(max_len, len(name.string())) + + for name, avg in results.items(): + print(f"{name:>{max_len}}: {avg:15.3f}") PrintStackCommand() PrintBacktraceCommand() TableWalkCommand() GetThreadsCommand() +PrintProfilesCommand() gdb.execute("display/i $rip") if not gdb.selected_inferior().was_attached: diff --git a/configs/kernel-debug.yaml b/configs/kernel-debug.yaml index 255f8d8..0c0fa19 100644 --- a/configs/kernel-debug.yaml +++ b/configs/kernel-debug.yaml @@ -19,7 +19,7 @@ variables: "-mno-sse", "-fno-omit-frame-pointer", "-mno-red-zone", - "-mcmodel=large", + "-mcmodel=kernel", "-g3", "-ggdb", diff --git a/src/kernel/profiler.h b/src/kernel/profiler.h new file mode 100644 index 0000000..ee00385 --- /dev/null +++ b/src/kernel/profiler.h @@ -0,0 +1,50 @@ +#pragma once +/// \file profiler.h +/// The kernel profiling interface + +#include +#include + +inline uint64_t rdtsc() { + uint32_t high, low; + asm ( "rdtsc" : "=a" (low), "=d" (high) ); + return (static_cast(high) << 32) | low; +} + +template +class profiler +{ + using class_t = PC; + static constexpr size_t id = ID; + +public: + profiler(char const * const function = __builtin_FUNCTION()) : m_start(rdtsc()) { + static_assert(id < class_t::count, "profiler out of bounds"); + class_t::call_counts[id]++; + class_t::function_names[id] = function; + } + + ~profiler() { + uint64_t stop = rdtsc(); + class_t::call_durations[id] += (stop - m_start); + } + +private: + uint64_t m_start; +}; + +template +struct profile_class +{ + static volatile char const * function_names[]; + static volatile uint64_t call_counts[]; + static volatile uint64_t call_durations[]; +}; + +#define DECLARE_PROFILE_CLASS(name, size) \ + struct name : public profile_class { static constexpr size_t count = size; }; + +#define DEFINE_PROFILE_CLASS(name) \ + template<> volatile char const * profile_class::function_names[name::count] = {0}; \ + template<> volatile uint64_t profile_class::call_counts[name::count] = {0}; \ + template<> volatile uint64_t profile_class::call_durations[name::count] = {0}; diff --git a/src/kernel/syscall_verify.cpp.cog b/src/kernel/syscall_verify.cpp.cog index 78cb60b..a8d4b41 100644 --- a/src/kernel/syscall_verify.cpp.cog +++ b/src/kernel/syscall_verify.cpp.cog @@ -7,6 +7,7 @@ #include #include +#include "profiler.h" #include "syscalls/helpers.h" /*[[[cog code generation @@ -21,6 +22,12 @@ for obj in syscalls.exposes: ]]]*/ //[[[end]]] + +/*[[[cog code generation +cog.outl(f'constexpr size_t syscall_count = {len(syscalls.methods)};') +]]]*/ +//[[[end]]] +DECLARE_PROFILE_CLASS(syscall_profiles, syscall_count); namespace { enum class req { required, optional, zero_ok }; @@ -61,6 +68,8 @@ namespace { } } +DEFINE_PROFILE_CLASS(syscall_profiles); + namespace syscalls { using util::buffer; @@ -185,6 +194,7 @@ for id, scope, method in syscalls.methods: if "noreturn" in method.options: cog.outl(f""" {name}({", ".join(args)});""") else: + cog.outl(f""" profiler profile {{"{name}"}};""") cog.outl(f""" return {name}({", ".join(args)});""") cog.outl("}") cog.outl("\n")