[kernel] Add RAII profiler object

Added profiler.h which defines classes and macros for defining profiler
objects. Also added gdb command j6prof for printing profile data. Added
the syscall_profiles profiler class and auto wrapping of syscalls with
profile objects.

Other changes in this commit:

- Made the gdb command `j6threads` argument for specifying a CPU
  optional. Without an argument, it loops through all CPUs.
- Switched to -mcmodel=kernel for kernel code, which makes `call`
  instructions easier to follow when debugging / looking at disassembly.
This commit is contained in:
Justin C. Miller
2022-02-26 13:19:21 -08:00
parent a9f40cf608
commit a03804b09d
4 changed files with 112 additions and 12 deletions

View File

@@ -199,29 +199,69 @@ class GetThreadsCommand(gdb.Command):
addr = int(gdb.parse_and_eval(f"((tcb_node*){addr:#x})->m_next"))
def invoke(self, arg, from_tty):
args = gdb.string_to_argv(arg)
if len(args) > 1:
raise RuntimeError("Usage: j6threads [cpu]")
ncpus = int(gdb.parse_and_eval("g_num_cpus"))
cpus = list(range(ncpus))
if len(args) == 1:
cpus = [int(args[0])]
for cpu in cpus:
runlist = f"scheduler::s_instance->m_run_queues.m_elements[{cpu:#x}]"
print(f"=== CPU {cpu:2}: CURRENT ===")
current = int(gdb.parse_and_eval(f"{runlist}.current"))
self.print_thread(current)
for pri in range(8):
ready = int(gdb.parse_and_eval(f"{runlist}.ready[{pri:#x}].m_head"))
self.print_thread_list(ready, f"CPU {cpu:2}: PRIORITY {pri}")
blocked = int(gdb.parse_and_eval(f"{runlist}.blocked.m_head"))
self.print_thread_list(blocked, f"CPU {cpu:2}: BLOCKED")
class PrintProfilesCommand(gdb.Command):
def __init__(self):
super().__init__("j6prof", gdb.COMMAND_DATA)
def invoke(self, arg, from_tty):
args = gdb.string_to_argv(arg)
if len(args) != 1:
raise RuntimeError("Usage: j6threads <cpu>")
raise RuntimeError("Usage: j6prof <profiler class>")
ncpu = int(args[0])
runlist = f"scheduler::s_instance->m_run_queues.m_elements[{ncpu:#x}]"
profclass = args[0]
root_type = f"profile_class<{profclass}>"
print("CURRENT:")
current = int(gdb.parse_and_eval(f"{runlist}.current"))
self.print_thread(current)
try:
baseclass = gdb.lookup_type(root_type)
except Exception as e:
print(e)
return
for pri in range(8):
ready = int(gdb.parse_and_eval(f"{runlist}.ready[{pri:#x}].m_head"))
self.print_thread_list(ready, f"PRIORITY {pri}")
results = {}
max_len = 0
count = gdb.parse_and_eval(f"{profclass}::count")
for i in range(count):
name = gdb.parse_and_eval(f"{root_type}::function_names[{i:#x}]")
if name == 0: continue
blocked = int(gdb.parse_and_eval(f"{runlist}.blocked.m_head"))
self.print_thread_list(blocked, "BLOCKED")
call_counts = gdb.parse_and_eval(f"{root_type}::call_counts[{i:#x}]")
call_durations = gdb.parse_and_eval(f"{root_type}::call_durations[{i:#x}]")
results[name.string()] = float(call_durations) / float(call_counts)
max_len = max(max_len, len(name.string()))
for name, avg in results.items():
print(f"{name:>{max_len}}: {avg:15.3f}")
PrintStackCommand()
PrintBacktraceCommand()
TableWalkCommand()
GetThreadsCommand()
PrintProfilesCommand()
gdb.execute("display/i $rip")
if not gdb.selected_inferior().was_attached:

View File

@@ -19,7 +19,7 @@ variables:
"-mno-sse",
"-fno-omit-frame-pointer",
"-mno-red-zone",
"-mcmodel=large",
"-mcmodel=kernel",
"-g3",
"-ggdb",

50
src/kernel/profiler.h Normal file
View File

@@ -0,0 +1,50 @@
#pragma once
/// \file profiler.h
/// The kernel profiling interface
#include <stdint.h>
#include <util/hash.h>
inline uint64_t rdtsc() {
uint32_t high, low;
asm ( "rdtsc" : "=a" (low), "=d" (high) );
return (static_cast<uint64_t>(high) << 32) | low;
}
template<typename PC, size_t ID>
class profiler
{
using class_t = PC;
static constexpr size_t id = ID;
public:
profiler(char const * const function = __builtin_FUNCTION()) : m_start(rdtsc()) {
static_assert(id < class_t::count, "profiler out of bounds");
class_t::call_counts[id]++;
class_t::function_names[id] = function;
}
~profiler() {
uint64_t stop = rdtsc();
class_t::call_durations[id] += (stop - m_start);
}
private:
uint64_t m_start;
};
template<typename T>
struct profile_class
{
static volatile char const * function_names[];
static volatile uint64_t call_counts[];
static volatile uint64_t call_durations[];
};
#define DECLARE_PROFILE_CLASS(name, size) \
struct name : public profile_class<name> { static constexpr size_t count = size; };
#define DEFINE_PROFILE_CLASS(name) \
template<> volatile char const * profile_class<name>::function_names[name::count] = {0}; \
template<> volatile uint64_t profile_class<name>::call_counts[name::count] = {0}; \
template<> volatile uint64_t profile_class<name>::call_durations[name::count] = {0};

View File

@@ -7,6 +7,7 @@
#include <j6/types.h>
#include <util/counted.h>
#include "profiler.h"
#include "syscalls/helpers.h"
/*[[[cog code generation
@@ -21,6 +22,12 @@ for obj in syscalls.exposes:
]]]*/
//[[[end]]]
/*[[[cog code generation
cog.outl(f'constexpr size_t syscall_count = {len(syscalls.methods)};')
]]]*/
//[[[end]]]
DECLARE_PROFILE_CLASS(syscall_profiles, syscall_count);
namespace {
enum class req { required, optional, zero_ok };
@@ -61,6 +68,8 @@ namespace {
}
}
DEFINE_PROFILE_CLASS(syscall_profiles);
namespace syscalls {
using util::buffer;
@@ -185,6 +194,7 @@ for id, scope, method in syscalls.methods:
if "noreturn" in method.options:
cog.outl(f""" {name}({", ".join(args)});""")
else:
cog.outl(f""" profiler<syscall_profiles, {id}> profile {{"{name}"}};""")
cog.outl(f""" return {name}({", ".join(args)});""")
cog.outl("}")
cog.outl("\n")