blob: a93aa83931875049d4638ceff755151e8fff9707 [file] [log] [blame]
/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */
// This file is included from PerfCounters.cc
struct CPUID {
uint8_t implementer = 0;
uint8_t variant = 0;
uint16_t part = 0;
operator bool() const
{
return implementer || variant || part;
}
// bool operator==(const CPUID&) const = default; // c++20
bool operator==(const CPUID &other) const
{
return implementer == other.implementer &&
variant == other.variant && part == other.part;
}
bool operator!=(const CPUID &other) const
{
return !(*this == other);
}
};
static std::ostream &operator<<(std::ostream &stm, const CPUID &cpuid)
{
stm << std::hex << "implementer: 0x" << int(cpuid.implementer)
<< ", variant: 0x" << int(cpuid.variant) << " part: 0x" << int(cpuid.part);
return stm;
}
/**
* Return the detected, known microarchitecture of this CPU, or don't
* return; i.e. never return UnknownCpu.
*/
static CpuMicroarch compute_cpu_microarch(const CPUID &cpuid) {
switch (cpuid.implementer) {
case 0x41: // ARM
switch (cpuid.part) {
case 0xd05:
return ARMCortexA55;
case 0xd0a:
return ARMCortexA75;
case 0xd0b:
return ARMCortexA76;
case 0xd0c:
return ARMNeoverseN1;
case 0xd0d:
return ARMCortexA77;
case 0xd40:
return ARMNeoverseV1;
case 0xd41:
case 0xd4b: // ARM Cortex A78C
return ARMCortexA78;
case 0xd44:
case 0xd4c: // ARM Cortex X1C
return ARMCortexX1;
case 0xd49:
return ARMNeoverseN2;
case 0xd4a:
return ARMNeoverseE1;
}
break;
case 0x51: // Qualcomm
switch (cpuid.part) {
case 0x802:
return ARMCortexA75;
case 0x803:
return ARMCortexA55;
case 0x804:
return ARMCortexA76;
case 0x805:
return ARMCortexA55;
}
break;
case 0x61: // Apple
switch (cpuid.part) {
case 0x22:
case 0x24:
case 0x28:
return AppleM1Icestorm;
case 0x23:
case 0x25:
case 0x29:
return AppleM1Firestorm;
case 0x32:
return AppleM2Blizzard;
case 0x33:
return AppleM2Avalanche;
}
break;
}
CLEAN_FATAL() << "Unknown aarch64 CPU type " << cpuid;
return UnknownCpu; // not reached
}
static void set_cpuid(std::vector<CPUID> &cpuids, unsigned long cpuidx, CPUID cpuid)
{
if (cpuids.size() <= cpuidx) {
cpuids.resize(cpuidx + 1);
}
if (cpuids[cpuidx]) {
CLEAN_FATAL() << "Duplicated CPUID for core " << cpuidx;
}
cpuids[cpuidx] = cpuid;
}
/**
* The new interface to get ID register values on AArch64
* `/sys/devices/system/cpu/cpu([0-9]+)/regs/identification/midr_el1`
* The register value is stored in hex.
*/
static inline void get_cpuinfo_sysfs(std::vector<CPUID> &res)
{
const std::string cpu_dir = "/sys/devices/system/cpu/";
const std::regex cpuname_regex("cpu([0-9]+)");
auto dir = opendir(cpu_dir.c_str());
if (!dir) {
return;
}
while (auto entry = readdir(dir)) {
std::cmatch match;
if (entry->d_type != DT_DIR ||
!std::regex_match(entry->d_name, match, cpuname_regex)) {
continue;
}
auto cpuidx = std::stoul(match[1].str());
std::string name = cpu_dir + entry->d_name + "/regs/identification/midr_el1";
std::ifstream file(name);
if (!file) {
CLEAN_FATAL() << "Failed to read midr register from kernel";
}
uint64_t val = 0;
file >> std::hex >> val;
if (!file) {
CLEAN_FATAL() << "Failed to read midr register from kernel";
}
set_cpuid(res, cpuidx, {
uint8_t(val >> 24),
uint8_t((val >> 20) & 0xf),
uint16_t((val >> 4) & 0xfff)
});
}
closedir(dir);
}
/**
* A line we care about in /proc/cpuinfo starts with a prefix followed by
* `:` and some white space characters, then followed by the value we care about.
* Return true if we've found the prefix. Set `flag` to `false`
* if the value parsing failed.
*
* Use an external template since lambda's can't be templated in C++11
*/
template<typename T, typename F>
static inline bool try_read_procfs_line(const std::string &line,
const char *prefix, T &out,
bool &flag, F &&reset)
{
size_t prefix_len = strlen(prefix);
if (line.size() < prefix_len) {
return false;
}
if (memcmp(&line[0], prefix, prefix_len) != 0) {
return false;
}
if (flag) {
// We've seen this already,
// i.e. we didn't see a new line between the processor lines
reset();
}
const char *p = &line[prefix_len];
// Skip blank and `:`.
while (*p == '\t' || *p == ' ' || *p == ':') {
p++;
}
char *str_end;
auto num = std::strtoull(p, &str_end, 0);
out = (T)num;
if (str_end == p) {
flag = false;
} else if (num > (unsigned long long)std::numeric_limits<T>::max()) {
flag = false;
} else {
flag = true;
}
return true;
}
/**
* /proc/cpuinfo reader
* The cpuinfo file contains blocks of text for each core.
* The blocks are separated by empty lines and it should start with a
* `processor : <num>` line followed by lines showing properties of the core.
* The three property lines we are looking for starts with
* `CPU implementer`, `CPU variant` and `CPU part`.
*/
static inline void get_cpuinfo_procfs(std::vector<CPUID> &res)
{
std::ifstream file("/proc/cpuinfo");
CPUID cpuid = {0, 0, 0};
unsigned cpuidx = 0;
bool has_cpuidx = false;
bool has_impl = false;
bool has_part = false;
bool has_var = false;
auto reset = [&] () {
// Few (none) of the detection code care about the variant number
// so we'll accept it if we couldn't read it.
if (has_cpuidx && has_impl && has_part) {
set_cpuid(res, cpuidx, cpuid);
}
has_cpuidx = false;
has_impl = false;
has_part = false;
has_var = false;
cpuid = {0, 0, 0};
};
for (std::string line; std::getline(file, line);) {
// Empty lines means that we've finished processing of a block
if (line.empty()) {
reset();
continue;
}
// First find the processor line
if (try_read_procfs_line(line, "processor", cpuidx, has_cpuidx, reset)) {
continue;
}
// and ignore the line until we found the processor line.
if (!has_cpuidx) {
continue;
}
// Try parsing as one of the data lines.
// Short circuiting after the first hit.
try_read_procfs_line(line, "CPU implementer", cpuid.implementer, has_impl, reset) ||
try_read_procfs_line(line, "CPU variant", cpuid.variant, has_var, reset) ||
try_read_procfs_line(line, "CPU part", cpuid.part, has_part, reset);
}
reset();
}
static std::vector<CpuMicroarch> compute_cpu_microarchs() {
std::vector<CPUID> cpuids;
get_cpuinfo_sysfs(cpuids);
if (cpuids.empty()) {
LOG(warn) << "Unable to read CPU type from sysfs, trying procfs instead.";
get_cpuinfo_procfs(cpuids);
}
if (cpuids.empty()) {
CLEAN_FATAL() << "Failed to read midr register from kernel";
}
for (auto &cpuid : cpuids) {
if (!cpuid) {
CLEAN_FATAL() << "Unable to find CPU id for core " << &cpuid - &cpuids[0];
}
}
auto cpuid0 = cpuids[0];
bool single_uarch = true;
for (auto &cpuid : cpuids) {
if (cpuid != cpuid0) {
single_uarch = false;
break;
}
}
if (single_uarch) {
return { compute_cpu_microarch(cpuid0) };
}
std::vector<CpuMicroarch> uarchs;
for (auto &cpuid : cpuids) {
uarchs.push_back(compute_cpu_microarch(cpuid));
}
return uarchs;
}
static void arch_check_restricted_counter() {
if (!Flags::get().suppress_environment_warnings) {
fprintf(stderr,
"Your CPU supports only one performance counter.\n"
"Use of LL/SC instructions will not be detected and will\n"
"cause silently corrupt recordings. It is highly recommended\n"
"that you alter your configuration to enable additional performance\n"
"counters.\n");
}
}
static bool always_recreate_counters(__attribute__((unused)) const perf_event_attrs &perf_attr) {
return false;
}
static void check_for_arch_bugs(__attribute__((unused)) perf_event_attrs &perf_attr) {}
static void post_init_pmu_uarchs(std::vector<PmuConfig> &pmu_uarchs)
{
std::map<std::string,int> pmu_types;
size_t npmus = pmu_uarchs.size();
int pmu_type_failed = 0;
auto fallback_pmu = [] (PmuConfig &pmu_uarch) {
pmu_uarch.pmu_name = nullptr;
if (pmu_uarch.cycle_type != PERF_TYPE_HARDWARE) {
pmu_uarch.cycle_type = PERF_TYPE_HARDWARE;
pmu_uarch.cycle_event = PERF_COUNT_HW_CPU_CYCLES;
}
if (pmu_uarch.event_type != PERF_TYPE_RAW) {
pmu_uarch.event_type = PERF_TYPE_RAW;
}
};
auto set_pmu_type = [] (PmuConfig &pmu_uarch, int type) {
if (pmu_uarch.cycle_type != PERF_TYPE_HARDWARE) {
pmu_uarch.cycle_type = type;
}
if (pmu_uarch.event_type != PERF_TYPE_RAW) {
pmu_uarch.event_type = type;
}
};
bool has_unknown = false;
for (size_t i = 0; i < npmus; i++) {
auto &pmu_uarch = pmu_uarchs[i];
if (!(pmu_uarch.flags & (PMU_TICKS_RCB | PMU_TICKS_TAKEN_BRANCHES))) {
has_unknown = true;
continue;
}
if (!pmu_uarch.pmu_name) {
CLEAN_FATAL() << "Unknown PMU name for core " << i;
continue;
}
std::string pmu_name(pmu_uarch.pmu_name);
auto &pmu_type = pmu_types[pmu_name];
if (pmu_type == -1) {
fallback_pmu(pmu_uarch);
continue;
}
if (pmu_type) {
set_pmu_type(pmu_uarch, pmu_type);
continue;
}
auto filename = "/sys/bus/event_source/devices/" + pmu_name + "/type";
std::ifstream file(filename);
int val = 0;
bool failed = false;
if (!file) {
failed = true;
LOG(warn) << "Cannot open " << filename;
}
else {
file >> val;
if (!file) {
failed = true;
LOG(warn) << "Cannot read " << filename;
}
}
if (failed) {
// Record the failure and fallback to the kernel raw and hardware events instead
pmu_type_failed++;
fallback_pmu(pmu_uarch);
pmu_type = -1;
}
else {
set_pmu_type(pmu_uarch, val);
pmu_type = val;
}
}
if (pmu_types.size() == 1 && !has_unknown) {
bool single_type = true;
auto &pmu_uarch0 = pmu_uarchs[0];
// Apparently the same PMU type doesn't actually mean the same PMU events...
for (auto &pmu_uarch: pmu_uarchs) {
if (&pmu_uarch == &pmu_uarch0) {
// Skip first
continue;
}
if (pmu_uarch.rcb_cntr_event != pmu_uarch0.rcb_cntr_event ||
pmu_uarch.minus_ticks_cntr_event != pmu_uarch0.minus_ticks_cntr_event ||
pmu_uarch.llsc_cntr_event != pmu_uarch0.llsc_cntr_event) {
single_type = false;
break;
}
}
if (single_type) {
// Single PMU type
pmu_uarchs.resize(1);
}
}
if (pmu_uarchs.size() != 1 && pmu_type_failed) {
// If reading PMU type failed, we only allow a single PMU type to be sure
// that we get what we want from the kernel events.
CLEAN_FATAL() << "Unable to read PMU event types";
}
}
template <>
void PerfCounters::reset_arch_extras<ARM64Arch>() {
// LL/SC can't be recorded reliably. Start a counter to detect
// any usage, such that we can give an intelligent error message.
struct perf_event_attr attr = perf_attrs[pmu_index].llsc_fail;
attr.sample_period = 0;
fd_strex_counter = start_counter(tid, fd_ticks_interrupt, &attr);
}