| #include <stdio.h> |
| #include <sys/time.h> |
| #include <getopt.h> |
| |
| #include <thread> |
| #include <iostream> |
| #include <iomanip> |
| |
| #include <sched.h> |
| |
| #include "Profiler.h" |
| |
| extern "C" void icache_test(long count, long step); |
| |
| static constexpr size_t MAX_CODE_SIZE = 128*1024; |
| static constexpr size_t CACHE_LINE_SIZE = 64; |
| static constexpr size_t MAX_ITERATIONS_COUNT = MAX_CODE_SIZE / CACHE_LINE_SIZE; |
| static constexpr size_t REPETITIONS = 0x800000L; |
| |
| |
| using namespace utils; |
| |
| static cpu_set_t g_cpu_set; |
| |
| static void printUsage(char* name) { |
| std::string exec_name(name); |
| std::string usage( |
| "ICACHE is a command-line tool for testing the L1 instruction cache performance.\n" |
| "(Make sure security.perf_harden is set to 0)\n\n" |
| "Usages:\n" |
| " ICACHE [options]\n" |
| "\n" |
| "Options:\n" |
| " --help, -h\n" |
| " print this message\n\n" |
| " --affinity=N, -a N\n" |
| " Specify which CPU the test should run on.\n\n" |
| ); |
| const std::string from("ICACHE"); |
| for (size_t pos = usage.find(from); pos != std::string::npos; pos = usage.find(from, pos)) { |
| usage.replace(pos, from.length(), exec_name); |
| } |
| printf("%s", usage.c_str()); |
| } |
| |
| static int handleCommandLineArgments(int argc, char* argv[]) { |
| static constexpr const char* OPTSTR = "ha:"; |
| static const struct option OPTIONS[] = { |
| { "help", no_argument, 0, 'h' }, |
| { "affinity", required_argument, 0, 'a' }, |
| { 0, 0, 0, 0 } // termination of the option list |
| }; |
| int opt; |
| int option_index = 0; |
| while ((opt = getopt_long(argc, argv, OPTSTR, OPTIONS, &option_index)) >= 0) { |
| std::string arg(optarg ? optarg : ""); |
| switch (opt) { |
| default: |
| case 'h': |
| printUsage(argv[0]); |
| exit(0); |
| break; |
| case 'a': |
| size_t cpu = std::stoi(arg); |
| if (cpu < std::thread::hardware_concurrency()) { |
| CPU_SET(cpu, &g_cpu_set); |
| } else { |
| std::cerr << "N must be < " << std::thread::hardware_concurrency() << std::endl; |
| exit(0); |
| } |
| break; |
| } |
| } |
| return optind; |
| } |
| |
| int main(int argc, char* argv[]) { |
| CPU_ZERO(&g_cpu_set); |
| |
| [[maybe_unused]] int option_index = handleCommandLineArgments(argc, argv); |
| [[maybe_unused]] int num_args = argc - option_index; |
| |
| if (CPU_COUNT(&g_cpu_set)) { |
| sched_setaffinity(gettid(), sizeof(g_cpu_set), &g_cpu_set); |
| } |
| |
| Profiler& profiler = Profiler::get(); |
| profiler.resetEvents(Profiler::EV_CPU_CYCLES | Profiler::EV_L1I_RATES); |
| |
| if (!profiler.isValid()) { |
| fprintf(stderr, "performance counters not enabled. try \"setprop security.perf_harden 0\"\n"); |
| exit(0); |
| } |
| |
| size_t const stepInBytes = 1024; // 1 KiB steps |
| size_t const step = stepInBytes / CACHE_LINE_SIZE; |
| |
| std::cout << std::fixed << std::setprecision(2); |
| |
| printf("[KiB]\t[cyc]\t[refs]\t[MPKI]\t[ns]\n"); |
| |
| Profiler::Counters counters; |
| |
| for (size_t i=step ; i <= MAX_ITERATIONS_COUNT ; i += step) { |
| profiler.reset(); |
| |
| auto now = std::chrono::steady_clock::now(); |
| profiler.start(); |
| icache_test(REPETITIONS, i); |
| profiler.stop(); |
| auto duration = std::chrono::steady_clock::now() - now; |
| |
| profiler.readCounters(&counters); |
| |
| std::cout << ((i*CACHE_LINE_SIZE)/1024) << "\t" |
| << counters.getCpuCycles()/double(REPETITIONS) << "\t" |
| << counters.getL1IReferences()/double(REPETITIONS) << "\t" |
| << counters.getMPKI(counters.getL1IMisses()) << "\t" |
| << duration.count()/double(REPETITIONS) << "\t" |
| << std::endl; |
| } |
| |
| return 0; |
| } |