| #define _GNU_SOURCE |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <stdint.h> |
| #include <stdarg.h> |
| #include <string.h> |
| #include <getopt.h> |
| #include <pthread.h> |
| #include <errno.h> |
| #include "sched.h" |
| |
| |
| #define MAX_CPUS 32 |
| #define DEFAULT_THRESHOLD 500 /* default maximum TSC skew */ |
| |
| |
| char *program; |
| long threshold = DEFAULT_THRESHOLD; |
| int silent = 0; |
| int verbose = 0; |
| |
| |
| struct option options[] = { |
| { "cpus", required_argument, 0, 'c' }, |
| { "help", no_argument, 0, 'h' }, |
| { "silent", no_argument, 0, 's' }, |
| { "threshold", required_argument, 0, 't' }, |
| { "verbose", no_argument, 0, 'v' }, |
| { 0, 0, 0, 0 } |
| }; |
| |
| |
| void usage(void) |
| { |
| printf("usage: %s [-hsv] [-c <cpu_set>] [-t threshold]\n", program); |
| } |
| |
| |
| void help(void) |
| { |
| usage(); |
| printf("check TSC synchronization between CPUs\n"); |
| printf(" -c,--cpus set of cpus to test (default: all)\n"); |
| printf(" -h,--help show this message\n"); |
| printf(" -s,--silent no output if test is successful\n"); |
| printf(" -t,--threshold TSC skew threshold (default: %d cycles)\n", |
| DEFAULT_THRESHOLD); |
| printf(" -v,--verbose verbose output\n"); |
| } |
| |
| |
| void error(int err, const char *fmt, ...) |
| { |
| va_list ap; |
| |
| fprintf(stderr, "%s: ", program); |
| va_start(ap, fmt); |
| vfprintf(stderr, fmt, ap); |
| va_end(ap); |
| |
| if (err) |
| fprintf(stderr, ": %s\n", strerror(err)); |
| putc('\n', stderr); |
| } |
| |
| |
| /* |
| * parse a string containing a comma separated list of ranges |
| * of cpu numbers such as: "0,2,4-7" into a cpu_set_t |
| */ |
| int parse_cpu_set(const char *s, cpu_set_t *cpus) |
| { |
| CPU_ZERO(cpus); |
| |
| while (*s) { |
| char *next; |
| int cpu; |
| int start, end; |
| |
| start = end = (int)strtol(s, &next, 0); |
| if (s == next) |
| break; |
| s = next; |
| |
| if (*s == '-') { |
| ++s; |
| end = (int)strtol(s, &next, 0); |
| if (s == next) |
| break; |
| s = next; |
| } |
| |
| if (*s == ',') |
| ++s; |
| |
| if (start < 0 || start >= CPU_SETSIZE) { |
| error(0, "bad cpu number '%d' in cpu set", start); |
| return 1; |
| } |
| |
| if (end < 0 || end >= CPU_SETSIZE) { |
| error(0, "bad cpu number '%d' in cpu set", end); |
| return 1; |
| } |
| |
| if (end < start) { |
| error(0, "bad cpu range '%d-%d' in cpu set", |
| start, end); |
| return 1; |
| } |
| |
| for (cpu = start; cpu <= end; ++cpu) |
| CPU_SET(cpu, cpus); |
| |
| } |
| |
| if (*s) { |
| error(0, "unexpected character '%c' in cpu set", *s); |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| |
| #define CACHE_LINE_SIZE 256 |
| typedef union state { |
| int state; |
| char pad[CACHE_LINE_SIZE]; |
| } state_t; |
| |
| #define barrier() __asm__ __volatile__("" : : : "memory") |
| |
| static void inline set_state(state_t *s, int v) |
| { |
| s->state = v; |
| } |
| |
| static void inline wait_for_state(state_t *s, int v) |
| { |
| while (s->state != v) |
| barrier(); |
| } |
| |
| #if defined(__x86_64__) |
| static inline uint64_t rdtsc(void) |
| { |
| uint32_t tsc_lo, tsc_hi; |
| |
| __asm__ __volatile__("rdtsc" : "=a" (tsc_lo), "=d" (tsc_hi)); |
| |
| return ((uint64_t)tsc_hi << 32) | tsc_lo; |
| } |
| #else |
| static inline uint64_t rdtsc(void) |
| { |
| uint64_t tsc; |
| |
| __asm__ __volatile__("rdtsc" : "=A" (tsc)); |
| |
| return tsc; |
| } |
| #endif |
| |
| #define READY 1 |
| #define DONE 2 |
| #define ERROR 3 |
| |
| state_t master; |
| state_t slave; |
| |
| int64_t slave_tsc; |
| int slave_cpu; |
| |
| |
| int set_cpu_affinity(int cpu) |
| { |
| cpu_set_t cpus; |
| |
| CPU_ZERO(&cpus); |
| CPU_SET(cpu, &cpus); |
| if (sched_setaffinity(0, sizeof cpus, &cpus) < 0) { |
| error(errno, "sched_setaffinity() failed for CPU %d", cpu); |
| return -1; |
| } |
| return 0; |
| } |
| |
| #define NUM_ITERS 10 |
| |
| int64_t |
| tsc_delta(int cpu_a, int cpu_b) |
| { |
| uint64_t best_t0 = 0; |
| uint64_t best_t1 = ~0ULL; |
| uint64_t best_tm = 0; |
| int64_t delta; |
| uint64_t t0, t1, tm; |
| int i; |
| |
| if (verbose) |
| printf("CPU %d - CPU %d\n", cpu_a, cpu_b); |
| |
| if (set_cpu_affinity(cpu_a) < 0) |
| return -1; |
| |
| slave_cpu = cpu_b; |
| |
| for (i = 0; i < NUM_ITERS; i++) { |
| |
| set_state(&master, READY); |
| |
| wait_for_state(&slave, READY); |
| |
| t0 = rdtsc(); |
| set_state(&master, DONE); |
| wait_for_state(&slave, DONE); |
| t1 = rdtsc(); |
| |
| if ((t1 - t0) < (best_t1 - best_t0)) { |
| best_t0 = t0; |
| best_t1 = t1; |
| best_tm = slave_tsc; |
| } |
| if (verbose) |
| printf("loop %2d: roundtrip = %5Ld\n", i, t1 - t0); |
| } |
| |
| delta = (best_t0/2 + best_t1/2 + (best_t0 & best_t1 & 1)) - best_tm; |
| |
| if (!silent) |
| printf("CPU %d - CPU %d = % 5Ld\n", cpu_a, cpu_b, delta); |
| |
| return delta; |
| } |
| |
| |
| void * |
| slave_thread(void *arg) |
| { |
| int current_cpu = -1; |
| |
| for(;;) { |
| |
| wait_for_state(&master, READY); |
| |
| if (slave_cpu < 0) { |
| return NULL; |
| } |
| |
| if (slave_cpu != current_cpu) { |
| |
| if (set_cpu_affinity(slave_cpu) < 0) { |
| set_state(&slave, ERROR); |
| return NULL; |
| } |
| |
| current_cpu = slave_cpu; |
| } |
| |
| set_state(&slave, READY); |
| |
| wait_for_state(&master, DONE); |
| |
| slave_tsc = rdtsc(); |
| |
| set_state(&slave, DONE); |
| } |
| return NULL; |
| } |
| |
| |
| int |
| check_tsc(cpu_set_t *cpus) |
| { |
| int cpu_a, cpu_b; |
| int64_t delta; |
| int err = 0; |
| pthread_t thread; |
| |
| if ((err = pthread_create(&thread, NULL, slave_thread, NULL))) { |
| error(err, "pthread_create_failed"); |
| return -1; |
| } |
| |
| |
| for (cpu_a = 0; cpu_a < MAX_CPUS; cpu_a++) { |
| if (!CPU_ISSET(cpu_a, cpus)) |
| continue; |
| |
| for (cpu_b = 0; cpu_b < MAX_CPUS; cpu_b++) { |
| if (!CPU_ISSET(cpu_b, cpus) || cpu_a == cpu_b) |
| continue; |
| |
| delta = tsc_delta(cpu_a, cpu_b); |
| |
| if (llabs(delta) > threshold) { |
| ++err; |
| } |
| } |
| } |
| |
| /* |
| * tell the slave thread to exit |
| */ |
| slave_cpu = -1; |
| set_state(&master, READY); |
| |
| pthread_join(thread, NULL); |
| |
| return err; |
| } |
| |
| |
| int |
| main(int argc, char *argv[]) |
| { |
| int c; |
| cpu_set_t cpus; |
| int errs = 0; |
| extern int optind; |
| extern char *optarg; |
| |
| if ((program = strrchr(argv[0], '/')) != NULL) |
| ++program; |
| else |
| program = argv[0]; |
| |
| /* |
| * default to checking all cpus |
| */ |
| for (c = 0; c < MAX_CPUS; c++) { |
| CPU_SET(c, &cpus); |
| } |
| |
| while ((c = getopt_long(argc, argv, "c:hst:v", options, NULL)) != EOF) { |
| switch (c) { |
| case 'c': |
| if (parse_cpu_set(optarg, &cpus) != 0) |
| ++errs; |
| break; |
| case 'h': |
| help(); |
| exit(0); |
| case 's': |
| ++silent; |
| break; |
| case 't': |
| threshold = strtol(optarg, NULL, 0); |
| break; |
| case 'v': |
| ++verbose; |
| break; |
| default: |
| ++errs; |
| break; |
| } |
| } |
| |
| if (errs || optind < argc) { |
| usage(); |
| exit(1); |
| } |
| |
| /* |
| * limit the set of CPUs to the ones that are currently available |
| * (Note that on some kernel versions sched_setaffinity() will fail |
| * if you specify CPUs that are not currently online so we ignore |
| * the return value and hope for the best) |
| */ |
| sched_setaffinity(0, sizeof cpus, &cpus); |
| if (sched_getaffinity(0, sizeof cpus, &cpus) < 0) { |
| error(errno, "sched_getaffinity() failed"); |
| exit(1); |
| } |
| |
| errs = check_tsc(&cpus); |
| |
| if (!silent) { |
| printf("%s\n", errs ? "FAIL" : "PASS"); |
| } |
| |
| return errs ? EXIT_FAILURE : EXIT_SUCCESS; |
| } |