| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * Copyright (C) 2014 Red Hat Inc, Steven Rostedt <[email protected]> |
| * |
| */ |
| |
| /** FIXME: Convert numbers based on machine and file */ |
| #define _LARGEFILE64_SOURCE |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #ifndef NO_AUDIT |
| #include <libaudit.h> |
| #endif |
| #include "trace-local.h" |
| #include "trace-hash.h" |
| #include "trace-hash-local.h" |
| #include "list.h" |
| |
| #include <linux/time64.h> |
| |
| #ifdef WARN_NO_AUDIT |
| # warning "lib audit not found, using raw syscalls " \ |
| "(install audit-libs-devel(for fedora) or libaudit-dev(for debian/ubuntu) and try again)" |
| #endif |
| |
| #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP" |
| #define TASK_STATE_MAX 1024 |
| |
| #define task_from_item(item) container_of(item, struct task_data, hash) |
| #define start_from_item(item) container_of(item, struct start_data, hash) |
| #define event_from_item(item) container_of(item, struct event_hash, hash) |
| #define stack_from_item(item) container_of(item, struct stack_data, hash) |
| #define group_from_item(item) container_of(item, struct group_data, hash) |
| #define event_data_from_item(item) container_of(item, struct event_data, hash) |
| |
| static unsigned long long nsecs_per_sec(unsigned long long ts) |
| { |
| return ts / NSEC_PER_SEC; |
| } |
| |
| static unsigned long long mod_to_usec(unsigned long long ts) |
| { |
| return ((ts % NSEC_PER_SEC) + NSEC_PER_USEC / 2) / NSEC_PER_USEC; |
| } |
| |
| struct handle_data; |
| struct event_hash; |
| struct event_data; |
| |
| typedef void (*event_data_print)(struct trace_seq *s, struct event_hash *hash); |
| typedef int (*handle_event_func)(struct handle_data *h, unsigned long long pid, |
| struct event_data *data, |
| struct tep_record *record, int cpu); |
| |
| enum event_data_type { |
| EVENT_TYPE_UNDEFINED, |
| EVENT_TYPE_STACK, |
| EVENT_TYPE_SCHED_SWITCH, |
| EVENT_TYPE_WAKEUP, |
| EVENT_TYPE_FUNC, |
| EVENT_TYPE_SYSCALL, |
| EVENT_TYPE_IRQ, |
| EVENT_TYPE_SOFTIRQ, |
| EVENT_TYPE_SOFTIRQ_RAISE, |
| EVENT_TYPE_PROCESS_EXEC, |
| EVENT_TYPE_USER_MATE, |
| }; |
| |
| struct event_data { |
| struct trace_hash_item hash; |
| int id; |
| int trace; |
| struct tep_event *event; |
| |
| struct event_data *end; |
| struct event_data *start; |
| |
| struct tep_format_field *pid_field; |
| struct tep_format_field *start_match_field; /* match with start */ |
| struct tep_format_field *end_match_field; /* match with end */ |
| struct tep_format_field *data_field; /* optional */ |
| |
| event_data_print print_func; |
| handle_event_func handle_event; |
| void *private; |
| int migrate; /* start/end pairs can migrate cpus */ |
| int global; /* use global tasks */ |
| enum event_data_type type; |
| }; |
| |
| struct stack_data { |
| struct trace_hash_item hash; |
| unsigned long long count; |
| unsigned long long time; |
| unsigned long long time_min; |
| unsigned long long ts_min; |
| unsigned long long time_max; |
| unsigned long long ts_max; |
| unsigned long long time_avg; |
| unsigned long size; |
| char caller[]; |
| }; |
| |
| struct stack_holder { |
| unsigned long size; |
| void *caller; |
| struct tep_record *record; |
| }; |
| |
| struct start_data { |
| struct trace_hash_item hash; |
| struct event_data *event_data; |
| struct list_head list; |
| struct task_data *task; |
| unsigned long long timestamp; |
| unsigned long long search_val; |
| unsigned long long val; |
| int cpu; |
| |
| struct stack_holder stack; |
| }; |
| |
| struct event_hash { |
| struct trace_hash_item hash; |
| struct event_data *event_data; |
| unsigned long long search_val; |
| unsigned long long val; |
| unsigned long long count; |
| unsigned long long time_total; |
| unsigned long long time_avg; |
| unsigned long long time_max; |
| unsigned long long ts_max; |
| unsigned long long time_min; |
| unsigned long long ts_min; |
| unsigned long long time_std; |
| unsigned long long last_time; |
| |
| struct trace_hash stacks; |
| }; |
| |
| struct group_data { |
| struct trace_hash_item hash; |
| char *comm; |
| struct trace_hash event_hash; |
| }; |
| |
| struct task_data { |
| struct trace_hash_item hash; |
| int pid; |
| int sleeping; |
| |
| char *comm; |
| |
| struct trace_hash start_hash; |
| struct trace_hash event_hash; |
| |
| struct task_data *proxy; |
| struct start_data *last_start; |
| struct event_hash *last_event; |
| struct tep_record *last_stack; |
| struct handle_data *handle; |
| struct group_data *group; |
| }; |
| |
| struct cpu_info { |
| int current; |
| }; |
| |
| struct sched_switch_data { |
| struct tep_format_field *prev_state; |
| int match_state; |
| }; |
| |
| struct handle_data { |
| struct handle_data *next; |
| struct tracecmd_input *handle; |
| struct tep_handle *pevent; |
| |
| struct trace_hash events; |
| struct trace_hash group_hash; |
| |
| struct cpu_info **cpu_data; |
| |
| struct tep_format_field *common_pid; |
| struct tep_format_field *wakeup_comm; |
| struct tep_format_field *switch_prev_comm; |
| struct tep_format_field *switch_next_comm; |
| |
| struct sched_switch_data sched_switch_blocked; |
| struct sched_switch_data sched_switch_preempt; |
| |
| struct trace_hash task_hash; |
| struct list_head *cpu_starts; |
| struct list_head migrate_starts; |
| |
| struct task_data *global_task; |
| struct task_data *global_percpu_tasks; |
| |
| int cpus; |
| }; |
| |
| static struct handle_data *handles; |
| static struct event_data *stacktrace_event; |
| static bool merge_like_comms = false; |
| |
| void trace_profile_set_merge_like_comms(void) |
| { |
| merge_like_comms = true; |
| } |
| |
| static struct start_data * |
| add_start(struct task_data *task, |
| struct event_data *event_data, struct tep_record *record, |
| unsigned long long search_val, unsigned long long val) |
| { |
| struct start_data *start; |
| |
| start = malloc(sizeof(*start)); |
| if (!start) |
| return NULL; |
| memset(start, 0, sizeof(*start)); |
| start->hash.key = trace_hash(search_val); |
| start->search_val = search_val; |
| start->val = val; |
| start->timestamp = record->ts; |
| start->event_data = event_data; |
| start->cpu = record->cpu; |
| start->task = task; |
| trace_hash_add(&task->start_hash, &start->hash); |
| if (event_data->migrate) |
| list_add(&start->list, &task->handle->migrate_starts); |
| else |
| list_add(&start->list, &task->handle->cpu_starts[record->cpu]); |
| return start; |
| } |
| |
| struct event_data_match { |
| struct event_data *event_data; |
| unsigned long long search_val; |
| unsigned long long val; |
| }; |
| |
| static int match_start(struct trace_hash_item *item, void *data) |
| { |
| struct start_data *start = start_from_item(item); |
| struct event_data_match *edata = data; |
| |
| return start->event_data == edata->event_data && |
| start->search_val == edata->search_val; |
| } |
| |
| static int match_event(struct trace_hash_item *item, void *data) |
| { |
| struct event_data_match *edata = data; |
| struct event_hash *event = event_from_item(item); |
| |
| return event->event_data == edata->event_data && |
| event->search_val == edata->search_val && |
| event->val == edata->val; |
| } |
| |
| static struct event_hash * |
| find_event_hash(struct task_data *task, struct event_data_match *edata) |
| { |
| struct event_hash *event_hash; |
| struct trace_hash_item *item; |
| unsigned long long key; |
| |
| key = (unsigned long)edata->event_data + |
| (unsigned long)edata->search_val + |
| (unsigned long)edata->val; |
| key = trace_hash(key); |
| item = trace_hash_find(&task->event_hash, key, match_event, edata); |
| if (item) |
| return event_from_item(item); |
| |
| event_hash = malloc(sizeof(*event_hash)); |
| if (!event_hash) |
| return NULL; |
| memset(event_hash, 0, sizeof(*event_hash)); |
| |
| event_hash->event_data = edata->event_data; |
| event_hash->search_val = edata->search_val; |
| event_hash->val = edata->val; |
| event_hash->hash.key = key; |
| trace_hash_init(&event_hash->stacks, 32); |
| |
| trace_hash_add(&task->event_hash, &event_hash->hash); |
| |
| return event_hash; |
| } |
| |
| static struct event_hash * |
| find_start_event_hash(struct task_data *task, struct event_data *event_data, |
| struct start_data *start) |
| { |
| struct event_data_match edata; |
| |
| edata.event_data = event_data; |
| edata.search_val = start->search_val; |
| edata.val = start->val; |
| |
| return find_event_hash(task, &edata); |
| } |
| |
| static struct start_data * |
| find_start(struct task_data *task, struct event_data *event_data, |
| unsigned long long search_val) |
| { |
| unsigned long long key = trace_hash(search_val); |
| struct event_data_match edata; |
| void *data = &edata; |
| struct trace_hash_item *item; |
| struct start_data *start; |
| |
| edata.event_data = event_data; |
| edata.search_val = search_val; |
| |
| item = trace_hash_find(&task->start_hash, key, match_start, data); |
| if (!item) |
| return NULL; |
| |
| start = start_from_item(item); |
| return start; |
| } |
| |
| struct stack_match { |
| void *caller; |
| unsigned long size; |
| }; |
| |
| static int match_stack(struct trace_hash_item *item, void *data) |
| { |
| struct stack_data *stack = stack_from_item(item); |
| struct stack_match *match = data; |
| |
| if (match->size != stack->size) |
| return 0; |
| |
| return memcmp(stack->caller, match->caller, stack->size) == 0; |
| } |
| |
| |
| static void add_event_stack(struct event_hash *event_hash, |
| void *caller, unsigned long size, |
| unsigned long long time, unsigned long long ts) |
| { |
| unsigned long long key; |
| struct stack_data *stack; |
| struct stack_match match; |
| struct trace_hash_item *item; |
| int i; |
| |
| match.caller = caller; |
| match.size = size; |
| |
| if (size < sizeof(int)) |
| die("Stack size of less than sizeof(int)??"); |
| |
| for (key = 0, i = 0; i <= size - sizeof(int); i += sizeof(int)) |
| key += trace_hash(*(int *)(caller + i)); |
| |
| item = trace_hash_find(&event_hash->stacks, key, match_stack, &match); |
| if (!item) { |
| stack = malloc(sizeof(*stack) + size); |
| if (!stack) { |
| warning("Could not allocate stack"); |
| return; |
| } |
| memset(stack, 0, sizeof(*stack)); |
| memcpy(&stack->caller, caller, size); |
| stack->size = size; |
| stack->hash.key = key; |
| trace_hash_add(&event_hash->stacks, &stack->hash); |
| } else |
| stack = stack_from_item(item); |
| |
| stack->count++; |
| stack->time += time; |
| if (stack->count == 1 || time < stack->time_min) { |
| stack->time_min = time; |
| stack->ts_min = ts; |
| } |
| if (time > stack->time_max) { |
| stack->time_max = time; |
| stack->ts_max = ts; |
| } |
| } |
| |
| static void free_start(struct start_data *start) |
| { |
| if (start->task->last_start == start) |
| start->task->last_start = NULL; |
| if (start->stack.record) |
| tracecmd_free_record(start->stack.record); |
| trace_hash_del(&start->hash); |
| list_del(&start->list); |
| free(start); |
| } |
| |
| static struct event_hash * |
| add_and_free_start(struct task_data *task, struct start_data *start, |
| struct event_data *event_data, unsigned long long ts) |
| { |
| struct event_hash *event_hash; |
| long long delta; |
| |
| delta = ts - start->timestamp; |
| |
| /* |
| * It's possible on a live trace, because of timestamps being |
| * different on different CPUs, we can go back in time. When |
| * that happens, just zero out the delta. |
| */ |
| if (delta < 0) |
| delta = 0; |
| |
| event_hash = find_start_event_hash(task, event_data, start); |
| if (!event_hash) |
| return NULL; |
| event_hash->count++; |
| event_hash->time_total += delta; |
| event_hash->last_time = delta; |
| |
| if (delta > event_hash->time_max) { |
| event_hash->time_max = delta; |
| event_hash->ts_max = ts; |
| } |
| |
| if (event_hash->count == 1 || delta < event_hash->time_min) { |
| event_hash->time_min = delta; |
| event_hash->ts_min = ts; |
| } |
| |
| if (start->stack.record) { |
| unsigned long size; |
| void *caller; |
| |
| size = start->stack.size; |
| caller = start->stack.caller; |
| |
| add_event_stack(event_hash, caller, size, delta, |
| start->stack.record->ts); |
| tracecmd_free_record(start->stack.record); |
| start->stack.record = NULL; |
| } |
| |
| free_start(start); |
| |
| return event_hash; |
| } |
| |
| static struct event_hash * |
| find_and_update_start(struct task_data *task, struct event_data *event_data, |
| unsigned long long ts, unsigned long long search_val) |
| { |
| struct start_data *start; |
| |
| start = find_start(task, event_data, search_val); |
| if (!start) |
| return NULL; |
| return add_and_free_start(task, start, event_data, ts); |
| } |
| |
| static int match_task(struct trace_hash_item *item, void *data) |
| { |
| struct task_data *task = task_from_item(item); |
| int pid = *(unsigned long *)data; |
| |
| return task->pid == pid; |
| } |
| |
| static void init_task(struct handle_data *h, struct task_data *task) |
| { |
| task->handle = h; |
| |
| trace_hash_init(&task->start_hash, 16); |
| trace_hash_init(&task->event_hash, 32); |
| } |
| |
| static struct task_data * |
| add_task(struct handle_data *h, int pid) |
| { |
| unsigned long long key = trace_hash(pid); |
| struct task_data *task; |
| |
| task = malloc(sizeof(*task)); |
| if (!task) { |
| warning("Could not allocate task"); |
| return NULL; |
| } |
| memset(task, 0, sizeof(*task)); |
| |
| task->pid = pid; |
| task->hash.key = key; |
| trace_hash_add(&h->task_hash, &task->hash); |
| |
| init_task(h, task); |
| |
| return task; |
| } |
| |
| static struct task_data * |
| find_task(struct handle_data *h, int pid) |
| { |
| unsigned long long key = trace_hash(pid); |
| struct trace_hash_item *item; |
| static struct task_data *last_task; |
| void *data = (unsigned long *)&pid; |
| |
| if (last_task && last_task->pid == pid) |
| return last_task; |
| |
| item = trace_hash_find(&h->task_hash, key, match_task, data); |
| |
| if (item) |
| last_task = task_from_item(item); |
| else |
| last_task = add_task(h, pid); |
| |
| return last_task; |
| } |
| |
| static int match_group(struct trace_hash_item *item, void *data) |
| { |
| struct group_data *group = group_from_item(item); |
| |
| return strcmp(group->comm, (char *)data) == 0; |
| } |
| |
| |
| static void |
| add_task_comm(struct task_data *task, struct tep_format_field *field, |
| struct tep_record *record) |
| { |
| const char *comm; |
| |
| task->comm = malloc(field->size + 1); |
| if (!task->comm) { |
| warning("Could not allocate task comm"); |
| return; |
| } |
| comm = record->data + field->offset; |
| memcpy(task->comm, comm, field->size); |
| task->comm[field->size] = 0; |
| } |
| |
| /* Account for tasks that don't have starts */ |
| static void account_task(struct task_data *task, struct event_data *event_data, |
| struct tep_record *record) |
| { |
| struct event_data_match edata; |
| struct event_hash *event_hash; |
| struct task_data *proxy = NULL; |
| unsigned long long search_val = 0; |
| unsigned long long val = 0; |
| unsigned long long pid; |
| |
| /* |
| * If an event has the pid_field set, then find that task for |
| * this event instead. Let this task proxy for it to handle |
| * stack traces on this event. |
| */ |
| if (event_data->pid_field) { |
| tep_read_number_field(event_data->pid_field, |
| record->data, &pid); |
| proxy = task; |
| task = find_task(task->handle, pid); |
| if (!task) |
| return; |
| proxy->proxy = task; |
| } |
| |
| /* |
| * If data_field is defined, use that for val, |
| * if the start_field is defined, use that for search_val. |
| */ |
| if (event_data->data_field) { |
| tep_read_number_field(event_data->data_field, |
| record->data, &val); |
| } |
| if (event_data->start_match_field) { |
| tep_read_number_field(event_data->start_match_field, |
| record->data, &search_val); |
| } |
| |
| edata.event_data = event_data; |
| edata.search_val = val; |
| edata.val = val; |
| |
| event_hash = find_event_hash(task, &edata); |
| if (!event_hash) { |
| warning("failed to allocate event_hash"); |
| return; |
| } |
| |
| event_hash->count++; |
| task->last_event = event_hash; |
| } |
| |
| static struct task_data * |
| find_event_task(struct handle_data *h, struct event_data *event_data, |
| struct tep_record *record, unsigned long long pid) |
| { |
| if (event_data->global) { |
| if (event_data->migrate) |
| return h->global_task; |
| else |
| return &h->global_percpu_tasks[record->cpu]; |
| } |
| |
| /* If pid_field is defined, use that to find the task */ |
| if (event_data->pid_field) |
| tep_read_number_field(event_data->pid_field, |
| record->data, &pid); |
| return find_task(h, pid); |
| } |
| |
| static struct task_data * |
| handle_end_event(struct handle_data *h, struct event_data *event_data, |
| struct tep_record *record, int pid) |
| { |
| struct event_hash *event_hash; |
| struct task_data *task; |
| unsigned long long val; |
| |
| task = find_event_task(h, event_data, record, pid); |
| if (!task) |
| return NULL; |
| |
| tep_read_number_field(event_data->start_match_field, record->data, |
| &val); |
| event_hash = find_and_update_start(task, event_data->start, record->ts, val); |
| task->last_start = NULL; |
| task->last_event = event_hash; |
| |
| return task; |
| } |
| |
| static struct task_data * |
| handle_start_event(struct handle_data *h, struct event_data *event_data, |
| struct tep_record *record, unsigned long long pid) |
| { |
| struct start_data *start; |
| struct task_data *task; |
| unsigned long long val; |
| |
| task = find_event_task(h, event_data, record, pid); |
| if (!task) |
| return NULL; |
| |
| tep_read_number_field(event_data->end_match_field, record->data, |
| &val); |
| start = add_start(task, event_data, record, val, val); |
| if (!start) { |
| warning("Failed to allocate start of task"); |
| return NULL; |
| } |
| |
| task->last_start = start; |
| task->last_event = NULL; |
| |
| return task; |
| } |
| |
| static int handle_event_data(struct handle_data *h, |
| unsigned long long pid, |
| struct event_data *event_data, |
| struct tep_record *record, int cpu) |
| { |
| struct task_data *task = NULL; |
| |
| /* If this is the end of a event pair (start is set) */ |
| if (event_data->start) |
| task = handle_end_event(h, event_data, record, pid); |
| |
| /* If this is the start of a event pair (end is set) */ |
| if (event_data->end) { |
| task = handle_start_event(h, event_data, record, pid); |
| /* handle_start_event only returns NULL on error */ |
| if (!task) |
| return -1; |
| } |
| |
| if (!task) { |
| task = find_task(h, pid); |
| if (!task) |
| return -1; |
| task->proxy = NULL; |
| task->last_start = NULL; |
| task->last_event = NULL; |
| account_task(task, event_data, record); |
| } |
| |
| return 0; |
| } |
| |
| static void handle_missed_events(struct handle_data *h, int cpu) |
| { |
| struct start_data *start; |
| struct start_data *n; |
| |
| /* Clear all starts on this CPU */ |
| list_for_each_entry_safe(start, n, &h->cpu_starts[cpu], list) { |
| free_start(start); |
| } |
| |
| /* Now clear all starts whose events can migrate */ |
| list_for_each_entry_safe(start, n, &h->migrate_starts, list) { |
| free_start(start); |
| } |
| } |
| |
| static int match_event_data(struct trace_hash_item *item, void *data) |
| { |
| struct event_data *event_data = event_data_from_item(item); |
| int id = (int)(unsigned long)data; |
| |
| return event_data->id == id; |
| } |
| |
| static struct event_data * |
| find_event_data(struct handle_data *h, int id) |
| { |
| struct trace_hash_item *item; |
| unsigned long long key = trace_hash(id); |
| void *data = (void *)(unsigned long)id; |
| |
| item = trace_hash_find(&h->events, key, match_event_data, data); |
| if (item) |
| return event_data_from_item(item); |
| return NULL; |
| } |
| |
| static void trace_profile_record(struct tracecmd_input *handle, |
| struct tep_record *record) |
| { |
| static struct handle_data *last_handle; |
| struct tep_record *stack_record; |
| struct event_data *event_data; |
| struct task_data *task; |
| struct handle_data *h; |
| struct tep_handle *pevent; |
| unsigned long long pid; |
| int cpu = record->cpu; |
| int id; |
| |
| if (last_handle && last_handle->handle == handle) |
| h = last_handle; |
| else { |
| for (h = handles; h; h = h->next) { |
| if (h->handle == handle) |
| break; |
| } |
| if (!h) |
| die("Handle not found?"); |
| last_handle = h; |
| } |
| |
| if (record->missed_events) |
| handle_missed_events(h, cpu); |
| |
| pevent = h->pevent; |
| |
| id = tep_data_type(pevent, record); |
| |
| event_data = find_event_data(h, id); |
| |
| if (!event_data) |
| return; |
| |
| |
| /* Get this current PID */ |
| tep_read_number_field(h->common_pid, record->data, &pid); |
| |
| task = find_task(h, pid); |
| if (!task) |
| return; |
| stack_record = task->last_stack; |
| |
| if (event_data->handle_event) |
| event_data->handle_event(h, pid, event_data, record, cpu); |
| else |
| handle_event_data(h, pid, event_data, record, cpu); |
| |
| /* If the last stack hasn't changed, free it */ |
| if (stack_record && task->last_stack == stack_record) { |
| tracecmd_free_record(stack_record); |
| task->last_stack = NULL; |
| } |
| } |
| |
| static struct event_data * |
| add_event(struct handle_data *h, const char *system, const char *event_name, |
| enum event_data_type type) |
| { |
| struct event_data *event_data; |
| struct tep_event *event; |
| |
| event = tep_find_event_by_name(h->pevent, system, event_name); |
| if (!event) |
| return NULL; |
| |
| if (!h->common_pid) { |
| h->common_pid = tep_find_common_field(event, "common_pid"); |
| if (!h->common_pid) |
| die("No 'common_pid' found in event"); |
| } |
| |
| event_data = malloc(sizeof(*event_data)); |
| if (!event_data) { |
| warning("Could not allocate event_data"); |
| return NULL; |
| } |
| memset(event_data, 0, sizeof(*event_data)); |
| event_data->id = event->id; |
| event_data->event = event; |
| event_data->type = type; |
| event_data->hash.key = trace_hash(event_data->event->id); |
| |
| trace_hash_add(&h->events, &event_data->hash); |
| |
| return event_data; |
| } |
| |
| static void |
| mate_events(struct handle_data *h, struct event_data *start, |
| const char *pid_field, const char *end_match_field, |
| struct event_data *end, const char *start_match_field, |
| int migrate, int global) |
| { |
| start->end = end; |
| end->start = start; |
| |
| if (pid_field) { |
| start->pid_field = tep_find_field(start->event, pid_field); |
| if (!start->pid_field) |
| die("Event: %s does not have field %s", |
| start->event->name, pid_field); |
| } |
| |
| /* Field to match with end */ |
| start->end_match_field = tep_find_field(start->event, end_match_field); |
| if (!start->end_match_field) |
| die("Event: %s does not have field %s", |
| start->event->name, end_match_field); |
| |
| /* Field to match with start */ |
| end->start_match_field = tep_find_field(end->event, start_match_field); |
| if (!end->start_match_field) |
| die("Event: %s does not have field %s", |
| end->event->name, start_match_field); |
| |
| start->migrate = migrate; |
| start->global = global; |
| end->migrate = migrate; |
| end->global = global; |
| } |
| |
| /** |
| * tracecmd_mate_events - match events to profile against |
| * @handle: The input handle where the events exist. |
| * @start_event: The event that starts the transaction |
| * @pid_field: Use this over common_pid (may be NULL to use common_pid) |
| * @end_match_field: The field that matches the end events @start_match_field |
| * @end_event: The event that ends the transaction |
| * @start_match_field: The end event field that matches start's @end_match_field |
| * @migrate: Can the transaction switch CPUs? 1 for yes, 0 for no |
| * @global: The events are global and not per task |
| */ |
| void tracecmd_mate_events(struct tracecmd_input *handle, |
| struct tep_event *start_event, |
| const char *pid_field, const char *end_match_field, |
| struct tep_event *end_event, |
| const char *start_match_field, |
| int migrate, int global) |
| { |
| struct handle_data *h; |
| struct event_data *start; |
| struct event_data *end; |
| |
| for (h = handles; h; h = h->next) { |
| if (h->handle == handle) |
| break; |
| } |
| if (!h) |
| die("Handle not found for trace profile"); |
| |
| start = add_event(h, start_event->system, start_event->name, |
| EVENT_TYPE_USER_MATE); |
| |
| end = add_event(h, end_event->system, end_event->name, |
| EVENT_TYPE_USER_MATE); |
| |
| if (!start || !end) |
| return; |
| |
| mate_events(h, start, pid_field, end_match_field, end, start_match_field, |
| migrate, global); |
| } |
| |
| static void func_print(struct trace_seq *s, struct event_hash *event_hash) |
| { |
| const char *func; |
| |
| func = tep_find_function(event_hash->event_data->event->tep, |
| event_hash->val); |
| if (func) |
| trace_seq_printf(s, "func: %s()", func); |
| else |
| trace_seq_printf(s, "func: 0x%llx", event_hash->val); |
| } |
| |
| static void syscall_print(struct trace_seq *s, struct event_hash *event_hash) |
| { |
| #ifndef NO_AUDIT |
| const char *name = NULL; |
| int machine; |
| |
| machine = audit_detect_machine(); |
| if (machine < 0) |
| goto fail; |
| name = audit_syscall_to_name(event_hash->val, machine); |
| if (!name) |
| goto fail; |
| trace_seq_printf(s, "syscall:%s", name); |
| return; |
| fail: |
| #endif |
| trace_seq_printf(s, "%s:%d", event_hash->event_data->event->name, |
| (int)event_hash->val); |
| } |
| |
| /* From Linux include/linux/interrupt.h */ |
| #define SOFTIRQS \ |
| C(HI), \ |
| C(TIMER), \ |
| C(NET_TX), \ |
| C(NET_RX), \ |
| C(BLOCK), \ |
| C(BLOCK_IOPOLL), \ |
| C(TASKLET), \ |
| C(SCHED), \ |
| C(HRTIMER), \ |
| C(RCU), \ |
| C(NR), |
| |
| #undef C |
| #define C(a) a##_SOFTIRQ |
| |
| enum { SOFTIRQS }; |
| |
| #undef C |
| #define C(a) #a |
| |
| static const char *softirq_map[] = { SOFTIRQS }; |
| |
| static void softirq_print(struct trace_seq *s, struct event_hash *event_hash) |
| { |
| int softirq = (int)event_hash->val; |
| |
| if (softirq < NR_SOFTIRQ) |
| trace_seq_printf(s, "%s:%s", event_hash->event_data->event->name, |
| softirq_map[softirq]); |
| else |
| trace_seq_printf(s, "%s:%d", event_hash->event_data->event->name, |
| softirq); |
| } |
| |
| static void sched_switch_print(struct trace_seq *s, struct event_hash *event_hash) |
| { |
| const char states[] = TASK_STATE_TO_CHAR_STR; |
| int i; |
| |
| trace_seq_printf(s, "%s:", event_hash->event_data->event->name); |
| |
| if (event_hash->val) { |
| int val = event_hash->val; |
| |
| for (i = 0; val && i < sizeof(states) - 1; i++, val >>= 1) { |
| if (val & 1) |
| trace_seq_putc(s, states[i+1]); |
| } |
| } else |
| trace_seq_putc(s, 'R'); |
| } |
| |
| static int handle_sched_switch_event(struct handle_data *h, |
| unsigned long long pid, |
| struct event_data *event_data, |
| struct tep_record *record, int cpu) |
| { |
| struct task_data *task; |
| unsigned long long prev_pid; |
| unsigned long long prev_state; |
| unsigned long long next_pid; |
| struct start_data *start; |
| |
| /* pid_field holds prev_pid, data_field holds prev_state */ |
| tep_read_number_field(event_data->pid_field, |
| record->data, &prev_pid); |
| |
| tep_read_number_field(event_data->data_field, |
| record->data, &prev_state); |
| |
| /* only care about real states */ |
| prev_state &= TASK_STATE_MAX - 1; |
| |
| /* end_match_field holds next_pid */ |
| tep_read_number_field(event_data->end_match_field, |
| record->data, &next_pid); |
| |
| task = find_task(h, prev_pid); |
| if (!task) |
| return -1; |
| if (!task->comm) |
| add_task_comm(task, h->switch_prev_comm, record); |
| |
| if (prev_state) |
| task->sleeping = 1; |
| else |
| task->sleeping = 0; |
| |
| /* task is being scheduled out. prev_state tells why */ |
| start = add_start(task, event_data, record, prev_pid, prev_state); |
| task->last_start = start; |
| task->last_event = NULL; |
| |
| task = find_task(h, next_pid); |
| if (!task) |
| return -1; |
| |
| if (!task->comm) |
| add_task_comm(task, h->switch_next_comm, record); |
| |
| /* |
| * If the next task was blocked, it required a wakeup to |
| * restart, and there should be one. |
| * But if it was preempted, we look for the previous sched switch. |
| * Unfortunately, we have to look for both types of events as |
| * we do not know why next_pid scheduled out. |
| * |
| * event_data->start holds the sched_wakeup event data. |
| */ |
| find_and_update_start(task, event_data->start, record->ts, next_pid); |
| |
| /* Look for this task if it was preempted (no wakeup found). */ |
| find_and_update_start(task, event_data, record->ts, next_pid); |
| |
| return 0; |
| } |
| |
| static int handle_stacktrace_event(struct handle_data *h, |
| unsigned long long pid, |
| struct event_data *event_data, |
| struct tep_record *record, int cpu) |
| { |
| struct task_data *orig_task; |
| struct task_data *proxy; |
| struct task_data *task; |
| unsigned long long size; |
| struct event_hash *event_hash; |
| struct start_data *start; |
| void *caller; |
| |
| task = find_task(h, pid); |
| if (!task) |
| return -1; |
| |
| if (task->last_stack) { |
| tracecmd_free_record(task->last_stack); |
| task->last_stack = NULL; |
| } |
| |
| if ((proxy = task->proxy)) { |
| task->proxy = NULL; |
| orig_task = task; |
| task = proxy; |
| } |
| |
| if (!task->last_start && !task->last_event) { |
| /* |
| * Save this stack in case function graph needs it. |
| * Need the original task, not a proxy. |
| */ |
| if (proxy) |
| task = orig_task; |
| tracecmd_record_ref(record); |
| task->last_stack = record; |
| return 0; |
| } |
| |
| /* |
| * start_match_field holds the size. |
| * data_field holds the caller location. |
| */ |
| size = record->size - event_data->data_field->offset; |
| caller = record->data + event_data->data_field->offset; |
| |
| /* |
| * If there's a "start" then don't add the stack until |
| * it finds a matching "end". |
| */ |
| if ((start = task->last_start)) { |
| tracecmd_record_ref(record); |
| start->stack.record = record; |
| start->stack.size = size; |
| start->stack.caller = caller; |
| task->last_start = NULL; |
| task->last_event = NULL; |
| return 0; |
| } |
| |
| event_hash = task->last_event; |
| task->last_event = NULL; |
| |
| add_event_stack(event_hash, caller, size, event_hash->last_time, |
| record->ts); |
| |
| return 0; |
| } |
| |
| static int handle_fgraph_entry_event(struct handle_data *h, |
| unsigned long long pid, |
| struct event_data *event_data, |
| struct tep_record *record, int cpu) |
| { |
| unsigned long long size; |
| struct start_data *start; |
| struct task_data *task; |
| void *caller; |
| |
| task = handle_start_event(h, event_data, record, pid); |
| if (!task) |
| return -1; |
| |
| /* |
| * If a stack trace hasn't been used for a previous task, |
| * then it could be a function trace that we can use for |
| * the function graph. But stack traces come before the function |
| * graph events (unfortunately). So we need to attach the previous |
| * stack trace (if there is one) to this start event. |
| */ |
| if (task->last_stack) { |
| start = task->last_start; |
| record = task->last_stack; |
| size = record->size - stacktrace_event->data_field->offset; |
| caller = record->data + stacktrace_event->data_field->offset; |
| start->stack.record = record; |
| start->stack.size = size; |
| start->stack.caller = caller; |
| task->last_stack = NULL; |
| task->last_event = NULL; |
| } |
| |
| /* Do not map stacks after this event to this event */ |
| task->last_start = NULL; |
| |
| return 0; |
| } |
| |
| static int handle_fgraph_exit_event(struct handle_data *h, |
| unsigned long long pid, |
| struct event_data *event_data, |
| struct tep_record *record, int cpu) |
| { |
| struct task_data *task; |
| |
| task = handle_end_event(h, event_data, record, pid); |
| if (!task) |
| return -1; |
| /* Do not match stacks with function graph exit events */ |
| task->last_event = NULL; |
| |
| return 0; |
| } |
| |
| static int handle_process_exec(struct handle_data *h, |
| unsigned long long pid, |
| struct event_data *event_data, |
| struct tep_record *record, int cpu) |
| { |
| struct task_data *task; |
| unsigned long long val; |
| |
| /* Task has execed, remove the comm for it */ |
| if (event_data->data_field) { |
| tep_read_number_field(event_data->data_field, |
| record->data, &val); |
| pid = val; |
| } |
| |
| task = find_task(h, pid); |
| if (!task) |
| return -1; |
| |
| free(task->comm); |
| task->comm = NULL; |
| |
| return 0; |
| } |
| |
| static int handle_sched_wakeup_event(struct handle_data *h, |
| unsigned long long pid, |
| struct event_data *event_data, |
| struct tep_record *record, int cpu) |
| { |
| struct task_data *proxy; |
| struct task_data *task = NULL; |
| struct start_data *start; |
| unsigned long long success; |
| |
| proxy = find_task(h, pid); |
| if (!proxy) |
| return -1; |
| |
| /* If present, data_field holds "success" */ |
| if (event_data->data_field) { |
| tep_read_number_field(event_data->data_field, |
| record->data, &success); |
| |
| /* If not a successful wakeup, ignore this */ |
| if (!success) |
| return 0; |
| } |
| |
| tep_read_number_field(event_data->pid_field, |
| record->data, &pid); |
| |
| task = find_task(h, pid); |
| if (!task) |
| return -1; |
| |
| if (!task->comm) |
| add_task_comm(task, h->wakeup_comm, record); |
| |
| /* if the task isn't sleeping, then ignore the wake up */ |
| if (!task->sleeping) { |
| /* Ignore any following stack traces */ |
| proxy->proxy = NULL; |
| proxy->last_start = NULL; |
| proxy->last_event = NULL; |
| return 0; |
| } |
| |
| /* It's being woken up */ |
| task->sleeping = 0; |
| |
| /* |
| * We need the stack trace to be hooked to the woken up |
| * task, not the waker. |
| */ |
| proxy->proxy = task; |
| |
| /* There should be a blocked schedule out of this task */ |
| find_and_update_start(task, event_data->start, record->ts, pid); |
| |
| /* Set this up for timing how long the wakeup takes */ |
| start = add_start(task, event_data, record, pid, pid); |
| task->last_event = NULL; |
| task->last_start = start; |
| |
| return 0; |
| } |
| |
| void trace_init_profile(struct tracecmd_input *handle, struct hook_list *hook, |
| int global) |
| { |
| struct tep_handle *pevent = tracecmd_get_tep(handle); |
| struct tep_format_field **fields; |
| struct handle_data *h; |
| struct event_data *event_data; |
| struct event_data *sched_switch; |
| struct event_data *sched_wakeup; |
| struct event_data *irq_entry; |
| struct event_data *irq_exit; |
| struct event_data *softirq_entry; |
| struct event_data *softirq_exit; |
| struct event_data *softirq_raise; |
| struct event_data *fgraph_entry; |
| struct event_data *fgraph_exit; |
| struct event_data *syscall_enter; |
| struct event_data *syscall_exit; |
| struct event_data *process_exec; |
| struct event_data *start_event; |
| struct event_data *end_event; |
| struct tep_event **events; |
| int ret; |
| int i; |
| |
| tracecmd_set_show_data_func(handle, trace_profile_record); |
| h = malloc(sizeof(*h)); |
| if (!h) { |
| warning("Could not allocate handle"); |
| return; |
| }; |
| memset(h, 0, sizeof(*h)); |
| h->next = handles; |
| handles = h; |
| |
| trace_hash_init(&h->task_hash, 1024); |
| trace_hash_init(&h->events, 1024); |
| trace_hash_init(&h->group_hash, 512); |
| |
| h->handle = handle; |
| h->pevent = pevent; |
| |
| h->cpus = tracecmd_cpus(handle); |
| |
| /* |
| * For streaming profiling, cpus will not be set up yet. |
| * In this case, we simply use the number of cpus on the |
| * system. |
| */ |
| if (!h->cpus) |
| h->cpus = tracecmd_count_cpus(); |
| |
| list_head_init(&h->migrate_starts); |
| h->cpu_starts = malloc(sizeof(*h->cpu_starts) * h->cpus); |
| if (!h->cpu_starts) |
| goto free_handle; |
| |
| for (i = 0; i < h->cpus; i++) |
| list_head_init(&h->cpu_starts[i]); |
| |
| h->cpu_data = malloc(h->cpus * sizeof(*h->cpu_data)); |
| if (!h->cpu_data) |
| goto free_starts; |
| |
| memset(h->cpu_data, 0, h->cpus * sizeof(h->cpu_data)); |
| |
| h->global_task = malloc(sizeof(struct task_data)); |
| if (!h->global_task) |
| goto free_data; |
| |
| memset(h->global_task, 0, sizeof(struct task_data)); |
| init_task(h, h->global_task); |
| h->global_task->comm = strdup("Global Events"); |
| if (!h->global_task->comm) |
| die("malloc"); |
| h->global_task->pid = -1; |
| |
| h->global_percpu_tasks = calloc(h->cpus, sizeof(struct task_data)); |
| if (!h->global_percpu_tasks) |
| die("malloc"); |
| for (i = 0; i < h->cpus; i++) { |
| init_task(h, &h->global_percpu_tasks[i]); |
| ret = asprintf(&h->global_percpu_tasks[i].comm, |
| "Global CPU[%d] Events", i); |
| if (ret < 0) |
| die("malloc"); |
| h->global_percpu_tasks[i].pid = -1 - i; |
| } |
| |
| irq_entry = add_event(h, "irq", "irq_handler_entry", EVENT_TYPE_IRQ); |
| irq_exit = add_event(h, "irq", "irq_handler_exit", EVENT_TYPE_IRQ); |
| softirq_entry = add_event(h, "irq", "softirq_entry", EVENT_TYPE_SOFTIRQ); |
| softirq_exit = add_event(h, "irq", "softirq_exit", EVENT_TYPE_SOFTIRQ); |
| softirq_raise = add_event(h, "irq", "softirq_raise", EVENT_TYPE_SOFTIRQ_RAISE); |
| sched_wakeup = add_event(h, "sched", "sched_wakeup", EVENT_TYPE_WAKEUP); |
| sched_switch = add_event(h, "sched", "sched_switch", EVENT_TYPE_SCHED_SWITCH); |
| fgraph_entry = add_event(h, "ftrace", "funcgraph_entry", EVENT_TYPE_FUNC); |
| fgraph_exit = add_event(h, "ftrace", "funcgraph_exit", EVENT_TYPE_FUNC); |
| syscall_enter = add_event(h, "raw_syscalls", "sys_enter", EVENT_TYPE_SYSCALL); |
| syscall_exit = add_event(h, "raw_syscalls", "sys_exit", EVENT_TYPE_SYSCALL); |
| |
| process_exec = add_event(h, "sched", "sched_process_exec", |
| EVENT_TYPE_PROCESS_EXEC); |
| |
| stacktrace_event = add_event(h, "ftrace", "kernel_stack", EVENT_TYPE_STACK); |
| if (stacktrace_event) { |
| stacktrace_event->handle_event = handle_stacktrace_event; |
| |
| stacktrace_event->data_field = tep_find_field(stacktrace_event->event, |
| "caller"); |
| if (!stacktrace_event->data_field) |
| die("Event: %s does not have field caller", |
| stacktrace_event->event->name); |
| } |
| |
| if (process_exec) { |
| process_exec->handle_event = handle_process_exec; |
| process_exec->data_field = tep_find_field(process_exec->event, |
| "old_pid"); |
| } |
| |
| if (sched_switch) { |
| sched_switch->handle_event = handle_sched_switch_event; |
| sched_switch->data_field = tep_find_field(sched_switch->event, |
| "prev_state"); |
| if (!sched_switch->data_field) |
| die("Event: %s does not have field prev_state", |
| sched_switch->event->name); |
| |
| h->switch_prev_comm = tep_find_field(sched_switch->event, |
| "prev_comm"); |
| if (!h->switch_prev_comm) |
| die("Event: %s does not have field prev_comm", |
| sched_switch->event->name); |
| |
| h->switch_next_comm = tep_find_field(sched_switch->event, |
| "next_comm"); |
| if (!h->switch_next_comm) |
| die("Event: %s does not have field next_comm", |
| sched_switch->event->name); |
| |
| sched_switch->print_func = sched_switch_print; |
| } |
| |
| if (sched_switch && sched_wakeup) { |
| mate_events(h, sched_switch, "prev_pid", "next_pid", |
| sched_wakeup, "pid", 1, 0); |
| mate_events(h, sched_wakeup, "pid", "pid", |
| sched_switch, "prev_pid", 1, 0); |
| sched_wakeup->handle_event = handle_sched_wakeup_event; |
| |
| /* The 'success' field may or may not be present */ |
| sched_wakeup->data_field = tep_find_field(sched_wakeup->event, |
| "success"); |
| |
| h->wakeup_comm = tep_find_field(sched_wakeup->event, "comm"); |
| if (!h->wakeup_comm) |
| die("Event: %s does not have field comm", |
| sched_wakeup->event->name); |
| } |
| |
| if (irq_entry && irq_exit) |
| mate_events(h, irq_entry, NULL, "irq", irq_exit, "irq", 0, global); |
| |
| if (softirq_entry) |
| softirq_entry->print_func = softirq_print; |
| |
| if (softirq_exit) |
| softirq_exit->print_func = softirq_print; |
| |
| if (softirq_raise) |
| softirq_raise->print_func = softirq_print; |
| |
| if (softirq_entry && softirq_exit) |
| mate_events(h, softirq_entry, NULL, "vec", softirq_exit, "vec", |
| 0, global); |
| |
| if (softirq_entry && softirq_raise) |
| mate_events(h, softirq_raise, NULL, "vec", softirq_entry, "vec", |
| 0, global); |
| |
| if (fgraph_entry && fgraph_exit) { |
| mate_events(h, fgraph_entry, NULL, "func", fgraph_exit, "func", 1, 0); |
| fgraph_entry->handle_event = handle_fgraph_entry_event; |
| fgraph_exit->handle_event = handle_fgraph_exit_event; |
| fgraph_entry->print_func = func_print; |
| } |
| |
| if (syscall_enter && syscall_exit) { |
| mate_events(h, syscall_enter, NULL, "id", syscall_exit, "id", 1, 0); |
| syscall_enter->print_func = syscall_print; |
| syscall_exit->print_func = syscall_print; |
| } |
| |
| events = tep_list_events(pevent, TEP_EVENT_SORT_ID); |
| if (!events) |
| die("malloc"); |
| |
| /* Add some other events */ |
| event_data = add_event(h, "ftrace", "function", EVENT_TYPE_FUNC); |
| if (event_data) { |
| event_data->data_field = |
| tep_find_field(event_data->event, "ip"); |
| } |
| |
| /* Add any user defined hooks */ |
| for (; hook; hook = hook->next) { |
| start_event = add_event(h, hook->start_system, hook->start_event, |
| EVENT_TYPE_USER_MATE); |
| end_event = add_event(h, hook->end_system, hook->end_event, |
| EVENT_TYPE_USER_MATE); |
| if (!start_event) { |
| warning("Event %s not found", hook->start_event); |
| continue; |
| } |
| if (!end_event) { |
| warning("Event %s not found", hook->end_event); |
| continue; |
| } |
| mate_events(h, start_event, hook->pid, hook->start_match, |
| end_event, hook->end_match, hook->migrate, |
| hook->global); |
| } |
| |
| /* Now add any defined event that we haven't processed */ |
| for (i = 0; events[i]; i++) { |
| event_data = find_event_data(h, events[i]->id); |
| if (event_data) |
| continue; |
| |
| event_data = add_event(h, events[i]->system, events[i]->name, |
| EVENT_TYPE_UNDEFINED); |
| |
| fields = tep_event_fields(events[i]); |
| if (!fields) |
| die("malloc"); |
| |
| if (fields[0]) |
| event_data->data_field = fields[0]; |
| |
| free(fields); |
| } |
| return; |
| |
| free_data: |
| free(h->cpu_data); |
| free_starts: |
| free(h->cpu_starts); |
| free_handle: |
| handles = h->next; |
| free(h); |
| warning("Failed handle allocations"); |
| } |
| |
| static void output_event_stack(struct tep_handle *pevent, struct stack_data *stack) |
| { |
| int longsize = tep_get_long_size(pevent); |
| unsigned long long val; |
| const char *func; |
| unsigned long long stop = -1ULL; |
| void *ptr; |
| int i; |
| |
| if (longsize < 8) |
| stop &= (1ULL << (longsize * 8)) - 1; |
| |
| if (stack->count) |
| stack->time_avg = stack->time / stack->count; |
| |
| printf(" <stack> %lld total:%lld min:%lld(ts:%lld.%06lld) max:%lld(ts:%lld.%06lld) avg=%lld\n", |
| stack->count, stack->time, stack->time_min, |
| nsecs_per_sec(stack->ts_min), mod_to_usec(stack->ts_min), |
| stack->time_max, |
| nsecs_per_sec(stack->ts_max), mod_to_usec(stack->ts_max), |
| stack->time_avg); |
| |
| for (i = 0; i < stack->size; i += longsize) { |
| ptr = stack->caller + i; |
| switch (longsize) { |
| case 4: |
| /* todo, read value from pevent */ |
| val = *(unsigned int *)ptr; |
| break; |
| case 8: |
| val = *(unsigned long long *)ptr; |
| break; |
| default: |
| die("Strange long size %d", longsize); |
| } |
| if (val == stop) |
| break; |
| func = tep_find_function(pevent, val); |
| if (func) |
| printf(" => %s (0x%llx)\n", func, val); |
| else |
| printf(" => 0x%llx\n", val); |
| } |
| } |
| |
| struct stack_chain { |
| struct stack_chain *children; |
| unsigned long long val; |
| unsigned long long time; |
| unsigned long long time_min; |
| unsigned long long ts_min; |
| unsigned long long time_max; |
| unsigned long long ts_max; |
| unsigned long long time_avg; |
| unsigned long long count; |
| int percent; |
| int nr_children; |
| }; |
| |
| static int compare_chains(const void *a, const void *b) |
| { |
| const struct stack_chain * A = a; |
| const struct stack_chain * B = b; |
| |
| if (A->time > B->time) |
| return -1; |
| if (A->time < B->time) |
| return 1; |
| /* If stacks don't use time, then use count */ |
| if (A->count > B->count) |
| return -1; |
| if (A->count < B->count) |
| return 1; |
| return 0; |
| } |
| |
| static int calc_percent(unsigned long long val, unsigned long long total) |
| { |
| return (val * 100 + total / 2) / total; |
| } |
| |
| static int stack_overflows(struct stack_data *stack, int longsize, int level) |
| { |
| return longsize * level > stack->size - longsize; |
| } |
| |
| static unsigned long long |
| stack_value(struct stack_data *stack, int longsize, int level) |
| { |
| void *ptr; |
| |
| ptr = &stack->caller[longsize * level]; |
| return longsize == 8 ? *(u64 *)ptr : *(unsigned *)ptr; |
| } |
| |
| static struct stack_chain * |
| make_stack_chain(struct stack_data **stacks, int cnt, int longsize, int level, |
| int *nr_children) |
| { |
| struct stack_chain *chain; |
| unsigned long long total_time = 0; |
| unsigned long long total_count = 0; |
| unsigned long long time; |
| unsigned long long time_min; |
| unsigned long long ts_min; |
| unsigned long long time_max; |
| unsigned long long ts_max; |
| unsigned long long count; |
| unsigned long long stop = -1ULL; |
| int nr_chains = 0; |
| u64 last = 0; |
| u64 val; |
| int start; |
| int i; |
| int x; |
| |
| if (longsize < 8) |
| stop &= (1ULL << (longsize * 8)) - 1; |
| |
| /* First find out how many diffs there are */ |
| for (i = 0; i < cnt; i++) { |
| if (stack_overflows(stacks[i], longsize, level)) |
| continue; |
| |
| val = stack_value(stacks[i], longsize, level); |
| |
| if (val == stop) |
| continue; |
| |
| if (!nr_chains || val != last) |
| nr_chains++; |
| last = val; |
| } |
| |
| if (!nr_chains) { |
| *nr_children = 0; |
| return NULL; |
| } |
| |
| chain = malloc(sizeof(*chain) * nr_chains); |
| if (!chain) { |
| warning("Could not allocate chain"); |
| return NULL; |
| } |
| memset(chain, 0, sizeof(*chain) * nr_chains); |
| |
| x = 0; |
| count = 0; |
| start = 0; |
| time = 0; |
| time_min = 0; |
| time_max = 0; |
| |
| for (i = 0; i < cnt; i++) { |
| if (stack_overflows(stacks[i], longsize, level)) { |
| start = i+1; |
| continue; |
| } |
| |
| val = stack_value(stacks[i], longsize, level); |
| |
| if (val == stop) { |
| start = i+1; |
| continue; |
| } |
| |
| count += stacks[i]->count; |
| time += stacks[i]->time; |
| if (stacks[i]->time_max > time_max) { |
| time_max = stacks[i]->time_max; |
| ts_max = stacks[i]->ts_max; |
| } |
| if (i == start || stacks[i]->time_min < time_min) { |
| time_min = stacks[i]->time_min; |
| ts_min = stacks[i]->ts_min; |
| } |
| if (i == cnt - 1 || |
| stack_overflows(stacks[i+1], longsize, level) || |
| val != stack_value(stacks[i+1], longsize, level)) { |
| |
| total_time += time; |
| total_count += count; |
| chain[x].val = val; |
| chain[x].time_avg = time / count; |
| chain[x].count = count; |
| chain[x].time = time; |
| chain[x].time_min = time_min; |
| chain[x].ts_min = ts_min; |
| chain[x].time_max = time_max; |
| chain[x].ts_max = ts_max; |
| chain[x].children = |
| make_stack_chain(&stacks[start], (i - start) + 1, |
| longsize, level+1, |
| &chain[x].nr_children); |
| x++; |
| start = i + 1; |
| count = 0; |
| time = 0; |
| time_min = 0; |
| time_max = 0; |
| } |
| } |
| |
| qsort(chain, nr_chains, sizeof(*chain), compare_chains); |
| |
| *nr_children = nr_chains; |
| |
| /* Should never happen */ |
| if (!total_time && !total_count) |
| return chain; |
| |
| |
| /* Now calculate percentage */ |
| time = 0; |
| for (i = 0; i < nr_chains; i++) { |
| if (total_time) |
| chain[i].percent = calc_percent(chain[i].time, total_time); |
| /* In case stacks don't have time */ |
| else if (total_count) |
| chain[i].percent = calc_percent(chain[i].count, total_count); |
| } |
| |
| return chain; |
| } |
| |
| static void free_chain(struct stack_chain *chain, int nr_chains) |
| { |
| int i; |
| |
| if (!chain) |
| return; |
| |
| for (i = 0; i < nr_chains; i++) |
| free_chain(chain[i].children, chain[i].nr_children); |
| |
| free(chain); |
| } |
| |
| #define INDENT 5 |
| |
| static void print_indent(int level, unsigned long long mask) |
| { |
| char line; |
| int p; |
| |
| for (p = 0; p < level + 1; p++) { |
| if (mask & (1ULL << p)) |
| line = '|'; |
| else |
| line = ' '; |
| printf("%*c ", INDENT, line); |
| } |
| } |
| |
| static void print_chain_func(struct tep_handle *pevent, struct stack_chain *chain) |
| { |
| unsigned long long val = chain->val; |
| const char *func; |
| |
| func = tep_find_function(pevent, val); |
| if (func) |
| printf("%s (0x%llx)\n", func, val); |
| else |
| printf("0x%llx\n", val); |
| } |
| |
| static void output_chain(struct tep_handle *pevent, struct stack_chain *chain, int level, |
| int nr_chains, unsigned long long *mask) |
| { |
| struct stack_chain *child; |
| int nr_children; |
| int i; |
| char line = '|'; |
| |
| if (!nr_chains) |
| return; |
| |
| *mask |= (1ULL << (level + 1)); |
| print_indent(level + 1, *mask); |
| printf("\n"); |
| |
| for (i = 0; i < nr_chains; i++) { |
| |
| print_indent(level, *mask); |
| |
| printf("%*c ", INDENT, '+'); |
| |
| if (i == nr_chains - 1) { |
| *mask &= ~(1ULL << (level + 1)); |
| line = ' '; |
| } |
| |
| print_chain_func(pevent, &chain[i]); |
| |
| print_indent(level, *mask); |
| |
| printf("%*c ", INDENT, line); |
| printf(" %d%% (%lld)", chain[i].percent, chain[i].count); |
| if (chain[i].time) |
| printf(" time:%lld max:%lld(ts:%lld.%06lld) min:%lld(ts:%lld.%06lld) avg:%lld", |
| chain[i].time, chain[i].time_max, |
| nsecs_per_sec(chain[i].ts_max), |
| mod_to_usec(chain[i].ts_max), |
| chain[i].time_min, |
| nsecs_per_sec(chain[i].ts_min), |
| mod_to_usec(chain[i].ts_min), |
| chain[i].time_avg); |
| printf("\n"); |
| |
| for (child = chain[i].children, nr_children = chain[i].nr_children; |
| child && nr_children == 1; |
| nr_children = child->nr_children, child = child->children) { |
| print_indent(level, *mask); |
| printf("%*c ", INDENT, line); |
| printf(" "); |
| print_chain_func(pevent, child); |
| } |
| |
| if (child) |
| output_chain(pevent, child, level+1, nr_children, mask); |
| |
| print_indent(level + 1, *mask); |
| printf("\n"); |
| } |
| *mask &= ~(1ULL << (level + 1)); |
| print_indent(level, *mask); |
| printf("\n"); |
| } |
| |
| static int compare_stacks(const void *a, const void *b) |
| { |
| struct stack_data * const *A = a; |
| struct stack_data * const *B = b; |
| unsigned int sa, sb; |
| int size; |
| int i; |
| |
| /* only compare up to the smaller size of the two */ |
| if ((*A)->size > (*B)->size) |
| size = (*B)->size; |
| else |
| size = (*A)->size; |
| |
| for (i = 0; i < size; i += sizeof(sa)) { |
| sa = *(unsigned *)&(*A)->caller[i]; |
| sb = *(unsigned *)&(*B)->caller[i]; |
| if (sa > sb) |
| return 1; |
| if (sa < sb) |
| return -1; |
| } |
| |
| /* They are the same up to size. Then bigger size wins */ |
| if ((*A)->size > (*B)->size) |
| return 1; |
| if ((*A)->size < (*B)->size) |
| return -1; |
| return 0; |
| } |
| |
| static void output_stacks(struct tep_handle *pevent, struct trace_hash *stack_hash) |
| { |
| struct trace_hash_item **bucket; |
| struct trace_hash_item *item; |
| struct stack_data **stacks; |
| struct stack_chain *chain; |
| unsigned long long mask = 0; |
| int nr_chains; |
| int longsize = tep_get_long_size(pevent); |
| int nr_stacks; |
| int i; |
| |
| nr_stacks = 0; |
| trace_hash_for_each_bucket(bucket, stack_hash) { |
| trace_hash_for_each_item(item, bucket) { |
| nr_stacks++; |
| } |
| } |
| |
| stacks = malloc(sizeof(*stacks) * nr_stacks); |
| if (!stacks) { |
| warning("Could not allocate stacks"); |
| return; |
| } |
| |
| nr_stacks = 0; |
| trace_hash_for_each_bucket(bucket, stack_hash) { |
| trace_hash_for_each_item(item, bucket) { |
| stacks[nr_stacks++] = stack_from_item(item); |
| } |
| } |
| |
| qsort(stacks, nr_stacks, sizeof(*stacks), compare_stacks); |
| |
| chain = make_stack_chain(stacks, nr_stacks, longsize, 0, &nr_chains); |
| |
| output_chain(pevent, chain, 0, nr_chains, &mask); |
| |
| if (0) |
| for (i = 0; i < nr_stacks; i++) |
| output_event_stack(pevent, stacks[i]); |
| |
| free(stacks); |
| free_chain(chain, nr_chains); |
| } |
| |
| static void output_event(struct event_hash *event_hash) |
| { |
| struct event_data *event_data = event_hash->event_data; |
| struct tep_handle *pevent = event_data->event->tep; |
| struct trace_seq s; |
| |
| trace_seq_init(&s); |
| |
| if (event_data->print_func) |
| event_data->print_func(&s, event_hash); |
| else if (event_data->type == EVENT_TYPE_FUNC) |
| func_print(&s, event_hash); |
| else |
| trace_seq_printf(&s, "%s:0x%llx", |
| event_data->event->name, |
| event_hash->val); |
| trace_seq_terminate(&s); |
| |
| printf(" Event: %s (%lld)", |
| s.buffer, event_hash->count); |
| |
| trace_seq_destroy(&s); |
| |
| if (event_hash->time_total) { |
| event_hash->time_avg = event_hash->time_total / event_hash->count; |
| printf(" Total: %lld Avg: %lld Max: %lld(ts:%lld.%06lld) Min:%lld(ts:%lld.%06lld)", |
| event_hash->time_total, event_hash->time_avg, |
| event_hash->time_max, |
| nsecs_per_sec(event_hash->ts_max), |
| mod_to_usec(event_hash->ts_max), |
| event_hash->time_min, |
| nsecs_per_sec(event_hash->ts_min), |
| mod_to_usec(event_hash->ts_min)); |
| } |
| printf("\n"); |
| |
| output_stacks(pevent, &event_hash->stacks); |
| } |
| |
| static int compare_events(const void *a, const void *b) |
| { |
| struct event_hash * const *A = a; |
| struct event_hash * const *B = b; |
| const struct event_data *event_data_a = (*A)->event_data; |
| const struct event_data *event_data_b = (*B)->event_data; |
| |
| /* Schedule switch goes first */ |
| if (event_data_a->type == EVENT_TYPE_SCHED_SWITCH) { |
| if (event_data_b->type != EVENT_TYPE_SCHED_SWITCH) |
| return -1; |
| /* lower the state the better */ |
| if ((*A)->val > (*B)->val) |
| return 1; |
| if ((*A)->val < (*B)->val) |
| return -1; |
| return 0; |
| } else if (event_data_b->type == EVENT_TYPE_SCHED_SWITCH) |
| return 1; |
| |
| /* Wakeups are next */ |
| if (event_data_a->type == EVENT_TYPE_WAKEUP) { |
| if (event_data_b->type != EVENT_TYPE_WAKEUP) |
| return -1; |
| return 0; |
| } else if (event_data_b->type == EVENT_TYPE_WAKEUP) |
| return 1; |
| |
| if (event_data_a->id > event_data_b->id) |
| return 1; |
| if (event_data_a->id < event_data_b->id) |
| return -1; |
| if ((*A)->time_total > (*B)->time_total) |
| return -1; |
| if ((*A)->time_total < (*B)->time_total) |
| return 1; |
| return 0; |
| } |
| |
| static void output_task(struct handle_data *h, struct task_data *task) |
| { |
| struct trace_hash_item **bucket; |
| struct trace_hash_item *item; |
| struct event_hash **events; |
| const char *comm; |
| int nr_events = 0; |
| int i; |
| |
| if (task->group) |
| return; |
| |
| if (task->comm) |
| comm = task->comm; |
| else |
| comm = tep_data_comm_from_pid(h->pevent, task->pid); |
| |
| if (task->pid < 0) |
| printf("%s\n", task->comm); |
| else |
| printf("\ntask: %s-%d\n", comm, task->pid); |
| |
| trace_hash_for_each_bucket(bucket, &task->event_hash) { |
| trace_hash_for_each_item(item, bucket) { |
| nr_events++; |
| } |
| } |
| |
| events = malloc(sizeof(*events) * nr_events); |
| if (!events) { |
| warning("Could not allocate events"); |
| return; |
| } |
| |
| i = 0; |
| trace_hash_for_each_bucket(bucket, &task->event_hash) { |
| trace_hash_for_each_item(item, bucket) { |
| events[i++] = event_from_item(item); |
| } |
| } |
| |
| qsort(events, nr_events, sizeof(*events), compare_events); |
| |
| for (i = 0; i < nr_events; i++) |
| output_event(events[i]); |
| |
| free(events); |
| } |
| |
| static void output_group(struct handle_data *h, struct group_data *group) |
| { |
| struct trace_hash_item **bucket; |
| struct trace_hash_item *item; |
| struct event_hash **events; |
| int nr_events = 0; |
| int i; |
| |
| printf("\ngroup: %s\n", group->comm); |
| |
| trace_hash_for_each_bucket(bucket, &group->event_hash) { |
| trace_hash_for_each_item(item, bucket) { |
| nr_events++; |
| } |
| } |
| |
| events = malloc(sizeof(*events) * nr_events); |
| if (!events) { |
| warning("Could not allocate events"); |
| return; |
| } |
| |
| i = 0; |
| trace_hash_for_each_bucket(bucket, &group->event_hash) { |
| trace_hash_for_each_item(item, bucket) { |
| events[i++] = event_from_item(item); |
| } |
| } |
| |
| qsort(events, nr_events, sizeof(*events), compare_events); |
| |
| for (i = 0; i < nr_events; i++) |
| output_event(events[i]); |
| |
| free(events); |
| } |
| |
| static int compare_tasks(const void *a, const void *b) |
| { |
| struct task_data * const *A = a; |
| struct task_data * const *B = b; |
| |
| if ((*A)->pid > (*B)->pid) |
| return 1; |
| else if ((*A)->pid < (*B)->pid) |
| return -1; |
| return 0; |
| } |
| |
| static int compare_groups(const void *a, const void *b) |
| { |
| const char *A = a; |
| const char *B = b; |
| |
| return strcmp(A, B); |
| } |
| |
| static void free_event_hash(struct event_hash *event_hash) |
| { |
| struct trace_hash_item **bucket; |
| struct trace_hash_item *item; |
| struct stack_data *stack; |
| |
| trace_hash_for_each_bucket(bucket, &event_hash->stacks) { |
| trace_hash_while_item(item, bucket) { |
| stack = stack_from_item(item); |
| trace_hash_del(&stack->hash); |
| free(stack); |
| } |
| } |
| trace_hash_free(&event_hash->stacks); |
| free(event_hash); |
| } |
| |
| static void __free_task(struct task_data *task) |
| { |
| struct trace_hash_item **bucket; |
| struct trace_hash_item *item; |
| struct start_data *start; |
| struct event_hash *event_hash; |
| |
| free(task->comm); |
| |
| trace_hash_for_each_bucket(bucket, &task->start_hash) { |
| trace_hash_while_item(item, bucket) { |
| start = start_from_item(item); |
| if (start->stack.record) |
| tracecmd_free_record(start->stack.record); |
| list_del(&start->list); |
| trace_hash_del(item); |
| free(start); |
| } |
| } |
| trace_hash_free(&task->start_hash); |
| |
| trace_hash_for_each_bucket(bucket, &task->event_hash) { |
| trace_hash_while_item(item, bucket) { |
| event_hash = event_from_item(item); |
| trace_hash_del(item); |
| free_event_hash(event_hash); |
| } |
| } |
| trace_hash_free(&task->event_hash); |
| |
| if (task->last_stack) |
| tracecmd_free_record(task->last_stack); |
| } |
| |
| static void free_task(struct task_data *task) |
| { |
| __free_task(task); |
| free(task); |
| } |
| |
| static void free_group(struct group_data *group) |
| { |
| struct trace_hash_item **bucket; |
| struct trace_hash_item *item; |
| struct event_hash *event_hash; |
| |
| free(group->comm); |
| |
| trace_hash_for_each_bucket(bucket, &group->event_hash) { |
| trace_hash_while_item(item, bucket) { |
| event_hash = event_from_item(item); |
| trace_hash_del(item); |
| free_event_hash(event_hash); |
| } |
| } |
| trace_hash_free(&group->event_hash); |
| free(group); |
| } |
| |
| static void show_global_task(struct handle_data *h, |
| struct task_data *task) |
| { |
| if (trace_hash_empty(&task->event_hash)) |
| return; |
| |
| output_task(h, task); |
| } |
| |
| static void output_tasks(struct handle_data *h) |
| { |
| struct trace_hash_item **bucket; |
| struct trace_hash_item *item; |
| struct task_data **tasks; |
| int nr_tasks = 0; |
| int i; |
| |
| trace_hash_for_each_bucket(bucket, &h->task_hash) { |
| trace_hash_for_each_item(item, bucket) { |
| nr_tasks++; |
| } |
| } |
| |
| tasks = malloc(sizeof(*tasks) * nr_tasks); |
| if (!tasks) { |
| warning("Could not allocate tasks"); |
| return; |
| } |
| |
| nr_tasks = 0; |
| |
| trace_hash_for_each_bucket(bucket, &h->task_hash) { |
| trace_hash_while_item(item, bucket) { |
| tasks[nr_tasks++] = task_from_item(item); |
| trace_hash_del(item); |
| } |
| } |
| |
| qsort(tasks, nr_tasks, sizeof(*tasks), compare_tasks); |
| |
| for (i = 0; i < nr_tasks; i++) { |
| output_task(h, tasks[i]); |
| free_task(tasks[i]); |
| } |
| |
| free(tasks); |
| } |
| |
| static void output_groups(struct handle_data *h) |
| { |
| struct trace_hash_item **bucket; |
| struct trace_hash_item *item; |
| struct group_data **groups; |
| int nr_groups = 0; |
| int i; |
| |
| trace_hash_for_each_bucket(bucket, &h->group_hash) { |
| trace_hash_for_each_item(item, bucket) { |
| nr_groups++; |
| } |
| } |
| |
| if (nr_groups == 0) |
| return; |
| |
| groups = malloc(sizeof(*groups) * nr_groups); |
| if (!groups) { |
| warning("Could not allocate groups"); |
| return; |
| } |
| |
| nr_groups = 0; |
| |
| trace_hash_for_each_bucket(bucket, &h->group_hash) { |
| trace_hash_while_item(item, bucket) { |
| groups[nr_groups++] = group_from_item(item); |
| trace_hash_del(item); |
| } |
| } |
| |
| qsort(groups, nr_groups, sizeof(*groups), compare_groups); |
| |
| for (i = 0; i < nr_groups; i++) { |
| output_group(h, groups[i]); |
| free_group(groups[i]); |
| } |
| |
| free(groups); |
| } |
| |
| static void output_handle(struct handle_data *h) |
| { |
| int i; |
| |
| show_global_task(h, h->global_task); |
| for (i = 0; i < h->cpus; i++) |
| show_global_task(h, &h->global_percpu_tasks[i]); |
| |
| output_groups(h); |
| output_tasks(h); |
| } |
| |
| static void merge_event_stack(struct event_hash *event, |
| struct stack_data *stack) |
| { |
| struct stack_data *exist; |
| struct trace_hash_item *item; |
| struct stack_match match; |
| |
| match.caller = stack->caller; |
| match.size = stack->size; |
| item = trace_hash_find(&event->stacks, stack->hash.key, match_stack, |
| &match); |
| if (!item) { |
| trace_hash_add(&event->stacks, &stack->hash); |
| return; |
| } |
| exist = stack_from_item(item); |
| exist->count += stack->count; |
| exist->time += stack->time; |
| |
| if (exist->time_max < stack->time_max) { |
| exist->time_max = stack->time_max; |
| exist->ts_max = stack->ts_max; |
| } |
| if (exist->time_min > stack->time_min) { |
| exist->time_min = stack->time_min; |
| exist->ts_min = stack->ts_min; |
| } |
| free(stack); |
| } |
| |
| static void merge_stacks(struct event_hash *exist, struct event_hash *event) |
| { |
| struct stack_data *stack; |
| struct trace_hash_item *item; |
| struct trace_hash_item **bucket; |
| |
| trace_hash_for_each_bucket(bucket, &event->stacks) { |
| trace_hash_while_item(item, bucket) { |
| stack = stack_from_item(item); |
| trace_hash_del(&stack->hash); |
| merge_event_stack(exist, stack); |
| } |
| } |
| } |
| |
| static void merge_event_into_group(struct group_data *group, |
| struct event_hash *event) |
| { |
| struct event_hash *exist; |
| struct trace_hash_item *item; |
| struct event_data_match edata; |
| unsigned long long key; |
| |
| if (event->event_data->type == EVENT_TYPE_WAKEUP) { |
| edata.event_data = event->event_data; |
| event->search_val = 0; |
| event->val = 0; |
| key = trace_hash((unsigned long)event->event_data); |
| } else if (event->event_data->type == EVENT_TYPE_SCHED_SWITCH) { |
| edata.event_data = event->event_data; |
| event->search_val = event->val; |
| key = (unsigned long)event->event_data + |
| ((unsigned long)event->val * 2); |
| key = trace_hash(key); |
| } else { |
| key = event->hash.key; |
| } |
| |
| edata.event_data = event->event_data; |
| edata.search_val = event->search_val; |
| edata.val = event->val; |
| |
| item = trace_hash_find(&group->event_hash, key, match_event, &edata); |
| if (!item) { |
| event->hash.key = key; |
| trace_hash_add(&group->event_hash, &event->hash); |
| return; |
| } |
| |
| exist = event_from_item(item); |
| exist->count += event->count; |
| exist->time_total += event->time_total; |
| |
| if (exist->time_max < event->time_max) { |
| exist->time_max = event->time_max; |
| exist->ts_max = event->ts_max; |
| } |
| if (exist->time_min > event->time_min) { |
| exist->time_min = event->time_min; |
| exist->ts_min = event->ts_min; |
| } |
| |
| merge_stacks(exist, event); |
| free_event_hash(event); |
| } |
| |
| static void add_group(struct handle_data *h, struct task_data *task) |
| { |
| unsigned long long key; |
| struct trace_hash_item *item; |
| struct group_data *grp; |
| struct trace_hash_item **bucket; |
| void *data = task->comm; |
| |
| if (!task->comm) |
| return; |
| |
| key = trace_hash_str(task->comm); |
| |
| item = trace_hash_find(&h->group_hash, key, match_group, data); |
| if (item) { |
| grp = group_from_item(item); |
| } else { |
| grp = malloc(sizeof(*grp)); |
| if (!grp) { |
| warning("Could not allocate group"); |
| return; |
| } |
| memset(grp, 0, sizeof(*grp)); |
| |
| grp->comm = strdup(task->comm); |
| if (!grp->comm) |
| die("strdup"); |
| grp->hash.key = key; |
| trace_hash_add(&h->group_hash, &grp->hash); |
| trace_hash_init(&grp->event_hash, 32); |
| } |
| task->group = grp; |
| |
| trace_hash_for_each_bucket(bucket, &task->event_hash) { |
| trace_hash_while_item(item, bucket) { |
| struct event_hash *event_hash; |
| |
| event_hash = event_from_item(item); |
| trace_hash_del(&event_hash->hash); |
| merge_event_into_group(grp, event_hash); |
| } |
| } |
| } |
| |
| static void merge_tasks(struct handle_data *h) |
| { |
| struct trace_hash_item **bucket; |
| struct trace_hash_item *item; |
| |
| if (!merge_like_comms) |
| return; |
| |
| trace_hash_for_each_bucket(bucket, &h->task_hash) { |
| trace_hash_for_each_item(item, bucket) |
| add_group(h, task_from_item(item)); |
| } |
| } |
| |
| int do_trace_profile(void) |
| { |
| struct handle_data *h; |
| |
| for (h = handles; h; h = h->next) { |
| if (merge_like_comms) |
| merge_tasks(h); |
| output_handle(h); |
| trace_hash_free(&h->task_hash); |
| } |
| |
| return 0; |
| } |