| /** |
| * @file op_alloc_counter.c |
| * hardware counter allocation |
| * |
| * You can have silliness here. |
| * |
| * @remark Copyright 2002 OProfile authors |
| * @remark Read the file COPYING |
| * |
| * @author John Levon |
| * @author Philippe Elie |
| */ |
| |
| #include <stdlib.h> |
| #include <ctype.h> |
| #include <dirent.h> |
| |
| #include "op_events.h" |
| #include "op_libiberty.h" |
| |
| |
| typedef struct counter_arc_head { |
| /** the head of allowed counter for this event */ |
| struct list_head next; |
| } counter_arc_head; |
| |
| |
| typedef struct counter_arc { |
| /** counter nr */ |
| int counter; |
| /** the next counter allowed for this event */ |
| struct list_head next; |
| } counter_arc; |
| |
| |
| /** |
| * @param pev an array of event |
| * @param nr_events number of entry in pev |
| * |
| * build an array of counter list allowed for each events |
| * counter_arc_head[i] is the list of allowed counter for pev[i] events |
| * The returned pointer is an array of nr_events entry |
| */ |
| static counter_arc_head * |
| build_counter_arc(struct op_event const * pev[], int nr_events) |
| { |
| counter_arc_head * ctr_arc; |
| int i; |
| |
| ctr_arc = xmalloc(nr_events * sizeof(*ctr_arc)); |
| |
| for (i = 0; i < nr_events; ++i) { |
| int j; |
| u32 mask = pev[i]->counter_mask; |
| |
| list_init(&ctr_arc[i].next); |
| for (j = 0; mask; ++j) { |
| if (mask & (1 << j)) { |
| counter_arc * arc = |
| xmalloc(sizeof(counter_arc)); |
| arc->counter = j; |
| /* we are looping by increasing counter number, |
| * allocation use a left to right tree walking |
| * so we add at end to ensure counter will |
| * be allocated by increasing number: it's not |
| * required but a bit less surprising when |
| * debugging code |
| */ |
| list_add_tail(&arc->next, &ctr_arc[i].next); |
| mask &= ~(1 << j); |
| } |
| } |
| } |
| |
| return ctr_arc; |
| } |
| |
| |
| /** |
| * @param ctr_arc the array to deallocate |
| * @param nr_events number of entry in array |
| * |
| * deallocate all previously allocated resource by build_counter_arc() |
| */ |
| static void delete_counter_arc(counter_arc_head * ctr_arc, int nr_events) |
| { |
| int i; |
| for (i = 0; i < nr_events; ++i) { |
| struct list_head * pos, * pos2; |
| list_for_each_safe(pos, pos2, &ctr_arc[i].next) { |
| counter_arc * arc = list_entry(pos, counter_arc, next); |
| list_del(&arc->next); |
| free(arc); |
| } |
| } |
| free(ctr_arc); |
| } |
| |
| |
| /** |
| * @param ctr_arc tree description, ctr_arc[i] is the i-th level of tree. |
| * @param max_depth number of entry in array ctr_arc == depth of tree |
| * @param depth current level we are exploring |
| * @param allocated_mask current counter already allocated mask |
| * @param counter_map array of counter number mapping, returned results go |
| * here |
| * |
| * return non zero on succees, in this case counter_map is set to the counter |
| * mapping number. |
| * |
| * Solution is searched through a simple backtracking exploring recursively all |
| * possible solution until one is found, prunning is done in O(1) by tracking |
| * a bitmask of already allocated counter. Walking through node is done in |
| * preorder left to right. |
| * |
| * In case of extended events (required no phisical counters), the associated |
| * counter_map entry will be -1. |
| * |
| * Possible improvment if neccessary: partition counters in class of counter, |
| * two counter belong to the same class if they allow exactly the same set of |
| * event. Now using a variant of the backtrack algo can works on class of |
| * counter rather on counter (this is not an improvment if each counter goes |
| * in it's own class) |
| */ |
| static int |
| allocate_counter(counter_arc_head const * ctr_arc, int max_depth, int depth, |
| u32 allocated_mask, size_t * counter_map) |
| { |
| struct list_head * pos; |
| |
| if (depth == max_depth) |
| return 1; |
| |
| /* If ctr_arc is not available, counter_map is -1 */ |
| if((&ctr_arc[depth].next)->next == &ctr_arc[depth].next) { |
| counter_map[depth] = -1; |
| if (allocate_counter(ctr_arc, max_depth, depth + 1, |
| allocated_mask, |
| counter_map)) |
| return 1; |
| } else { |
| list_for_each(pos, &ctr_arc[depth].next) { |
| counter_arc const * arc = list_entry(pos, counter_arc, next); |
| |
| if (allocated_mask & (1 << arc->counter)) |
| continue; |
| |
| counter_map[depth] = arc->counter; |
| |
| if (allocate_counter(ctr_arc, max_depth, depth + 1, |
| allocated_mask | (1 << arc->counter), |
| counter_map)) |
| return 1; |
| } |
| } |
| |
| return 0; |
| } |
| |
| /* determine which directories are counter directories |
| */ |
| static int perfcounterdir(const struct dirent * entry) |
| { |
| return (isdigit(entry->d_name[0])); |
| } |
| |
| |
| /** |
| * @param mask pointer where to place bit mask of unavailable counters |
| * |
| * return >= 0 number of counters that are available |
| * < 0 could not determine number of counters |
| * |
| */ |
| static int op_get_counter_mask(u32 * mask) |
| { |
| struct dirent **counterlist; |
| int count, i; |
| /* assume nothing is available */ |
| u32 available=0; |
| |
| count = scandir("/dev/oprofile", &counterlist, perfcounterdir, |
| alphasort); |
| if (count < 0) |
| /* unable to determine bit mask */ |
| return -1; |
| /* convert to bit map (0 where counter exists) */ |
| for (i=0; i<count; ++i) { |
| available |= 1 << atoi(counterlist[i]->d_name); |
| free(counterlist[i]); |
| } |
| *mask=~available; |
| free(counterlist); |
| return count; |
| } |
| |
| size_t * map_event_to_counter(struct op_event const * pev[], int nr_events, |
| op_cpu cpu_type) |
| { |
| counter_arc_head * ctr_arc; |
| size_t * counter_map; |
| int i, nr_counters, nr_pmc_events; |
| op_cpu curr_cpu_type; |
| u32 unavailable_counters = 0; |
| |
| /* Either ophelp or one of the libop tests may invoke this |
| * function with a non-native cpu_type. If so, we should not |
| * call op_get_counter_mask because that will look for real counter |
| * information in oprofilefs. |
| */ |
| curr_cpu_type = op_get_cpu_type(); |
| if (cpu_type != curr_cpu_type) |
| nr_counters = op_get_nr_counters(cpu_type); |
| else |
| nr_counters = op_get_counter_mask(&unavailable_counters); |
| |
| /* no counters then probably perfmon managing perfmon hw */ |
| if (nr_counters <= 0) { |
| nr_counters = op_get_nr_counters(cpu_type); |
| unavailable_counters = (~0) << nr_counters; |
| } |
| |
| /* Check to see if we have enough physical counters to map events*/ |
| for (i = 0, nr_pmc_events = 0; i < nr_events; i++) |
| if(pev[i]->ext == NULL) |
| if (++nr_pmc_events > nr_counters) |
| return 0; |
| |
| ctr_arc = build_counter_arc(pev, nr_events); |
| |
| counter_map = xmalloc(nr_events * sizeof(size_t)); |
| |
| if (!allocate_counter(ctr_arc, nr_events, 0, unavailable_counters, |
| counter_map)) { |
| free(counter_map); |
| counter_map = 0; |
| } |
| |
| delete_counter_arc(ctr_arc, nr_events); |
| return counter_map; |
| } |