| /** |
| * @file op_pmu.c |
| * Setup and handling of IA64 Performance Monitoring Unit (PMU) |
| * |
| * @remark Copyright 2002 OProfile authors |
| * @remark Read the file COPYING |
| * |
| * @author Bob Montgomery |
| * @author Will Cohen |
| * @author John Levon |
| * @author Philippe Elie |
| */ |
| |
| |
| #include "oprofile.h" |
| #include "op_util.h" |
| #include <asm/perfmon.h> |
| #include "op_ia64_model.h" |
| |
| /* number of counters physically present */ |
| static uint op_nr_counters = 4; |
| |
| /* performance counters are in pairs: pmcN and pmdN. The pmc register acts |
| * as the event selection; the pmd register is the counter. */ |
| #define perf_reg(c) ((c)+4) |
| |
| #define IA64_1_PMD_MASK_VAL ((1UL << 32) - 1) |
| #define IA64_2_PMD_MASK_VAL ((1UL << 47) - 1) |
| |
| /* The appropriate value is selected in pmu_init() */ |
| unsigned long pmd_mask = IA64_2_PMD_MASK_VAL; |
| |
| #define pmd_overflowed(r, c) ((r) & (1 << perf_reg(c))) |
| #define set_pmd_neg(v, c) do { \ |
| ia64_set_pmd(perf_reg(c), -(ulong)(v) & pmd_mask); \ |
| ia64_srlz_d(); } while (0) |
| #define set_pmd(v, c) do { \ |
| ia64_set_pmd(perf_reg(c), (v) & pmd_mask); \ |
| ia64_srlz_d(); } while (0) |
| #define set_pmc(v, c) do { ia64_set_pmc(perf_reg(c), (v)); ia64_srlz_d(); } while (0) |
| #define get_pmd(c) ia64_get_pmd(perf_reg(c)) |
| #define get_pmc(c) ia64_get_pmc(perf_reg(c)) |
| |
| /* ---------------- IRQ handler ------------------ */ |
| |
| /* The args match the args for pfm_overflow_handler in perfmon.c. |
| * The task_struct is currently filled in with the perfmon "owner" of |
| * the PMU. This might change. I'm not sure it makes sense in perfmon |
| * either with system-wide profiling. |
| * pmc0 is a bit mask for overflowed counters (bits 4-7) |
| * This routine should return 0 to resume interrupts. |
| */ |
| inline static void |
| op_do_pmu_interrupt(u64 pmc0, struct pt_regs * regs) |
| { |
| uint cpu = op_cpu_id(); |
| int ctr; |
| |
| for (ctr = 0 ; ctr < op_nr_counters ; ++ctr) { |
| if (pmd_overflowed(pmc0, ctr)) { |
| op_do_profile(cpu, regs->cr_iip, 1, ctr); |
| set_pmd_neg(oprof_data[cpu].ctr_count[ctr], ctr); |
| } |
| } |
| return; |
| } |
| |
| |
| static void |
| op_raw_pmu_interrupt(int irq, void * arg, struct pt_regs * regs) |
| { |
| u64 pmc0; |
| |
| pmc0 = ia64_get_pmc(0); |
| |
| if ((pmc0 & ~0x1UL) != 0UL) { |
| op_do_pmu_interrupt(pmc0, regs); |
| ia64_set_pmc(0, 0); |
| ia64_srlz_d(); |
| } |
| } |
| |
| |
| #define MY_OPROFILE_VECTOR (IA64_PERFMON_VECTOR - 2) |
| |
| static void |
| op_set_pmv(void * dummy) |
| { |
| ia64_set_pmv(MY_OPROFILE_VECTOR); |
| ia64_srlz_d(); |
| } |
| |
| |
| static void |
| op_restore_pmv(void* dummy) |
| { |
| ia64_set_pmv(IA64_PERFMON_VECTOR); |
| ia64_srlz_d(); |
| } |
| |
| |
| static int |
| install_handler(void) |
| { |
| int err = 0; |
| |
| /* Try it legally - confusion about vec vs irq */ |
| err = request_irq(MY_OPROFILE_VECTOR, op_raw_pmu_interrupt, |
| SA_INTERRUPT | SA_PERCPU_IRQ, "oprofile", NULL); |
| |
| if (err) { |
| printk(KERN_ALERT "oprofile_IA64: request_irq fails, " |
| "returns %d\n", err); |
| return err; |
| } |
| |
| if ((smp_call_function(op_set_pmv, NULL, 0, 1))) { |
| printk(KERN_ALERT "oprofile_IA64: unexpected failure " |
| "of smp_call_function(op_set_pmv)\n"); |
| } |
| |
| op_set_pmv(NULL); |
| |
| return err; |
| } |
| |
| |
| static int |
| restore_handler(void) |
| { |
| int err = 0; |
| |
| if ((smp_call_function(op_restore_pmv, NULL, 0, 1))) { |
| printk(KERN_ALERT "oprofile_IA64: unexpected failure " |
| "of smp_call_function(op_restore_pmv)\n"); |
| } |
| |
| op_restore_pmv(NULL); |
| |
| free_irq(MY_OPROFILE_VECTOR, NULL); |
| return err; |
| } |
| |
| |
| /* ---------------- PMU setup ------------------ */ |
| |
| /* This is kind of artificial. The proc interface might really want to |
| * accept register values directly. There are other features not exposed |
| * by this limited interface. Of course that might require all sorts of |
| * validity checking??? */ |
| static void |
| pmc_fill_in(ulong * val, u8 kernel, u8 user, u8 event, u8 um) |
| { |
| /* enable interrupt generation */ |
| *val |= (1 << 5); |
| |
| /* setup as a privileged monitor */ |
| *val |= (1 << 6); |
| |
| /* McKinley requires pmc4 to have bit 23 set (enable PMU). |
| * It is supposedly ignored in other pmc registers. |
| * Try assuming it's ignored in Itanium, too, and just |
| * set it for everyone. |
| */ |
| |
| *val |= (1 << 23); |
| |
| /* enable/disable chosen OS and USR counting */ |
| (user) ? (*val |= (1 << 3)) |
| : (*val &= ~(1 << 3)); |
| |
| (kernel) ? (*val |= (1 << 0)) |
| : (*val &= ~(1 << 0)); |
| |
| /* what are we counting ? */ |
| *val &= ~(0xff << 8); |
| *val |= ((event & 0xff) << 8); |
| *val &= ~(0xf << 16); |
| *val |= ((um & 0xf) << 16); |
| } |
| |
| |
| static void |
| pmu_setup(void * dummy) |
| { |
| ulong pmc_val; |
| int ii; |
| |
| /* setup each counter */ |
| for (ii = 0 ; ii < op_nr_counters ; ++ii) { |
| if (sysctl.ctr[ii].enabled) { |
| pmc_val = 0; |
| |
| set_pmd_neg(sysctl.ctr[ii].count, ii); |
| pmc_fill_in(&pmc_val, sysctl.ctr[ii].kernel, |
| sysctl.ctr[ii].user, sysctl.ctr[ii].event, |
| sysctl.ctr[ii].unit_mask); |
| |
| set_pmc(pmc_val, ii); |
| } |
| } |
| } |
| |
| |
| void |
| disable_psr(void * dummy) |
| { |
| struct pt_regs * regs; |
| /* disable profiling for my saved state */ |
| regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET); |
| regs--; |
| ia64_psr(regs)->pp = 0; |
| /* shouldn't need to */ |
| ia64_psr(regs)->up = 0; |
| |
| /* disable profiling for my current state */ |
| __asm__ __volatile__ ("rsm psr.pp;;"::: "memory"); |
| |
| #if defined(CONFIG_PERFMON) && defined(CONFIG_SMP) |
| #if V_AT_LEAST(2, 4, 21) |
| local_cpu_data->pfm_syst_info |= PFM_CPUINFO_SYST_WIDE; |
| local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_DCR_PP; |
| /* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */ |
| #else |
| /* disable profiling for everyone else */ |
| local_cpu_data->pfm_syst_wide = 1; |
| local_cpu_data->pfm_dcr_pp = 0; |
| #endif |
| #endif |
| ia64_set_pmc(0, 0); |
| ia64_srlz_d(); |
| } |
| |
| |
| static int |
| pmu_setup_all(void) |
| { |
| |
| /* This would be a great place to reserve all cpus with |
| * some sort of call to perfmonctl (something like the |
| * CREATE_CONTEXT command). The current interface to |
| * perfmonctl wants to be called from a different task id |
| * for each CPU to be set up (and doesn't allow calls from |
| * modules. |
| */ |
| |
| /* disable profiling with the psr.pp bit */ |
| if ((smp_call_function(disable_psr, NULL, 0, 1))) |
| return -EFAULT; |
| |
| disable_psr(NULL); |
| |
| /* now I've reserved the PMUs and they should be quiet */ |
| |
| if ((smp_call_function(pmu_setup, NULL, 0, 1))) |
| return -EFAULT; |
| |
| pmu_setup(NULL); |
| return 0; |
| } |
| |
| |
| #ifndef CONFIG_SMP |
| /* from linux/arch/ia64/kernel/perfmon.c */ |
| /* |
| * Originaly Written by Ganesh Venkitachalam, IBM Corp. |
| * Copyright (C) 1999 Ganesh Venkitachalam <venkitac@us.ibm.com> |
| * |
| * Modifications by Stephane Eranian, Hewlett-Packard Co. |
| * Modifications by David Mosberger-Tang, Hewlett-Packard Co. |
| * |
| * Copyright (C) 1999-2002 Hewlett Packard Co |
| * Stephane Eranian <eranian@hpl.hp.com> |
| * David Mosberger-Tang <davidm@hpl.hp.com> |
| */ |
| |
| /* |
| * On UP kernels, we do not need to constantly set the psr.pp bit |
| * when a task is scheduled. The psr.pp bit can only be changed in |
| * the kernel because of a user request. Given we are on a UP non preeemptive |
| * kernel we know that no other task is running, so we cna simply update their |
| * psr.pp from their saved state. There is this no impact on the context switch |
| * code compared to the SMP case. |
| */ |
| static void |
| op_tasklist_toggle_pp(unsigned int val) |
| { |
| struct task_struct * p; |
| struct pt_regs * regs; |
| |
| read_lock(&tasklist_lock); |
| |
| for_each_task(p) { |
| regs = (struct pt_regs *)((unsigned long) p + IA64_STK_OFFSET); |
| |
| /* |
| * position on pt_regs saved on stack on 1st entry into the kernel |
| */ |
| regs--; |
| |
| /* |
| * update psr.pp |
| */ |
| ia64_psr(regs)->pp = val; |
| } |
| read_unlock(&tasklist_lock); |
| } |
| #endif |
| |
| |
| static void |
| pmu_start(void * info) |
| { |
| struct pt_regs * regs; |
| |
| if (info && (*((uint *)info) != op_cpu_id())) |
| return; |
| |
| /* printk(KERN_ALERT "oprofile_IA64: pmu_start on cpu %d\n", |
| op_cpu_id()); */ |
| /* The default control register pp value is copied into psr.pp |
| * on an interrupt. This allows interrupt service routines to |
| * be monitored. |
| */ |
| ia64_set_dcr(ia64_get_dcr() | IA64_DCR_PP); |
| |
| #ifdef CONFIG_PERFMON |
| #ifdef CONFIG_SMP |
| #if V_AT_LEAST(2, 4, 21) |
| local_cpu_data->pfm_syst_info |= PFM_CPUINFO_SYST_WIDE; |
| local_cpu_data->pfm_syst_info |= PFM_CPUINFO_DCR_PP; |
| /* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */ |
| #else |
| local_cpu_data->pfm_syst_wide = 1; |
| local_cpu_data->pfm_dcr_pp = 1; |
| #endif |
| #else |
| op_tasklist_toggle_pp(1); |
| #endif |
| #endif |
| /* set it in my saved state */ |
| regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET); |
| regs--; |
| ia64_psr(regs)->pp = 1; |
| |
| /* set it in my current state */ |
| __asm__ __volatile__ ("ssm psr.pp;;"::: "memory"); |
| ia64_srlz_d(); |
| } |
| |
| |
| static void |
| pmu_stop(void * info) |
| { |
| struct pt_regs * regs; |
| |
| if (info && (*((uint *)info) != op_cpu_id())) |
| return; |
| |
| /* stop in my current state */ |
| __asm__ __volatile__ ("rsm psr.pp;;"::: "memory"); |
| |
| /* disable the dcr pp */ |
| ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP); |
| |
| #ifdef CONFIG_PERFMON |
| #ifdef CONFIG_SMP |
| #if V_AT_LEAST(2, 4, 21) |
| local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_SYST_WIDE; |
| local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_DCR_PP; |
| /* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */ |
| #else |
| local_cpu_data->pfm_syst_wide = 0; |
| local_cpu_data->pfm_dcr_pp = 0; |
| #endif |
| #else |
| pfm_tasklist_toggle_pp(0); |
| #endif |
| #endif |
| |
| /* disable in my saved state */ |
| regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET); |
| regs--; |
| ia64_psr(regs)->pp = 0; |
| } |
| |
| |
| static void |
| pmu_select_start(uint cpu) |
| { |
| if (cpu == op_cpu_id()) |
| pmu_start(NULL); |
| else |
| smp_call_function(pmu_start, &cpu, 0, 1); |
| } |
| |
| |
| static void |
| pmu_select_stop(uint cpu) |
| { |
| if (cpu == op_cpu_id()) |
| pmu_stop(NULL); |
| else |
| smp_call_function(pmu_stop, &cpu, 0, 1); |
| } |
| |
| |
| static void |
| pmu_start_all(void) |
| { |
| int cpu, i; |
| |
| for (cpu=0; cpu < smp_num_cpus; cpu++) { |
| struct _oprof_data * data = &oprof_data[cpu]; |
| |
| for (i = 0 ; i < op_nr_counters ; ++i) { |
| if (sysctl.ctr[i].enabled) { |
| data->ctr_count[i] = sysctl.ctr[i].count; |
| } else { |
| data->ctr_count[i] = 0; |
| } |
| } |
| } |
| |
| if (!install_handler()) { |
| smp_call_function(pmu_start, NULL, 0, 1); |
| pmu_start(NULL); |
| } |
| /* FIXME need some way to fail here */; |
| } |
| |
| |
| static void |
| pmu_stop_all(void) |
| { |
| smp_call_function(pmu_stop, NULL, 0, 1); |
| pmu_stop(NULL); |
| restore_handler(); |
| } |
| |
| |
| static int |
| pmu_check_params(void) |
| { |
| int i; |
| int enabled = 0; |
| |
| for (i = 0; i < op_nr_counters ; i++) { |
| if (!sysctl.ctr[i].enabled) |
| continue; |
| |
| enabled = 1; |
| |
| if (!sysctl.ctr[i].user && !sysctl.ctr[i].kernel) { |
| printk(KERN_ERR "oprofile: neither kernel nor user " |
| "set for counter %d\n", i); |
| return -EINVAL; |
| } |
| |
| if (check_range(sysctl.ctr[i].count, 1, OP_MAX_PERF_COUNT, |
| "ctr count value %d not in range (%d %ld)\n")) |
| return -EINVAL; |
| } |
| |
| if (!enabled) { |
| printk(KERN_ERR "oprofile: no counters have been enabled.\n"); |
| return -EINVAL; |
| } |
| |
| return 0; |
| } |
| |
| |
| static struct op_msrs cpu_msrs[NR_CPUS]; |
| |
| |
| static void free_msr_group(struct op_msr_group * group) |
| { |
| if (group->addrs) |
| kfree(group->addrs); |
| if (group->saved) |
| kfree(group->saved); |
| group->addrs = NULL; |
| group->saved = NULL; |
| } |
| |
| |
| static void pmu_save_registers(void * dummy) |
| { |
| uint i; |
| uint const cpu = op_cpu_id(); |
| struct op_msr_group * counters = &cpu_msrs[cpu].counters; |
| struct op_msr_group * controls = &cpu_msrs[cpu].controls; |
| |
| counters->addrs = NULL; |
| counters->saved = NULL; |
| controls->addrs = NULL; |
| controls->saved = NULL; |
| |
| counters->saved = kmalloc( |
| op_nr_counters * sizeof(struct op_saved_msr), GFP_KERNEL); |
| if (!counters->saved) |
| goto fault; |
| |
| controls->saved = kmalloc( |
| op_nr_counters * sizeof(struct op_saved_msr), GFP_KERNEL); |
| if (!controls->saved) |
| goto fault; |
| |
| for (i = 0; i < op_nr_counters; ++i) { |
| controls->saved[i].low = get_pmc(i); |
| counters->saved[i].low = get_pmd(i); |
| } |
| return; |
| |
| fault: |
| free_msr_group(counters); |
| free_msr_group(controls); |
| } |
| |
| |
| static void pmu_restore_registers(void * dummy) |
| { |
| uint i; |
| uint const cpu = op_cpu_id(); |
| struct op_msr_group * counters = &cpu_msrs[cpu].counters; |
| struct op_msr_group * controls = &cpu_msrs[cpu].controls; |
| |
| for (i = 0; i < op_nr_counters; ++i) { |
| set_pmc(controls->saved[i].low, i); |
| set_pmd(counters->saved[i].low, i); |
| } |
| |
| free_msr_group(counters); |
| free_msr_group(controls); |
| } |
| |
| |
| |
| static int |
| pmu_init(void) |
| { |
| int err = 0; |
| |
| /* figure out processor type configure number of bits in pmd |
| and number of counters */ |
| switch (get_cpu_type()) { |
| case CPU_IA64_1: |
| pmd_mask = IA64_1_PMD_MASK_VAL; break; |
| case CPU_IA64_2: |
| case CPU_IA64: |
| pmd_mask = IA64_2_PMD_MASK_VAL; break; |
| default: |
| err = -EIO; break; |
| } |
| |
| op_nr_counters = 4; |
| |
| if ((err = smp_call_function(pmu_save_registers, NULL, 0, 1))) |
| goto out; |
| |
| pmu_save_registers(NULL); |
| |
| out: |
| return err; |
| } |
| |
| |
| static void |
| pmu_deinit(void) |
| { |
| smp_call_function(pmu_restore_registers, NULL, 0, 1); |
| pmu_restore_registers(NULL); |
| } |
| |
| |
| static char * names[] = { "0", "1", "2", "3", }; |
| |
| |
| static int |
| pmu_add_sysctls(ctl_table * next) |
| { |
| ctl_table * start = next; |
| ctl_table * tab; |
| int i, j; |
| |
| for (i=0; i < op_nr_counters; i++) { |
| next->ctl_name = 1; |
| next->procname = names[i]; |
| next->mode = 0700; |
| |
| if (!(tab = kmalloc(sizeof(ctl_table)*7, GFP_KERNEL))) |
| goto cleanup; |
| |
| next->child = tab; |
| |
| memset(tab, 0, sizeof(ctl_table)*7); |
| tab[0] = ((ctl_table) { 1, "enabled", &sysctl_parms.ctr[i].enabled, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); |
| tab[1] = ((ctl_table) { 1, "event", &sysctl_parms.ctr[i].event, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); |
| tab[2] = ((ctl_table) { 1, "count", &sysctl_parms.ctr[i].count, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); |
| tab[3] = ((ctl_table) { 1, "unit_mask", &sysctl_parms.ctr[i].unit_mask, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); |
| tab[4] = ((ctl_table) { 1, "kernel", &sysctl_parms.ctr[i].kernel, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); |
| tab[5] = ((ctl_table) { 1, "user", &sysctl_parms.ctr[i].user, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); |
| next++; |
| } |
| |
| return 0; |
| |
| cleanup: |
| next = start; |
| for (j = 0; j < i; j++) { |
| kfree(next->child); |
| next++; |
| } |
| return -EFAULT; |
| } |
| |
| |
| static void pmu_remove_sysctls(ctl_table * next) |
| { |
| int ii; |
| |
| for (ii=0; ii < op_nr_counters; ii++) { |
| kfree(next->child); |
| next++; |
| } |
| } |
| |
| |
| struct op_int_operations op_nmi_ops = { |
| init: pmu_init, |
| deinit: pmu_deinit, |
| add_sysctls: pmu_add_sysctls, |
| remove_sysctls: pmu_remove_sysctls, |
| check_params: pmu_check_params, |
| setup: pmu_setup_all, |
| start: pmu_start_all, |
| stop: pmu_stop_all, |
| start_cpu: pmu_select_start, |
| stop_cpu: pmu_select_stop, |
| }; |
| |
| |
| struct op_int_operations const * op_int_interface() |
| { |
| return &op_nmi_ops; |
| } |
| |
| /* Need this dummy so module/oprofile.c links */ |
| struct op_int_operations op_rtc_ops = { |
| init: NULL, |
| deinit: NULL, |
| add_sysctls: NULL, |
| remove_sysctls: NULL, |
| check_params: NULL, |
| setup: NULL, |
| start: NULL, |
| stop: NULL, |
| start_cpu: NULL, |
| stop_cpu: NULL, |
| }; |