| /** |
| * @file op_syscalls.c |
| * Tracing of system calls |
| * |
| * @remark Copyright 2002 OProfile authors |
| * @remark Read the file COPYING |
| * |
| * @author Bob Montgomery |
| * @author Will Cohen |
| * @author John Levon |
| * @author Philippe Elie |
| */ |
| |
| #include <linux/sched.h> |
| #include <linux/unistd.h> |
| #include <linux/mman.h> |
| #include <linux/file.h> |
| |
| #include "oprofile.h" |
| #include "op_dcache.h" |
| #include "op_util.h" |
| |
| uint dname_top; |
| struct qstr **dname_stack; |
| char * pool_pos; |
| char * pool_start; |
| char * pool_end; |
| |
| void oprof_put_note(struct op_note * samp); |
| |
| /* ------------ system calls --------------- */ |
| |
| struct mmap_arg_struct { |
| unsigned long addr; |
| unsigned long len; |
| unsigned long prot; |
| unsigned long flags; |
| unsigned long fd; |
| unsigned long offset; |
| }; |
| |
| /* --------- IA64 versions of system calls ------ */ |
| asmlinkage static int (*old_sys_clone)(long, long); |
| asmlinkage static int (*old_sys_clone2)(long, long, long); |
| asmlinkage static int (*old_sys_execve)(char *, char **, char **); |
| asmlinkage static unsigned long (*old_sys_mmap)(unsigned long, |
| unsigned long, int, int, int, long); |
| asmlinkage static unsigned long (*old_sys_mmap2)(unsigned long, |
| unsigned long, int, int, int, long); |
| asmlinkage static long (*old_sys_init_module)(char const *, struct module *); |
| asmlinkage static long (*old_sys_exit)(int); |
| |
| /* --------- declarations of interception stubs for IA64 ------ */ |
| asmlinkage long post_stub_clone(long, long); |
| asmlinkage long post_stub_clone2(long, long, long); |
| asmlinkage long my_ia64_execve(char *, char **, char **); |
| asmlinkage unsigned long post_stub_mmap(unsigned long, |
| unsigned long, int, int, int, long); |
| asmlinkage unsigned long post_stub_mmap2(unsigned long, |
| unsigned long, int, int, int, long); |
| asmlinkage long post_stub_init_module(char const *, struct module *); |
| asmlinkage long pre_stub_exit(int); |
| |
| /* IA64 system call table doesn't use function pointers, it uses |
| * pointers to code (not the same thing). Basically it can violate the |
| * procedure calling rules because these "procedure calls" are made by |
| * the assembly language BREAK handler in ivt.S. |
| */ |
| |
| struct fdesc { |
| void * ip; |
| void * gp; |
| }; |
| |
| struct fdesc fdesc_clone; |
| struct fdesc fdesc_clone2; |
| struct fdesc fdesc_execve; |
| struct fdesc fdesc_mmap; |
| struct fdesc fdesc_mmap2; |
| struct fdesc fdesc_init_module; |
| struct fdesc fdesc_exit; |
| /* ----------- End of IA64 weirdness for now -------------- */ |
| |
| spinlock_t map_lock = SPIN_LOCK_UNLOCKED; |
| |
| /* called with map_lock held */ |
| static void oprof_output_map(ulong addr, ulong len, |
| ulong offset, struct file * file, int is_execve) |
| { |
| struct op_note note; |
| |
| /* don't bother with /dev/zero mappings etc. */ |
| if (!len) |
| return; |
| |
| note.pid = current->pid; |
| note.tgid = op_get_tgid(); |
| note.addr = addr; |
| note.len = len; |
| note.offset = offset; |
| note.type = is_execve ? OP_EXEC : OP_MAP; |
| note.hash = hash_path(file); |
| if (note.hash == -1) |
| return; |
| oprof_put_note(¬e); |
| } |
| |
| static int oprof_output_maps(struct task_struct * task) |
| { |
| int size=0; |
| struct mm_struct * mm; |
| struct vm_area_struct * map; |
| |
| /* we don't need to worry about mm_users here, since there is at |
| least one user (current), and if there's other code using this |
| mm, then mm_users must be at least 2; we should never have to |
| mmput() here. */ |
| |
| if (!(mm = task->mm)) |
| goto out; |
| |
| lock_mmap(mm); |
| spin_lock(&map_lock); |
| |
| /* We need two pass, daemon assume than the first mmap notification |
| * is for the executable but some process doesn't follow this model. |
| */ |
| for (map = mm->mmap; map; map = map->vm_next) { |
| if (!(map->vm_flags & VM_EXEC) || !map->vm_file) |
| continue; |
| if (!(map->vm_flags & VM_EXECUTABLE)) |
| continue; |
| |
| oprof_output_map(map->vm_start, map->vm_end-map->vm_start, |
| GET_VM_OFFSET(map), map->vm_file, 1); |
| } |
| for (map = mm->mmap; map; map = map->vm_next) { |
| if (!(map->vm_flags & VM_EXEC) || !map->vm_file) |
| continue; |
| if (map->vm_flags & VM_EXECUTABLE) |
| continue; |
| |
| oprof_output_map(map->vm_start, map->vm_end-map->vm_start, |
| GET_VM_OFFSET(map), map->vm_file, 0); |
| } |
| spin_unlock(&map_lock); |
| unlock_mmap(mm); |
| |
| out: |
| return size; |
| } |
| |
| |
| /* execve is a special case on IA64. The others get the result and |
| * arguments after the system call has been made from the ASM stub. */ |
| |
| asmlinkage long |
| my_sys_execve (char * filename, char **argv, char **envp, struct pt_regs * regs) |
| { |
| int error; |
| |
| MOD_INC_USE_COUNT; |
| |
| filename = getname(filename); |
| error = PTR_ERR(filename); |
| if (IS_ERR(filename)) |
| goto out; |
| error = do_execve(filename, argv, envp, regs); |
| |
| if (!error) { |
| PTRACE_OFF(current); |
| oprof_output_maps(current); |
| } |
| putname(filename); |
| out: |
| unlock_execve(); |
| MOD_DEC_USE_COUNT; |
| return error; |
| } |
| |
| |
| static void out_mmap(ulong addr, ulong len, ulong prot, ulong flags, |
| ulong fd, ulong offset) |
| { |
| struct file * file; |
| |
| lock_out_mmap(); |
| |
| file = fget(fd); |
| if (!file) |
| goto out; |
| |
| spin_lock(&map_lock); |
| oprof_output_map(addr, len, offset, file, 0); |
| spin_unlock(&map_lock); |
| |
| fput(file); |
| |
| out: |
| unlock_out_mmap(); |
| } |
| |
| |
| /* |
| * IA64 mmap routines: |
| * The post_sys_* routines are called after the syscall has been made. |
| * The first argument is the return value from the system call. |
| */ |
| asmlinkage void post_sys_mmap2(ulong ret, ulong addr, ulong len, |
| ulong prot, ulong flags, ulong fd, ulong pgoff) |
| { |
| /* FIXME: This should be done in the ASM stub. */ |
| MOD_INC_USE_COUNT; |
| |
| if ((prot & PROT_EXEC) && ret >= 0) |
| out_mmap(ret, len, prot, flags, fd, pgoff << PAGE_SHIFT); |
| goto out; |
| out: |
| MOD_DEC_USE_COUNT; |
| } |
| |
| asmlinkage void post_sys_mmap(ulong ret, ulong addr, ulong len, |
| ulong prot, ulong flags, ulong fd, ulong off) |
| { |
| /* FIXME: This should be done in the ASM stub. */ |
| MOD_INC_USE_COUNT; |
| |
| if ((prot & PROT_EXEC) && ret >= 0) |
| out_mmap(ret, len, prot, flags, fd, off); |
| goto out; |
| out: |
| MOD_DEC_USE_COUNT; |
| } |
| |
| |
| inline static void oprof_report_fork(u32 old_pid, u32 new_pid, u32 old_tgid, u32 new_tgid) |
| { |
| struct op_note note; |
| |
| note.type = OP_FORK; |
| note.pid = old_pid; |
| note.tgid = old_tgid; |
| note.addr = new_pid; |
| note.len = new_tgid; |
| oprof_put_note(¬e); |
| } |
| |
| |
| asmlinkage void post_sys_clone(long ret, long arg0, long arg1) |
| { |
| u32 pid = current->pid; |
| u32 tgid = op_get_tgid(); |
| |
| /* FIXME: This should be done in the ASM stub. */ |
| MOD_INC_USE_COUNT; |
| |
| if (ret) |
| /* FIXME: my libc show clone() is not implemented in ia64 |
| * but used only by fork() with a SIGCHILD first parameter |
| * so we assume it's a fork */ |
| oprof_report_fork(pid, ret, pid, tgid); |
| MOD_DEC_USE_COUNT; |
| } |
| |
| asmlinkage void post_sys_clone2(long ret, long arg0, long arg1, long arg2) |
| { |
| u32 pid = current->pid; |
| u32 tgid = op_get_tgid(); |
| long clone_flags = arg0; |
| |
| /* FIXME: This should be done in the ASM stub. */ |
| MOD_INC_USE_COUNT; |
| |
| if (ret) { |
| if (clone_flags & CLONE_THREAD) |
| oprof_report_fork(pid, ret, tgid, tgid); |
| else |
| oprof_report_fork(pid, ret, tgid, ret); |
| } |
| MOD_DEC_USE_COUNT; |
| } |
| |
| asmlinkage void |
| post_sys_init_module(long ret, char const * name_user, |
| struct module * mod_user) |
| { |
| /* FIXME: This should be done in the ASM stub. */ |
| MOD_INC_USE_COUNT; |
| |
| if (ret >= 0) { |
| struct op_note note; |
| |
| note.type = OP_DROP_MODULES; |
| oprof_put_note(¬e); |
| } |
| MOD_DEC_USE_COUNT; |
| } |
| |
| /* Exit must use a pre-call intercept stub. There is no post exit. */ |
| asmlinkage void pre_sys_exit(int error_code) |
| { |
| struct op_note note; |
| |
| MOD_INC_USE_COUNT; |
| |
| note.addr = current->times.tms_utime; |
| note.len = current->times.tms_stime; |
| note.offset = current->start_time; |
| note.type = OP_EXIT; |
| note.pid = current->pid; |
| note.tgid = op_get_tgid(); |
| oprof_put_note(¬e); |
| |
| /* this looks UP-dangerous, as the exit sleeps and we don't |
| * have a use count, but in fact its ok as sys_exit is noreturn, |
| * so we can never come back to this non-existent exec page |
| */ |
| MOD_DEC_USE_COUNT; |
| } |
| |
| extern void * sys_call_table[]; |
| |
| /* FIXME: Now that I'm never trying to do a C-level call through these |
| * pointers, I should just save, intercept, and restore with void * |
| * instead of the void * part of the function descriptor, I think. |
| */ |
| |
| void op_save_syscalls(void) |
| { |
| fdesc_clone.ip = sys_call_table[__NR_clone - __NR_ni_syscall]; |
| old_sys_clone = (void *)&fdesc_clone; |
| fdesc_clone2.ip = sys_call_table[__NR_clone2 - __NR_ni_syscall]; |
| old_sys_clone2 = (void *)&fdesc_clone2; |
| fdesc_execve.ip = sys_call_table[__NR_execve - __NR_ni_syscall]; |
| old_sys_execve = (void *)&fdesc_execve; |
| fdesc_mmap.ip = sys_call_table[__NR_mmap - __NR_ni_syscall]; |
| old_sys_mmap = (void *)&fdesc_mmap; |
| fdesc_mmap2.ip = sys_call_table[__NR_mmap2 - __NR_ni_syscall]; |
| old_sys_mmap2 = (void *)&fdesc_mmap2; |
| fdesc_init_module.ip = sys_call_table[__NR_init_module - __NR_ni_syscall]; |
| old_sys_init_module = (void *)&fdesc_init_module; |
| fdesc_exit.ip = sys_call_table[__NR_exit - __NR_ni_syscall]; |
| old_sys_exit = (void *)&fdesc_exit; |
| } |
| |
| void op_intercept_syscalls(void) |
| { |
| /* Must extract the function address from the stub function |
| * descriptors. |
| */ |
| sys_call_table[__NR_clone - __NR_ni_syscall] = |
| ((struct fdesc *)post_stub_clone)->ip; |
| sys_call_table[__NR_clone2 - __NR_ni_syscall] = |
| ((struct fdesc *)post_stub_clone2)->ip; |
| sys_call_table[__NR_execve - __NR_ni_syscall] = |
| ((struct fdesc *)my_ia64_execve)->ip; |
| sys_call_table[__NR_mmap - __NR_ni_syscall] = |
| ((struct fdesc *)post_stub_mmap)->ip; |
| sys_call_table[__NR_mmap2 - __NR_ni_syscall] = |
| ((struct fdesc *)post_stub_mmap2)->ip; |
| sys_call_table[__NR_init_module - __NR_ni_syscall] = |
| ((struct fdesc *)post_stub_init_module)->ip; |
| sys_call_table[__NR_exit - __NR_ni_syscall] = |
| ((struct fdesc *)pre_stub_exit)->ip; |
| } |
| |
| void op_restore_syscalls(void) |
| { |
| sys_call_table[__NR_clone - __NR_ni_syscall] = |
| ((struct fdesc *)old_sys_clone)->ip; |
| sys_call_table[__NR_clone2 - __NR_ni_syscall] = |
| ((struct fdesc *)old_sys_clone2)->ip; |
| sys_call_table[__NR_execve - __NR_ni_syscall] = |
| ((struct fdesc *)old_sys_execve)->ip; |
| sys_call_table[__NR_mmap - __NR_ni_syscall] = |
| ((struct fdesc *)old_sys_mmap)->ip; |
| sys_call_table[__NR_mmap2 - __NR_ni_syscall] = |
| ((struct fdesc *)old_sys_mmap2)->ip; |
| sys_call_table[__NR_init_module - __NR_ni_syscall] = |
| ((struct fdesc *)old_sys_init_module)->ip; |
| sys_call_table[__NR_exit - __NR_ni_syscall] = |
| ((struct fdesc *)old_sys_exit)->ip; |
| } |