| char netcpu_looper_id[]="\ |
| @(#)netcpu_looper.c (c) Copyright 2005-2007. Version 2.4.3"; |
| |
| /* netcpu_looper.c |
| |
| Implement the soaker process specific portions of netperf CPU |
| utilization measurements. These are broken-out into a separate file |
| to make life much nicer over in netlib.c which had become a maze of |
| twisty, CPU-util-related, #ifdefs, all different. raj 2005-01-26 |
| */ |
| |
| #ifdef HAVE_CONFIG_H |
| #include <config.h> |
| #endif |
| |
| #include <stdio.h> |
| |
| #ifdef HAVE_FCNTL_H |
| # include <fcntl.h> |
| #endif |
| #if HAVE_UNISTD_H |
| # include <unistd.h> |
| #endif |
| #if defined(HAVE_MMAP) || defined(HAVE_SYS_MMAN_H) |
| # include <sys/mman.h> |
| #else |
| # error netcpu_looper requires mmap |
| #endif |
| |
| #if TIME_WITH_SYS_TIME |
| # include <sys/time.h> |
| # include <time.h> |
| #else |
| # if HAVE_SYS_TIME_H |
| # include <sys/time.h> |
| # else |
| # include <time.h> |
| # endif |
| #endif |
| |
| #if HAVE_SYS_TYPES_H |
| # include <sys/types.h> |
| #endif |
| |
| #if HAVE_SYS_WAIT_H |
| # include <sys/wait.h> |
| #endif |
| |
| #ifdef HAVE_SIGNAL_H |
| #include <signal.h> |
| #endif |
| |
| #ifdef HAVE_ERRNO_H |
| #include <errno.h> |
| #endif |
| |
| #include "netsh.h" |
| #include "netlib.h" |
| |
| #define PAGES_PER_CHILD 2 |
| |
| /* the lib_start_count and lib_end_count arrays hold the starting |
| and ending values of whatever is counting when the system is |
| idle. The rate at which this increments during a test is compared |
| with a previous calibrarion to arrive at a CPU utilization |
| percentage. raj 2005-01-26 */ |
| static uint64_t lib_start_count[MAXCPUS]; |
| static uint64_t lib_end_count[MAXCPUS]; |
| |
| static int *cpu_mappings; |
| |
| static int lib_idle_fd; |
| static uint64_t *lib_idle_address[MAXCPUS]; |
| static long *lib_base_pointer; |
| static pid_t lib_idle_pids[MAXCPUS]; |
| static int lib_loopers_running=0; |
| |
| /* we used to use this code to bind the loopers, but since we have |
| decided to enable processor affinity for the actual |
| netperf/netserver processes we will use that affinity routine, |
| which happens to know about more systems than this */ |
| |
| #ifdef NOTDEF |
| static void |
| bind_to_processor(int child_num) |
| { |
| /* This routine will bind the calling process to a particular */ |
| /* processor. We are not choosy as to which processor, so it will be */ |
| /* the process id mod the number of processors - shifted by one for */ |
| /* those systems which name processor starting from one instead of */ |
| /* zero. on those systems where I do not yet know how to bind a */ |
| /* process to a processor, this routine will be a no-op raj 10/95 */ |
| |
| /* just as a reminder, this is *only* for the looper processes, not */ |
| /* the actual measurement processes. those will, should, MUST float */ |
| /* or not float from CPU to CPU as controlled by the operating */ |
| /* system defaults. raj 12/95 */ |
| |
| #ifdef __hpux |
| #include <sys/syscall.h> |
| #include <sys/mp.h> |
| |
| int old_cpu = -2; |
| |
| if (debug) { |
| fprintf(where, |
| "child %d asking for CPU %d as pid %d with %d CPUs\n", |
| child_num, |
| (child_num % lib_num_loc_cpus), |
| getpid(), |
| lib_num_loc_cpus); |
| fflush(where); |
| } |
| |
| SETPROCESS((child_num % lib_num_loc_cpus), getpid()); |
| return; |
| |
| #else |
| #if defined(__sun) && defined(__SVR4) |
| /* should only be Solaris */ |
| #include <sys/processor.h> |
| #include <sys/procset.h> |
| |
| int old_binding; |
| |
| if (debug) { |
| fprintf(where, |
| "bind_to_processor: child %d asking for CPU %d as pid %d with %d CPUs\n", |
| child_num, |
| (child_num % lib_num_loc_cpus), |
| getpid(), |
| lib_num_loc_cpus); |
| fflush(where); |
| } |
| |
| if (processor_bind(P_PID, |
| getpid(), |
| (child_num % lib_num_loc_cpus), |
| &old_binding) != 0) { |
| fprintf(where,"bind_to_processor: unable to perform processor binding\n"); |
| fprintf(where," errno %d\n",errno); |
| fflush(where); |
| } |
| return; |
| #else |
| #ifdef WIN32 |
| |
| if (!SetThreadAffinityMask(GetCurrentThread(), (ULONG_PTR)1 << (child_num % lib_num_loc_cpus))) { |
| perror("SetThreadAffinityMask failed"); |
| fflush(stderr); |
| } |
| |
| if (debug) { |
| fprintf(where, |
| "bind_to_processor: child %d asking for CPU %d of %d CPUs\n", |
| child_num, |
| (child_num % lib_num_loc_cpus), |
| lib_num_loc_cpus); |
| fflush(where); |
| } |
| |
| #endif |
| return; |
| #endif /* __sun && _SVR4 */ |
| #endif /* __hpux */ |
| } |
| #endif |
| |
| /* sit_and_spin will just spin about incrementing a value */ |
| /* this value will either be in a memory mapped region on Unix shared */ |
| /* by each looper process, or something appropriate on Windows/NT */ |
| /* (malloc'd or such). This routine is reasonably ugly in that it has */ |
| /* priority manipulating code for lots of different operating */ |
| /* systems. This routine never returns. raj 1/96 */ |
| |
| static void |
| sit_and_spin(int child_index) |
| |
| { |
| uint64_t *my_counter_ptr; |
| |
| /* only use C stuff if we are not WIN32 unless and until we */ |
| /* switch from CreateThread to _beginthread. raj 1/96 */ |
| #ifndef WIN32 |
| /* we are the child. we could decide to exec some separate */ |
| /* program, but that doesn't really seem worthwhile - raj 4/95 */ |
| if (debug > 1) { |
| fprintf(where, |
| "Looper child %d is born, pid %d\n", |
| child_index, |
| getpid()); |
| fflush(where); |
| } |
| |
| #endif /* WIN32 */ |
| |
| /* reset our base pointer to be at the appropriate offset */ |
| my_counter_ptr = (uint64_t *) ((char *)lib_base_pointer + |
| (netlib_get_page_size() * |
| PAGES_PER_CHILD * child_index)); |
| |
| /* in the event we are running on an MP system, it would */ |
| /* probably be good to bind the soaker processes to specific */ |
| /* processors. I *think* this is the most reasonable thing to */ |
| /* do, and would be closes to simulating the information we get */ |
| /* on HP-UX with pstat. I could put all the system-specific code */ |
| /* here, but will "abstract it into another routine to keep this */ |
| /* area more readable. I'll probably do the same thine with the */ |
| /* "low pri code" raj 10/95 */ |
| |
| /* since we are "flying blind" wrt where we should bind the looper |
| processes, we want to use the cpu_map that was prepared by netlib |
| rather than assume that the CPU ids on the system start at zero |
| and are contiguous. raj 2006-04-03 */ |
| bind_to_specific_processor(child_index % lib_num_loc_cpus,1); |
| |
| for (*my_counter_ptr = 0L; |
| ; |
| (*my_counter_ptr)++) { |
| if (!(*lib_base_pointer % 1)) { |
| /* every once and again, make sure that our process priority is */ |
| /* nice and low. also, by making system calls, it may be easier */ |
| /* for us to be pre-empted by something that needs to do useful */ |
| /* work - like the thread of execution actually sending and */ |
| /* receiving data across the network :) */ |
| #ifdef _AIX |
| int pid,prio; |
| |
| prio = PRIORITY; |
| pid = getpid(); |
| /* if you are not root, this call will return EPERM - why one */ |
| /* cannot change one's own priority to lower value is beyond */ |
| /* me. raj 2/26/96 */ |
| setpri(pid, prio); |
| #else /* _AIX */ |
| #ifdef __sgi |
| int pid,prio; |
| |
| prio = PRIORITY; |
| pid = getpid(); |
| schedctl(NDPRI, pid, prio); |
| sginap(0); |
| #else /* __sgi */ |
| #ifdef WIN32 |
| SetThreadPriority(GetCurrentThread(),THREAD_PRIORITY_IDLE); |
| #else /* WIN32 */ |
| #if defined(__sun) && defined(__SVR4) |
| #include <sys/types.h> |
| #include <sys/priocntl.h> |
| #include <sys/rtpriocntl.h> |
| #include <sys/tspriocntl.h> |
| /* I would *really* like to know how to use priocntl to make the */ |
| /* priority low for this looper process. however, either my mind */ |
| /* is addled, or the manpage in section two for priocntl is not */ |
| /* terribly helpful - for one, it has no examples :( so, if you */ |
| /* can help, I'd love to hear from you. in the meantime, we will */ |
| /* rely on nice(39). raj 2/26/96 */ |
| nice(39); |
| #else /* __sun && __SVR4 */ |
| nice(39); |
| #endif /* __sun && _SVR4 */ |
| #endif /* WIN32 */ |
| #endif /* __sgi */ |
| #endif /* _AIX */ |
| } |
| } |
| } |
| |
| |
| |
| /* this routine will start all the looper processes or threads for */ |
| /* measuring CPU utilization. */ |
| |
| static void |
| start_looper_processes() |
| { |
| |
| unsigned int i, file_size; |
| |
| /* we want at least two pages for each processor. the */ |
| /* child for any one processor will write to the first of his two */ |
| /* pages, and the second page will be a buffer in case there is page */ |
| /* prefetching. if your system pre-fetches more than a single page, */ |
| /* well, you'll have to modify this or live with it :( raj 4/95 */ |
| |
| file_size = ((netlib_get_page_size() * PAGES_PER_CHILD) * |
| lib_num_loc_cpus); |
| |
| #ifndef WIN32 |
| |
| /* we we are not using WINDOWS NT (or 95 actually :), then we want */ |
| /* to create a memory mapped region so we can see all the counting */ |
| /* rates of the loopers */ |
| |
| /* could we just use an anonymous memory region for this? it is */ |
| /* possible that using a mmap()'ed "real" file, while convenient for */ |
| /* debugging, could result in some filesystem activity - like */ |
| /* metadata updates? raj 4/96 */ |
| lib_idle_fd = open("/tmp/netperf_cpu",O_RDWR | O_CREAT | O_EXCL); |
| |
| if (lib_idle_fd == -1) { |
| fprintf(where,"create_looper: file creation; errno %d\n",errno); |
| fflush(where); |
| exit(1); |
| } |
| |
| if (chmod("/tmp/netperf_cpu",0644) == -1) { |
| fprintf(where,"create_looper: chmod; errno %d\n",errno); |
| fflush(where); |
| exit(1); |
| } |
| |
| /* with the file descriptor in place, lets be sure that the file is */ |
| /* large enough. */ |
| |
| if (truncate("/tmp/netperf_cpu",file_size) == -1) { |
| fprintf(where,"create_looper: truncate: errno %d\n",errno); |
| fflush(where); |
| exit(1); |
| } |
| |
| /* the file should be large enough now, so we can mmap it */ |
| |
| /* if the system does not have MAP_VARIABLE, just define it to */ |
| /* be zero. it is only used/needed on HP-UX (?) raj 4/95 */ |
| #ifndef MAP_VARIABLE |
| #define MAP_VARIABLE 0x0000 |
| #endif /* MAP_VARIABLE */ |
| #ifndef MAP_FILE |
| #define MAP_FILE 0x0000 |
| #endif /* MAP_FILE */ |
| if ((lib_base_pointer = (long *)mmap(NULL, |
| file_size, |
| PROT_READ | PROT_WRITE, |
| MAP_FILE | MAP_SHARED | MAP_VARIABLE, |
| lib_idle_fd, |
| 0)) == (long *)-1) { |
| fprintf(where,"create_looper: mmap: errno %d\n",errno); |
| fflush(where); |
| exit(1); |
| } |
| |
| |
| if (debug > 1) { |
| fprintf(where,"num CPUs %d, file_size %d, lib_base_pointer %p\n", |
| lib_num_loc_cpus, |
| file_size, |
| lib_base_pointer); |
| fflush(where); |
| } |
| |
| /* we should have a valid base pointer. lets fork */ |
| |
| for (i = 0; i < (unsigned int)lib_num_loc_cpus; i++) { |
| switch (lib_idle_pids[i] = fork()) { |
| case -1: |
| perror("netperf: fork"); |
| exit(1); |
| case 0: |
| /* we are the child. we could decide to exec some separate */ |
| /* program, but that doesn't really seem worthwhile - raj 4/95 */ |
| |
| signal(SIGTERM, SIG_DFL); |
| sit_and_spin(i); |
| |
| /* we should never really get here, but if we do, just exit(0) */ |
| exit(0); |
| break; |
| default: |
| /* we must be the parent */ |
| lib_idle_address[i] = (uint64_t *) ((char *)lib_base_pointer + |
| (netlib_get_page_size() * |
| PAGES_PER_CHILD * i)); |
| if (debug) { |
| fprintf(where,"lib_idle_address[%d] is %p\n", |
| i, |
| lib_idle_address[i]); |
| fflush(where); |
| } |
| } |
| } |
| #else |
| /* we are compiled -DWIN32 */ |
| if ((lib_base_pointer = malloc(file_size)) == NULL) { |
| fprintf(where, |
| "create_looper_process could not malloc %d bytes\n", |
| file_size); |
| fflush(where); |
| exit(1); |
| } |
| |
| /* now, create all the threads */ |
| for(i = 0; i < (unsigned int)lib_num_loc_cpus; i++) { |
| long place_holder; |
| if ((lib_idle_pids[i] = CreateThread(0, |
| 0, |
| (LPTHREAD_START_ROUTINE)sit_and_spin, |
| (LPVOID)(ULONG_PTR)i, |
| 0, |
| &place_holder)) == NULL ) { |
| fprintf(where, |
| "create_looper_process: CreateThread failed\n"); |
| fflush(where); |
| /* I wonder if I need to look for other threads to kill? */ |
| exit(1); |
| } |
| lib_idle_address[i] = (long *) ((char *)lib_base_pointer + |
| (netlib_get_page_size() * |
| PAGES_PER_CHILD * i)); |
| if (debug) { |
| fprintf(where,"lib_idle_address[%d] is %p\n", |
| i, |
| lib_idle_address[i]); |
| fflush(where); |
| } |
| } |
| #endif /* WIN32 */ |
| |
| /* we need to have the looper processes settled-in before we do */ |
| /* anything with them, so lets sleep for say 30 seconds. raj 4/95 */ |
| |
| sleep(30); |
| } |
| |
| void |
| cpu_util_init(void) |
| { |
| cpu_method = LOOPER; |
| |
| /* we want to get the looper processes going */ |
| if (!lib_loopers_running) { |
| start_looper_processes(); |
| lib_loopers_running = 1; |
| } |
| |
| return; |
| } |
| |
| /* clean-up any left-over CPU util resources - looper processes, |
| files, whatever. raj 2005-01-26 */ |
| void |
| cpu_util_terminate() { |
| |
| #ifdef WIN32 |
| /* it would seem that if/when the process exits, all the threads */ |
| /* will go away too, so I don't think I need any explicit thread */ |
| /* killing calls here. raj 1/96 */ |
| #else |
| |
| int i; |
| |
| /* now go through and kill-off all the child processes */ |
| for (i = 0; i < lib_num_loc_cpus; i++){ |
| /* SIGKILL can leave core files behind - thanks to Steinar Haug */ |
| /* for pointing that out. */ |
| kill(lib_idle_pids[i],SIGTERM); |
| } |
| lib_loopers_running = 0; |
| /* reap the children */ |
| while(waitpid(-1, NULL, WNOHANG) > 0) { } |
| |
| /* finally, unlink the mmaped file */ |
| munmap((caddr_t)lib_base_pointer, |
| ((netlib_get_page_size() * PAGES_PER_CHILD) * |
| lib_num_loc_cpus)); |
| unlink("/tmp/netperf_cpu"); |
| #endif |
| return; |
| } |
| |
| int |
| get_cpu_method(void) |
| { |
| return LOOPER; |
| } |
| |
| /* calibrate_looper */ |
| |
| /* Loop a number of iterations, sleeping interval seconds each and */ |
| /* count how high the idle counter gets each time. Return the */ |
| /* measured cpu rate to the calling routine. raj 4/95 */ |
| |
| float |
| calibrate_idle_rate (int iterations, int interval) |
| { |
| |
| uint64_t |
| firstcnt[MAXCPUS], |
| secondcnt[MAXCPUS]; |
| |
| float |
| elapsed, |
| temp_rate, |
| rate[MAXTIMES], |
| local_maxrate; |
| |
| long |
| sec, |
| usec; |
| |
| int |
| i, |
| j; |
| |
| struct timeval time1, time2 ; |
| struct timezone tz; |
| |
| if (iterations > MAXTIMES) { |
| iterations = MAXTIMES; |
| } |
| |
| local_maxrate = (float)-1.0; |
| |
| for(i = 0; i < iterations; i++) { |
| rate[i] = (float)0.0; |
| for (j = 0; j < lib_num_loc_cpus; j++) { |
| firstcnt[j] = *(lib_idle_address[j]); |
| } |
| gettimeofday (&time1, &tz); |
| sleep(interval); |
| gettimeofday (&time2, &tz); |
| |
| if (time2.tv_usec < time1.tv_usec) |
| { |
| time2.tv_usec += 1000000; |
| time2.tv_sec -=1; |
| } |
| sec = time2.tv_sec - time1.tv_sec; |
| usec = time2.tv_usec - time1.tv_usec; |
| elapsed = (float)sec + ((float)usec/(float)1000000.0); |
| |
| if(debug) { |
| fprintf(where, "Calibration for counter run: %d\n",i); |
| fprintf(where,"\tsec = %ld usec = %ld\n",sec,usec); |
| fprintf(where,"\telapsed time = %g\n",elapsed); |
| } |
| |
| for (j = 0; j < lib_num_loc_cpus; j++) { |
| secondcnt[j] = *(lib_idle_address[j]); |
| if(debug) { |
| /* I know that there are situations where compilers know about */ |
| /* long long, but the library fucntions do not... raj 4/95 */ |
| fprintf(where, |
| "\tfirstcnt[%d] = 0x%8.8lx%8.8lx secondcnt[%d] = 0x%8.8lx%8.8lx\n", |
| j, |
| (uint32_t)(firstcnt[j]>>32), |
| (uint32_t)(firstcnt[j]&0xffffffff), |
| j, |
| (uint32_t)(secondcnt[j]>>32), |
| (uint32_t)(secondcnt[j]&0xffffffff)); |
| } |
| /* we assume that it would wrap no more than once. we also */ |
| /* assume that the result of subtracting will "fit" raj 4/95 */ |
| temp_rate = (secondcnt[j] >= firstcnt[j]) ? |
| (float)(secondcnt[j] - firstcnt[j])/elapsed : |
| (float)(secondcnt[j]-firstcnt[j]+MAXLONG)/elapsed; |
| if (temp_rate > rate[i]) rate[i] = temp_rate; |
| if(debug) { |
| fprintf(where,"\trate[%d] = %g\n",i,rate[i]); |
| fflush(where); |
| } |
| if (local_maxrate < rate[i]) local_maxrate = rate[i]; |
| } |
| } |
| if(debug) { |
| fprintf(where,"\tlocal maxrate = %g per sec. \n",local_maxrate); |
| fflush(where); |
| } |
| return local_maxrate; |
| } |
| |
| |
| void |
| get_cpu_idle (uint64_t *res) |
| { |
| int i; |
| |
| for (i = 0; i < lib_num_loc_cpus; i++){ |
| res[i] = *lib_idle_address[i]; |
| } |
| |
| } |
| |
| float |
| calc_cpu_util_internal(float elapsed_time) |
| { |
| int i; |
| float correction_factor; |
| float actual_rate; |
| |
| lib_local_cpu_util = (float)0.0; |
| /* It is possible that the library measured a time other than */ |
| /* the one that the user want for the cpu utilization */ |
| /* calculations - for example, tests that were ended by */ |
| /* watchdog timers such as the udp stream test. We let these */ |
| /* tests tell up what the elapsed time should be. */ |
| |
| if (elapsed_time != 0.0) { |
| correction_factor = (float) 1.0 + |
| ((lib_elapsed - elapsed_time) / elapsed_time); |
| } |
| else { |
| correction_factor = (float) 1.0; |
| } |
| |
| for (i = 0; i < lib_num_loc_cpus; i++) { |
| |
| /* it would appear that on some systems, in loopback, nice is |
| *very* effective, causing the looper process to stop dead in its |
| tracks. if this happens, we need to ensure that the calculation |
| does not go south. raj 6/95 and if we run completely out of idle, |
| the same thing could in theory happen to the USE_KSTAT path. raj |
| 8/2000 */ |
| |
| if (lib_end_count[i] == lib_start_count[i]) { |
| lib_end_count[i]++; |
| } |
| |
| actual_rate = (lib_end_count[i] > lib_start_count[i]) ? |
| (float)(lib_end_count[i] - lib_start_count[i])/lib_elapsed : |
| (float)(lib_end_count[i] - lib_start_count[i] + |
| MAXLONG)/ lib_elapsed; |
| if (debug) { |
| fprintf(where, |
| "calc_cpu_util: actual_rate on processor %d is %f start 0x%8.8lx%8.8lx end 0x%8.8lx%8.8lx\n", |
| i, |
| actual_rate, |
| (uint32_t)(lib_start_count[i]>>32), |
| (uint32_t)(lib_start_count[i]&0xffffffff), |
| (uint32_t)(lib_end_count[i]>>32), |
| (uint32_t)(lib_end_count[i]&0xffffffff)); |
| } |
| lib_local_per_cpu_util[i] = (lib_local_maxrate - actual_rate) / |
| lib_local_maxrate * 100; |
| lib_local_cpu_util += lib_local_per_cpu_util[i]; |
| } |
| /* we want the average across all n processors */ |
| lib_local_cpu_util /= (float)lib_num_loc_cpus; |
| |
| lib_local_cpu_util *= correction_factor; |
| return lib_local_cpu_util; |
| |
| |
| } |
| void |
| cpu_start_internal(void) |
| { |
| get_cpu_idle(lib_start_count); |
| return; |
| } |
| |
| void |
| cpu_stop_internal(void) |
| { |
| get_cpu_idle(lib_end_count); |
| } |