blob: 3d98f299f9ae6914252a496590a34a891f11c5f3 [file] [log] [blame]
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/****************************************************************************************/
/* Dynamic detection and loading of IPP modules */
/****************************************************************************************/
#include "_cxcore.h"
#if defined _MSC_VER && _MSC_VER >= 1200
#pragma warning( disable: 4115 ) /* type definition in () */
#endif
#if defined _MSC_VER && defined WIN64 && !defined EM64T
#pragma optimize( "", off )
#endif
#if defined WIN32 || defined WIN64
#include <windows.h>
#else
#include <dlfcn.h>
#include <sys/time.h>
#endif
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#define CV_PROC_GENERIC 0
#define CV_PROC_SHIFT 10
#define CV_PROC_ARCH_MASK ((1 << CV_PROC_SHIFT) - 1)
#define CV_PROC_IA32_GENERIC 1
#define CV_PROC_IA32_WITH_MMX (CV_PROC_IA32_GENERIC|(2 << CV_PROC_SHIFT))
#define CV_PROC_IA32_WITH_SSE (CV_PROC_IA32_GENERIC|(3 << CV_PROC_SHIFT))
#define CV_PROC_IA32_WITH_SSE2 (CV_PROC_IA32_GENERIC|(4 << CV_PROC_SHIFT))
#define CV_PROC_IA64 2
#define CV_PROC_EM64T 3
#define CV_GET_PROC_ARCH(model) ((model) & CV_PROC_ARCH_MASK)
typedef struct CvProcessorInfo
{
int model;
int count;
double frequency; // clocks per microsecond
}
CvProcessorInfo;
#undef MASM_INLINE_ASSEMBLY
#if defined WIN32 && !defined WIN64
#if defined _MSC_VER
#define MASM_INLINE_ASSEMBLY 1
#elif defined __BORLANDC__
#if __BORLANDC__ >= 0x560
#define MASM_INLINE_ASSEMBLY 1
#endif
#endif
#endif
/*
determine processor type
*/
static void
icvInitProcessorInfo( CvProcessorInfo* cpu_info )
{
memset( cpu_info, 0, sizeof(*cpu_info) );
cpu_info->model = CV_PROC_GENERIC;
#if defined WIN32 || defined WIN64
#ifndef PROCESSOR_ARCHITECTURE_AMD64
#define PROCESSOR_ARCHITECTURE_AMD64 9
#endif
#ifndef PROCESSOR_ARCHITECTURE_IA32_ON_WIN64
#define PROCESSOR_ARCHITECTURE_IA32_ON_WIN64 10
#endif
SYSTEM_INFO sys;
LARGE_INTEGER freq;
GetSystemInfo( &sys );
if( sys.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_INTEL &&
sys.dwProcessorType == PROCESSOR_INTEL_PENTIUM && sys.wProcessorLevel >= 6 )
{
int version = 0, features = 0, family = 0;
int id = 0;
HKEY key = 0;
cpu_info->count = (int)sys.dwNumberOfProcessors;
unsigned long val = 0, sz = sizeof(val);
if( RegOpenKeyEx( HKEY_LOCAL_MACHINE, "HARDWARE\\DESCRIPTION\\SYSTEM\\CentralProcessor\\0\\",
0, KEY_QUERY_VALUE, &key ) >= 0 )
{
if( RegQueryValueEx( key, "~MHz", 0, 0, (uchar*)&val, &sz ) >= 0 )
cpu_info->frequency = (double)val;
RegCloseKey( key );
}
#ifdef MASM_INLINE_ASSEMBLY
__asm
{
/* use CPUID to determine the features supported */
pushfd
mov eax, 1
push ebx
push esi
push edi
#ifdef __BORLANDC__
db 0fh
db 0a2h
#else
_emit 0x0f
_emit 0xa2
#endif
pop edi
pop esi
pop ebx
mov version, eax
mov features, edx
popfd
}
#elif defined WIN32 && __GNUC__ > 2
asm volatile
(
"movl $1,%%eax\n\t"
".byte 0x0f; .byte 0xa2\n\t"
"movl %%eax, %0\n\t"
"movl %%edx, %1\n\t"
: "=r"(version), "=r" (features)
:
: "%ebx", "%esi", "%edi"
);
#else
{
static const char cpuid_code[] =
"\x53\x56\x57\xb8\x01\x00\x00\x00\x0f\xa2\x5f\x5e\x5b\xc3";
typedef int64 (CV_CDECL * func_ptr)(void);
func_ptr cpuid = (func_ptr)(void*)cpuid_code;
int64 cpuid_val = cpuid();
version = (int)cpuid_val;
features = (int)(cpuid_val >> 32);
}
#endif
#define ICV_CPUID_M6 ((1<<15)|(1<<23)) /* cmov + MMX */
#define ICV_CPUID_A6 ((1<<25)|ICV_CPUID_M6) /* <all above> + SSE */
#define ICV_CPUID_W7 ((1<<26)|ICV_CPUID_A6) /* <all above> + SSE2 */
family = (version >> 8) & 15;
if( family >= 6 && (features & ICV_CPUID_M6) != 0 ) /* Pentium II or higher */
id = features & ICV_CPUID_W7;
cpu_info->model = id == ICV_CPUID_W7 ? CV_PROC_IA32_WITH_SSE2 :
id == ICV_CPUID_A6 ? CV_PROC_IA32_WITH_SSE :
id == ICV_CPUID_M6 ? CV_PROC_IA32_WITH_MMX :
CV_PROC_IA32_GENERIC;
}
else
{
#if defined EM64T
if( sys.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64 )
cpu_info->model = CV_PROC_EM64T;
#elif defined WIN64
if( sys.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_IA64 )
cpu_info->model = CV_PROC_IA64;
#endif
if( QueryPerformanceFrequency( &freq ) )
cpu_info->frequency = (double)freq.QuadPart;
}
#else
cpu_info->frequency = 1;
#ifdef __x86_64__
cpu_info->model = CV_PROC_EM64T;
#elif defined __ia64__
cpu_info->model = CV_PROC_IA64;
#elif !defined __i386__
cpu_info->model = CV_PROC_GENERIC;
#else
cpu_info->model = CV_PROC_IA32_GENERIC;
// reading /proc/cpuinfo file (proc file system must be supported)
FILE *file = fopen( "/proc/cpuinfo", "r" );
if( file )
{
char buffer[1024];
int max_size = sizeof(buffer)-1;
for(;;)
{
const char* ptr = fgets( buffer, max_size, file );
if( !ptr )
break;
if( strncmp( buffer, "flags", 5 ) == 0 )
{
if( strstr( buffer, "mmx" ) && strstr( buffer, "cmov" ))
{
cpu_info->model = CV_PROC_IA32_WITH_MMX;
if( strstr( buffer, "xmm" ) || strstr( buffer, "sse" ))
{
cpu_info->model = CV_PROC_IA32_WITH_SSE;
if( strstr( buffer, "emm" ))
cpu_info->model = CV_PROC_IA32_WITH_SSE2;
}
}
}
else if( strncmp( buffer, "cpu MHz", 7 ) == 0 )
{
char* pos = strchr( buffer, ':' );
if( pos )
cpu_info->frequency = strtod( pos + 1, &pos );
}
}
fclose( file );
if( CV_GET_PROC_ARCH(cpu_info->model) != CV_PROC_IA32_GENERIC )
cpu_info->frequency = 1;
else
assert( cpu_info->frequency > 1 );
}
#endif
#endif
}
CV_INLINE const CvProcessorInfo*
icvGetProcessorInfo()
{
static CvProcessorInfo cpu_info;
static int init_cpu_info = 0;
if( !init_cpu_info )
{
icvInitProcessorInfo( &cpu_info );
init_cpu_info = 1;
}
return &cpu_info;
}
/****************************************************************************************/
/* Make functions descriptions */
/****************************************************************************************/
#undef IPCVAPI_EX
#define IPCVAPI_EX(type,func_name,names,modules,arg) \
{ (void**)&func_name##_p, (void*)(size_t)-1, names, modules, 0 },
#undef IPCVAPI_C_EX
#define IPCVAPI_C_EX(type,func_name,names,modules,arg) \
{ (void**)&func_name##_p, (void*)(size_t)-1, names, modules, 0 },
static CvPluginFuncInfo cxcore_ipp_tab[] =
{
#undef _CXCORE_IPP_H_
#include "_cxipp.h"
#undef _CXCORE_IPP_H_
{0, 0, 0, 0, 0}
};
/*
determine processor type, load appropriate dll and
initialize all function pointers
*/
#if defined WIN32 || defined WIN64
#define DLL_PREFIX ""
#define DLL_SUFFIX ".dll"
#else
#define DLL_PREFIX "lib"
#define DLL_SUFFIX ".so"
#define LoadLibrary(name) dlopen(name, RTLD_LAZY)
#define FreeLibrary(name) dlclose(name)
#define GetProcAddress dlsym
typedef void* HMODULE;
#endif
#if 0 /*def _DEBUG*/
#define DLL_DEBUG_FLAG "d"
#else
#define DLL_DEBUG_FLAG ""
#endif
#define VERBOSE_LOADING 0
#if VERBOSE_LOADING
#define ICV_PRINTF(args) printf args; fflush(stdout)
#else
#define ICV_PRINTF(args)
#endif
typedef struct CvPluginInfo
{
const char* basename;
HMODULE handle;
char name[100];
}
CvPluginInfo;
static CvPluginInfo plugins[CV_PLUGIN_MAX];
static CvModuleInfo cxcore_info = { 0, "cxcore", CV_VERSION, cxcore_ipp_tab };
CvModuleInfo *CvModule::first = 0, *CvModule::last = 0;
CvModule::CvModule( CvModuleInfo* _info )
{
cvRegisterModule( _info );
info = last;
}
CvModule::~CvModule()
{
if( info )
{
CvModuleInfo* p = first;
for( ; p != 0 && p->next != info; p = p->next )
;
if( p )
p->next = info->next;
if( first == info )
first = info->next;
if( last == info )
last = p;
cvFree( &info );
info = 0;
}
}
static int
icvUpdatePluginFuncTab( CvPluginFuncInfo* func_tab )
{
int i, loaded_functions = 0;
// 1. reset pointers
for( i = 0; func_tab[i].func_addr != 0; i++ )
{
if( func_tab[i].default_func_addr == (void*)(size_t)-1 )
func_tab[i].default_func_addr = *func_tab[i].func_addr;
else
*func_tab[i].func_addr = func_tab[i].default_func_addr;
func_tab[i].loaded_from = 0;
}
// ippopencv substitutes all the other IPP modules
if( plugins[CV_PLUGIN_OPTCV].handle != 0 )
{
for( i = 2; i < CV_PLUGIN_MKL; i++ )
{
assert( plugins[i].handle == 0 );
plugins[i].handle = plugins[CV_PLUGIN_OPTCV].handle;
}
}
// 2. try to find corresponding functions in ipp* and reassign pointers to them
for( i = 0; func_tab[i].func_addr != 0; i++ )
{
#if defined _MSC_VER && _MSC_VER >= 1200
#pragma warning( disable: 4054 4055 ) /* converting pointers to code<->data */
#endif
char name[100];
int j = 0, idx = 0;
assert( func_tab[i].loaded_from == 0 );
if( func_tab[i].search_modules )
{
uchar* addr = 0;
const char* name_ptr = func_tab[i].func_names;
for( ; j < 10 && name_ptr; j++ )
{
const char* name_start = name_ptr;
const char* name_end;
while( !isalpha(name_start[0]) && name_start[0] != '\0' )
name_start++;
if( !name_start[0] )
name_start = 0;
name_end = name_start ? strchr( name_start, ',' ) : 0;
idx = (func_tab[i].search_modules / (1<<j*4)) % CV_PLUGIN_MAX;
if( plugins[idx].handle != 0 && name_start )
{
if( name_end != 0 )
{
strncpy( name, name_start, name_end - name_start );
name[name_end - name_start] = '\0';
}
else
strcpy( name, name_start );
addr = (uchar*)GetProcAddress( plugins[idx].handle, name );
if( addr )
break;
}
name_ptr = name_end;
}
if( addr )
{
/*#ifdef WIN32
while( *addr == 0xE9 )
addr += 5 + *((int*)(addr + 1));
#endif*/
*func_tab[i].func_addr = addr;
func_tab[i].loaded_from = idx; // store index of the module
// that contain the loaded function
loaded_functions++;
ICV_PRINTF(("%s: \t%s\n", name, plugins[idx].name ));
}
#if defined _MSC_VER && _MSC_VER >= 1200
#pragma warning( default: 4054 4055 )
#endif
}
}
#if VERBOSE_LOADING
{
int not_loaded = 0;
ICV_PRINTF(("\nTotal loaded: %d\n\n", loaded_functions ));
printf( "***************************************************\nNot loaded ...\n\n" );
for( i = 0; func_tab[i].func_addr != 0; i++ )
if( !func_tab[i].loaded_from )
{
ICV_PRINTF(( "%s\n", func_tab[i].func_names ));
not_loaded++;
}
ICV_PRINTF(("\nTotal: %d\n", not_loaded ));
}
#endif
if( plugins[CV_PLUGIN_OPTCV].handle != 0 )
{
for( i = 2; i < CV_PLUGIN_MKL; i++ )
plugins[i].handle = 0;
}
return loaded_functions;
}
CV_IMPL int
cvRegisterModule( const CvModuleInfo* module )
{
CvModuleInfo* module_copy = 0;
CV_FUNCNAME( "cvRegisterModule" );
__BEGIN__;
size_t name_len, version_len;
CV_ASSERT( module != 0 && module->name != 0 && module->version != 0 );
name_len = strlen(module->name);
version_len = strlen(module->version);
CV_CALL( module_copy = (CvModuleInfo*)cvAlloc( sizeof(*module_copy) +
name_len + 1 + version_len + 1 ));
*module_copy = *module;
module_copy->name = (char*)(module_copy + 1);
module_copy->version = (char*)(module_copy + 1) + name_len + 1;
memcpy( (void*)module_copy->name, module->name, name_len + 1 );
memcpy( (void*)module_copy->version, module->version, version_len + 1 );
module_copy->next = 0;
if( CvModule::first == 0 )
CvModule::first = module_copy;
else
CvModule::last->next = module_copy;
CvModule::last = module_copy;
if( CvModule::first == CvModule::last )
{
CV_CALL( cvUseOptimized(1));
}
else
{
CV_CALL( icvUpdatePluginFuncTab( module_copy->func_tab ));
}
__END__;
if( cvGetErrStatus() < 0 && module_copy )
cvFree( &module_copy );
return module_copy ? 0 : -1;
}
CV_IMPL int
cvUseOptimized( int load_flag )
{
int i, loaded_modules = 0, loaded_functions = 0;
CvModuleInfo* module;
const CvProcessorInfo* cpu_info = icvGetProcessorInfo();
int arch = CV_GET_PROC_ARCH(cpu_info->model);
// TODO: implement some more elegant way
// to find the latest and the greatest IPP/MKL libraries
static const char* opencv_sfx[] = { "100", "099", "097", 0 };
static const char* ipp_sfx_ia32[] = { "-6.1", "-6.0", "-5.3", "-5.2", "-5.1", "", 0 };
static const char* ipp_sfx_ia64[] = { "64-6.1", "64-6.0", "64-5.3", "64-5.2", "64-5.1", "64", 0 };
static const char* ipp_sfx_em64t[] = { "em64t-6.1", "em64t-6.0", "em64t-5.3", "em64t-5.2", "em64t-5.1", "em64t", 0 };
static const char* mkl_sfx_ia32[] = { "p4", "p3", "def", 0 };
static const char* mkl_sfx_ia64[] = { "i2p", "itp", 0 };
static const char* mkl_sfx_em64t[] = { "def", 0 };
const char** ipp_suffix = arch == CV_PROC_IA64 ? ipp_sfx_ia64 :
arch == CV_PROC_EM64T ? ipp_sfx_em64t : ipp_sfx_ia32;
const char** mkl_suffix = arch == CV_PROC_IA64 ? mkl_sfx_ia64 :
arch == CV_PROC_EM64T ? mkl_sfx_em64t : mkl_sfx_ia32;
for( i = 0; i < CV_PLUGIN_MAX; i++ )
plugins[i].basename = 0;
plugins[CV_PLUGIN_NONE].basename = 0;
plugins[CV_PLUGIN_NONE].name[0] = '\0';
plugins[CV_PLUGIN_OPTCV].basename = "ippopencv";
plugins[CV_PLUGIN_IPPCV].basename = "ippcv";
plugins[CV_PLUGIN_IPPI].basename = "ippi";
plugins[CV_PLUGIN_IPPS].basename = "ipps";
plugins[CV_PLUGIN_IPPVM].basename = "ippvm";
plugins[CV_PLUGIN_IPPCC].basename = "ippcc";
plugins[CV_PLUGIN_MKL].basename = "mkl_";
// try to load optimized dlls
for( i = 1; i < CV_PLUGIN_MAX; i++ )
{
// unload previously loaded optimized modules
if( plugins[i].handle )
{
FreeLibrary( plugins[i].handle );
plugins[i].handle = 0;
}
// do not load regular IPP modules if the custom merged IPP module is already found.
if( i < CV_PLUGIN_MKL && load_flag && plugins[CV_PLUGIN_OPTCV].handle != 0 )
continue;
if( load_flag && plugins[i].basename &&
(arch == CV_PROC_IA32_GENERIC || arch == CV_PROC_IA64 || arch == CV_PROC_EM64T) )
{
const char** suffix = i == CV_PLUGIN_OPTCV ? opencv_sfx :
i < CV_PLUGIN_MKL ? ipp_suffix : mkl_suffix;
if( suffix == mkl_sfx_ia32 )
{
if( !(cpu_info->model & CV_PROC_IA32_WITH_SSE2) )
suffix++;
if( !(cpu_info->model & CV_PROC_IA32_WITH_SSE) )
suffix++;
}
for( ; *suffix != 0; suffix++ )
{
sprintf( plugins[i].name, DLL_PREFIX "%s%s" DLL_DEBUG_FLAG DLL_SUFFIX,
plugins[i].basename, *suffix );
ICV_PRINTF(("loading %s...\n", plugins[i].name ));
plugins[i].handle = LoadLibrary( plugins[i].name );
if( plugins[i].handle != 0 )
{
ICV_PRINTF(("%s loaded\n", plugins[i].name ));
loaded_modules++;
break;
}
#ifndef WIN32
// temporary workaround for MacOSX
sprintf( plugins[i].name, DLL_PREFIX "%s%s" DLL_DEBUG_FLAG ".dylib",
plugins[i].basename, *suffix );
ICV_PRINTF(("loading %s...\n", plugins[i].name ));
plugins[i].handle = LoadLibrary( plugins[i].name );
if( plugins[i].handle != 0 )
{
ICV_PRINTF(("%s loaded\n", plugins[i].name ));
loaded_modules++;
break;
}
#endif
}
}
}
for( module = CvModule::first; module != 0; module = module->next )
loaded_functions += icvUpdatePluginFuncTab( module->func_tab );
return loaded_functions;
}
CvModule cxcore_module( &cxcore_info );
CV_IMPL void
cvGetModuleInfo( const char* name, const char **version, const char **plugin_list )
{
static char joint_verinfo[1024] = "";
static char plugin_list_buf[1024] = "";
CV_FUNCNAME( "cvGetLibraryInfo" );
if( version )
*version = 0;
if( plugin_list )
*plugin_list = 0;
__BEGIN__;
CvModuleInfo* module;
if( version )
{
if( name )
{
size_t i, name_len = strlen(name);
for( module = CvModule::first; module != 0; module = module->next )
{
if( strlen(module->name) == name_len )
{
for( i = 0; i < name_len; i++ )
{
int c0 = toupper(module->name[i]), c1 = toupper(name[i]);
if( c0 != c1 )
break;
}
if( i == name_len )
break;
}
}
if( !module )
CV_ERROR( CV_StsObjectNotFound, "The module is not found" );
*version = module->version;
}
else
{
char* ptr = joint_verinfo;
for( module = CvModule::first; module != 0; module = module->next )
{
sprintf( ptr, "%s: %s%s", module->name, module->version, module->next ? ", " : "" );
ptr += strlen(ptr);
}
*version = joint_verinfo;
}
}
if( plugin_list )
{
char* ptr = plugin_list_buf;
int i;
for( i = 0; i < CV_PLUGIN_MAX; i++ )
if( plugins[i].handle != 0 )
{
sprintf( ptr, "%s, ", plugins[i].name );
ptr += strlen(ptr);
}
if( ptr > plugin_list_buf )
{
ptr[-2] = '\0';
*plugin_list = plugin_list_buf;
}
else
*plugin_list = "";
}
__END__;
}
typedef int64 (CV_CDECL * rdtsc_func)(void);
/* helper functions for RNG initialization and accurate time measurement */
CV_IMPL int64 cvGetTickCount( void )
{
const CvProcessorInfo* cpu_info = icvGetProcessorInfo();
if( cpu_info->frequency > 1 &&
CV_GET_PROC_ARCH(cpu_info->model) == CV_PROC_IA32_GENERIC )
{
#ifdef MASM_INLINE_ASSEMBLY
#ifdef __BORLANDC__
__asm db 0fh
__asm db 31h
#else
__asm _emit 0x0f;
__asm _emit 0x31;
#endif
#elif (defined __GNUC__ || defined CV_ICC) && defined __i386__
int64 t;
asm volatile (".byte 0xf; .byte 0x31" /* "rdtsc" */ : "=A" (t));
return t;
#else
static const char code[] = "\x0f\x31\xc3";
rdtsc_func func = (rdtsc_func)(void*)code;
return func();
#endif
}
else
{
#if defined WIN32 || defined WIN64
LARGE_INTEGER counter;
QueryPerformanceCounter( &counter );
return (int64)counter.QuadPart;
#else
struct timeval tv;
struct timezone tz;
gettimeofday( &tv, &tz );
return (int64)tv.tv_sec*1000000 + tv.tv_usec;
#endif
}
}
CV_IMPL double cvGetTickFrequency()
{
return icvGetProcessorInfo()->frequency;
}
static int icvNumThreads = 0;
static int icvNumProcs = 0;
CV_IMPL int cvGetNumThreads(void)
{
if( !icvNumProcs )
cvSetNumThreads(0);
return icvNumThreads;
}
CV_IMPL void cvSetNumThreads( int threads )
{
if( !icvNumProcs )
{
#ifdef _OPENMP
icvNumProcs = omp_get_num_procs();
icvNumProcs = MIN( icvNumProcs, CV_MAX_THREADS );
#else
icvNumProcs = 1;
#endif
}
#ifdef _OPENMP
if( threads <= 0 )
threads = icvNumProcs;
//else
// threads = MIN( threads, icvNumProcs );
icvNumThreads = threads;
#else
icvNumThreads = 1;
#endif
}
CV_IMPL int cvGetThreadNum(void)
{
#ifdef _OPENMP
return omp_get_thread_num();
#else
return 0;
#endif
}
/* End of file. */