blob: 749117a1a20b82d278071ec71423def39f151099 [file] [log] [blame]
/*
**
** Copyright 2009, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
#include "SkBitmapProcState_opts_SSE2.h"
#include "SkBlitRow_opts_SSE2.h"
#include "SkUtils_opts_SSE2.h"
#include "SkUtils.h"
/* This file must *not* be compiled with -msse or -msse2, otherwise
gcc may generate sse2 even for scalar ops (and thus give an invalid
instruction on Pentium3 on the code below). Only files named *_SSE2.cpp
in this directory should be compiled with -msse2. */
#if defined(__x86_64__) || defined(_WIN64)
/* All x86_64 machines have SSE2, so don't even bother checking. */
static inline bool hasSSE2() {
return true;
}
#else
#ifdef _MSC_VER
static inline void getcpuid(int info_type, int info[4]) {
__asm {
mov eax, [info_type]
cpuid
mov edi, [info]
mov [edi], eax
mov [edi+4], ebx
mov [edi+8], ecx
mov [edi+12], edx
}
}
#else
static inline void getcpuid(int info_type, int info[4]) {
// We save and restore ebx, so this code can be compatible with -fPIC
asm volatile (
"pushl %%ebx \n\t"
"cpuid \n\t"
"movl %%ebx, %1 \n\t"
"popl %%ebx \n\t"
: "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
: "a"(info_type)
);
}
#endif
static inline bool hasSSE2() {
int cpu_info[4] = { 0 };
getcpuid(1, cpu_info);
return (cpu_info[3] & (1<<26)) != 0;
}
#endif
void SkBitmapProcState::platformProcs() {
if (hasSSE2()) {
if (fSampleProc32 == S32_opaque_D32_filter_DX) {
fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
} else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
}
}
}
static SkBlitRow::Proc32 platform_32_procs[] = {
NULL, // S32_Opaque,
S32_Blend_BlitRow32_SSE2, // S32_Blend,
S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque
S32A_Blend_BlitRow32_SSE2, // S32A_Blend,
};
SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) {
return NULL;
}
SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
return NULL;
}
SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
if (hasSSE2()) {
return Color32_SSE2;
} else {
return NULL;
}
}
SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
if (hasSSE2()) {
return platform_32_procs[flags];
} else {
return NULL;
}
}
SkBlitMask::Proc SkBlitMask::PlatformProcs(SkBitmap::Config dstConfig,
SkColor color)
{
SkBlitMask::Proc proc = NULL;
if (hasSSE2()) {
switch (dstConfig) {
case SkBitmap::kARGB_8888_Config:
// TODO: is our current SSE2 faster than the portable, even in
// the case of black or opaque? If so, no need for this check.
if ( SK_ColorBLACK != color && 0xFF != SkColorGetA(color))
proc = SkARGB32_BlitMask_SSE2;
break;
default:
break;
}
}
return proc;
}
SkMemset16Proc SkMemset16GetPlatformProc() {
if (hasSSE2()) {
return sk_memset16_SSE2;
} else {
return NULL;
}
}
SkMemset32Proc SkMemset32GetPlatformProc() {
if (hasSSE2()) {
return sk_memset32_SSE2;
} else {
return NULL;
}
}