blob: c31efdc01a74d9e2e0e1a150a3b070e83435f650 [file] [log] [blame]
#include "rs_core.rsh"
#include "rs_graphics.rsh"
#include "rs_structs.h"
/**
* Allocation sampling
*/
static const void * __attribute__((overloadable))
getElementAt(rs_allocation a, uint32_t x, uint32_t lod) {
Allocation_t *alloc = (Allocation_t *)a.p;
const Type_t *type = (const Type_t*)alloc->mHal.state.type;
const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
const uint32_t offset = type->mHal.state.lodOffset[lod];
const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
return &p[offset + eSize * x];
}
static const void * __attribute__((overloadable))
getElementAt(rs_allocation a, uint32_t x, uint32_t y, uint32_t lod) {
Allocation_t *alloc = (Allocation_t *)a.p;
const Type_t *type = (const Type_t*)alloc->mHal.state.type;
const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
const uint32_t offset = type->mHal.state.lodOffset[lod];
uint32_t stride;
if(lod == 0) {
stride = alloc->mHal.drvState.lod[0].stride;
} else {
stride = type->mHal.state.lodDimX[lod] * eSize;
}
return &p[offset + (eSize * x) + (y * stride)];
}
static const void * __attribute__((overloadable))
getElementAt(rs_allocation a, uint2 uv, uint32_t lod) {
return getElementAt(a, uv.x, uv.y, lod);
}
static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
if (wrap == RS_SAMPLER_WRAP) {
coord = coord % size;
if (coord < 0) {
coord += size;
}
}
if (wrap == RS_SAMPLER_MIRRORED_REPEAT) {
coord = coord % (size * 2);
if (coord < 0) {
coord = (size * 2) + coord;
}
if (coord >= size) {
coord = (size * 2) - coord;
}
}
return (uint32_t)max(0, min(coord, size - 1));
}
// 565 Conversion bits taken from SkBitmap
#define SK_R16_BITS 5
#define SK_G16_BITS 6
#define SK_B16_BITS 5
#define SK_R16_SHIFT (SK_B16_BITS + SK_G16_BITS)
#define SK_G16_SHIFT (SK_B16_BITS)
#define SK_B16_SHIFT 0
#define SK_R16_MASK ((1 << SK_R16_BITS) - 1)
#define SK_G16_MASK ((1 << SK_G16_BITS) - 1)
#define SK_B16_MASK ((1 << SK_B16_BITS) - 1)
#define SkGetPackedR16(color) (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
#define SkGetPackedG16(color) (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
#define SkGetPackedB16(color) (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
static inline unsigned SkR16ToR32(unsigned r) {
return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
}
static inline unsigned SkG16ToG32(unsigned g) {
return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
}
static inline unsigned SkB16ToB32(unsigned b) {
return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
}
#define SkPacked16ToR32(c) SkR16ToR32(SkGetPackedR16(c))
#define SkPacked16ToG32(c) SkG16ToG32(SkGetPackedG16(c))
#define SkPacked16ToB32(c) SkB16ToB32(SkGetPackedB16(c))
static float3 getFrom565(uint16_t color) {
float3 result;
result.x = (float)SkPacked16ToR32(color);
result.y = (float)SkPacked16ToG32(color);
result.z = (float)SkPacked16ToB32(color);
return result;
}
#define SAMPLE_1D_FUNC(vecsize, intype, outtype, convert) \
static outtype __attribute__((overloadable)) \
getSample##vecsize(rs_allocation a, float2 weights, \
uint32_t iPixel, uint32_t next, uint32_t lod) { \
intype *p0c = (intype*)getElementAt(a, iPixel, lod); \
intype *p1c = (intype*)getElementAt(a, next, lod); \
outtype p0 = convert(*p0c); \
outtype p1 = convert(*p1c); \
return p0 * weights.x + p1 * weights.y; \
}
#define SAMPLE_2D_FUNC(vecsize, intype, outtype, convert) \
static outtype __attribute__((overloadable)) \
getSample##vecsize(rs_allocation a, float4 weights, \
uint2 iPixel, uint2 next, uint32_t lod) { \
intype *p0c = (intype*)getElementAt(a, iPixel.x, iPixel.y, lod); \
intype *p1c = (intype*)getElementAt(a, next.x, iPixel.y, lod); \
intype *p2c = (intype*)getElementAt(a, iPixel.x, next.y, lod); \
intype *p3c = (intype*)getElementAt(a, next.x, next.y, lod); \
outtype p0 = convert(*p0c); \
outtype p1 = convert(*p1c); \
outtype p2 = convert(*p2c); \
outtype p3 = convert(*p3c); \
return p0 * weights.x + p1 * weights.y + p2 * weights.z + p3 * weights.w; \
}
SAMPLE_1D_FUNC(1, uchar, float, (float))
SAMPLE_1D_FUNC(2, uchar2, float2, convert_float2)
SAMPLE_1D_FUNC(3, uchar3, float3, convert_float3)
SAMPLE_1D_FUNC(4, uchar4, float4, convert_float4)
SAMPLE_1D_FUNC(565, uint16_t, float3, getFrom565)
SAMPLE_2D_FUNC(1, uchar, float, (float))
SAMPLE_2D_FUNC(2, uchar2, float2, convert_float2)
SAMPLE_2D_FUNC(3, uchar3, float3, convert_float3)
SAMPLE_2D_FUNC(4, uchar4, float4, convert_float4)
SAMPLE_2D_FUNC(565, uint16_t, float3, getFrom565)
// Sampler function body is the same for all dimensions
#define SAMPLE_FUNC_BODY() \
{ \
rs_element elem = rsAllocationGetElement(a); \
rs_data_kind dk = rsElementGetDataKind(elem); \
rs_data_type dt = rsElementGetDataType(elem); \
\
if (dk == RS_KIND_USER || (dt != RS_TYPE_UNSIGNED_8 && dt != RS_TYPE_UNSIGNED_5_6_5)) { \
float4 zero = {0.0f, 0.0f, 0.0f, 0.0f}; \
return zero; \
} \
\
uint32_t vecSize = rsElementGetVectorSize(elem); \
Allocation_t *alloc = (Allocation_t *)a.p; \
const Type_t *type = (const Type_t*)alloc->mHal.state.type; \
\
rs_sampler_value sampleMin = rsSamplerGetMinification(s); \
rs_sampler_value sampleMag = rsSamplerGetMagnification(s); \
\
if (lod <= 0.0f) { \
if (sampleMag == RS_SAMPLER_NEAREST) { \
return sample_LOD_NearestPixel(a, type, vecSize, dt, s, uv, 0); \
} \
return sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, 0); \
} \
\
if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) { \
uint32_t maxLOD = type->mHal.state.lodCount - 1; \
lod = min(lod, (float)maxLOD); \
uint32_t nearestLOD = (uint32_t)round(lod); \
return sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, nearestLOD); \
} \
\
if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) { \
uint32_t lod0 = (uint32_t)floor(lod); \
uint32_t lod1 = (uint32_t)ceil(lod); \
uint32_t maxLOD = type->mHal.state.lodCount - 1; \
lod0 = min(lod0, maxLOD); \
lod1 = min(lod1, maxLOD); \
float4 sample0 = sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, lod0); \
float4 sample1 = sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, lod1); \
float frac = lod - (float)lod0; \
return sample0 * (1.0f - frac) + sample1 * frac; \
} \
\
return sample_LOD_NearestPixel(a, type, vecSize, dt, s, uv, 0); \
} // End of sampler function body is the same for all dimensions
// Body of the bilinear sampling function
#define BILINEAR_SAMPLE_BODY() \
{ \
float4 result; \
if (dt == RS_TYPE_UNSIGNED_5_6_5) { \
result.xyz = getSample565(a, weights, iPixel, next, lod); \
return result; \
} \
\
switch(vecSize) { \
case 1: \
result.x = getSample1(a, weights, iPixel, next, lod); \
break; \
case 2: \
result.xy = getSample2(a, weights, iPixel, next, lod); \
break; \
case 3: \
result.xyz = getSample3(a, weights, iPixel, next, lod); \
break; \
case 4: \
result = getSample4(a, weights, iPixel, next, lod); \
break; \
} \
\
return result * 0.003921569f; \
} // End of body of the bilinear sampling function
// Body of the nearest sampling function
#define NEAREST_SAMPLE_BODY() \
{ \
float4 result; \
if (dt == RS_TYPE_UNSIGNED_5_6_5) { \
result.xyz = getFrom565(*(uint16_t*)getElementAt(a, iPixel, lod)); \
return result; \
} \
\
switch(vecSize) { \
case 1: \
result.x = (float)(*((uchar*)getElementAt(a, iPixel, lod))); \
break; \
case 2: \
result.xy = convert_float2(*((uchar2*)getElementAt(a, iPixel, lod))); \
break; \
case 3: \
result.xyz = convert_float3(*((uchar3*)getElementAt(a, iPixel, lod))); \
break; \
case 4: \
result = convert_float4(*((uchar4*)getElementAt(a, iPixel, lod))); \
break; \
} \
\
return result * 0.003921569f; \
} // End of body of the nearest sampling function
static float4 __attribute__((overloadable))
getBilinearSample(rs_allocation a, float2 weights,
uint32_t iPixel, uint32_t next,
uint32_t vecSize, rs_data_type dt, uint32_t lod) {
BILINEAR_SAMPLE_BODY()
}
static float4 __attribute__((overloadable))
getBilinearSample(rs_allocation a, float4 weights,
uint2 iPixel, uint2 next,
uint32_t vecSize, rs_data_type dt, uint32_t lod) {
BILINEAR_SAMPLE_BODY()
}
static float4 __attribute__((overloadable))
getNearestSample(rs_allocation a, uint32_t iPixel, uint32_t vecSize,
rs_data_type dt, uint32_t lod) {
NEAREST_SAMPLE_BODY()
}
static float4 __attribute__((overloadable))
getNearestSample(rs_allocation a, uint2 iPixel, uint32_t vecSize,
rs_data_type dt, uint32_t lod) {
NEAREST_SAMPLE_BODY()
}
static float4 __attribute__((overloadable))
sample_LOD_LinearPixel(rs_allocation a, const Type_t *type,
uint32_t vecSize, rs_data_type dt,
rs_sampler s,
float uv, uint32_t lod) {
rs_sampler_value wrapS = rsSamplerGetWrapS(s);
int32_t sourceW = type->mHal.state.lodDimX[lod];
float pixelUV = uv * (float)(sourceW);
int32_t iPixel = (int32_t)(pixelUV);
float frac = pixelUV - (float)iPixel;
if (frac < 0.5f) {
iPixel -= 1;
frac += 0.5f;
} else {
frac -= 0.5f;
}
float oneMinusFrac = 1.0f - frac;
float2 weights;
weights.x = oneMinusFrac;
weights.y = frac;
uint32_t next = wrapI(wrapS, iPixel + 1, sourceW);
uint32_t location = wrapI(wrapS, iPixel, sourceW);
return getBilinearSample(a, weights, location, next, vecSize, dt, lod);
}
static float4 __attribute__((overloadable))
sample_LOD_NearestPixel(rs_allocation a, const Type_t *type,
uint32_t vecSize, rs_data_type dt,
rs_sampler s,
float uv, uint32_t lod) {
rs_sampler_value wrapS = rsSamplerGetWrapS(s);
int32_t sourceW = type->mHal.state.lodDimX[lod];
int32_t iPixel = (int32_t)(uv * (float)(sourceW));
uint32_t location = wrapI(wrapS, iPixel, sourceW);
return getNearestSample(a, location, vecSize, dt, lod);
}
static float4 __attribute__((overloadable))
sample_LOD_LinearPixel(rs_allocation a, const Type_t *type,
uint32_t vecSize, rs_data_type dt,
rs_sampler s,
float2 uv, uint32_t lod) {
rs_sampler_value wrapS = rsSamplerGetWrapS(s);
rs_sampler_value wrapT = rsSamplerGetWrapT(s);
int32_t sourceW = type->mHal.state.lodDimX[lod];
int32_t sourceH = type->mHal.state.lodDimY[lod];
float2 dimF;
dimF.x = (float)(sourceW);
dimF.y = (float)(sourceH);
float2 pixelUV = uv * dimF;
int2 iPixel = convert_int2(pixelUV);
float2 frac = pixelUV - convert_float2(iPixel);
if (frac.x < 0.5f) {
iPixel.x -= 1;
frac.x += 0.5f;
} else {
frac.x -= 0.5f;
}
if (frac.y < 0.5f) {
iPixel.y -= 1;
frac.y += 0.5f;
} else {
frac.y -= 0.5f;
}
float2 oneMinusFrac = 1.0f - frac;
float4 weights;
weights.x = oneMinusFrac.x * oneMinusFrac.y;
weights.y = frac.x * oneMinusFrac.y;
weights.z = oneMinusFrac.x * frac.y;
weights.w = frac.x * frac.y;
uint2 next;
next.x = wrapI(wrapS, iPixel.x + 1, sourceW);
next.y = wrapI(wrapT, iPixel.y + 1, sourceH);
uint2 location;
location.x = wrapI(wrapS, iPixel.x, sourceW);
location.y = wrapI(wrapT, iPixel.y, sourceH);
return getBilinearSample(a, weights, location, next, vecSize, dt, lod);
}
static float4 __attribute__((overloadable))
sample_LOD_NearestPixel(rs_allocation a, const Type_t *type,
uint32_t vecSize, rs_data_type dt,
rs_sampler s,
float2 uv, uint32_t lod) {
rs_sampler_value wrapS = rsSamplerGetWrapS(s);
rs_sampler_value wrapT = rsSamplerGetWrapT(s);
int32_t sourceW = type->mHal.state.lodDimX[lod];
int32_t sourceH = type->mHal.state.lodDimY[lod];
float2 dimF;
dimF.x = (float)(sourceW);
dimF.y = (float)(sourceH);
int2 iPixel = convert_int2(uv * dimF);
uint2 location;
location.x = wrapI(wrapS, iPixel.x, sourceW);
location.y = wrapI(wrapT, iPixel.y, sourceH);
return getNearestSample(a, location, vecSize, dt, lod);
}
extern const float4 __attribute__((overloadable))
rsSample(rs_allocation a, rs_sampler s, float location) {
return rsSample(a, s, location, 0);
}
extern const float4 __attribute__((overloadable))
rsSample(rs_allocation a, rs_sampler s, float uv, float lod) {
SAMPLE_FUNC_BODY()
}
extern const float4 __attribute__((overloadable))
rsSample(rs_allocation a, rs_sampler s, float2 location) {
return rsSample(a, s, location, 0.0f);
}
extern const float4 __attribute__((overloadable))
rsSample(rs_allocation a, rs_sampler s, float2 uv, float lod) {
SAMPLE_FUNC_BODY()
}