Merge "overhaul rs_sampler" into jb-mr2-dev
diff --git a/lib/Renderscript/runtime/rs_sample.c b/lib/Renderscript/runtime/rs_sample.c
index c31efdc..8bc6966 100644
--- a/lib/Renderscript/runtime/rs_sample.c
+++ b/lib/Renderscript/runtime/rs_sample.c
@@ -2,62 +2,6 @@
#include "rs_graphics.rsh"
#include "rs_structs.h"
-/**
-* Allocation sampling
-*/
-static const void * __attribute__((overloadable))
- getElementAt(rs_allocation a, uint32_t x, uint32_t lod) {
- Allocation_t *alloc = (Allocation_t *)a.p;
- const Type_t *type = (const Type_t*)alloc->mHal.state.type;
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
-
- const uint32_t offset = type->mHal.state.lodOffset[lod];
- const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
-
- return &p[offset + eSize * x];
-}
-
-static const void * __attribute__((overloadable))
- getElementAt(rs_allocation a, uint32_t x, uint32_t y, uint32_t lod) {
- Allocation_t *alloc = (Allocation_t *)a.p;
- const Type_t *type = (const Type_t*)alloc->mHal.state.type;
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
-
- const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
- const uint32_t offset = type->mHal.state.lodOffset[lod];
- uint32_t stride;
- if(lod == 0) {
- stride = alloc->mHal.drvState.lod[0].stride;
- } else {
- stride = type->mHal.state.lodDimX[lod] * eSize;
- }
-
- return &p[offset + (eSize * x) + (y * stride)];
-}
-
-static const void * __attribute__((overloadable))
- getElementAt(rs_allocation a, uint2 uv, uint32_t lod) {
- return getElementAt(a, uv.x, uv.y, lod);
-}
-
-static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
- if (wrap == RS_SAMPLER_WRAP) {
- coord = coord % size;
- if (coord < 0) {
- coord += size;
- }
- }
- if (wrap == RS_SAMPLER_MIRRORED_REPEAT) {
- coord = coord % (size * 2);
- if (coord < 0) {
- coord = (size * 2) + coord;
- }
- if (coord >= size) {
- coord = (size * 2) - coord;
- }
- }
- return (uint32_t)max(0, min(coord, size - 1));
-}
// 565 Conversion bits taken from SkBitmap
#define SK_R16_BITS 5
@@ -100,178 +44,392 @@
return result;
}
-#define SAMPLE_1D_FUNC(vecsize, intype, outtype, convert) \
- static outtype __attribute__((overloadable)) \
- getSample##vecsize(rs_allocation a, float2 weights, \
- uint32_t iPixel, uint32_t next, uint32_t lod) { \
- intype *p0c = (intype*)getElementAt(a, iPixel, lod); \
- intype *p1c = (intype*)getElementAt(a, next, lod); \
- outtype p0 = convert(*p0c); \
- outtype p1 = convert(*p1c); \
- return p0 * weights.x + p1 * weights.y; \
- }
-#define SAMPLE_2D_FUNC(vecsize, intype, outtype, convert) \
- static outtype __attribute__((overloadable)) \
- getSample##vecsize(rs_allocation a, float4 weights, \
- uint2 iPixel, uint2 next, uint32_t lod) { \
- intype *p0c = (intype*)getElementAt(a, iPixel.x, iPixel.y, lod); \
- intype *p1c = (intype*)getElementAt(a, next.x, iPixel.y, lod); \
- intype *p2c = (intype*)getElementAt(a, iPixel.x, next.y, lod); \
- intype *p3c = (intype*)getElementAt(a, next.x, next.y, lod); \
- outtype p0 = convert(*p0c); \
- outtype p1 = convert(*p1c); \
- outtype p2 = convert(*p2c); \
- outtype p3 = convert(*p3c); \
- return p0 * weights.x + p1 * weights.y + p2 * weights.z + p3 * weights.w; \
- }
+/**
+* Allocation sampling
+*/
+static inline float __attribute__((overloadable))
+ getElementAt1(const uint8_t *p, int32_t x) {
+ float r = p[x];
+ return r;
+}
-SAMPLE_1D_FUNC(1, uchar, float, (float))
-SAMPLE_1D_FUNC(2, uchar2, float2, convert_float2)
-SAMPLE_1D_FUNC(3, uchar3, float3, convert_float3)
-SAMPLE_1D_FUNC(4, uchar4, float4, convert_float4)
-SAMPLE_1D_FUNC(565, uint16_t, float3, getFrom565)
+static inline float2 __attribute__((overloadable))
+ getElementAt2(const uint8_t *p, int32_t x) {
+ x *= 2;
+ float2 r = {p[x], p[x+1]};
+ return r;
+}
-SAMPLE_2D_FUNC(1, uchar, float, (float))
-SAMPLE_2D_FUNC(2, uchar2, float2, convert_float2)
-SAMPLE_2D_FUNC(3, uchar3, float3, convert_float3)
-SAMPLE_2D_FUNC(4, uchar4, float4, convert_float4)
-SAMPLE_2D_FUNC(565, uint16_t, float3, getFrom565)
+static inline float3 __attribute__((overloadable))
+ getElementAt3(const uint8_t *p, int32_t x) {
+ x *= 4;
+ float3 r = {p[x], p[x+1], p[x+2]};
+ return r;
+}
-// Sampler function body is the same for all dimensions
-#define SAMPLE_FUNC_BODY() \
-{ \
- rs_element elem = rsAllocationGetElement(a); \
- rs_data_kind dk = rsElementGetDataKind(elem); \
- rs_data_type dt = rsElementGetDataType(elem); \
- \
- if (dk == RS_KIND_USER || (dt != RS_TYPE_UNSIGNED_8 && dt != RS_TYPE_UNSIGNED_5_6_5)) { \
- float4 zero = {0.0f, 0.0f, 0.0f, 0.0f}; \
- return zero; \
- } \
- \
- uint32_t vecSize = rsElementGetVectorSize(elem); \
- Allocation_t *alloc = (Allocation_t *)a.p; \
- const Type_t *type = (const Type_t*)alloc->mHal.state.type; \
- \
- rs_sampler_value sampleMin = rsSamplerGetMinification(s); \
- rs_sampler_value sampleMag = rsSamplerGetMagnification(s); \
- \
- if (lod <= 0.0f) { \
- if (sampleMag == RS_SAMPLER_NEAREST) { \
- return sample_LOD_NearestPixel(a, type, vecSize, dt, s, uv, 0); \
- } \
- return sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, 0); \
- } \
- \
- if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) { \
- uint32_t maxLOD = type->mHal.state.lodCount - 1; \
- lod = min(lod, (float)maxLOD); \
- uint32_t nearestLOD = (uint32_t)round(lod); \
- return sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, nearestLOD); \
- } \
- \
- if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) { \
- uint32_t lod0 = (uint32_t)floor(lod); \
- uint32_t lod1 = (uint32_t)ceil(lod); \
- uint32_t maxLOD = type->mHal.state.lodCount - 1; \
- lod0 = min(lod0, maxLOD); \
- lod1 = min(lod1, maxLOD); \
- float4 sample0 = sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, lod0); \
- float4 sample1 = sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, lod1); \
- float frac = lod - (float)lod0; \
- return sample0 * (1.0f - frac) + sample1 * frac; \
- } \
- \
- return sample_LOD_NearestPixel(a, type, vecSize, dt, s, uv, 0); \
-} // End of sampler function body is the same for all dimensions
+static inline float4 __attribute__((overloadable))
+ getElementAt4(const uint8_t *p, int32_t x) {
+ x *= 4;
+ const uchar4 *p2 = (const uchar4 *)&p[x];
+ return convert_float4(p2[0]);
+}
-// Body of the bilinear sampling function
-#define BILINEAR_SAMPLE_BODY() \
-{ \
- float4 result; \
- if (dt == RS_TYPE_UNSIGNED_5_6_5) { \
- result.xyz = getSample565(a, weights, iPixel, next, lod); \
- return result; \
- } \
- \
- switch(vecSize) { \
- case 1: \
- result.x = getSample1(a, weights, iPixel, next, lod); \
- break; \
- case 2: \
- result.xy = getSample2(a, weights, iPixel, next, lod); \
- break; \
- case 3: \
- result.xyz = getSample3(a, weights, iPixel, next, lod); \
- break; \
- case 4: \
- result = getSample4(a, weights, iPixel, next, lod); \
- break; \
- } \
- \
- return result * 0.003921569f; \
-} // End of body of the bilinear sampling function
+static inline float3 __attribute__((overloadable))
+ getElementAt565(const uint8_t *p, int32_t x) {
+ x *= 2;
+ float3 r = getFrom565(((const uint16_t *)p)[0]);
+ return r;
+}
-// Body of the nearest sampling function
-#define NEAREST_SAMPLE_BODY() \
-{ \
- float4 result; \
- if (dt == RS_TYPE_UNSIGNED_5_6_5) { \
- result.xyz = getFrom565(*(uint16_t*)getElementAt(a, iPixel, lod)); \
- return result; \
- } \
- \
- switch(vecSize) { \
- case 1: \
- result.x = (float)(*((uchar*)getElementAt(a, iPixel, lod))); \
- break; \
- case 2: \
- result.xy = convert_float2(*((uchar2*)getElementAt(a, iPixel, lod))); \
- break; \
- case 3: \
- result.xyz = convert_float3(*((uchar3*)getElementAt(a, iPixel, lod))); \
- break; \
- case 4: \
- result = convert_float4(*((uchar4*)getElementAt(a, iPixel, lod))); \
- break; \
- } \
- \
- return result * 0.003921569f; \
-} // End of body of the nearest sampling function
+static inline float __attribute__((overloadable))
+ getElementAt1(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
+ p += y * stride;
+ float r = p[x];
+ return r;
+}
+
+static inline float2 __attribute__((overloadable))
+ getElementAt2(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
+ p += y * stride;
+ x *= 2;
+ float2 r = {p[x], p[x+1]};
+ return r;
+}
+
+static inline float3 __attribute__((overloadable))
+ getElementAt3(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
+ p += y * stride;
+ x *= 4;
+ float3 r = {p[x], p[x+1], p[x+2]};
+ return r;
+}
+
+static inline float4 __attribute__((overloadable))
+ getElementAt4(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
+ p += y * stride;
+ x *= 4;
+ float4 r = {p[x], p[x+1], p[x+2], p[x+3]};
+ return r;
+}
+
+static inline float3 __attribute__((overloadable))
+ getElementAt565(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
+ p += y * stride;
+ x *= 2;
+ float3 r = getFrom565(((const uint16_t *)p)[0]);
+ return r;
+}
+
+
+
+
static float4 __attribute__((overloadable))
- getBilinearSample(rs_allocation a, float2 weights,
+ getSample_A(const uint8_t *p, int32_t iPixel,
+ int32_t next, float w0, float w1) {
+ float p0 = getElementAt1(p, iPixel);
+ float p1 = getElementAt1(p, next);
+ float r = p0 * w0 + p1 * w1;
+ r *= (1.f / 255.f);
+ float4 ret = {0.f, 0.f, 0.f, r};
+ return ret;
+}
+static float4 __attribute__((overloadable))
+ getSample_L(const uint8_t *p, int32_t iPixel,
+ int32_t next, float w0, float w1) {
+ float p0 = getElementAt1(p, iPixel);
+ float p1 = getElementAt1(p, next);
+ float r = p0 * w0 + p1 * w1;
+ r *= (1.f / 255.f);
+ float4 ret = {r, r, r, 1.f};
+ return ret;
+}
+static float4 __attribute__((overloadable))
+ getSample_LA(const uint8_t *p, int32_t iPixel,
+ int32_t next, float w0, float w1) {
+ float2 p0 = getElementAt2(p, iPixel);
+ float2 p1 = getElementAt2(p, next);
+ float2 r = p0 * w0 + p1 * w1;
+ r *= (1.f / 255.f);
+ float4 ret = {r.x, r.x, r.x, r.y};
+ return ret;
+}
+static float4 __attribute__((overloadable))
+ getSample_RGB(const uint8_t *p, int32_t iPixel,
+ int32_t next, float w0, float w1) {
+ float3 p0 = getElementAt3(p, iPixel);
+ float3 p1 = getElementAt3(p, next);
+ float3 r = p0 * w0 + p1 * w1;
+ r *= (1.f / 255.f);
+ float4 ret = {r.x, r.x, r.z, 1.f};
+ return ret;
+}
+static float4 __attribute__((overloadable))
+ getSample_565(const uint8_t *p, int32_t iPixel,
+ int32_t next, float w0, float w1) {
+ float3 p0 = getElementAt565(p, iPixel);
+ float3 p1 = getElementAt565(p, next);
+ float3 r = p0 * w0 + p1 * w1;
+ r *= (1.f / 255.f);
+ float4 ret = {r.x, r.x, r.z, 1.f};
+ return ret;
+}
+static float4 __attribute__((overloadable))
+ getSample_RGBA(const uint8_t *p, int32_t iPixel,
+ int32_t next, float w0, float w1) {
+ float4 p0 = getElementAt4(p, iPixel);
+ float4 p1 = getElementAt4(p, next);
+ float4 r = p0 * w0 + p1 * w1;
+ r *= (1.f / 255.f);
+ return r;
+}
+
+
+static float4 __attribute__((overloadable))
+ getSample_A(const uint8_t *p, size_t stride,
+ int locX, int locY, int nextX, int nextY,
+ float w0, float w1, float w2, float w3) {
+ float p0 = getElementAt1(p, stride, locX, locY);
+ float p1 = getElementAt1(p, stride, nextX, locY);
+ float p2 = getElementAt1(p, stride, locX, nextY);
+ float p3 = getElementAt1(p, stride, nextX, nextY);
+ float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
+ r *= (1.f / 255.f);
+ float4 ret = {0.f, 0.f, 0.f, r};
+ return ret;
+}
+static float4 __attribute__((overloadable))
+ getSample_L(const uint8_t *p, size_t stride,
+ int locX, int locY, int nextX, int nextY,
+ float w0, float w1, float w2, float w3) {
+ float p0 = getElementAt1(p, stride, locX, locY);
+ float p1 = getElementAt1(p, stride, nextX, locY);
+ float p2 = getElementAt1(p, stride, locX, nextY);
+ float p3 = getElementAt1(p, stride, nextX, nextY);
+ float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
+ r *= (1.f / 255.f);
+ float4 ret = {r, r, r, 1.f};
+ return ret;
+}
+static float4 __attribute__((overloadable))
+ getSample_LA(const uint8_t *p, size_t stride,
+ int locX, int locY, int nextX, int nextY,
+ float w0, float w1, float w2, float w3) {
+ float2 p0 = getElementAt2(p, stride, locX, locY);
+ float2 p1 = getElementAt2(p, stride, nextX, locY);
+ float2 p2 = getElementAt2(p, stride, locX, nextY);
+ float2 p3 = getElementAt2(p, stride, nextX, nextY);
+ float2 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
+ r *= (1.f / 255.f);
+ float4 ret = {r.x, r.x, r.x, r.y};
+ return ret;
+}
+static float4 __attribute__((overloadable))
+ getSample_RGB(const uint8_t *p, size_t stride,
+ int locX, int locY, int nextX, int nextY,
+ float w0, float w1, float w2, float w3) {
+ float4 p0 = getElementAt4(p, stride, locX, locY);
+ float4 p1 = getElementAt4(p, stride, nextX, locY);
+ float4 p2 = getElementAt4(p, stride, locX, nextY);
+ float4 p3 = getElementAt4(p, stride, nextX, nextY);
+ float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
+ r *= (1.f / 255.f);
+ float4 ret = {r.x, r.y, r.z, 1.f};
+ return ret;
+}
+static float4 __attribute__((overloadable))
+ getSample_RGBA(const uint8_t *p, size_t stride,
+ int locX, int locY, int nextX, int nextY,
+ float w0, float w1, float w2, float w3) {
+ float4 p0 = getElementAt4(p, stride, locX, locY);
+ float4 p1 = getElementAt4(p, stride, nextX, locY);
+ float4 p2 = getElementAt4(p, stride, locX, nextY);
+ float4 p3 = getElementAt4(p, stride, nextX, nextY);
+ float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
+ r *= (1.f / 255.f);
+ return r;
+}
+static float4 __attribute__((overloadable))
+ getSample_565(const uint8_t *p, size_t stride,
+ int locX, int locY, int nextX, int nextY,
+ float w0, float w1, float w2, float w3) {
+ float3 p0 = getElementAt565(p, stride, locX, locY);
+ float3 p1 = getElementAt565(p, stride, nextX, locY);
+ float3 p2 = getElementAt565(p, stride, locX, nextY);
+ float3 p3 = getElementAt565(p, stride, nextX, nextY);
+ float3 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
+ r *= (1.f / 255.f);
+ float4 ret;
+ ret.rgb = r;
+ ret.w = 1.f;
+ return ret;
+}
+
+static float4 __attribute__((overloadable))
+ getBilinearSample1D(const Allocation_t *alloc, float2 weights,
uint32_t iPixel, uint32_t next,
- uint32_t vecSize, rs_data_type dt, uint32_t lod) {
- BILINEAR_SAMPLE_BODY()
+ rs_data_kind dk, rs_data_type dt, uint32_t lod) {
+
+ const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
+
+ switch(dk) {
+ case RS_KIND_PIXEL_RGBA:
+ return getSample_RGBA(p, iPixel, next, weights.x, weights.y);
+ case RS_KIND_PIXEL_A:
+ return getSample_A(p, iPixel, next, weights.x, weights.y);
+ case RS_KIND_PIXEL_RGB:
+ if (dt == RS_TYPE_UNSIGNED_5_6_5) {
+ return getSample_565(p, iPixel, next, weights.x, weights.y);
+ }
+ return getSample_RGB(p, iPixel, next, weights.x, weights.y);
+ case RS_KIND_PIXEL_L:
+ return getSample_L(p, iPixel, next, weights.x, weights.y);
+ case RS_KIND_PIXEL_LA:
+ return getSample_LA(p, iPixel, next, weights.x, weights.y);
+
+ default:
+ //__builtin_unreachable();
+ break;
+ }
+
+ //__builtin_unreachable();
+ return 0.f;
+}
+
+static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
+ if (wrap == RS_SAMPLER_WRAP) {
+ coord = coord % size;
+ if (coord < 0) {
+ coord += size;
+ }
+ }
+ if (wrap == RS_SAMPLER_MIRRORED_REPEAT) {
+ coord = coord % (size * 2);
+ if (coord < 0) {
+ coord = (size * 2) + coord;
+ }
+ if (coord >= size) {
+ coord = (size * 2) - coord;
+ }
+ }
+ return (uint32_t)max(0, min(coord, size - 1));
}
static float4 __attribute__((overloadable))
- getBilinearSample(rs_allocation a, float4 weights,
- uint2 iPixel, uint2 next,
- uint32_t vecSize, rs_data_type dt, uint32_t lod) {
- BILINEAR_SAMPLE_BODY()
+ getBilinearSample2D(const Allocation_t *alloc, float w0, float w1, float w2, float w3,
+ int lx, int ly, int nx, int ny,
+ rs_data_kind dk, rs_data_type dt, uint32_t lod) {
+
+ const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
+ size_t stride = alloc->mHal.drvState.lod[lod].stride;
+
+ switch(dk) {
+ case RS_KIND_PIXEL_RGBA:
+ return getSample_RGBA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
+ case RS_KIND_PIXEL_A:
+ return getSample_A(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
+ case RS_KIND_PIXEL_LA:
+ return getSample_LA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
+ case RS_KIND_PIXEL_RGB:
+ if (dt == RS_TYPE_UNSIGNED_5_6_5) {
+ return getSample_565(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
+ }
+ return getSample_RGB(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
+ case RS_KIND_PIXEL_L:
+ return getSample_L(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
+
+ default:
+ //__builtin_unreachable();
+ break;
+ }
+
+ //__builtin_unreachable();
+ return 0.f;
}
static float4 __attribute__((overloadable))
- getNearestSample(rs_allocation a, uint32_t iPixel, uint32_t vecSize,
+ getNearestSample(const Allocation_t *alloc, uint32_t iPixel, rs_data_kind dk,
rs_data_type dt, uint32_t lod) {
- NEAREST_SAMPLE_BODY()
+
+ const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
+
+ float4 result = {0.f, 0.f, 0.f, 255.f};
+
+ switch(dk) {
+ case RS_KIND_PIXEL_RGBA:
+ result = getElementAt4(p, iPixel);
+ break;
+ case RS_KIND_PIXEL_A:
+ result.w = getElementAt1(p, iPixel);
+ break;
+ case RS_KIND_PIXEL_LA:
+ result.zw = getElementAt2(p, iPixel);
+ result.xy = result.z;
+ break;
+ case RS_KIND_PIXEL_RGB:
+ if (dt == RS_TYPE_UNSIGNED_5_6_5) {
+ result.xyz = getElementAt565(p, iPixel);
+ } else {
+ result.xyz = getElementAt3(p, iPixel);
+ }
+ break;
+ case RS_KIND_PIXEL_L:
+ result.xyz = getElementAt1(p, iPixel);
+
+ default:
+ //__builtin_unreachable();
+ break;
+ }
+
+ return result * 0.003921569f;
}
static float4 __attribute__((overloadable))
- getNearestSample(rs_allocation a, uint2 iPixel, uint32_t vecSize,
+ getNearestSample(const Allocation_t *alloc, uint2 iPixel, rs_data_kind dk,
rs_data_type dt, uint32_t lod) {
- NEAREST_SAMPLE_BODY()
+
+ const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
+ size_t stride = alloc->mHal.drvState.lod[lod].stride;
+
+ float4 result = {0.f, 0.f, 0.f, 255.f};
+
+ switch(dk) {
+ case RS_KIND_PIXEL_RGBA:
+ result = getElementAt4(p, stride, iPixel.x, iPixel.y);
+ break;
+ case RS_KIND_PIXEL_A:
+ result.w = getElementAt1(p, stride, iPixel.x, iPixel.y);
+ break;
+ case RS_KIND_PIXEL_LA:
+ result.zw = getElementAt2(p, stride, iPixel.x, iPixel.y);
+ result.xy = result.z;
+ break;
+ case RS_KIND_PIXEL_RGB:
+ if (dt == RS_TYPE_UNSIGNED_5_6_5) {
+ result.xyz = getElementAt565(p, stride, iPixel.x, iPixel.y);
+ } else {
+ result.xyz = getElementAt3(p, stride, iPixel.x, iPixel.y);
+ }
+ break;
+
+ default:
+ //__builtin_unreachable();
+ break;
+ }
+
+ return result * 0.003921569f;
}
static float4 __attribute__((overloadable))
- sample_LOD_LinearPixel(rs_allocation a, const Type_t *type,
- uint32_t vecSize, rs_data_type dt,
+ sample_LOD_LinearPixel(const Allocation_t *alloc, const Type_t *type,
+ rs_data_kind dk, rs_data_type dt,
rs_sampler s,
float uv, uint32_t lod) {
+
+ const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
+
rs_sampler_value wrapS = rsSamplerGetWrapS(s);
- int32_t sourceW = type->mHal.state.lodDimX[lod];
+ int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
float pixelUV = uv * (float)(sourceW);
int32_t iPixel = (int32_t)(pixelUV);
float frac = pixelUV - (float)iPixel;
@@ -292,81 +450,83 @@
uint32_t next = wrapI(wrapS, iPixel + 1, sourceW);
uint32_t location = wrapI(wrapS, iPixel, sourceW);
- return getBilinearSample(a, weights, location, next, vecSize, dt, lod);
+ return getBilinearSample1D(alloc, weights, location, next, dk, dt, lod);
}
static float4 __attribute__((overloadable))
- sample_LOD_NearestPixel(rs_allocation a, const Type_t *type,
- uint32_t vecSize, rs_data_type dt,
+ sample_LOD_NearestPixel(const Allocation_t *alloc,
+ rs_data_kind dk, rs_data_type dt,
rs_sampler s,
float uv, uint32_t lod) {
+
rs_sampler_value wrapS = rsSamplerGetWrapS(s);
- int32_t sourceW = type->mHal.state.lodDimX[lod];
+ int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
int32_t iPixel = (int32_t)(uv * (float)(sourceW));
uint32_t location = wrapI(wrapS, iPixel, sourceW);
- return getNearestSample(a, location, vecSize, dt, lod);
+ return getNearestSample(alloc, location, dk, dt, lod);
}
static float4 __attribute__((overloadable))
- sample_LOD_LinearPixel(rs_allocation a, const Type_t *type,
- uint32_t vecSize, rs_data_type dt,
+ sample_LOD_LinearPixel(const Allocation_t *alloc,
+ rs_data_kind dk, rs_data_type dt,
rs_sampler s,
float2 uv, uint32_t lod) {
+
+ const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
+
rs_sampler_value wrapS = rsSamplerGetWrapS(s);
rs_sampler_value wrapT = rsSamplerGetWrapT(s);
- int32_t sourceW = type->mHal.state.lodDimX[lod];
- int32_t sourceH = type->mHal.state.lodDimY[lod];
+ int sourceW = alloc->mHal.drvState.lod[lod].dimX;
+ int sourceH = alloc->mHal.drvState.lod[lod].dimY;
- float2 dimF;
- dimF.x = (float)(sourceW);
- dimF.y = (float)(sourceH);
- float2 pixelUV = uv * dimF;
- int2 iPixel = convert_int2(pixelUV);
+ float pixelU = uv.x * sourceW;
+ float pixelV = uv.y * sourceH;
+ int iPixelU = pixelU;
+ int iPixelV = pixelV;
+ float fracU = pixelU - iPixelU;
+ float fracV = pixelV - iPixelV;
- float2 frac = pixelUV - convert_float2(iPixel);
-
- if (frac.x < 0.5f) {
- iPixel.x -= 1;
- frac.x += 0.5f;
+ if (fracU < 0.5f) {
+ iPixelU -= 1;
+ fracU += 0.5f;
} else {
- frac.x -= 0.5f;
+ fracU -= 0.5f;
}
- if (frac.y < 0.5f) {
- iPixel.y -= 1;
- frac.y += 0.5f;
+ if (fracV < 0.5f) {
+ iPixelV -= 1;
+ fracV += 0.5f;
} else {
- frac.y -= 0.5f;
+ fracV -= 0.5f;
}
- float2 oneMinusFrac = 1.0f - frac;
+ float oneMinusFracU = 1.0f - fracU;
+ float oneMinusFracV = 1.0f - fracV;
- float4 weights;
- weights.x = oneMinusFrac.x * oneMinusFrac.y;
- weights.y = frac.x * oneMinusFrac.y;
- weights.z = oneMinusFrac.x * frac.y;
- weights.w = frac.x * frac.y;
+ float w0 = oneMinusFracU * oneMinusFracV;
+ float w1 = fracU * oneMinusFracV;
+ float w2 = oneMinusFracU * fracV;
+ float w3 = fracU * fracV;
- uint2 next;
- next.x = wrapI(wrapS, iPixel.x + 1, sourceW);
- next.y = wrapI(wrapT, iPixel.y + 1, sourceH);
- uint2 location;
- location.x = wrapI(wrapS, iPixel.x, sourceW);
- location.y = wrapI(wrapT, iPixel.y, sourceH);
+ int nx = wrapI(wrapS, iPixelU + 1, sourceW);
+ int ny = wrapI(wrapT, iPixelV + 1, sourceH);
+ int lx = wrapI(wrapS, iPixelU, sourceW);
+ int ly = wrapI(wrapT, iPixelV, sourceH);
- return getBilinearSample(a, weights, location, next, vecSize, dt, lod);
+ return getBilinearSample2D(alloc, w0, w1, w2, w3, lx, ly, nx, ny, dk, dt, lod);
+
}
static float4 __attribute__((overloadable))
- sample_LOD_NearestPixel(rs_allocation a, const Type_t *type,
- uint32_t vecSize, rs_data_type dt,
+ sample_LOD_NearestPixel(const Allocation_t *alloc,
+ rs_data_kind dk, rs_data_type dt,
rs_sampler s,
float2 uv, uint32_t lod) {
rs_sampler_value wrapS = rsSamplerGetWrapS(s);
rs_sampler_value wrapT = rsSamplerGetWrapT(s);
- int32_t sourceW = type->mHal.state.lodDimX[lod];
- int32_t sourceH = type->mHal.state.lodDimY[lod];
+ int sourceW = alloc->mHal.drvState.lod[lod].dimX;
+ int sourceH = alloc->mHal.drvState.lod[lod].dimY;
float2 dimF;
dimF.x = (float)(sourceW);
@@ -376,7 +536,52 @@
uint2 location;
location.x = wrapI(wrapS, iPixel.x, sourceW);
location.y = wrapI(wrapT, iPixel.y, sourceH);
- return getNearestSample(a, location, vecSize, dt, lod);
+ return getNearestSample(alloc, location, dk, dt, lod);
+}
+
+extern const float4 __attribute__((overloadable))
+ rsSample(rs_allocation a, rs_sampler s, float uv, float lod) {
+ rs_element elem = rsAllocationGetElement(a);
+ rs_data_kind dk = rsElementGetDataKind(elem);
+ rs_data_type dt = rsElementGetDataType(elem);
+
+ if (dk == RS_KIND_USER || (dt != RS_TYPE_UNSIGNED_8 && dt != RS_TYPE_UNSIGNED_5_6_5)) {
+ return 0.f;
+ }
+
+ const Allocation_t *alloc = (const Allocation_t *)a.p;
+ const Type_t *type = (const Type_t*)alloc->mHal.state.type;
+
+ rs_sampler_value sampleMin = rsSamplerGetMinification(s);
+ rs_sampler_value sampleMag = rsSamplerGetMagnification(s);
+
+ if (lod <= 0.0f) {
+ if (sampleMag == RS_SAMPLER_NEAREST) {
+ return sample_LOD_NearestPixel(alloc, dk, dt, s, uv, 0);
+ }
+ return sample_LOD_LinearPixel(alloc, dk, dt, s, uv, 0);
+ }
+
+ if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
+ uint32_t maxLOD = type->mHal.state.lodCount - 1;
+ lod = min(lod, (float)maxLOD);
+ uint32_t nearestLOD = (uint32_t)round(lod);
+ return sample_LOD_LinearPixel(alloc, dk, dt, s, uv, nearestLOD);
+ }
+
+ if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
+ uint32_t lod0 = (uint32_t)floor(lod);
+ uint32_t lod1 = (uint32_t)ceil(lod);
+ uint32_t maxLOD = type->mHal.state.lodCount - 1;
+ lod0 = min(lod0, maxLOD);
+ lod1 = min(lod1, maxLOD);
+ float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, s, uv, lod0);
+ float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, s, uv, lod1);
+ float frac = lod - (float)lod0;
+ return sample0 * (1.0f - frac) + sample1 * frac;
+ }
+
+ return sample_LOD_NearestPixel(alloc, dk, dt, s, uv, 0);
}
extern const float4 __attribute__((overloadable))
@@ -384,17 +589,74 @@
return rsSample(a, s, location, 0);
}
-extern const float4 __attribute__((overloadable))
- rsSample(rs_allocation a, rs_sampler s, float uv, float lod) {
- SAMPLE_FUNC_BODY()
-}
-
-extern const float4 __attribute__((overloadable))
- rsSample(rs_allocation a, rs_sampler s, float2 location) {
- return rsSample(a, s, location, 0.0f);
-}
extern const float4 __attribute__((overloadable))
rsSample(rs_allocation a, rs_sampler s, float2 uv, float lod) {
- SAMPLE_FUNC_BODY()
+
+ const Allocation_t *alloc = (const Allocation_t *)a.p;
+
+ rs_element elem = rsAllocationGetElement(a);
+ rs_data_kind dk = rsElementGetDataKind(elem);
+ rs_data_type dt = rsElementGetDataType(elem);
+
+ if (dk == RS_KIND_USER ||
+ (dt != RS_TYPE_UNSIGNED_8 && dt != RS_TYPE_UNSIGNED_5_6_5) ||
+ !(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
+ return 0.f;
+ }
+
+ rs_sampler_value sampleMin = rsSamplerGetMinification(s);
+ rs_sampler_value sampleMag = rsSamplerGetMagnification(s);
+
+ if (lod <= 0.0f) {
+ if (sampleMag == RS_SAMPLER_NEAREST) {
+ return sample_LOD_NearestPixel(alloc, dk, dt, s, uv, 0);
+ }
+ return sample_LOD_LinearPixel(alloc, dk, dt, s, uv, 0);
+ }
+
+ if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
+ const Type_t *type = (const Type_t*)alloc->mHal.state.type;
+ uint32_t maxLOD = type->mHal.state.lodCount - 1;
+ lod = min(lod, (float)maxLOD);
+ uint32_t nearestLOD = (uint32_t)round(lod);
+ return sample_LOD_LinearPixel(alloc, dk, dt, s, uv, nearestLOD);
+ }
+
+ if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
+ const Type_t *type = (const Type_t*)alloc->mHal.state.type;
+ uint32_t lod0 = (uint32_t)floor(lod);
+ uint32_t lod1 = (uint32_t)ceil(lod);
+ uint32_t maxLOD = type->mHal.state.lodCount - 1;
+ lod0 = min(lod0, maxLOD);
+ lod1 = min(lod1, maxLOD);
+ float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, s, uv, lod0);
+ float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, s, uv, lod1);
+ float frac = lod - (float)lod0;
+ return sample0 * (1.0f - frac) + sample1 * frac;
+ }
+
+ return sample_LOD_NearestPixel(alloc, dk, dt, s, uv, 0);
}
+
+extern const float4 __attribute__((overloadable))
+ rsSample(rs_allocation a, rs_sampler s, float2 uv) {
+
+ const Allocation_t *alloc = (const Allocation_t *)a.p;
+
+ rs_element elem = rsAllocationGetElement(a);
+ rs_data_kind dk = rsElementGetDataKind(elem);
+ rs_data_type dt = rsElementGetDataType(elem);
+
+ if (dk == RS_KIND_USER ||
+ (dt != RS_TYPE_UNSIGNED_8 && dt != RS_TYPE_UNSIGNED_5_6_5) ||
+ !(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
+ return 0.f;
+ }
+
+ if (rsSamplerGetMagnification(s) == RS_SAMPLER_NEAREST) {
+ return sample_LOD_NearestPixel(alloc, dk, dt, s, uv, 0);
+ }
+ return sample_LOD_LinearPixel(alloc, dk, dt, s, uv, 0);
+}
+