Merge "Switch to frameworks/rs/driver/linkloader." into jb-mr2-dev
diff --git a/lib/Renderscript/runtime/rs_allocation.c b/lib/Renderscript/runtime/rs_allocation.c
index de7ddbd..903a350 100644
--- a/lib/Renderscript/runtime/rs_allocation.c
+++ b/lib/Renderscript/runtime/rs_allocation.c
@@ -2,6 +2,8 @@
 #include "rs_graphics.rsh"
 #include "rs_structs.h"
 
+#define RS_DEBUG_RUNTIME 0
+
 // Opaque Allocation type operations
 extern uint32_t __attribute__((overloadable))
     rsAllocationGetDimX(rs_allocation a) {
@@ -33,32 +35,6 @@
     return alloc->mHal.state.hasFaces;
 }
 
-extern const void * __attribute__((overloadable))
-        rsGetElementAt(rs_allocation a, uint32_t x) {
-    Allocation_t *alloc = (Allocation_t *)a.p;
-    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
-    const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
-    return &p[eSize * x];
-}
-
-extern const void * __attribute__((overloadable))
-        rsGetElementAt(rs_allocation a, uint32_t x, uint32_t y) {
-    Allocation_t *alloc = (Allocation_t *)a.p;
-    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
-    const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
-    const uint32_t stride = alloc->mHal.drvState.lod[0].stride;
-    return &p[(eSize * x) + (y * stride)];
-}
-
-extern const void * __attribute__((overloadable))
-        rsGetElementAt(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
-    Allocation_t *alloc = (Allocation_t *)a.p;
-    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
-    const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
-    const uint32_t stride = alloc->mHal.drvState.lod[0].stride;
-    const uint32_t dimY = alloc->mHal.drvState.lod[0].dimY;
-    return &p[(eSize * x) + (y * stride) + (z * stride * dimY)];
-}
 
 extern rs_element __attribute__((overloadable))
         rsAllocationGetElement(rs_allocation a) {
@@ -80,33 +56,53 @@
     }
 }
 
-extern void __attribute__((overloadable))
-        rsSetElementAt(rs_allocation a, void* ptr, uint32_t x) {
-    Allocation_t *alloc = (Allocation_t *)a.p;
-    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
-    const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
-    memcpy((void*)&p[eSize * x], ptr, eSize);
-}
+#if RS_DEBUG_RUNTIME
+#define ELEMENT_AT(T)                                                   \
+    extern void __attribute__((overloadable))                           \
+        rsSetElementAt_##T(rs_allocation a, const T *val, uint32_t x);  \
+    extern void __attribute__((overloadable))                           \
+        rsSetElementAt_##T(rs_allocation a, const T *val, uint32_t x, uint32_t y); \
+    extern void __attribute__((overloadable))                           \
+        rsSetElementAt_##T(rs_allocation a, const T *val, uint32_t x, uint32_t y, uint32_t z); \
+    extern void __attribute__((overloadable))                           \
+        rsGetElementAt_##T(rs_allocation a, T *val, uint32_t x);  \
+    extern void __attribute__((overloadable))                           \
+        rsGetElementAt_##T(rs_allocation a, T *val, uint32_t x, uint32_t y); \
+    extern void __attribute__((overloadable))                           \
+        rsGetElementAt_##T(rs_allocation a, T *val, uint32_t x, uint32_t y, uint32_t z); \
+                                                                        \
+    extern void __attribute__((overloadable))                           \
+    rsSetElementAt_##T(rs_allocation a, T val, uint32_t x) {            \
+        rsSetElementAt_##T(a, &val, x);                                 \
+    }                                                                   \
+    extern void __attribute__((overloadable))                           \
+    rsSetElementAt_##T(rs_allocation a, T val, uint32_t x, uint32_t y) { \
+        rsSetElementAt_##T(a, &val, x, y);                              \
+    }                                                                   \
+    extern void __attribute__((overloadable))                           \
+    rsSetElementAt(rs_allocation a, T val, uint32_t x, uint32_t y, uint32_t z) { \
+        rsSetElementAt_##T(a, &val, x, y, z);                           \
+    }                                                                   \
+    extern T __attribute__((overloadable))                              \
+    rsGetElementAt_##T(rs_allocation a, uint32_t x) {                   \
+        T tmp;                                                          \
+        rsGetElementAt_##T(a, &tmp, x);                                 \
+        return tmp;                                                     \
+    }                                                                   \
+    extern T __attribute__((overloadable))                              \
+    rsGetElementAt_##T(rs_allocation a, uint32_t x, uint32_t y) {       \
+        T tmp;                                                          \
+        rsGetElementAt_##T(a, &tmp, x, y);                              \
+        return tmp;                                                     \
+    }                                                                   \
+    extern T __attribute__((overloadable))                              \
+            rsGetElementAt_##T(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) { \
+        T tmp;                                                          \
+        rsGetElementAt_##T(a, &tmp, x, y, z);                           \
+        return tmp;                                                     \
+    }
 
-extern void __attribute__((overloadable))
-        rsSetElementAt(rs_allocation a, void* ptr, uint32_t x, uint32_t y) {
-    Allocation_t *alloc = (Allocation_t *)a.p;
-    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
-    const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
-    const uint32_t stride = alloc->mHal.drvState.lod[0].stride;
-    memcpy((void*)&p[(eSize * x) + (y * stride)], ptr, eSize);
-}
-
-extern void __attribute__((overloadable))
-        rsSetElementAt(rs_allocation a, void* ptr, uint32_t x, uint32_t y, uint32_t z) {
-    Allocation_t *alloc = (Allocation_t *)a.p;
-    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
-    const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
-    const uint32_t stride = alloc->mHal.drvState.lod[0].stride;
-    const uint32_t dimY = alloc->mHal.drvState.lod[0].dimY;
-    memcpy((void*)&p[(eSize * x) + (y * stride) + (z * stride * dimY)], ptr, eSize);
-}
-
+#else
 #define ELEMENT_AT(T)                                                   \
     extern void __attribute__((overloadable))                           \
     rsSetElementAt_##T(rs_allocation a, T val, uint32_t x) {            \
@@ -155,6 +151,62 @@
         return ((const T*)dp)[0];                                       \
     }
 
+
+
+extern const void * __attribute__((overloadable))
+        rsGetElementAt(rs_allocation a, uint32_t x) {
+    Allocation_t *alloc = (Allocation_t *)a.p;
+    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
+    const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
+    return &p[eSize * x];
+}
+
+extern const void * __attribute__((overloadable))
+        rsGetElementAt(rs_allocation a, uint32_t x, uint32_t y) {
+    Allocation_t *alloc = (Allocation_t *)a.p;
+    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
+    const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
+    const uint32_t stride = alloc->mHal.drvState.lod[0].stride;
+    return &p[(eSize * x) + (y * stride)];
+}
+
+extern const void * __attribute__((overloadable))
+        rsGetElementAt(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    Allocation_t *alloc = (Allocation_t *)a.p;
+    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
+    const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
+    const uint32_t stride = alloc->mHal.drvState.lod[0].stride;
+    const uint32_t dimY = alloc->mHal.drvState.lod[0].dimY;
+    return &p[(eSize * x) + (y * stride) + (z * stride * dimY)];
+}
+extern void __attribute__((overloadable))
+        rsSetElementAt(rs_allocation a, void* ptr, uint32_t x) {
+    Allocation_t *alloc = (Allocation_t *)a.p;
+    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
+    const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
+    memcpy((void*)&p[eSize * x], ptr, eSize);
+}
+
+extern void __attribute__((overloadable))
+        rsSetElementAt(rs_allocation a, void* ptr, uint32_t x, uint32_t y) {
+    Allocation_t *alloc = (Allocation_t *)a.p;
+    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
+    const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
+    const uint32_t stride = alloc->mHal.drvState.lod[0].stride;
+    memcpy((void*)&p[(eSize * x) + (y * stride)], ptr, eSize);
+}
+
+extern void __attribute__((overloadable))
+        rsSetElementAt(rs_allocation a, void* ptr, uint32_t x, uint32_t y, uint32_t z) {
+    Allocation_t *alloc = (Allocation_t *)a.p;
+    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
+    const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
+    const uint32_t stride = alloc->mHal.drvState.lod[0].stride;
+    const uint32_t dimY = alloc->mHal.drvState.lod[0].dimY;
+    memcpy((void*)&p[(eSize * x) + (y * stride) + (z * stride * dimY)], ptr, eSize);
+}
+#endif
+
 ELEMENT_AT(char)
 ELEMENT_AT(char2)
 ELEMENT_AT(char3)
@@ -197,3 +249,64 @@
 ELEMENT_AT(double4)
 
 #undef ELEMENT_AT
+
+
+extern const uchar __attribute__((overloadable))
+        rsGetElementAtYuv_uchar_Y(rs_allocation a, uint32_t x, uint32_t y) {
+    return rsGetElementAt_uchar(a, x, y);
+}
+
+extern const uchar __attribute__((overloadable))
+        rsGetElementAtYuv_uchar_U(rs_allocation a, uint32_t x, uint32_t y) {
+
+    Allocation_t *alloc = (Allocation_t *)a.p;
+    const uint32_t yuvID = alloc->mHal.state.yuv;
+    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[1].mallocPtr;
+    const uint32_t stride = alloc->mHal.drvState.lod[1].stride;
+
+    switch(yuvID) {
+    case 0x32315659: //HAL_PIXEL_FORMAT_YV12:
+        x >>= 1;
+        y >>= 1;
+        return p[x + (y * stride)];
+    case 11: //HAL_PIXEL_FORMAT_YCrCb_420_SP:  // NV21
+        x >>= 1;
+        y >>= 1;
+        return p[(x<<1) + (y * stride)];
+    default:
+        break;
+    }
+
+    return 0;
+}
+
+extern const uchar __attribute__((overloadable))
+        rsGetElementAtYuv_uchar_V(rs_allocation a, uint32_t x, uint32_t y) {
+
+    Allocation_t *alloc = (Allocation_t *)a.p;
+    const uint32_t yuvID = alloc->mHal.state.yuv;
+
+    switch(yuvID) {
+    case 0x32315659: //HAL_PIXEL_FORMAT_YV12:
+        {
+        const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[2].mallocPtr;
+        const uint32_t stride = alloc->mHal.drvState.lod[2].stride;
+        x >>= 1;
+        y >>= 1;
+        return p[x + (y * stride)];
+        }
+    case 11: //HAL_PIXEL_FORMAT_YCrCb_420_SP:  // NV21
+        {
+        const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[1].mallocPtr;
+        const uint32_t stride = alloc->mHal.drvState.lod[1].stride;
+        x >>= 1;
+        y >>= 1;
+        return p[(x<<1) + (y * stride) + 1];
+        }
+    default:
+            break;
+    }
+
+    return 0;
+}
+
diff --git a/lib/Renderscript/runtime/rs_core.c b/lib/Renderscript/runtime/rs_core.c
index f0c9490..54fcccb 100644
--- a/lib/Renderscript/runtime/rs_core.c
+++ b/lib/Renderscript/runtime/rs_core.c
@@ -163,3 +163,42 @@
     return (int)rsRand((float)min, (float)max);
 }
 
+#define PRIM_DEBUG(T)                               \
+extern void __attribute__((overloadable)) rsDebug(const char *, const T *);     \
+void __attribute__((overloadable)) rsDebug(const char *txt, T val) {            \
+    rsDebug(txt, &val);                                                         \
+}
+
+PRIM_DEBUG(char2)
+PRIM_DEBUG(char3)
+PRIM_DEBUG(char4)
+PRIM_DEBUG(uchar2)
+PRIM_DEBUG(uchar3)
+PRIM_DEBUG(uchar4)
+PRIM_DEBUG(short2)
+PRIM_DEBUG(short3)
+PRIM_DEBUG(short4)
+PRIM_DEBUG(ushort2)
+PRIM_DEBUG(ushort3)
+PRIM_DEBUG(ushort4)
+PRIM_DEBUG(int2)
+PRIM_DEBUG(int3)
+PRIM_DEBUG(int4)
+PRIM_DEBUG(uint2)
+PRIM_DEBUG(uint3)
+PRIM_DEBUG(uint4)
+PRIM_DEBUG(long2)
+PRIM_DEBUG(long3)
+PRIM_DEBUG(long4)
+PRIM_DEBUG(ulong2)
+PRIM_DEBUG(ulong3)
+PRIM_DEBUG(ulong4)
+PRIM_DEBUG(float2)
+PRIM_DEBUG(float3)
+PRIM_DEBUG(float4)
+PRIM_DEBUG(double2)
+PRIM_DEBUG(double3)
+PRIM_DEBUG(double4)
+
+#undef PRIM_DEBUG
+