Merge "Disable sampler optimization until bugs are fixed." into jb-mr2-dev
diff --git a/cpp/Allocation.cpp b/cpp/Allocation.cpp
index 4368225..7755f74 100644
--- a/cpp/Allocation.cpp
+++ b/cpp/Allocation.cpp
@@ -317,7 +317,7 @@
                                                 RsAllocationMipmapControl mips, uint32_t usage,
                                                 void *pointer) {
     void *id = rsAllocationCreateTyped(rs->getContext(), type->getID(), mips, usage,
-                                       (uint32_t)pointer);
+                                       (uintptr_t)pointer);
     if (id == 0) {
         ALOGE("Allocation creation failed.");
     }
diff --git a/cpu_ref/rsCpuIntrinsicBlur.cpp b/cpu_ref/rsCpuIntrinsicBlur.cpp
index 58565f1..d6f8898 100644
--- a/cpu_ref/rsCpuIntrinsicBlur.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlur.cpp
@@ -189,7 +189,7 @@
 
     int len = x2 - x1;
 
-    while((x2 > x1) && (((int)ptrIn) & 0x3)) {
+    while((x2 > x1) && (((uintptr_t)ptrIn) & 0x3)) {
         const uchar *pi = ptrIn;
         float blurredPixel = 0;
         const float* gp = gPtr;
@@ -359,7 +359,7 @@
 
     x1 = xstart;
     while ((x1 < x2) &&
-           ((x1 < (uint32_t)cp->mIradius) || (((int)out) & 0x3))) {
+           ((x1 < (uint32_t)cp->mIradius) || (((uintptr_t)out) & 0x3))) {
         OneHU1(p, out, x1, buf, cp->mFp, cp->mIradius);
         out++;
         x1++;
diff --git a/cpu_ref/rsCpuRuntimeMath.cpp b/cpu_ref/rsCpuRuntimeMath.cpp
index 6c02303..ee82328 100644
--- a/cpu_ref/rsCpuRuntimeMath.cpp
+++ b/cpu_ref/rsCpuRuntimeMath.cpp
@@ -27,15 +27,29 @@
 #include "rsCpuCore.h"
 #include "rsCpuScript.h"
 
-
 using namespace android;
 using namespace android::renderscript;
 
+#define EXPORT_F32_FN_F32(func)                                 \
+    float __attribute__((overloadable)) SC_##func(float v) {    \
+        return func(v);                                         \
+    }
+
+#define EXPORT_F32_FN_F32_F32(func)                                     \
+    float __attribute__((overloadable)) SC_##func(float t, float v) {   \
+        return func(t, v);                                              \
+    }
 
 //////////////////////////////////////////////////////////////////////////////
 // Float util
 //////////////////////////////////////////////////////////////////////////////
 
+// Handle missing Gingerbread functions like tgammaf.
+float SC_tgammaf(float x) {
+    return tgamma(x);
+}
+
+uint32_t SC_abs_i32(int32_t v) {return abs(v);}
 
 static void SC_MatrixLoadRotate(Matrix4x4 *m, float rot, float x, float y, float z) {
     m->loadRotate(rot, x, y, z);
@@ -94,6 +108,54 @@
     return fmin(v - i, 0x1.fffffep-1f);
 }
 
+#ifdef RS_COMPATIBILITY_LIB
+EXPORT_F32_FN_F32(acosf)
+EXPORT_F32_FN_F32(acoshf)
+EXPORT_F32_FN_F32(asinf)
+EXPORT_F32_FN_F32(asinhf)
+EXPORT_F32_FN_F32(atanf)
+EXPORT_F32_FN_F32_F32(atan2f)
+EXPORT_F32_FN_F32(atanhf)
+EXPORT_F32_FN_F32(cbrtf)
+EXPORT_F32_FN_F32(ceilf)
+EXPORT_F32_FN_F32_F32(copysignf)
+EXPORT_F32_FN_F32(cosf)
+EXPORT_F32_FN_F32(coshf)
+EXPORT_F32_FN_F32(erfcf)
+EXPORT_F32_FN_F32(erff)
+EXPORT_F32_FN_F32(expf)
+EXPORT_F32_FN_F32(exp2f)
+EXPORT_F32_FN_F32(expm1f)
+EXPORT_F32_FN_F32_F32(fdimf)
+EXPORT_F32_FN_F32(floorf)
+float SC_fmaf(float u, float t, float v) {return fmaf(u, t, v);}
+EXPORT_F32_FN_F32_F32(fmaxf)
+EXPORT_F32_FN_F32_F32(fminf)
+EXPORT_F32_FN_F32_F32(fmodf)
+float SC_frexpf(float v, int* ptr) {return frexpf(v, ptr);}
+EXPORT_F32_FN_F32_F32(hypotf)
+EXPORT_F32_FN_F32(ilogbf)
+float SC_ldexpf(float v, int i) {return ldexpf(v, i);}
+EXPORT_F32_FN_F32(lgammaf)
+float SC_lgammaf_r(float v, int* ptr) {return lgammaf_r(v, ptr);}
+EXPORT_F32_FN_F32(logf)
+EXPORT_F32_FN_F32(log10f)
+EXPORT_F32_FN_F32(log1pf)
+EXPORT_F32_FN_F32(logbf)
+float SC_modff(float v, float* ptr) {return modff(v, ptr);}
+EXPORT_F32_FN_F32_F32(nextafterf)
+EXPORT_F32_FN_F32_F32(powf)
+EXPORT_F32_FN_F32_F32(remainderf)
+float SC_remquof(float t, float v, int* ptr) {return remquof(t, v, ptr);}
+EXPORT_F32_FN_F32(rintf)
+EXPORT_F32_FN_F32(roundf)
+EXPORT_F32_FN_F32(sinf)
+EXPORT_F32_FN_F32(sinhf)
+EXPORT_F32_FN_F32(sqrtf)
+EXPORT_F32_FN_F32(tanf)
+EXPORT_F32_FN_F32(tanhf)
+EXPORT_F32_FN_F32(truncf)
+#endif
 
 //////////////////////////////////////////////////////////////////////////////
 // Class implementation
diff --git a/cpu_ref/rsCpuRuntimeMathFuncs.cpp b/cpu_ref/rsCpuRuntimeMathFuncs.cpp
new file mode 100644
index 0000000..a1d7aff
--- /dev/null
+++ b/cpu_ref/rsCpuRuntimeMathFuncs.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2011-2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// exports unavailable mathlib functions to compat lib
+
+#ifdef RS_COMPATIBILITY_LIB
+
+typedef unsigned int uint32_t;
+typedef int int32_t;
+
+extern uint32_t SC_abs_i32(int32_t v);
+uint32_t __attribute__((overloadable)) abs(int32_t v) {return SC_abs_i32(v);}
+
+#define IMPORT_F32_FN_F32(func)                                         \
+    extern float SC_##func##f(float v);                                 \
+    float __attribute__((overloadable)) func(float v) {return SC_##func##f(v);}
+
+#define IMPORT_F32_FN_F32_F32(func)                                     \
+    extern float SC_##func##f(float t, float v);                        \
+    float __attribute__((overloadable)) func(float t, float v) {return SC_##func##f(t, v);}
+
+IMPORT_F32_FN_F32(acos)
+IMPORT_F32_FN_F32(acosh)
+IMPORT_F32_FN_F32(asin)
+IMPORT_F32_FN_F32(asinh)
+IMPORT_F32_FN_F32(atan)
+IMPORT_F32_FN_F32_F32(atan2)
+IMPORT_F32_FN_F32(atanh)
+IMPORT_F32_FN_F32(cbrt)
+IMPORT_F32_FN_F32(ceil)
+IMPORT_F32_FN_F32_F32(copysign)
+IMPORT_F32_FN_F32(cos)
+IMPORT_F32_FN_F32(cosh)
+IMPORT_F32_FN_F32(erfc)
+IMPORT_F32_FN_F32(erf)
+IMPORT_F32_FN_F32(exp)
+IMPORT_F32_FN_F32(exp2)
+IMPORT_F32_FN_F32(expm1)
+IMPORT_F32_FN_F32_F32(fdim)
+IMPORT_F32_FN_F32(floor)
+extern float SC_fmaf(float u, float t, float v);
+float __attribute__((overloadable)) fma(float u, float t, float v) {return SC_fmaf(u, t, v);}
+IMPORT_F32_FN_F32_F32(fmax)
+IMPORT_F32_FN_F32_F32(fmin)
+IMPORT_F32_FN_F32_F32(fmod)
+extern float SC_frexpf(float v, int* ptr);
+float __attribute__((overloadable)) frexp(float v, int* ptr) {return SC_frexpf(v, ptr);}
+IMPORT_F32_FN_F32_F32(hypot)
+IMPORT_F32_FN_F32(ilogb)
+extern float SC_ldexpf(float v, int i);
+float __attribute__((overloadable)) ldexp(float v, int i) {return SC_ldexpf(v, i);}
+IMPORT_F32_FN_F32(lgamma)
+extern float SC_lgammaf_r(float v, int* ptr);
+float __attribute__((overloadable)) lgamma(float v, int* ptr) {return SC_lgammaf_r(v, ptr);}
+IMPORT_F32_FN_F32(log)
+IMPORT_F32_FN_F32(log10)
+IMPORT_F32_FN_F32(log1p)
+IMPORT_F32_FN_F32(logb)
+extern float SC_modff(float v, float* ptr);
+float modf(float v, float* ptr) {return SC_modff(v, ptr);}
+IMPORT_F32_FN_F32_F32(nextafter)
+IMPORT_F32_FN_F32_F32(pow)
+IMPORT_F32_FN_F32_F32(remainder)
+extern float SC_remquof(float t, float v, int* ptr);
+float remquo(float t, float v, int* ptr) {return SC_remquof(t, v, ptr);}
+IMPORT_F32_FN_F32(rint)
+IMPORT_F32_FN_F32(round)
+IMPORT_F32_FN_F32(sin)
+IMPORT_F32_FN_F32(sinh)
+IMPORT_F32_FN_F32(sqrt)
+IMPORT_F32_FN_F32(tan)
+IMPORT_F32_FN_F32(tanh)
+IMPORT_F32_FN_F32(tgamma)
+IMPORT_F32_FN_F32(trunc)
+
+// !!! DANGER !!!
+// These functions are potentially missing on older Android versions.
+// Work around the issue by supplying our own variants.
+// !!! DANGER !!!
+
+// The logbl() implementation is taken from the latest bionic/, since
+// double == long double on Android.
+extern "C" long double logbl(long double x) { return logb(x); }
+
+// __aeabi_idiv0 is a missing function in libcompiler_rt.so, so we just
+// pick the simplest implementation based on the ARM EABI doc.
+extern "C" int __aeabi_idiv0(int v) { return v; }
+
+#endif // compatibility lib
diff --git a/driver/rsdAllocation.cpp b/driver/rsdAllocation.cpp
index cdcf48f..620c47c 100644
--- a/driver/rsdAllocation.cpp
+++ b/driver/rsdAllocation.cpp
@@ -526,7 +526,7 @@
 
 
     if (dimX > oldDimX) {
-        uint32_t stride = alloc->mHal.state.elementSizeBytes;
+        size_t stride = alloc->mHal.state.elementSizeBytes;
         memset(((uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr) + stride * oldDimX,
                  0, stride * (dimX - oldDimX));
     }
@@ -797,13 +797,13 @@
 
 
 void rsdAllocationData1D(const Context *rsc, const Allocation *alloc,
-                         uint32_t xoff, uint32_t lod, uint32_t count,
+                         uint32_t xoff, uint32_t lod, size_t count,
                          const void *data, size_t sizeBytes) {
     DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
 
-    const uint32_t eSize = alloc->mHal.state.type->getElementSizeBytes();
+    const size_t eSize = alloc->mHal.state.type->getElementSizeBytes();
     uint8_t * ptr = GetOffsetPtr(alloc, xoff, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
-    uint32_t size = count * eSize;
+    size_t size = count * eSize;
 
     if (ptr != data) {
         // Skip the copy if we are the same allocation. This can arise from
@@ -822,8 +822,8 @@
                          uint32_t w, uint32_t h, const void *data, size_t sizeBytes, size_t stride) {
     DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
 
-    uint32_t eSize = alloc->mHal.state.elementSizeBytes;
-    uint32_t lineSize = eSize * w;
+    size_t eSize = alloc->mHal.state.elementSizeBytes;
+    size_t lineSize = eSize * w;
     if (!stride) {
         stride = lineSize;
     }
@@ -850,7 +850,7 @@
         if (alloc->mHal.state.yuv) {
             int lod = 1;
             while (alloc->mHal.drvState.lod[lod].mallocPtr) {
-                uint32_t lineSize = alloc->mHal.drvState.lod[lod].dimX;
+                size_t lineSize = alloc->mHal.drvState.lod[lod].dimX;
                 uint8_t *dst = GetOffsetPtr(alloc, xoff, yoff, lod, face);
 
                 for (uint32_t line=(yoff >> 1); line < ((yoff+h)>>1); line++) {
@@ -871,14 +871,14 @@
 void rsdAllocationData3D(const Context *rsc, const Allocation *alloc,
                          uint32_t xoff, uint32_t yoff, uint32_t zoff,
                          uint32_t lod, RsAllocationCubemapFace face,
-                         uint32_t w, uint32_t h, uint32_t d, const void *data, uint32_t sizeBytes) {
+                         uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes) {
 
 }
 
 void rsdAllocationRead1D(const Context *rsc, const Allocation *alloc,
-                         uint32_t xoff, uint32_t lod, uint32_t count,
+                         uint32_t xoff, uint32_t lod, size_t count,
                          void *data, size_t sizeBytes) {
-    const uint32_t eSize = alloc->mHal.state.type->getElementSizeBytes();
+    const size_t eSize = alloc->mHal.state.type->getElementSizeBytes();
     const uint8_t * ptr = GetOffsetPtr(alloc, xoff, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
     if (data != ptr) {
         // Skip the copy if we are the same allocation. This can arise from
@@ -890,8 +890,8 @@
 void rsdAllocationRead2D(const Context *rsc, const Allocation *alloc,
                                 uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
                                 uint32_t w, uint32_t h, void *data, size_t sizeBytes, size_t stride) {
-    uint32_t eSize = alloc->mHal.state.elementSizeBytes;
-    uint32_t lineSize = eSize * w;
+    size_t eSize = alloc->mHal.state.elementSizeBytes;
+    size_t lineSize = eSize * w;
     if (!stride) {
         stride = lineSize;
     }
@@ -919,7 +919,7 @@
 void rsdAllocationRead3D(const Context *rsc, const Allocation *alloc,
                          uint32_t xoff, uint32_t yoff, uint32_t zoff,
                          uint32_t lod, RsAllocationCubemapFace face,
-                         uint32_t w, uint32_t h, uint32_t d, void *data, uint32_t sizeBytes) {
+                         uint32_t w, uint32_t h, uint32_t d, void *data, size_t sizeBytes) {
 
 }
 
@@ -935,7 +935,7 @@
 
 void rsdAllocationData1D_alloc(const android::renderscript::Context *rsc,
                                const android::renderscript::Allocation *dstAlloc,
-                               uint32_t dstXoff, uint32_t dstLod, uint32_t count,
+                               uint32_t dstXoff, uint32_t dstLod, size_t count,
                                const android::renderscript::Allocation *srcAlloc,
                                uint32_t srcXoff, uint32_t srcLod) {
 }
@@ -948,7 +948,7 @@
                                       const android::renderscript::Allocation *srcAlloc,
                                       uint32_t srcXoff, uint32_t srcYoff, uint32_t srcLod,
                                       RsAllocationCubemapFace srcFace) {
-    uint32_t elementSize = dstAlloc->getType()->getElementSizeBytes();
+    size_t elementSize = dstAlloc->getType()->getElementSizeBytes();
     for (uint32_t i = 0; i < h; i ++) {
         uint8_t *dstPtr = GetOffsetPtr(dstAlloc, dstXoff, dstYoff + i, dstLod, dstFace);
         uint8_t *srcPtr = GetOffsetPtr(srcAlloc, srcXoff, srcYoff + i, srcLod, srcFace);
@@ -988,10 +988,10 @@
 
 void rsdAllocationElementData1D(const Context *rsc, const Allocation *alloc,
                                 uint32_t x,
-                                const void *data, uint32_t cIdx, uint32_t sizeBytes) {
+                                const void *data, uint32_t cIdx, size_t sizeBytes) {
     DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
 
-    uint32_t eSize = alloc->mHal.state.elementSizeBytes;
+    size_t eSize = alloc->mHal.state.elementSizeBytes;
     uint8_t * ptr = GetOffsetPtr(alloc, x, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
 
     const Element * e = alloc->mHal.state.type->getElement()->getField(cIdx);
@@ -1008,10 +1008,10 @@
 
 void rsdAllocationElementData2D(const Context *rsc, const Allocation *alloc,
                                 uint32_t x, uint32_t y,
-                                const void *data, uint32_t cIdx, uint32_t sizeBytes) {
+                                const void *data, uint32_t cIdx, size_t sizeBytes) {
     DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
 
-    uint32_t eSize = alloc->mHal.state.elementSizeBytes;
+    size_t eSize = alloc->mHal.state.elementSizeBytes;
     uint8_t * ptr = GetOffsetPtr(alloc, x, y, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
 
     const Element * e = alloc->mHal.state.type->getElement()->getField(cIdx);
diff --git a/driver/rsdAllocation.h b/driver/rsdAllocation.h
index c209203..b64c999 100644
--- a/driver/rsdAllocation.h
+++ b/driver/rsdAllocation.h
@@ -104,33 +104,33 @@
 
 void rsdAllocationData1D(const android::renderscript::Context *rsc,
                          const android::renderscript::Allocation *alloc,
-                         uint32_t xoff, uint32_t lod, uint32_t count,
-                         const void *data, uint32_t sizeBytes);
+                         uint32_t xoff, uint32_t lod, size_t count,
+                         const void *data, size_t sizeBytes);
 void rsdAllocationData2D(const android::renderscript::Context *rsc,
                          const android::renderscript::Allocation *alloc,
                          uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
                          uint32_t w, uint32_t h,
-                         const void *data, uint32_t sizeBytes, size_t stride);
+                         const void *data, size_t sizeBytes, size_t stride);
 void rsdAllocationData3D(const android::renderscript::Context *rsc,
                          const android::renderscript::Allocation *alloc,
                          uint32_t xoff, uint32_t yoff, uint32_t zoff,
                          uint32_t lod, RsAllocationCubemapFace face,
-                         uint32_t w, uint32_t h, uint32_t d, const void *data, uint32_t sizeBytes);
+                         uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes);
 
 void rsdAllocationRead1D(const android::renderscript::Context *rsc,
                          const android::renderscript::Allocation *alloc,
-                         uint32_t xoff, uint32_t lod, uint32_t count,
-                         void *data, uint32_t sizeBytes);
+                         uint32_t xoff, uint32_t lod, size_t count,
+                         void *data, size_t sizeBytes);
 void rsdAllocationRead2D(const android::renderscript::Context *rsc,
                          const android::renderscript::Allocation *alloc,
                          uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
                          uint32_t w, uint32_t h,
-                         void *data, uint32_t sizeBytes, size_t stride);
+                         void *data, size_t sizeBytes, size_t stride);
 void rsdAllocationRead3D(const android::renderscript::Context *rsc,
                          const android::renderscript::Allocation *alloc,
                          uint32_t xoff, uint32_t yoff, uint32_t zoff,
                          uint32_t lod, RsAllocationCubemapFace face,
-                         uint32_t w, uint32_t h, uint32_t d, void *data, uint32_t sizeBytes);
+                         uint32_t w, uint32_t h, uint32_t d, void *data, size_t sizeBytes);
 
 void * rsdAllocationLock1D(const android::renderscript::Context *rsc,
                           const android::renderscript::Allocation *alloc);
@@ -140,7 +140,7 @@
 
 void rsdAllocationData1D_alloc(const android::renderscript::Context *rsc,
                                const android::renderscript::Allocation *dstAlloc,
-                               uint32_t dstXoff, uint32_t dstLod, uint32_t count,
+                               uint32_t dstXoff, uint32_t dstLod, size_t count,
                                const android::renderscript::Allocation *srcAlloc,
                                uint32_t srcXoff, uint32_t srcLod);
 void rsdAllocationData2D_alloc(const android::renderscript::Context *rsc,
@@ -162,11 +162,11 @@
 void rsdAllocationElementData1D(const android::renderscript::Context *rsc,
                                 const android::renderscript::Allocation *alloc,
                                 uint32_t x,
-                                const void *data, uint32_t elementOff, uint32_t sizeBytes);
+                                const void *data, uint32_t elementOff, size_t sizeBytes);
 void rsdAllocationElementData2D(const android::renderscript::Context *rsc,
                                 const android::renderscript::Allocation *alloc,
                                 uint32_t x, uint32_t y,
-                                const void *data, uint32_t elementOff, uint32_t sizeBytes);
+                                const void *data, uint32_t elementOff, size_t sizeBytes);
 
 void rsdAllocationGenerateMipmaps(const android::renderscript::Context *rsc,
                                   const android::renderscript::Allocation *alloc);
diff --git a/driver/rsdBcc.cpp b/driver/rsdBcc.cpp
index 77e0576..709d181 100644
--- a/driver/rsdBcc.cpp
+++ b/driver/rsdBcc.cpp
@@ -70,7 +70,7 @@
                             const Allocation * ain,
                             Allocation * aout,
                             const void * usr,
-                            uint32_t usrLen,
+                            size_t usrLen,
                             const RsScriptCall *sc) {
 
     RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
diff --git a/driver/rsdBcc.h b/driver/rsdBcc.h
index 4c65c2a..7b1b06c 100644
--- a/driver/rsdBcc.h
+++ b/driver/rsdBcc.h
@@ -40,7 +40,7 @@
                             const android::renderscript::Allocation * ain,
                             android::renderscript::Allocation * aout,
                             const void * usr,
-                            uint32_t usrLen,
+                            size_t usrLen,
                             const RsScriptCall *sc);
 
 int rsdScriptInvokeRoot(const android::renderscript::Context *dc,
diff --git a/driver/rsdRuntimeStubs.cpp b/driver/rsdRuntimeStubs.cpp
index 8e7d9ee..76c3654 100644
--- a/driver/rsdRuntimeStubs.cpp
+++ b/driver/rsdRuntimeStubs.cpp
@@ -72,6 +72,31 @@
 typedef uint64_t ulong;
 #endif
 
+#ifdef RS_COMPATIBILITY_LIB
+#define OPAQUETYPE(t) \
+    typedef struct { const int* const p; } __attribute__((packed, aligned(4))) t;
+
+OPAQUETYPE(rs_element)
+OPAQUETYPE(rs_type)
+OPAQUETYPE(rs_allocation)
+OPAQUETYPE(rs_sampler)
+OPAQUETYPE(rs_script)
+OPAQUETYPE(rs_script_call)
+#undef OPAQUETYPE
+
+typedef struct {
+    int tm_sec;     ///< seconds
+    int tm_min;     ///< minutes
+    int tm_hour;    ///< hours
+    int tm_mday;    ///< day of the month
+    int tm_mon;     ///< month
+    int tm_year;    ///< year
+    int tm_wday;    ///< day of the week
+    int tm_yday;    ///< day of the year
+    int tm_isdst;   ///< daylight savings time
+} rs_tm;
+#endif
+
 //////////////////////////////////////////////////////////////////////////////
 // Allocation
 //////////////////////////////////////////////////////////////////////////////
@@ -480,10 +505,17 @@
     return rsrGetDt(rsc, sc);
 }
 
+#ifndef RS_COMPATIBILITY_LIB
 time_t SC_Time(time_t *timer) {
     Context *rsc = RsdCpuReference::getTlsContext();
     return rsrTime(rsc, timer);
 }
+#else
+static int SC_Time(int *timer) {
+    Context *rsc = RsdCpuReference::getTlsContext();
+    return rsrTime(rsc, (long*)timer);
+}
+#endif
 
 tm* SC_LocalTime(tm *local, time_t *timer) {
     Context *rsc = RsdCpuReference::getTlsContext();
@@ -1140,6 +1172,68 @@
 
 #ifdef RS_COMPATIBILITY_LIB
 
+//////////////////////////////////////////////////////////////////////////////
+// Compatibility Library entry points
+//////////////////////////////////////////////////////////////////////////////
+
+bool rsIsObject(rs_element src) {
+    return SC_IsObject((ObjectBase*)src.p);
+}
+
+#define CLEAR_SET_OBJ(t) \
+    void __attribute__((overloadable)) rsClearObject(t *dst) { \
+    return SC_ClearObject((ObjectBase**) dst); \
+    } \
+    void __attribute__((overloadable)) rsSetObject(t *dst, t src) { \
+    return SC_SetObject((ObjectBase**) dst, (ObjectBase*) src.p); \
+    }
+
+CLEAR_SET_OBJ(rs_element)
+CLEAR_SET_OBJ(rs_type)
+CLEAR_SET_OBJ(rs_allocation)
+CLEAR_SET_OBJ(rs_sampler)
+CLEAR_SET_OBJ(rs_script)
+#undef CLEAR_SET_OBJ
+
+const Allocation * rsGetAllocation(const void *ptr) {
+    return SC_GetAllocation(ptr);
+}
+
+void __attribute__((overloadable)) rsForEach(rs_script script,
+                                             rs_allocation in,
+                                             rs_allocation out,
+                                             const void *usr,
+                                             const rs_script_call *call) {
+    return SC_ForEach_SAAUS((Script *)script.p, (Allocation*)in.p, (Allocation*)out.p, usr, (RsScriptCall*)call);
+}
+
+void __attribute__((overloadable)) rsForEach(rs_script script,
+                                             rs_allocation in,
+                                             rs_allocation out,
+                                             const void *usr,
+                                             uint32_t usrLen,
+                                             const rs_script_call *call) {
+    return SC_ForEach_SAAULS((Script *)script.p, (Allocation*)in.p, (Allocation*)out.p, usr, usrLen, (RsScriptCall*)call);
+}
+
+int rsTime(int *timer) {
+    return SC_Time(timer);
+}
+
+rs_tm* rsLocaltime(rs_tm* local, const int *timer) {
+    return (rs_tm*)(SC_LocalTime((tm*)local, (long*)timer));
+}
+
+int64_t rsUptimeMillis() {
+    Context *rsc = RsdCpuReference::getTlsContext();
+    return rsrUptimeMillis(rsc);
+}
+
+uint32_t rsSendToClientBlocking2(int cmdID, void *data, int len) {
+    Context *rsc = RsdCpuReference::getTlsContext();
+    return rsrToClientBlocking(rsc, cmdID, data, len);
+}
+
 uint32_t rsSendToClientBlocking(int cmdID) {
     Context *rsc = RsdCpuReference::getTlsContext();
     return rsrToClientBlocking(rsc, cmdID, NULL, 0);
@@ -1323,11 +1417,11 @@
 }
 
 void rsDebug(const char *s, rs_matrix3x3 *m) {
-    SC_debugFM4v4(s, (float *) m);
+    SC_debugFM3v3(s, (float *) m);
 }
 
 void rsDebug(const char *s, rs_matrix2x2 *m) {
-    SC_debugFM4v4(s, (float *) m);
+    SC_debugFM2v2(s, (float *) m);
 }
 
 void rsDebug(const char *s, char c) {
diff --git a/rs.spec b/rs.spec
index e4fea03..49a082a 100644
--- a/rs.spec
+++ b/rs.spec
@@ -48,7 +48,7 @@
     param RsType vtype
     param RsAllocationMipmapControl mips
     param uint32_t usages
-    param uint32_t ptr
+    param uintptr_t ptr
     ret RsAllocation
 }
 
@@ -86,43 +86,6 @@
     sync
     }
 
-ContextBindRootScript {
-    param RsScript sampler
-    }
-
-ContextBindProgramStore {
-    param RsProgramStore pgm
-    }
-
-ContextBindProgramFragment {
-    param RsProgramFragment pgm
-    }
-
-ContextBindProgramVertex {
-    param RsProgramVertex pgm
-    }
-
-ContextBindProgramRaster {
-    param RsProgramRaster pgm
-    }
-
-ContextBindFont {
-    param RsFont pgm
-    }
-
-ContextPause {
-    }
-
-ContextResume {
-    }
-
-ContextSetSurface {
-    param uint32_t width
-    param uint32_t height
-    param RsNativeWindow sur
-        sync
-    }
-
 ContextDump {
     param int32_t bits
 }
diff --git a/rsAllocation.cpp b/rsAllocation.cpp
index 30176ec..7e4fbc2 100644
--- a/rsAllocation.cpp
+++ b/rsAllocation.cpp
@@ -549,8 +549,8 @@
 
 RsAllocation rsi_AllocationCreateTyped(Context *rsc, RsType vtype,
                                        RsAllocationMipmapControl mips,
-                                       uint32_t usages, uint32_t ptr) {
-    Allocation * alloc = Allocation::createAllocation(rsc, static_cast<Type *>(vtype), usages, mips, (void *)ptr);
+                                       uint32_t usages, uintptr_t ptr) {
+    Allocation * alloc = Allocation::createAllocation(rsc, static_cast<Type *>(vtype), usages, mips, (void*)ptr);
     if (!alloc) {
         return NULL;
     }
diff --git a/rsElement.cpp b/rsElement.cpp
index 3126c28..712f17d 100644
--- a/rsElement.cpp
+++ b/rsElement.cpp
@@ -420,14 +420,14 @@
     (*elemData++) = e->getFieldCount();
 }
 
-void rsaElementGetSubElements(RsContext con, RsElement elem, uint32_t *ids,
-                              const char **names, uint32_t *arraySizes, uint32_t dataSize) {
+void rsaElementGetSubElements(RsContext con, RsElement elem, uintptr_t *ids,
+                              const char **names, size_t *arraySizes, uint32_t dataSize) {
     Element *e = static_cast<Element *>(elem);
     rsAssert(e->getFieldCount() == dataSize);
 
     for (uint32_t i = 0; i < dataSize; i ++) {
         e->getField(i)->incUserRef();
-        ids[i] = (uint32_t)e->getField(i);
+        ids[i] = (uintptr_t)e->getField(i);
         names[i] = e->getFieldName(i);
         arraySizes[i] = e->getFieldArraySize(i);
     }
diff --git a/rsScript.cpp b/rsScript.cpp
index f23d107..4b2765a 100644
--- a/rsScript.cpp
+++ b/rsScript.cpp
@@ -199,7 +199,7 @@
     s->setVarObj(slot, o);
 }
 
-void rsi_ScriptSetVarJ(Context *rsc, RsScript vs, uint32_t slot, long long value) {
+void rsi_ScriptSetVarJ(Context *rsc, RsScript vs, uint32_t slot, int64_t value) {
     Script *s = static_cast<Script *>(vs);
     s->setVar(slot, &value, sizeof(value));
 }
diff --git a/rsType.cpp b/rsType.cpp
index dacf5e0..9719a04 100644
--- a/rsType.cpp
+++ b/rsType.cpp
@@ -346,7 +346,7 @@
 }
 }
 
-void rsaTypeGetNativeData(RsContext con, RsType type, uint32_t *typeData, uint32_t typeDataSize) {
+void rsaTypeGetNativeData(RsContext con, RsType type, uintptr_t *typeData, uint32_t typeDataSize) {
     rsAssert(typeDataSize == 6);
     // Pack the data in the follofing way mHal.state.dimX; mHal.state.dimY; mHal.state.dimZ;
     // mHal.state.lodCount; mHal.state.faces; mElement; into typeData
@@ -357,6 +357,6 @@
     (*typeData++) = t->getDimZ();
     (*typeData++) = t->getDimLOD() ? 1 : 0;
     (*typeData++) = t->getDimFaces() ? 1 : 0;
-    (*typeData++) = (uint32_t)t->getElement();
+    (*typeData++) = (uintptr_t)t->getElement();
     t->getElement()->incUserRef();
 }
diff --git a/rs_hal.h b/rs_hal.h
index 6a71930..0b0d00a 100644
--- a/rs_hal.h
+++ b/rs_hal.h
@@ -112,7 +112,7 @@
                               const Allocation * ain,
                               Allocation * aout,
                               const void * usr,
-                              uint32_t usrLen,
+                              size_t usrLen,
                               const RsScriptCall *sc);
         void (*invokeInit)(const Context *rsc, Script *s);
         void (*invokeFreeChildren)(const Context *rsc, Script *s);
@@ -153,7 +153,7 @@
         void (*ioReceive)(const Context *rsc, Allocation *alloc);
 
         void (*data1D)(const Context *rsc, const Allocation *alloc,
-                       uint32_t xoff, uint32_t lod, uint32_t count,
+                       uint32_t xoff, uint32_t lod, size_t count,
                        const void *data, size_t sizeBytes);
         void (*data2D)(const Context *rsc, const Allocation *alloc,
                        uint32_t xoff, uint32_t yoff, uint32_t lod,
@@ -165,7 +165,7 @@
                        uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes);
 
         void (*read1D)(const Context *rsc, const Allocation *alloc,
-                       uint32_t xoff, uint32_t lod, uint32_t count,
+                       uint32_t xoff, uint32_t lod, size_t count,
                        void *data, size_t sizeBytes);
         void (*read2D)(const Context *rsc, const Allocation *alloc,
                        uint32_t xoff, uint32_t yoff, uint32_t lod,
@@ -185,7 +185,7 @@
         // Allocation to allocation copies
         void (*allocData1D)(const Context *rsc,
                             const Allocation *dstAlloc,
-                            uint32_t dstXoff, uint32_t dstLod, uint32_t count,
+                            uint32_t dstXoff, uint32_t dstLod, size_t count,
                             const Allocation *srcAlloc, uint32_t srcXoff, uint32_t srcLod);
         void (*allocData2D)(const Context *rsc,
                             const Allocation *dstAlloc,
diff --git a/rsg.spec b/rsg.spec
index 72bc695..8ac73e8 100644
--- a/rsg.spec
+++ b/rsg.spec
@@ -84,3 +84,40 @@
     param float quality
     ret RsPath
     }
+
+ContextBindProgramStore {
+    param RsProgramStore pgm
+    }
+
+ContextBindProgramFragment {
+    param RsProgramFragment pgm
+    }
+
+ContextBindProgramVertex {
+    param RsProgramVertex pgm
+    }
+
+ContextBindProgramRaster {
+    param RsProgramRaster pgm
+    }
+
+ContextBindFont {
+    param RsFont pgm
+    }
+
+ContextSetSurface {
+    param uint32_t width
+    param uint32_t height
+    param RsNativeWindow sur
+        sync
+    }
+
+ContextBindRootScript {
+    param RsScript sampler
+    }
+
+ContextPause {
+    }
+
+ContextResume {
+    }
diff --git a/rsg_generator.c b/rsg_generator.c
index 7022bcb..ca26851 100644
--- a/rsg_generator.c
+++ b/rsg_generator.c
@@ -248,9 +248,9 @@
             fprintf(f, "    }\n\n");
 
             fprintf(f, "    ThreadIO *io = &((Context *)rsc)->mIO;\n");
-            fprintf(f, "    const uint32_t size = sizeof(RS_CMD_%s);\n", api->name);
+            fprintf(f, "    const size_t size = sizeof(RS_CMD_%s);\n", api->name);
             if (hasInlineDataPointers(api)) {
-                fprintf(f, "    uint32_t dataSize = 0;\n");
+                fprintf(f, "    size_t dataSize = 0;\n");
                 for (ct2=0; ct2 < api->paramCount; ct2++) {
                     const VarType *vt = &api->params[ct2];
                     if (vt->isConst && vt->ptrLevel) {
@@ -659,7 +659,7 @@
             printPlaybackFuncs(f, "rsp_");
             fprintf(f, "\n\ntypedef struct RsPlaybackRemoteHeaderRec {\n");
             fprintf(f, "    uint32_t command;\n");
-            fprintf(f, "    uint32_t size;\n");
+            fprintf(f, "    size_t size;\n");
             fprintf(f, "} RsPlaybackRemoteHeader;\n\n");
             fprintf(f, "typedef void (*RsPlaybackLocalFunc)(Context *, const void *, size_t sizeBytes);\n");
             fprintf(f, "typedef void (*RsPlaybackRemoteFunc)(Context *, ThreadIO *);\n");