Merge "Remove invalid logging on missing root.expand() functions." into jb-mr2-dev
diff --git a/Android.mk b/Android.mk
index 1c71962..42a018e 100644
--- a/Android.mk
+++ b/Android.mk
@@ -36,7 +36,7 @@
 
 
 LOCAL_SHARED_LIBRARIES += libRS libRSCpuRef
-LOCAL_SHARED_LIBRARIES += libcutils libutils libEGL libGLESv1_CM libGLESv2
+LOCAL_SHARED_LIBRARIES += liblog libcutils libutils libEGL libGLESv1_CM libGLESv2
 LOCAL_SHARED_LIBRARIES += libbcc libbcinfo libLLVM libui libgui libsync
 
 LOCAL_C_INCLUDES += frameworks/compile/libbcc/include
@@ -147,7 +147,7 @@
 	rsThreadIO.cpp \
 	rsType.cpp
 
-LOCAL_SHARED_LIBRARIES += libcutils libutils libEGL libGLESv1_CM libGLESv2 libbcc
+LOCAL_SHARED_LIBRARIES += liblog libcutils libutils libEGL libGLESv1_CM libGLESv2 libbcc
 LOCAL_SHARED_LIBRARIES += libui libbcinfo libLLVM libgui libsync libdl
 
 LOCAL_STATIC_LIBRARIES := libft2
@@ -239,7 +239,7 @@
 	rsThreadIO.cpp \
 	rsType.cpp
 
-LOCAL_STATIC_LIBRARIES := libcutils libutils
+LOCAL_STATIC_LIBRARIES := libcutils libutils liblog
 
 LOCAL_LDLIBS := -lpthread
 
@@ -328,6 +328,7 @@
 LOCAL_STATIC_LIBRARIES := \
   librsloader \
   libcutils \
+  liblog \
   libLLVMSupport
 
 LOCAL_SRC_FILES := \
@@ -362,6 +363,7 @@
 LOCAL_STATIC_LIBRARIES := \
   librsloader \
   libcutils \
+  liblog \
   libLLVMSupport
 
 LOCAL_SRC_FILES := \
@@ -448,4 +450,3 @@
 
 
 include $(call all-makefiles-under,$(LOCAL_PATH))
-
diff --git a/cpp/Android.mk b/cpp/Android.mk
index 1fba998..145f487 100644
--- a/cpp/Android.mk
+++ b/cpp/Android.mk
@@ -27,7 +27,8 @@
 	libRS \
 	libz \
 	libcutils \
-	libutils
+	libutils \
+	liblog
 
 LOCAL_MODULE:= libRScpp
 
@@ -44,4 +45,3 @@
 
 
 include $(BUILD_SHARED_LIBRARY)
-
diff --git a/cpu_ref/Android.mk b/cpu_ref/Android.mk
index 685969f..67e0786 100644
--- a/cpu_ref/Android.mk
+++ b/cpu_ref/Android.mk
@@ -40,7 +40,7 @@
     LOCAL_CFLAGS += -DARCH_ARM_HAVE_VFP
 endif
 
-LOCAL_SHARED_LIBRARIES += libRS libcutils libutils libsync
+LOCAL_SHARED_LIBRARIES += libRS libcutils libutils liblog libsync
 LOCAL_SHARED_LIBRARIES += libbcc libbcinfo
 
 LOCAL_C_INCLUDES += frameworks/compile/libbcc/include
@@ -52,5 +52,3 @@
 LOCAL_MODULE_TAGS := optional
 
 include $(BUILD_SHARED_LIBRARY)
-
-
diff --git a/driver/rsdAllocation.cpp b/driver/rsdAllocation.cpp
index 246e67c..231ce12 100644
--- a/driver/rsdAllocation.cpp
+++ b/driver/rsdAllocation.cpp
@@ -88,10 +88,11 @@
 #endif
 
 uint8_t *GetOffsetPtr(const android::renderscript::Allocation *alloc,
-                      uint32_t xoff, uint32_t yoff, uint32_t lod,
-                      RsAllocationCubemapFace face) {
+                      uint32_t xoff, uint32_t yoff, uint32_t zoff,
+                      uint32_t lod, RsAllocationCubemapFace face) {
     uint8_t *ptr = (uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
     ptr += face * alloc->mHal.drvState.faceOffset;
+    ptr += zoff * alloc->mHal.drvState.lod[lod].dimY * alloc->mHal.drvState.lod[lod].stride;
     ptr += yoff * alloc->mHal.drvState.lod[lod].stride;
     ptr += xoff * alloc->mHal.state.elementSizeBytes;
     return ptr;
@@ -131,7 +132,7 @@
     rsdGLCheckError(rsc, "Upload2DTexture 1 ");
     for (uint32_t face = 0; face < faceCount; face ++) {
         for (uint32_t lod = 0; lod < alloc->mHal.state.type->getLODCount(); lod++) {
-            const uint8_t *p = GetOffsetPtr(alloc, 0, 0, lod, (RsAllocationCubemapFace)face);
+            const uint8_t *p = GetOffsetPtr(alloc, 0, 0, 0, lod, (RsAllocationCubemapFace)face);
 
             GLenum t = GL_TEXTURE_2D;
             if (alloc->mHal.state.hasFaces) {
@@ -374,9 +375,10 @@
         // in getSurface
     } else if (alloc->mHal.state.userProvidedPtr != NULL) {
         // user-provided allocation
-        // limitations: no faces, no LOD, USAGE_SCRIPT only
-        if (alloc->mHal.state.usageFlags != (RS_ALLOCATION_USAGE_SCRIPT | RS_ALLOCATION_USAGE_SHARED)) {
-            ALOGE("Can't use user-allocated buffers if usage is not USAGE_SCRIPT and USAGE_SHARED");
+        // limitations: no faces, no LOD, USAGE_SCRIPT or SCRIPT+TEXTURE only
+        if (!(alloc->mHal.state.usageFlags == (RS_ALLOCATION_USAGE_SCRIPT | RS_ALLOCATION_USAGE_SHARED) ||
+              alloc->mHal.state.usageFlags == (RS_ALLOCATION_USAGE_SCRIPT | RS_ALLOCATION_USAGE_SHARED | RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE))) {
+            ALOGE("Can't use user-allocated buffers if usage is not USAGE_SCRIPT | USAGE_SHARED or USAGE_SCRIPT | USAGE_SHARED | USAGE_GRAPHICS_TEXTURE");
             return false;
         }
         if (alloc->getType()->getDimLOD() || alloc->getType()->getDimFaces()) {
@@ -598,6 +600,10 @@
         UploadToBufferObject(rsc, alloc);
     }
 
+    if (alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_SHARED) {
+        // NOP in CPU driver for now
+    }
+
     drv->uploadDeferred = false;
 }
 
@@ -811,7 +817,7 @@
     DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
 
     const size_t eSize = alloc->mHal.state.type->getElementSizeBytes();
-    uint8_t * ptr = GetOffsetPtr(alloc, xoff, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
+    uint8_t * ptr = GetOffsetPtr(alloc, xoff, 0, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
     size_t size = count * eSize;
 
     if (ptr != data) {
@@ -839,7 +845,7 @@
 
     if (alloc->mHal.drvState.lod[0].mallocPtr) {
         const uint8_t *src = static_cast<const uint8_t *>(data);
-        uint8_t *dst = GetOffsetPtr(alloc, xoff, yoff, lod, face);
+        uint8_t *dst = GetOffsetPtr(alloc, xoff, yoff, 0, lod, face);
         if (dst == src) {
             // Skip the copy if we are the same allocation. This can arise from
             // our Bitmap optimization, where we share the same storage.
@@ -860,7 +866,7 @@
             int lod = 1;
             while (alloc->mHal.drvState.lod[lod].mallocPtr) {
                 size_t lineSize = alloc->mHal.drvState.lod[lod].dimX;
-                uint8_t *dst = GetOffsetPtr(alloc, xoff, yoff, lod, face);
+                uint8_t *dst = GetOffsetPtr(alloc, xoff, yoff, 0, lod, face);
 
                 for (uint32_t line=(yoff >> 1); line < ((yoff+h)>>1); line++) {
                     memcpy(dst, src, lineSize);
@@ -879,16 +885,48 @@
 
 void rsdAllocationData3D(const Context *rsc, const Allocation *alloc,
                          uint32_t xoff, uint32_t yoff, uint32_t zoff,
-                         uint32_t lod, RsAllocationCubemapFace face,
-                         uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes) {
+                         uint32_t lod,
+                         uint32_t w, uint32_t h, uint32_t d, const void *data,
+                         size_t sizeBytes, size_t stride) {
+    DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
 
+    uint32_t eSize = alloc->mHal.state.elementSizeBytes;
+    uint32_t lineSize = eSize * w;
+    if (!stride) {
+        stride = lineSize;
+    }
+
+    if (alloc->mHal.drvState.lod[0].mallocPtr) {
+        const uint8_t *src = static_cast<const uint8_t *>(data);
+        for (uint32_t z = zoff; z < d; z++) {
+            uint8_t *dst = GetOffsetPtr(alloc, xoff, yoff, z, lod,
+                                        RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
+            if (dst == src) {
+                // Skip the copy if we are the same allocation. This can arise from
+                // our Bitmap optimization, where we share the same storage.
+                drv->uploadDeferred = true;
+                return;
+            }
+
+            for (uint32_t line=yoff; line < (yoff+h); line++) {
+                if (alloc->mHal.state.hasReferences) {
+                    alloc->incRefs(src, w);
+                    alloc->decRefs(dst, w);
+                }
+                memcpy(dst, src, lineSize);
+                src += stride;
+                dst += alloc->mHal.drvState.lod[lod].stride;
+            }
+        }
+        drv->uploadDeferred = true;
+    }
 }
 
 void rsdAllocationRead1D(const Context *rsc, const Allocation *alloc,
                          uint32_t xoff, uint32_t lod, size_t count,
                          void *data, size_t sizeBytes) {
     const size_t eSize = alloc->mHal.state.type->getElementSizeBytes();
-    const uint8_t * ptr = GetOffsetPtr(alloc, xoff, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
+    const uint8_t * ptr = GetOffsetPtr(alloc, xoff, 0, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
     if (data != ptr) {
         // Skip the copy if we are the same allocation. This can arise from
         // our Bitmap optimization, where we share the same storage.
@@ -907,7 +945,7 @@
 
     if (alloc->mHal.drvState.lod[0].mallocPtr) {
         uint8_t *dst = static_cast<uint8_t *>(data);
-        const uint8_t *src = GetOffsetPtr(alloc, xoff, yoff, lod, face);
+        const uint8_t *src = GetOffsetPtr(alloc, xoff, yoff, 0, lod, face);
         if (dst == src) {
             // Skip the copy if we are the same allocation. This can arise from
             // our Bitmap optimization, where we share the same storage.
@@ -927,9 +965,32 @@
 
 void rsdAllocationRead3D(const Context *rsc, const Allocation *alloc,
                          uint32_t xoff, uint32_t yoff, uint32_t zoff,
-                         uint32_t lod, RsAllocationCubemapFace face,
-                         uint32_t w, uint32_t h, uint32_t d, void *data, size_t sizeBytes) {
+                         uint32_t lod,
+                         uint32_t w, uint32_t h, uint32_t d, void *data, size_t sizeBytes, size_t stride) {
+    uint32_t eSize = alloc->mHal.state.elementSizeBytes;
+    uint32_t lineSize = eSize * w;
+    if (!stride) {
+        stride = lineSize;
+    }
 
+    if (alloc->mHal.drvState.lod[0].mallocPtr) {
+        uint8_t *dst = static_cast<uint8_t *>(data);
+        for (uint32_t z = zoff; z < d; z++) {
+            const uint8_t *src = GetOffsetPtr(alloc, xoff, yoff, z, lod,
+                                              RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
+            if (dst == src) {
+                // Skip the copy if we are the same allocation. This can arise from
+                // our Bitmap optimization, where we share the same storage.
+                return;
+            }
+
+            for (uint32_t line=yoff; line < (yoff+h); line++) {
+                memcpy(dst, src, lineSize);
+                dst += stride;
+                src += alloc->mHal.drvState.lod[lod].stride;
+            }
+        }
+    }
 }
 
 void * rsdAllocationLock1D(const android::renderscript::Context *rsc,
@@ -959,8 +1020,8 @@
                                       RsAllocationCubemapFace srcFace) {
     size_t elementSize = dstAlloc->getType()->getElementSizeBytes();
     for (uint32_t i = 0; i < h; i ++) {
-        uint8_t *dstPtr = GetOffsetPtr(dstAlloc, dstXoff, dstYoff + i, dstLod, dstFace);
-        uint8_t *srcPtr = GetOffsetPtr(srcAlloc, srcXoff, srcYoff + i, srcLod, srcFace);
+        uint8_t *dstPtr = GetOffsetPtr(dstAlloc, dstXoff, dstYoff + i, 0, dstLod, dstFace);
+        uint8_t *srcPtr = GetOffsetPtr(srcAlloc, srcXoff, srcYoff + i, 0, srcLod, srcFace);
         memcpy(dstPtr, srcPtr, w * elementSize);
 
         //ALOGE("COPIED dstXoff(%u), dstYoff(%u), dstLod(%u), dstFace(%u), w(%u), h(%u), srcXoff(%u), srcYoff(%u), srcLod(%u), srcFace(%u)",
@@ -968,6 +1029,27 @@
     }
 }
 
+void rsdAllocationData3D_alloc_script(const android::renderscript::Context *rsc,
+                                      const android::renderscript::Allocation *dstAlloc,
+                                      uint32_t dstXoff, uint32_t dstYoff, uint32_t dstZoff, uint32_t dstLod,
+                                      uint32_t w, uint32_t h, uint32_t d,
+                                      const android::renderscript::Allocation *srcAlloc,
+                                      uint32_t srcXoff, uint32_t srcYoff, uint32_t srcZoff, uint32_t srcLod) {
+    uint32_t elementSize = dstAlloc->getType()->getElementSizeBytes();
+    for (uint32_t j = 0; j < d; j++) {
+        for (uint32_t i = 0; i < h; i ++) {
+            uint8_t *dstPtr = GetOffsetPtr(dstAlloc, dstXoff, dstYoff + i, dstZoff + j,
+                                           dstLod, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
+            uint8_t *srcPtr = GetOffsetPtr(srcAlloc, srcXoff, srcYoff + i, srcZoff + j,
+                                           srcLod, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
+            memcpy(dstPtr, srcPtr, w * elementSize);
+
+            //ALOGE("COPIED dstXoff(%u), dstYoff(%u), dstLod(%u), dstFace(%u), w(%u), h(%u), srcXoff(%u), srcYoff(%u), srcLod(%u), srcFace(%u)",
+            //     dstXoff, dstYoff, dstLod, dstFace, w, h, srcXoff, srcYoff, srcLod, srcFace);
+        }
+    }
+}
+
 void rsdAllocationData2D_alloc(const android::renderscript::Context *rsc,
                                const android::renderscript::Allocation *dstAlloc,
                                uint32_t dstXoff, uint32_t dstYoff, uint32_t dstLod,
@@ -988,11 +1070,19 @@
 void rsdAllocationData3D_alloc(const android::renderscript::Context *rsc,
                                const android::renderscript::Allocation *dstAlloc,
                                uint32_t dstXoff, uint32_t dstYoff, uint32_t dstZoff,
-                               uint32_t dstLod, RsAllocationCubemapFace dstFace,
+                               uint32_t dstLod,
                                uint32_t w, uint32_t h, uint32_t d,
                                const android::renderscript::Allocation *srcAlloc,
                                uint32_t srcXoff, uint32_t srcYoff, uint32_t srcZoff,
-                               uint32_t srcLod, RsAllocationCubemapFace srcFace) {
+                               uint32_t srcLod) {
+    if (!dstAlloc->getIsScript() && !srcAlloc->getIsScript()) {
+        rsc->setError(RS_ERROR_FATAL_DRIVER, "Non-script allocation copies not "
+                                             "yet implemented.");
+        return;
+    }
+    rsdAllocationData3D_alloc_script(rsc, dstAlloc, dstXoff, dstYoff, dstZoff,
+                                     dstLod, w, h, d, srcAlloc,
+                                     srcXoff, srcYoff, srcZoff, srcLod);
 }
 
 void rsdAllocationElementData1D(const Context *rsc, const Allocation *alloc,
@@ -1001,7 +1091,7 @@
     DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
 
     size_t eSize = alloc->mHal.state.elementSizeBytes;
-    uint8_t * ptr = GetOffsetPtr(alloc, x, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
+    uint8_t * ptr = GetOffsetPtr(alloc, x, 0, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
 
     const Element * e = alloc->mHal.state.type->getElement()->getField(cIdx);
     ptr += alloc->mHal.state.type->getElement()->getFieldOffsetBytes(cIdx);
@@ -1021,7 +1111,7 @@
     DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
 
     size_t eSize = alloc->mHal.state.elementSizeBytes;
-    uint8_t * ptr = GetOffsetPtr(alloc, x, y, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
+    uint8_t * ptr = GetOffsetPtr(alloc, x, y, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
 
     const Element * e = alloc->mHal.state.type->getElement()->getField(cIdx);
     ptr += alloc->mHal.state.type->getElement()->getFieldOffsetBytes(cIdx);
@@ -1040,9 +1130,9 @@
     uint32_t h = alloc->mHal.drvState.lod[lod + 1].dimY;
 
     for (uint32_t y=0; y < h; y++) {
-        uint16_t *oPtr = (uint16_t *)GetOffsetPtr(alloc, 0, y, lod + 1, face);
-        const uint16_t *i1 = (uint16_t *)GetOffsetPtr(alloc, 0, y*2, lod, face);
-        const uint16_t *i2 = (uint16_t *)GetOffsetPtr(alloc, 0, y*2+1, lod, face);
+        uint16_t *oPtr = (uint16_t *)GetOffsetPtr(alloc, 0, y, 0, lod + 1, face);
+        const uint16_t *i1 = (uint16_t *)GetOffsetPtr(alloc, 0, 0, y*2, lod, face);
+        const uint16_t *i2 = (uint16_t *)GetOffsetPtr(alloc, 0, 0, y*2+1, lod, face);
 
         for (uint32_t x=0; x < w; x++) {
             *oPtr = rsBoxFilter565(i1[0], i1[1], i2[0], i2[1]);
@@ -1058,9 +1148,9 @@
     uint32_t h = alloc->mHal.drvState.lod[lod + 1].dimY;
 
     for (uint32_t y=0; y < h; y++) {
-        uint32_t *oPtr = (uint32_t *)GetOffsetPtr(alloc, 0, y, lod + 1, face);
-        const uint32_t *i1 = (uint32_t *)GetOffsetPtr(alloc, 0, y*2, lod, face);
-        const uint32_t *i2 = (uint32_t *)GetOffsetPtr(alloc, 0, y*2+1, lod, face);
+        uint32_t *oPtr = (uint32_t *)GetOffsetPtr(alloc, 0, y, 0, lod + 1, face);
+        const uint32_t *i1 = (uint32_t *)GetOffsetPtr(alloc, 0, y*2, 0, lod, face);
+        const uint32_t *i2 = (uint32_t *)GetOffsetPtr(alloc, 0, y*2+1, 0, lod, face);
 
         for (uint32_t x=0; x < w; x++) {
             *oPtr = rsBoxFilter8888(i1[0], i1[1], i2[0], i2[1]);
@@ -1076,9 +1166,9 @@
     uint32_t h = alloc->mHal.drvState.lod[lod + 1].dimY;
 
     for (uint32_t y=0; y < h; y++) {
-        uint8_t *oPtr = GetOffsetPtr(alloc, 0, y, lod + 1, face);
-        const uint8_t *i1 = GetOffsetPtr(alloc, 0, y*2, lod, face);
-        const uint8_t *i2 = GetOffsetPtr(alloc, 0, y*2+1, lod, face);
+        uint8_t *oPtr = GetOffsetPtr(alloc, 0, y, 0, lod + 1, face);
+        const uint8_t *i1 = GetOffsetPtr(alloc, 0, y*2, 0, lod, face);
+        const uint8_t *i2 = GetOffsetPtr(alloc, 0, y*2+1, 0, lod, face);
 
         for (uint32_t x=0; x < w; x++) {
             *oPtr = (uint8_t)(((uint32_t)i1[0] + i1[1] + i2[0] + i2[1]) * 0.25f);
diff --git a/driver/rsdAllocation.h b/driver/rsdAllocation.h
index b64c999..a3f62a1 100644
--- a/driver/rsdAllocation.h
+++ b/driver/rsdAllocation.h
@@ -113,9 +113,9 @@
                          const void *data, size_t sizeBytes, size_t stride);
 void rsdAllocationData3D(const android::renderscript::Context *rsc,
                          const android::renderscript::Allocation *alloc,
-                         uint32_t xoff, uint32_t yoff, uint32_t zoff,
-                         uint32_t lod, RsAllocationCubemapFace face,
-                         uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes);
+                         uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t lod,
+                         uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes,
+                         size_t stride);
 
 void rsdAllocationRead1D(const android::renderscript::Context *rsc,
                          const android::renderscript::Allocation *alloc,
@@ -128,9 +128,9 @@
                          void *data, size_t sizeBytes, size_t stride);
 void rsdAllocationRead3D(const android::renderscript::Context *rsc,
                          const android::renderscript::Allocation *alloc,
-                         uint32_t xoff, uint32_t yoff, uint32_t zoff,
-                         uint32_t lod, RsAllocationCubemapFace face,
-                         uint32_t w, uint32_t h, uint32_t d, void *data, size_t sizeBytes);
+                         uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t lod,
+                         uint32_t w, uint32_t h, uint32_t d, void *data, size_t sizeBytes,
+                         size_t stride);
 
 void * rsdAllocationLock1D(const android::renderscript::Context *rsc,
                           const android::renderscript::Allocation *alloc);
@@ -153,11 +153,11 @@
 void rsdAllocationData3D_alloc(const android::renderscript::Context *rsc,
                                const android::renderscript::Allocation *dstAlloc,
                                uint32_t dstXoff, uint32_t dstYoff, uint32_t dstZoff,
-                               uint32_t dstLod, RsAllocationCubemapFace dstFace,
+                               uint32_t dstLod,
                                uint32_t w, uint32_t h, uint32_t d,
                                const android::renderscript::Allocation *srcAlloc,
                                uint32_t srcXoff, uint32_t srcYoff, uint32_t srcZoff,
-                               uint32_t srcLod, RsAllocationCubemapFace srcFace);
+                               uint32_t srcLod);
 
 void rsdAllocationElementData1D(const android::renderscript::Context *rsc,
                                 const android::renderscript::Allocation *alloc,
diff --git a/rs.spec b/rs.spec
index 49a082a..4836762 100644
--- a/rs.spec
+++ b/rs.spec
@@ -158,6 +158,19 @@
     param size_t stride
     }
 
+Allocation3DData {
+    param RsAllocation va
+    param uint32_t xoff
+    param uint32_t yoff
+    param uint32_t zoff
+    param uint32_t lod
+    param uint32_t w
+    param uint32_t h
+    param uint32_t d
+    param const void *data
+    param size_t stride
+    }
+
 Allocation2DElementData {
     param RsAllocation va
     param uint32_t x
@@ -208,12 +221,6 @@
     param uint32_t dimX
     }
 
-AllocationResize2D {
-    param RsAllocation va
-    param uint32_t dimX
-    param uint32_t dimY
-    }
-
 AllocationCopy2DRange {
     param RsAllocation dest
     param uint32_t destXoff
@@ -229,6 +236,23 @@
     param uint32_t srcFace
     }
 
+AllocationCopy3DRange {
+    param RsAllocation dest
+    param uint32_t destXoff
+    param uint32_t destYoff
+    param uint32_t destZoff
+    param uint32_t destMip
+    param uint32_t width
+    param uint32_t height
+    param uint32_t depth
+    param RsAllocation src
+    param uint32_t srcXoff
+    param uint32_t srcYoff
+    param uint32_t srcZoff
+    param uint32_t srcMip
+    }
+
+
 SamplerCreate {
     direct
     param RsSamplerValue magFilter
diff --git a/rsAllocation.cpp b/rsAllocation.cpp
index 7e4fbc2..b558d10 100644
--- a/rsAllocation.cpp
+++ b/rsAllocation.cpp
@@ -109,8 +109,10 @@
 }
 
 void Allocation::data(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t zoff,
-                      uint32_t lod, RsAllocationCubemapFace face,
-                      uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes) {
+                      uint32_t lod,
+                      uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes, size_t stride) {
+    rsc->mHal.funcs.allocation.data3D(rsc, this, xoff, yoff, zoff, lod, w, h, d, data, sizeBytes, stride);
+    sendDirty(rsc);
 }
 
 void Allocation::read(Context *rsc, uint32_t xoff, uint32_t lod,
@@ -127,40 +129,33 @@
     rsc->mHal.funcs.allocation.read1D(rsc, this, xoff, lod, count, data, sizeBytes);
 }
 
-void Allocation::readUnchecked(Context *rsc, uint32_t xoff, uint32_t lod,
-                         uint32_t count, void *data, size_t sizeBytes) {
-    rsc->mHal.funcs.allocation.read1D(rsc, this, xoff, lod, count, data, sizeBytes);
-}
-
-
-void Allocation::read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
-                      uint32_t w, uint32_t h, void *data, size_t sizeBytes) {
-    const size_t eSize = mHal.state.elementSizeBytes;
-    const size_t lineSize = eSize * w;
-
-    if ((lineSize * h) != sizeBytes) {
-        ALOGE("Allocation size mismatch, expected %zu, got %zu", (lineSize * h), sizeBytes);
-        rsAssert(!"Allocation::read called with mismatched size");
-        return;
-    }
-
-    read(rsc, xoff, yoff, lod, face, w, h, data, sizeBytes, lineSize);
-}
-
 void Allocation::read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
                       uint32_t w, uint32_t h, void *data, size_t sizeBytes, size_t stride) {
     const size_t eSize = mHal.state.elementSizeBytes;
     const size_t lineSize = eSize * w;
     if (!stride) {
         stride = lineSize;
+    } else {
+        if ((lineSize * h) != sizeBytes) {
+            ALOGE("Allocation size mismatch, expected %zu, got %zu", (lineSize * h), sizeBytes);
+            rsAssert(!"Allocation::read called with mismatched size");
+            return;
+        }
     }
 
     rsc->mHal.funcs.allocation.read2D(rsc, this, xoff, yoff, lod, face, w, h, data, sizeBytes, stride);
 }
 
-void Allocation::read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t zoff,
-                      uint32_t lod, RsAllocationCubemapFace face,
-                      uint32_t w, uint32_t h, uint32_t d, void *data, size_t sizeBytes) {
+void Allocation::read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t lod,
+                      uint32_t w, uint32_t h, uint32_t d, void *data, size_t sizeBytes, size_t stride) {
+    const size_t eSize = mHal.state.elementSizeBytes;
+    const size_t lineSize = eSize * w;
+    if (!stride) {
+        stride = lineSize;
+    }
+
+    rsc->mHal.funcs.allocation.read3D(rsc, this, xoff, yoff, zoff, lod, w, h, d, data, sizeBytes, stride);
+
 }
 
 void Allocation::elementData(Context *rsc, uint32_t x, const void *data,
@@ -498,7 +493,7 @@
     Allocation *a = static_cast<Allocation *>(va);
     const Type * t = a->getType();
     a->read(rsc, 0, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X,
-            t->getDimX(), t->getDimY(), data, sizeBytes);
+            t->getDimX(), t->getDimY(), data, sizeBytes, 0);
 }
 
 void rsi_Allocation1DData(Context *rsc, RsAllocation va, uint32_t xoff, uint32_t lod,
@@ -525,12 +520,19 @@
     a->data(rsc, xoff, yoff, lod, face, w, h, data, sizeBytes, stride);
 }
 
+void rsi_Allocation3DData(Context *rsc, RsAllocation va, uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t lod,
+                          uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes, size_t stride) {
+    Allocation *a = static_cast<Allocation *>(va);
+    a->data(rsc, xoff, yoff, zoff, lod, w, h, d, data, sizeBytes, stride);
+}
+
+
 void rsi_AllocationRead(Context *rsc, RsAllocation va, void *data, size_t sizeBytes) {
     Allocation *a = static_cast<Allocation *>(va);
     const Type * t = a->getType();
     if(t->getDimY()) {
         a->read(rsc, 0, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X,
-                t->getDimX(), t->getDimY(), data, sizeBytes);
+                t->getDimX(), t->getDimY(), data, sizeBytes, 0);
     } else {
         a->read(rsc, 0, 0, t->getDimX(), data, sizeBytes);
     }
@@ -635,6 +637,22 @@
                                            (RsAllocationCubemapFace)srcFace);
 }
 
+void rsi_AllocationCopy3DRange(Context *rsc,
+                               RsAllocation dstAlloc,
+                               uint32_t dstXoff, uint32_t dstYoff, uint32_t dstZoff,
+                               uint32_t dstMip,
+                               uint32_t width, uint32_t height, uint32_t depth,
+                               RsAllocation srcAlloc,
+                               uint32_t srcXoff, uint32_t srcYoff, uint32_t srcZoff,
+                               uint32_t srcMip) {
+    Allocation *dst = static_cast<Allocation *>(dstAlloc);
+    Allocation *src= static_cast<Allocation *>(srcAlloc);
+    rsc->mHal.funcs.allocation.allocData3D(rsc, dst, dstXoff, dstYoff, dstZoff, dstMip,
+                                           width, height, depth,
+                                           src, srcXoff, srcYoff, srcZoff, srcMip);
+}
+
+
 void * rsi_AllocationGetSurface(Context *rsc, RsAllocation valloc) {
     Allocation *alloc = static_cast<Allocation *>(valloc);
     void *s = alloc->getSurface(rsc);
@@ -659,7 +677,7 @@
 void rsi_Allocation1DRead(Context *rsc, RsAllocation va, uint32_t xoff, uint32_t lod,
                           uint32_t count, void *data, size_t sizeBytes) {
     Allocation *a = static_cast<Allocation *>(va);
-    a->readUnchecked(rsc, xoff, lod, count, data, sizeBytes);
+    rsc->mHal.funcs.allocation.read1D(rsc, a, xoff, lod, count, data, sizeBytes);
 }
 
 void rsi_Allocation2DRead(Context *rsc, RsAllocation va, uint32_t xoff, uint32_t yoff,
diff --git a/rsAllocation.h b/rsAllocation.h
index 087d1cd..dffa440 100644
--- a/rsAllocation.h
+++ b/rsAllocation.h
@@ -99,20 +99,15 @@
 
     void data(Context *rsc, uint32_t xoff, uint32_t lod, uint32_t count, const void *data, size_t sizeBytes);
     void data(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
-              uint32_t w, uint32_t h, const void *data, size_t sizeBytes);
-    void data(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
               uint32_t w, uint32_t h, const void *data, size_t sizeBytes, size_t stride);
-    void data(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t lod, RsAllocationCubemapFace face,
-              uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes);
+    void data(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t lod,
+              uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes, size_t stride);
 
     void read(Context *rsc, uint32_t xoff, uint32_t lod, uint32_t count, void *data, size_t sizeBytes);
-    void readUnchecked(Context *rsc, uint32_t xoff, uint32_t lod, uint32_t count, void *data, size_t sizeBytes);
-    void read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
-              uint32_t w, uint32_t h, void *data, size_t sizeBytes);
     void read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
               uint32_t w, uint32_t h, void *data, size_t sizeBytes, size_t stride);
-    void read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t lod, RsAllocationCubemapFace face,
-              uint32_t w, uint32_t h, uint32_t d, void *data, size_t sizeBytes);
+    void read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t lod,
+              uint32_t w, uint32_t h, uint32_t d, void *data, size_t sizeBytes, size_t stride);
 
     void elementData(Context *rsc, uint32_t x,
                      const void *data, uint32_t elementOff, size_t sizeBytes);
diff --git a/rs_hal.h b/rs_hal.h
index 0b0d00a..1949a15 100644
--- a/rs_hal.h
+++ b/rs_hal.h
@@ -160,9 +160,9 @@
                        RsAllocationCubemapFace face, uint32_t w, uint32_t h,
                        const void *data, size_t sizeBytes, size_t stride);
         void (*data3D)(const Context *rsc, const Allocation *alloc,
-                       uint32_t xoff, uint32_t yoff, uint32_t zoff,
-                       uint32_t lod, RsAllocationCubemapFace face,
-                       uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes);
+                       uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t lod,
+                       uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes,
+                       size_t stride);
 
         void (*read1D)(const Context *rsc, const Allocation *alloc,
                        uint32_t xoff, uint32_t lod, size_t count,
@@ -172,9 +172,9 @@
                        RsAllocationCubemapFace face, uint32_t w, uint32_t h,
                        void *data, size_t sizeBytes, size_t stride);
         void (*read3D)(const Context *rsc, const Allocation *alloc,
-                       uint32_t xoff, uint32_t yoff, uint32_t zoff,
-                       uint32_t lod, RsAllocationCubemapFace face,
-                       uint32_t w, uint32_t h, uint32_t d, void *data, size_t sizeBytes);
+                       uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t lod,
+                       uint32_t w, uint32_t h, uint32_t d, void *data, size_t sizeBytes,
+                       size_t stride);
 
         // Lock and unlock make a 1D region of memory available to the CPU
         // for direct access by pointer.  Once unlock is called control is
@@ -197,11 +197,11 @@
         void (*allocData3D)(const Context *rsc,
                             const Allocation *dstAlloc,
                             uint32_t dstXoff, uint32_t dstYoff, uint32_t dstZoff,
-                            uint32_t dstLod, RsAllocationCubemapFace dstFace,
+                            uint32_t dstLod,
                             uint32_t w, uint32_t h, uint32_t d,
                             const Allocation *srcAlloc,
                             uint32_t srcXoff, uint32_t srcYoff, uint32_t srcZoff,
-                            uint32_t srcLod, RsAllocationCubemapFace srcFace);
+                            uint32_t srcLod);
 
         void (*elementData1D)(const Context *rsc, const Allocation *alloc, uint32_t x,
                               const void *data, uint32_t elementOff, size_t sizeBytes);