Move simple matrix ops to the on device lib. Change-Id: I2cd23cc2dab32c54f341e8e0cfbfbcaf1585c401

commit: 9112850375c24d3ffb8ced3835f878b08c1297ab [log] [tgz]
author: Jason Sams <jsams@google.com> Mon Mar 11 17:48:13 2013 -0700
committer: Jason Sams <jsams@google.com> Mon Mar 11 17:48:13 2013 -0700
tree: c99624c4820e551acfe021603f164561ea8540b8
parent: 229c99b4290e30047678a79910722c628fb2602e [diff]
diff --git a/lib/Renderscript/runtime/Android.mk b/lib/Renderscript/runtime/Android.mk
index dc25df7..3005624 100755
--- a/lib/Renderscript/runtime/Android.mk
+++ b/lib/Renderscript/runtime/Android.mk

@@ -23,11 +23,11 @@
     rs_core.c \
     rs_element.c \
     rs_mesh.c \
+    rs_matrix.c \
     rs_program.c \
     rs_sample.c \
     rs_sampler.c \
     convert.ll \
-    matrix.ll \
     pixel_packing.ll \
     rsClamp.ll
 

diff --git a/lib/Renderscript/runtime/arch/generic.c b/lib/Renderscript/runtime/arch/generic.c
index e802d58..ab92227 100644
--- a/lib/Renderscript/runtime/arch/generic.c
+++ b/lib/Renderscript/runtime/arch/generic.c

@@ -842,3 +842,72 @@
     return r;
 }
 
+
+extern float4 __attribute__((overloadable))
+rsMatrixMultiply(const rs_matrix4x4 *m, float4 in) {
+    float4 ret;
+    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + (m->m[12] * in.w);
+    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + (m->m[13] * in.w);
+    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + (m->m[14] * in.w);
+    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + (m->m[15] * in.w);
+    return ret;
+}
+extern float4 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix4x4 *m, float4 in) {
+    return rsMatrixMultiply((const rs_matrix4x4 *)m, in);
+}
+
+extern float4 __attribute__((overloadable))
+rsMatrixMultiply(const rs_matrix4x4 *m, float3 in) {
+    float4 ret;
+    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + m->m[12];
+    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + m->m[13];
+    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + m->m[14];
+    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + m->m[15];
+    return ret;
+}
+extern float4 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix4x4 *m, float3 in) {
+    return rsMatrixMultiply((const rs_matrix4x4 *)m, in);
+}
+
+extern float4 __attribute__((overloadable))
+rsMatrixMultiply(const rs_matrix4x4 *m, float2 in) {
+    float4 ret;
+    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + m->m[12];
+    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + m->m[13];
+    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + m->m[14];
+    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + m->m[15];
+    return ret;
+}
+extern float4 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix4x4 *m, float2 in) {
+    return rsMatrixMultiply((const rs_matrix4x4 *)m, in);
+}
+
+extern float3 __attribute__((overloadable))
+rsMatrixMultiply(const rs_matrix3x3 *m, float3 in) {
+    float3 ret;
+    ret.x = (m->m[0] * in.x) + (m->m[3] * in.y) + (m->m[6] * in.z);
+    ret.y = (m->m[1] * in.x) + (m->m[4] * in.y) + (m->m[7] * in.z);
+    ret.z = (m->m[2] * in.x) + (m->m[5] * in.y) + (m->m[8] * in.z);
+    return ret;
+}
+extern float3 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix3x3 *m, float3 in) {
+    return rsMatrixMultiply((const rs_matrix3x3 *)m, in);
+}
+
+extern float3 __attribute__((overloadable))
+rsMatrixMultiply(const rs_matrix3x3 *m, float2 in) {
+    float3 ret;
+    ret.x = (m->m[0] * in.x) + (m->m[3] * in.y);
+    ret.y = (m->m[1] * in.x) + (m->m[4] * in.y);
+    ret.z = (m->m[2] * in.x) + (m->m[5] * in.y);
+    return ret;
+}
+extern float3 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix3x3 *m, float2 in) {
+    return rsMatrixMultiply((const rs_matrix3x3 *)m, in);
+}
+

diff --git a/lib/Renderscript/runtime/arch/neon.ll b/lib/Renderscript/runtime/arch/neon.ll
index 010b252..3b85e1b 100644
--- a/lib/Renderscript/runtime/arch/neon.ll
+++ b/lib/Renderscript/runtime/arch/neon.ll

@@ -826,3 +826,183 @@
   ret <4 x float> %1
 }
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;              matrix                    ;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+
+%struct.rs_matrix4x4 = type { [16 x float] }
+%struct.rs_matrix3x3 = type { [9 x float] }
+%struct.rs_matrix2x2 = type { [4 x float] }
+
+define internal <4 x float> @smear_f(float %in) nounwind readnone alwaysinline {
+  %1 = insertelement <4 x float> undef, float %in, i32 0
+  %2 = insertelement <4 x float> %1, float %in, i32 1
+  %3 = insertelement <4 x float> %2, float %in, i32 2
+  %4 = insertelement <4 x float> %3, float %in, i32 3
+  ret <4 x float> %4
+}
+
+
+define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind readonly {
+  %x0 = extractelement <3 x float> %in, i32 0
+  %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
+  %y0 = extractelement <3 x float> %in, i32 1
+  %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
+  %z0 = extractelement <3 x float> %in, i32 2
+  %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
+
+  %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0
+  %px2 = bitcast float* %px to i8*
+  %xm = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %px2, i32 4) nounwind
+
+  %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3
+  %py2 = bitcast float* %py to i8*
+  %ym = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %py2, i32 4) nounwind
+
+  %pz = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 5
+  %pz2 = bitcast float* %pz to i8*
+  %zm2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %pz2, i32 4) nounwind
+  %zm = shufflevector <4 x float> %zm2, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+
+  %a1 = fmul <4 x float> %x, %xm
+  %a2 = fmul <4 x float> %y, %ym
+  %a3 = fadd <4 x float> %a1, %a2
+  %a4 = fmul <4 x float> %z, %zm
+  %a5 = fadd <4 x float> %a4, %a3
+  %a6 = shufflevector <4 x float> %a5, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x float> %a6
+}
+
+define <3 x float> @_Z16rsMatrixMultiplyP12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind readonly {
+  %r = tail call <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind
+  ret <3 x float> %r
+}
+
+define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind readonly {
+  %x0 = extractelement <2 x float> %in, i32 0
+  %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
+  %y0 = extractelement <2 x float> %in, i32 1
+  %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
+
+  %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0
+  %px2 = bitcast float* %px to <4 x float>*
+  %xm = load <4 x float>* %px2, align 4
+  %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3
+  %py2 = bitcast float* %py to <4 x float>*
+  %ym = load <4 x float>* %py2, align 4
+
+  %a1 = fmul <4 x float> %x, %xm
+  %a2 = fmul <4 x float> %y, %ym
+  %a3 = fadd <4 x float> %a1, %a2
+  %a4 = shufflevector <4 x float> %a3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x float> %a4
+}
+
+define <3 x float> @_Z16rsMatrixMultiplyP12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind readonly {
+  %r = tail call <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind
+  ret <3 x float> %r
+}
+
+define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind readonly {
+  %x0 = extractelement <4 x float> %in, i32 0
+  %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
+  %y0 = extractelement <4 x float> %in, i32 1
+  %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
+  %z0 = extractelement <4 x float> %in, i32 2
+  %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
+  %w0 = extractelement <4 x float> %in, i32 3
+  %w = tail call <4 x float> @smear_f(float %w0) nounwind readnone
+
+  %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
+  %px2 = bitcast float* %px to <4 x float>*
+  %xm = load <4 x float>* %px2, align 4
+  %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
+  %py2 = bitcast float* %py to <4 x float>*
+  %ym = load <4 x float>* %py2, align 4
+  %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8
+  %pz2 = bitcast float* %pz to <4 x float>*
+  %zm = load <4 x float>* %pz2, align 4
+  %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
+  %pw2 = bitcast float* %pw to <4 x float>*
+  %wm = load <4 x float>* %pw2, align 4
+
+  %a1 = fmul <4 x float> %x, %xm
+  %a2 = fmul <4 x float> %y, %ym
+  %a3 = fadd <4 x float> %a1, %a2
+  %a4 = fmul <4 x float> %z, %zm
+  %a5 = fadd <4 x float> %a3, %a4
+  %a6 = fmul <4 x float> %w, %wm
+  %a7 = fadd <4 x float> %a5, %a6
+  ret <4 x float> %a7
+}
+
+define <4 x float> @_Z16rsMatrixMultiplyP12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind readonly {
+  %r = tail call <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind
+  ret <4 x float> %r
+}
+
+define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind readonly {
+  %x0 = extractelement <3 x float> %in, i32 0
+  %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
+  %y0 = extractelement <3 x float> %in, i32 1
+  %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
+  %z0 = extractelement <3 x float> %in, i32 2
+  %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
+
+  %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
+  %px2 = bitcast float* %px to <4 x float>*
+  %xm = load <4 x float>* %px2, align 4
+  %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
+  %py2 = bitcast float* %py to <4 x float>*
+  %ym = load <4 x float>* %py2, align 4
+  %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8
+  %pz2 = bitcast float* %pz to <4 x float>*
+  %zm = load <4 x float>* %pz2, align 4
+  %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
+  %pw2 = bitcast float* %pw to <4 x float>*
+  %wm = load <4 x float>* %pw2, align 4
+
+  %a1 = fmul <4 x float> %x, %xm
+  %a2 = fadd <4 x float> %wm, %a1
+  %a3 = fmul <4 x float> %y, %ym
+  %a4 = fadd <4 x float> %a2, %a3
+  %a5 = fmul <4 x float> %z, %zm
+  %a6 = fadd <4 x float> %a4, %a5
+  ret <4 x float> %a6
+}
+
+define <4 x float> @_Z16rsMatrixMultiplyP12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind readonly {
+  %r = tail call <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind
+  ret <4 x float> %r
+}
+
+define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind readonly {
+  %x0 = extractelement <2 x float> %in, i32 0
+  %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
+  %y0 = extractelement <2 x float> %in, i32 1
+  %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
+
+  %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
+  %px2 = bitcast float* %px to <4 x float>*
+  %xm = load <4 x float>* %px2, align 4
+  %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
+  %py2 = bitcast float* %py to <4 x float>*
+  %ym = load <4 x float>* %py2, align 4
+  %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
+  %pw2 = bitcast float* %pw to <4 x float>*
+  %wm = load <4 x float>* %pw2, align 4
+
+  %a1 = fmul <4 x float> %x, %xm
+  %a2 = fadd <4 x float> %wm, %a1
+  %a3 = fmul <4 x float> %y, %ym
+  %a4 = fadd <4 x float> %a2, %a3
+  ret <4 x float> %a4
+}
+
+define <4 x float> @_Z16rsMatrixMultiplyP12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind readonly {
+  %r = tail call <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind
+  ret <4 x float> %r
+}
+

diff --git a/lib/Renderscript/runtime/rs_core.c b/lib/Renderscript/runtime/rs_core.c
index 1f6bd5d..dc026a9 100644
--- a/lib/Renderscript/runtime/rs_core.c
+++ b/lib/Renderscript/runtime/rs_core.c

@@ -47,124 +47,7 @@
 
 extern float4 rsUnpackColor8888(uchar4 c)
 {
-    float4 ret = (float4)0.003921569f;
-    ret *= convert_float4(c);
-    return ret;
+    return convert_float4(c) * 0.003921569f;
 }
 
-/////////////////////////////////////////////////////
-// Matrix ops
-/////////////////////////////////////////////////////
-
-extern void __attribute__((overloadable))
-rsMatrixSet(rs_matrix4x4 *m, uint32_t row, uint32_t col, float v) {
-    m->m[row * 4 + col] = v;
-}
-
-extern float __attribute__((overloadable))
-rsMatrixGet(const rs_matrix4x4 *m, uint32_t row, uint32_t col) {
-    return m->m[row * 4 + col];
-}
-
-extern void __attribute__((overloadable))
-rsMatrixSet(rs_matrix3x3 *m, uint32_t row, uint32_t col, float v) {
-    m->m[row * 3 + col] = v;
-}
-
-extern float __attribute__((overloadable))
-rsMatrixGet(const rs_matrix3x3 *m, uint32_t row, uint32_t col) {
-    return m->m[row * 3 + col];
-}
-
-extern void __attribute__((overloadable))
-rsMatrixSet(rs_matrix2x2 *m, uint32_t row, uint32_t col, float v) {
-    m->m[row * 2 + col] = v;
-}
-
-extern float __attribute__((overloadable))
-rsMatrixGet(const rs_matrix2x2 *m, uint32_t row, uint32_t col) {
-    return m->m[row * 2 + col];
-}
-
-/*
-extern float4 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix4x4 *m, float4 in) {
-    float4 ret;
-    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + (m->m[12] * in.w);
-    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + (m->m[13] * in.w);
-    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + (m->m[14] * in.w);
-    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + (m->m[15] * in.w);
-    return ret;
-}
-extern float4 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix4x4 *m, float4 in) {
-    return rsMatrixMultiply((const rs_matrix4x4 *)m, in);
-}
-
-extern float4 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix4x4 *m, float3 in) {
-    float4 ret;
-    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + m->m[12];
-    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + m->m[13];
-    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + m->m[14];
-    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + m->m[15];
-    return ret;
-}
-extern float4 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix4x4 *m, float3 in) {
-    return rsMatrixMultiply((const rs_matrix4x4 *)m, in);
-}
-
-extern float4 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix4x4 *m, float2 in) {
-    float4 ret;
-    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + m->m[12];
-    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + m->m[13];
-    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + m->m[14];
-    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + m->m[15];
-    return ret;
-}
-extern float4 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix4x4 *m, float2 in) {
-    return rsMatrixMultiply((const rs_matrix4x4 *)m, in);
-}
-
-extern float3 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix3x3 *m, float3 in) {
-    float3 ret;
-    ret.x = (m->m[0] * in.x) + (m->m[3] * in.y) + (m->m[6] * in.z);
-    ret.y = (m->m[1] * in.x) + (m->m[4] * in.y) + (m->m[7] * in.z);
-    ret.z = (m->m[2] * in.x) + (m->m[5] * in.y) + (m->m[8] * in.z);
-    return ret;
-}
-extern float3 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix3x3 *m, float3 in) {
-    return rsMatrixMultiply((const rs_matrix3x3 *)m, in);
-}
-
-extern float3 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix3x3 *m, float2 in) {
-    float3 ret;
-    ret.x = (m->m[0] * in.x) + (m->m[3] * in.y);
-    ret.y = (m->m[1] * in.x) + (m->m[4] * in.y);
-    ret.z = (m->m[2] * in.x) + (m->m[5] * in.y);
-    return ret;
-}
-extern float3 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix3x3 *m, float2 in) {
-    return rsMatrixMultiply((const rs_matrix3x3 *)m, in);
-}
-*/
-
-extern float2 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix2x2 *m, float2 in) {
-    float2 ret;
-    ret.x = (m->m[0] * in.x) + (m->m[2] * in.y);
-    ret.y = (m->m[1] * in.x) + (m->m[3] * in.y);
-    return ret;
-}
-extern float2 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix2x2 *m, float2 in) {
-    return rsMatrixMultiply((const rs_matrix2x2 *)m, in);
-}
 

diff --git a/lib/Renderscript/runtime/rs_matrix.c b/lib/Renderscript/runtime/rs_matrix.c
new file mode 100644
index 0000000..db3fede
--- /dev/null
+++ b/lib/Renderscript/runtime/rs_matrix.c

@@ -0,0 +1,215 @@
+#include "rs_core.rsh"
+#include "rs_graphics.rsh"
+#include "rs_structs.h"
+
+/* Function declarations from libRS */
+extern float4 __attribute__((overloadable)) convert_float4(uchar4 c);
+
+/* Implementation of Core Runtime */
+
+
+/////////////////////////////////////////////////////
+// Matrix ops
+/////////////////////////////////////////////////////
+
+
+extern void __attribute__((overloadable))
+rsMatrixLoadIdentity(rs_matrix4x4 *m) {
+    m->m[0] = 1.f;
+    m->m[1] = 0.f;
+    m->m[2] = 0.f;
+    m->m[3] = 0.f;
+    m->m[4] = 0.f;
+    m->m[5] = 1.f;
+    m->m[6] = 0.f;
+    m->m[7] = 0.f;
+    m->m[8] = 0.f;
+    m->m[9] = 0.f;
+    m->m[10] = 1.f;
+    m->m[11] = 0.f;
+    m->m[12] = 0.f;
+    m->m[13] = 0.f;
+    m->m[14] = 0.f;
+    m->m[15] = 1.f;
+}
+
+extern void __attribute__((overloadable))
+rsMatrixLoadIdentity(rs_matrix3x3 *m) {
+    m->m[0] = 1.f;
+    m->m[1] = 0.f;
+    m->m[2] = 0.f;
+    m->m[3] = 0.f;
+    m->m[4] = 1.f;
+    m->m[5] = 0.f;
+    m->m[6] = 0.f;
+    m->m[7] = 0.f;
+    m->m[8] = 1.f;
+}
+extern void __attribute__((overloadable))
+rsMatrixLoadIdentity(rs_matrix2x2 *m) {
+    m->m[0] = 1.f;
+    m->m[1] = 0.f;
+    m->m[2] = 0.f;
+    m->m[3] = 1.f;
+}
+
+extern void __attribute__((overloadable))
+rsMatrixLoad(rs_matrix4x4 *m, const float *f) {
+    m->m[0] = f[0];
+    m->m[1] = f[1];
+    m->m[2] = f[2];
+    m->m[3] = f[3];
+    m->m[4] = f[4];
+    m->m[5] = f[5];
+    m->m[6] = f[6];
+    m->m[7] = f[7];
+    m->m[8] = f[8];
+    m->m[9] = f[9];
+    m->m[10] = f[10];
+    m->m[11] = f[11];
+    m->m[12] = f[12];
+    m->m[13] = f[13];
+    m->m[14] = f[14];
+    m->m[15] = f[15];
+}
+extern void __attribute__((overloadable))
+rsMatrixLoad(rs_matrix3x3 *m, const float *f) {
+    m->m[0] = f[0];
+    m->m[1] = f[1];
+    m->m[2] = f[2];
+    m->m[3] = f[3];
+    m->m[4] = f[4];
+    m->m[5] = f[5];
+    m->m[6] = f[6];
+    m->m[7] = f[7];
+    m->m[8] = f[8];
+}
+extern void __attribute__((overloadable))
+rsMatrixLoad(rs_matrix2x2 *m, const float *f) {
+    m->m[0] = f[0];
+    m->m[1] = f[1];
+    m->m[2] = f[2];
+    m->m[3] = f[3];
+}
+
+extern void __attribute__((overloadable))
+rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix4x4 *s) {
+    m->m[0] = s->m[0];
+    m->m[1] = s->m[1];
+    m->m[2] = s->m[2];
+    m->m[3] = s->m[3];
+    m->m[4] = s->m[4];
+    m->m[5] = s->m[5];
+    m->m[6] = s->m[6];
+    m->m[7] = s->m[7];
+    m->m[8] = s->m[8];
+    m->m[9] = s->m[9];
+    m->m[10] = s->m[10];
+    m->m[11] = s->m[11];
+    m->m[12] = s->m[12];
+    m->m[13] = s->m[13];
+    m->m[14] = s->m[14];
+    m->m[15] = s->m[15];
+}
+extern void __attribute__((overloadable))
+rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix3x3 *v) {
+    m->m[0] = v->m[0];
+    m->m[1] = v->m[1];
+    m->m[2] = v->m[2];
+    m->m[3] = 0.f;
+    m->m[4] = v->m[3];
+    m->m[5] = v->m[4];
+    m->m[6] = v->m[5];
+    m->m[7] = 0.f;
+    m->m[8] = v->m[6];
+    m->m[9] = v->m[7];
+    m->m[10] = v->m[8];
+    m->m[11] = 0.f;
+    m->m[12] = 0.f;
+    m->m[13] = 0.f;
+    m->m[14] = 0.f;
+    m->m[15] = 1.f;
+}
+extern void __attribute__((overloadable))
+rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix2x2 *v) {
+    m->m[0] = v->m[0];
+    m->m[1] = v->m[1];
+    m->m[2] = 0.f;
+    m->m[3] = 0.f;
+    m->m[4] = v->m[2];
+    m->m[5] = v->m[3];
+    m->m[6] = 0.f;
+    m->m[7] = 0.f;
+    m->m[8] = 0.f;
+    m->m[9] = 0.f;
+    m->m[10] = 1.f;
+    m->m[11] = 0.f;
+    m->m[12] = 0.f;
+    m->m[13] = 0.f;
+    m->m[14] = 0.f;
+    m->m[15] = 1.f;
+}
+extern void __attribute__((overloadable))
+rsMatrixLoad(rs_matrix3x3 *m, const rs_matrix3x3 *s) {
+    m->m[0] = s->m[0];
+    m->m[1] = s->m[1];
+    m->m[2] = s->m[2];
+    m->m[3] = s->m[3];
+    m->m[4] = s->m[4];
+    m->m[5] = s->m[5];
+    m->m[6] = s->m[6];
+    m->m[7] = s->m[7];
+    m->m[8] = s->m[8];
+}
+extern void __attribute__((overloadable))
+rsMatrixLoad(rs_matrix2x2 *m, const rs_matrix2x2 *s) {
+    m->m[0] = s->m[0];
+    m->m[1] = s->m[1];
+    m->m[2] = s->m[2];
+    m->m[3] = s->m[3];
+}
+
+
+extern void __attribute__((overloadable))
+rsMatrixSet(rs_matrix4x4 *m, uint32_t row, uint32_t col, float v) {
+    m->m[row * 4 + col] = v;
+}
+
+extern float __attribute__((overloadable))
+rsMatrixGet(const rs_matrix4x4 *m, uint32_t row, uint32_t col) {
+    return m->m[row * 4 + col];
+}
+
+extern void __attribute__((overloadable))
+rsMatrixSet(rs_matrix3x3 *m, uint32_t row, uint32_t col, float v) {
+    m->m[row * 3 + col] = v;
+}
+
+extern float __attribute__((overloadable))
+rsMatrixGet(const rs_matrix3x3 *m, uint32_t row, uint32_t col) {
+    return m->m[row * 3 + col];
+}
+
+extern void __attribute__((overloadable))
+rsMatrixSet(rs_matrix2x2 *m, uint32_t row, uint32_t col, float v) {
+    m->m[row * 2 + col] = v;
+}
+
+extern float __attribute__((overloadable))
+rsMatrixGet(const rs_matrix2x2 *m, uint32_t row, uint32_t col) {
+    return m->m[row * 2 + col];
+}
+
+extern float2 __attribute__((overloadable))
+rsMatrixMultiply(const rs_matrix2x2 *m, float2 in) {
+    float2 ret;
+    ret.x = (m->m[0] * in.x) + (m->m[2] * in.y);
+    ret.y = (m->m[1] * in.x) + (m->m[3] * in.y);
+    return ret;
+}
+extern float2 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix2x2 *m, float2 in) {
+    return rsMatrixMultiply((const rs_matrix2x2 *)m, in);
+}
+
+
commit	9112850375c24d3ffb8ced3835f878b08c1297ab	[log] [tgz]
author	Jason Sams <jsams@google.com>	Mon Mar 11 17:48:13 2013 -0700
committer	Jason Sams <jsams@google.com>	Mon Mar 11 17:48:13 2013 -0700
tree	c99624c4820e551acfe021603f164561ea8540b8
parent	229c99b4290e30047678a79910722c628fb2602e [diff]