Merge "Clamp rsPackPixel" into jb-mr2-dev
diff --git a/lib/Renderscript/runtime/Android.mk b/lib/Renderscript/runtime/Android.mk
index 3005624..80b310d 100755
--- a/lib/Renderscript/runtime/Android.mk
+++ b/lib/Renderscript/runtime/Android.mk
@@ -28,7 +28,6 @@
rs_sample.c \
rs_sampler.c \
convert.ll \
- pixel_packing.ll \
rsClamp.ll
clcore_files := \
diff --git a/lib/Renderscript/runtime/arch/generic.c b/lib/Renderscript/runtime/arch/generic.c
index ab92227..77ce4c5 100644
--- a/lib/Renderscript/runtime/arch/generic.c
+++ b/lib/Renderscript/runtime/arch/generic.c
@@ -20,13 +20,52 @@
extern short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high);
extern float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high);
extern uchar4 __attribute__((overloadable)) convert_uchar4(short4);
+extern uchar4 __attribute__((overloadable)) convert_uchar4(float4);
+extern float4 __attribute__((overloadable)) convert_float4(uchar4);
extern float __attribute__((overloadable)) sqrt(float);
+/**
+ * clz
+ */
+extern uint32_t __attribute__((overloadable)) clz(uint32_t v) {
+ return __builtin_clz(v);
+}
+extern uint16_t __attribute__((overloadable)) clz(uint16_t v) {
+ return (uint16_t)__builtin_clz(v);
+}
+extern uint8_t __attribute__((overloadable)) clz(uint8_t v) {
+ return (uint8_t)__builtin_clz(v);
+}
+extern int32_t __attribute__((overloadable)) clz(int32_t v) {
+ return (int32_t)__builtin_clz((uint32_t)v);
+}
+extern int16_t __attribute__((overloadable)) clz(int16_t v) {
+ return (int16_t)__builtin_clz(v);
+}
+extern int8_t __attribute__((overloadable)) clz(int8_t v) {
+ return (int8_t)__builtin_clz(v);
+}
+
+extern uint32_t __attribute__((overloadable)) abs(int32_t v) {
+ if (v < 0)
+ return -v;
+ return v;
+}
+extern uint16_t __attribute__((overloadable)) abs(int16_t v) {
+ if (v < 0)
+ return -v;
+ return v;
+}
+extern uint8_t __attribute__((overloadable)) abs(int8_t v) {
+ if (v < 0)
+ return -v;
+ return v;
+}
+
/*
* CLAMP
*/
-
extern float __attribute__((overloadable)) clamp(float amount, float low, float high) {
return amount < low ? low : (amount > high ? high : amount);
}
@@ -706,6 +745,16 @@
return fmin(v1, v2);
}
+extern float step(float edge, float v) {
+ if (v < edge) return 0.f;
+ return 1.f;
+}
+
+extern float sign(float value) {
+ if (value > 0) return 1.f;
+ if (value < 0) return -1.f;
+ return value;
+}
/*
* YUV
@@ -842,6 +891,9 @@
return r;
}
+/**
+ * matrix ops
+ */
extern float4 __attribute__((overloadable))
rsMatrixMultiply(const rs_matrix4x4 *m, float4 in) {
@@ -911,3 +963,44 @@
return rsMatrixMultiply((const rs_matrix3x3 *)m, in);
}
+/**
+ * Pixel Ops
+ */
+extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
+{
+ uchar4 c;
+ c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
+ c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
+ c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
+ c.w = 255;
+ return c;
+}
+
+extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
+{
+ uchar4 c;
+ c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
+ c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
+ c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
+ c.w = (uchar)clamp((a * 255.f + 0.5f), 0.f, 255.f);
+ return c;
+}
+
+extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
+{
+ color *= 255.f;
+ color += 0.5f;
+ color = clamp(color, 0.f, 255.f);
+ uchar4 c = {color.x, color.y, color.z, 255};
+ return c;
+}
+
+extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
+{
+ color *= 255.f;
+ color += 0.5f;
+ color = clamp(color, 0.f, 255.f);
+ uchar4 c = {color.x, color.y, color.z, color.w};
+ return c;
+}
+
diff --git a/lib/Renderscript/runtime/arch/neon.ll b/lib/Renderscript/runtime/arch/neon.ll
index 3b85e1b..466a623 100644
--- a/lib/Renderscript/runtime/arch/neon.ll
+++ b/lib/Renderscript/runtime/arch/neon.ll
@@ -1006,3 +1006,57 @@
ret <4 x float> %r
}
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;; pixel ops ;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+@fc_255.0 = internal constant <4 x float> <float 255.0, float 255.0, float 255.0, float 255.0>, align 16
+@fc_0.5 = internal constant <4 x float> <float 0.5, float 0.5, float 0.5, float 0.5>, align 16
+@fc_0 = internal constant <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, align 16
+
+declare <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %in) nounwind readnone
+declare <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> %in) nounwind readnone
+
+; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
+define <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %color) nounwind readnone {
+ %f255 = load <4 x float>* @fc_255.0, align 16
+ %f05 = load <4 x float>* @fc_0.5, align 16
+ %f0 = load <4 x float>* @fc_0, align 16
+ %v1 = fmul <4 x float> %f255, %color
+ %v2 = fadd <4 x float> %f05, %v1
+ %v3 = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %v2, <4 x float> %f0, <4 x float> %f255) nounwind readnone
+ %v4 = tail call <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %v3) nounwind readnone
+ ret <4 x i8> %v4
+}
+
+; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
+define <4 x i8> @_Z17rsPackColorTo8888Dv3_f(<3 x float> %color) nounwind readnone {
+ %1 = shufflevector <3 x float> %color, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = insertelement <4 x float> %1, float 1.0, i32 3
+ %3 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %2) nounwind readnone
+ ret <4 x i8> %3
+}
+
+; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
+define <4 x i8> @_Z17rsPackColorTo8888fff(float %r, float %g, float %b) nounwind readnone {
+ %1 = insertelement <4 x float> undef, float %r, i32 0
+ %2 = insertelement <4 x float> %1, float %g, i32 1
+ %3 = insertelement <4 x float> %2, float %b, i32 2
+ %4 = insertelement <4 x float> %3, float 1.0, i32 3
+ %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone
+ ret <4 x i8> %5
+}
+
+; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
+define <4 x i8> @_Z17rsPackColorTo8888ffff(float %r, float %g, float %b, float %a) nounwind readnone {
+ %1 = insertelement <4 x float> undef, float %r, i32 0
+ %2 = insertelement <4 x float> %1, float %g, i32 1
+ %3 = insertelement <4 x float> %2, float %b, i32 2
+ %4 = insertelement <4 x float> %3, float %a, i32 3
+ %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone
+ ret <4 x i8> %5
+}
+
diff --git a/lib/Renderscript/runtime/pixel_packing.ll b/lib/Renderscript/runtime/pixel_packing.ll
deleted file mode 100644
index 65401a6..0000000
--- a/lib/Renderscript/runtime/pixel_packing.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
-target triple = "armv7-none-linux-gnueabi"
-
-@fc_255.0 = internal constant <4 x float> <float 255.0, float 255.0, float 255.0, float 255.0>, align 16
-@fc_0.5 = internal constant <4 x float> <float 0.5, float 0.5, float 0.5, float 0.5>, align 16
-
-declare <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %in) nounwind readnone
-declare <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> %in) nounwind readnone
-
-; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
-define <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %color) nounwind readnone {
- %f255 = load <4 x float>* @fc_255.0, align 16
- %f05 = load <4 x float>* @fc_0.5, align 16
- %v1 = fmul <4 x float> %f255, %color
- %v2 = fadd <4 x float> %f05, %v1
- %v3 = tail call <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %v2) nounwind readnone
- ret <4 x i8> %v3
-}
-
-; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
-define <4 x i8> @_Z17rsPackColorTo8888Dv3_f(<3 x float> %color) nounwind readnone {
- %1 = shufflevector <3 x float> %color, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = insertelement <4 x float> %1, float 1.0, i32 3
- %3 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %2) nounwind readnone
- ret <4 x i8> %3
-}
-
-; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
-define <4 x i8> @_Z17rsPackColorTo8888fff(float %r, float %g, float %b) nounwind readnone {
- %1 = insertelement <4 x float> undef, float %r, i32 0
- %2 = insertelement <4 x float> %1, float %g, i32 1
- %3 = insertelement <4 x float> %2, float %b, i32 2
- %4 = insertelement <4 x float> %3, float 1.0, i32 3
- %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone
- ret <4 x i8> %5
-}
-
-; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
-define <4 x i8> @_Z17rsPackColorTo8888ffff(float %r, float %g, float %b, float %a) nounwind readnone {
- %1 = insertelement <4 x float> undef, float %r, i32 0
- %2 = insertelement <4 x float> %1, float %g, i32 1
- %3 = insertelement <4 x float> %2, float %b, i32 2
- %4 = insertelement <4 x float> %3, float %a, i32 3
- %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone
- ret <4 x i8> %5
-}
-