/*************************************************************************** | |
* Copyright (c) 2009,2010, Code Aurora Forum. All rights reserved. | |
* | |
* Use of this source code is governed by a BSD-style license that can be | |
* found in the LICENSE file. | |
***************************************************************************/ | |
.code 32 | |
.fpu neon | |
.align 4 | |
.globl memset32_neon | |
.func | |
/* r0 = buffer, r1 = value, r2 = times to write */ | |
memset32_neon: | |
cmp r2, #1 | |
streq r1, [r0], #4 | |
bxeq lr | |
cmp r2, #4 | |
bgt memset32_neon_start | |
cmp r2, #0 | |
bxeq lr | |
memset32_neon_small: | |
str r1, [r0], #4 | |
subs r2, r2, #1 | |
bne memset32_neon_small | |
bx lr | |
memset32_neon_start: | |
cmp r2, #16 | |
blt memset32_dropthru | |
vdup.32 q0, r1 | |
vmov q1, q0 | |
cmp r2, #32 | |
blt memset32_16 | |
cmp r2, #64 | |
blt memset32_32 | |
cmp r2, #128 | |
blt memset32_64 | |
memset32_128: | |
movs r12, r2, lsr #7 | |
memset32_loop128: | |
subs r12, r12, #1 | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
bne memset32_loop128 | |
ands r2, r2, #0x7f | |
bxeq lr | |
memset32_64: | |
movs r12, r2, lsr #6 | |
beq memset32_32 | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
ands r2, r2, #0x3f | |
bxeq lr | |
memset32_32: | |
movs r12, r2, lsr #5 | |
beq memset32_16 | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
ands r2, r2, #0x1f | |
bxeq lr | |
memset32_16: | |
movs r12, r2, lsr #4 | |
beq memset32_dropthru | |
and r2, r2, #0xf | |
vst1.64 {q0, q1}, [r0]! | |
vst1.64 {q0, q1}, [r0]! | |
memset32_dropthru: | |
rsb r2, r2, #15 | |
add pc, pc, r2, lsl #2 | |
nop | |
str r1, [r0, #56] | |
str r1, [r0, #52] | |
str r1, [r0, #48] | |
str r1, [r0, #44] | |
str r1, [r0, #40] | |
str r1, [r0, #36] | |
str r1, [r0, #32] | |
str r1, [r0, #28] | |
str r1, [r0, #24] | |
str r1, [r0, #20] | |
str r1, [r0, #16] | |
str r1, [r0, #12] | |
str r1, [r0, #8] | |
str r1, [r0, #4] | |
str r1, [r0, #0] | |
bx lr | |
.endfunc | |
.end |