| ; |
| ; Copyright (c) 2011 The WebM project authors. All Rights Reserved. |
| ; |
| ; Use of this source code is governed by a BSD-style license |
| ; that can be found in the LICENSE file in the root of the source |
| ; tree. An additional intellectual property rights grant can be found |
| ; in the file PATENTS. All contributing project authors may |
| ; be found in the AUTHORS file in the root of the source tree. |
| ; |
| |
| |
| EXPORT |vp8_subtract_mby_armv6| |
| EXPORT |vp8_subtract_mbuv_armv6| |
| EXPORT |vp8_subtract_b_armv6| |
| |
| INCLUDE asm_enc_offsets.asm |
| |
| ARM |
| REQUIRE8 |
| PRESERVE8 |
| |
| AREA ||.text||, CODE, READONLY, ALIGN=2 |
| |
| ; r0 BLOCK *be |
| ; r1 BLOCKD *bd |
| ; r2 int pitch |
| |vp8_subtract_b_armv6| PROC |
| |
| stmfd sp!, {r4-r9} |
| |
| ldr r4, [r0, #vp8_block_base_src] |
| ldr r5, [r0, #vp8_block_src] |
| ldr r6, [r0, #vp8_block_src_diff] |
| |
| ldr r3, [r4] |
| ldr r7, [r0, #vp8_block_src_stride] |
| add r3, r3, r5 ; src = *base_src + src |
| ldr r8, [r1, #vp8_blockd_predictor] |
| |
| mov r9, #4 ; loop count |
| |
| loop_block |
| |
| ldr r0, [r3], r7 ; src |
| ldr r1, [r8], r2 ; pred |
| |
| uxtb16 r4, r0 ; [s2 | s0] |
| uxtb16 r5, r1 ; [p2 | p0] |
| uxtb16 r0, r0, ror #8 ; [s3 | s1] |
| uxtb16 r1, r1, ror #8 ; [p3 | p1] |
| |
| usub16 r4, r4, r5 ; [d2 | d0] |
| usub16 r5, r0, r1 ; [d3 | d1] |
| |
| subs r9, r9, #1 ; decrement loop counter |
| |
| pkhbt r0, r4, r5, lsl #16 ; [d1 | d0] |
| pkhtb r1, r5, r4, asr #16 ; [d3 | d2] |
| |
| str r0, [r6, #0] ; diff |
| str r1, [r6, #4] ; diff |
| |
| add r6, r6, r2, lsl #1 ; update diff pointer |
| bne loop_block |
| |
| ldmfd sp!, {r4-r9} |
| mov pc, lr |
| |
| ENDP |
| |
| |
| ; r0 short *diff |
| ; r1 unsigned char *usrc |
| ; r2 unsigned char *vsrc |
| ; r3 unsigned char *pred |
| ; stack int stride |
| |vp8_subtract_mbuv_armv6| PROC |
| |
| stmfd sp!, {r4-r12, lr} |
| |
| add r0, r0, #512 ; set *diff point to Cb |
| add r3, r3, #256 ; set *pred point to Cb |
| |
| mov r4, #8 ; loop count |
| ldr r5, [sp, #40] ; stride |
| |
| ; Subtract U block |
| loop_u |
| ldr r6, [r1] ; src (A) |
| ldr r7, [r3], #4 ; pred (A) |
| |
| uxtb16 r8, r6 ; [s2 | s0] (A) |
| uxtb16 r9, r7 ; [p2 | p0] (A) |
| uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) |
| uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) |
| |
| usub16 r6, r8, r9 ; [d2 | d0] (A) |
| usub16 r7, r10, r11 ; [d3 | d1] (A) |
| |
| ldr r10, [r1, #4] ; src (B) |
| ldr r11, [r3], #4 ; pred (B) |
| |
| pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) |
| pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) |
| |
| str r8, [r0], #4 ; diff (A) |
| uxtb16 r8, r10 ; [s2 | s0] (B) |
| str r9, [r0], #4 ; diff (A) |
| |
| uxtb16 r9, r11 ; [p2 | p0] (B) |
| uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) |
| uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) |
| |
| usub16 r6, r8, r9 ; [d2 | d0] (B) |
| usub16 r7, r10, r11 ; [d3 | d1] (B) |
| |
| add r1, r1, r5 ; update usrc pointer |
| |
| pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) |
| pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) |
| |
| str r8, [r0], #4 ; diff (B) |
| subs r4, r4, #1 ; update loop counter |
| str r9, [r0], #4 ; diff (B) |
| |
| bne loop_u |
| |
| mov r4, #8 ; loop count |
| |
| ; Subtract V block |
| loop_v |
| ldr r6, [r2] ; src (A) |
| ldr r7, [r3], #4 ; pred (A) |
| |
| uxtb16 r8, r6 ; [s2 | s0] (A) |
| uxtb16 r9, r7 ; [p2 | p0] (A) |
| uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) |
| uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) |
| |
| usub16 r6, r8, r9 ; [d2 | d0] (A) |
| usub16 r7, r10, r11 ; [d3 | d1] (A) |
| |
| ldr r10, [r2, #4] ; src (B) |
| ldr r11, [r3], #4 ; pred (B) |
| |
| pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) |
| pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) |
| |
| str r8, [r0], #4 ; diff (A) |
| uxtb16 r8, r10 ; [s2 | s0] (B) |
| str r9, [r0], #4 ; diff (A) |
| |
| uxtb16 r9, r11 ; [p2 | p0] (B) |
| uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) |
| uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) |
| |
| usub16 r6, r8, r9 ; [d2 | d0] (B) |
| usub16 r7, r10, r11 ; [d3 | d1] (B) |
| |
| add r2, r2, r5 ; update vsrc pointer |
| |
| pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) |
| pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) |
| |
| str r8, [r0], #4 ; diff (B) |
| subs r4, r4, #1 ; update loop counter |
| str r9, [r0], #4 ; diff (B) |
| |
| bne loop_v |
| |
| ldmfd sp!, {r4-r12, pc} |
| |
| ENDP |
| |
| |
| ; r0 short *diff |
| ; r1 unsigned char *src |
| ; r2 unsigned char *pred |
| ; r3 int stride |
| |vp8_subtract_mby_armv6| PROC |
| |
| stmfd sp!, {r4-r11} |
| |
| mov r4, #16 |
| loop |
| ldr r6, [r1] ; src (A) |
| ldr r7, [r2], #4 ; pred (A) |
| |
| uxtb16 r8, r6 ; [s2 | s0] (A) |
| uxtb16 r9, r7 ; [p2 | p0] (A) |
| uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) |
| uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) |
| |
| usub16 r6, r8, r9 ; [d2 | d0] (A) |
| usub16 r7, r10, r11 ; [d3 | d1] (A) |
| |
| ldr r10, [r1, #4] ; src (B) |
| ldr r11, [r2], #4 ; pred (B) |
| |
| pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) |
| pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) |
| |
| str r8, [r0], #4 ; diff (A) |
| uxtb16 r8, r10 ; [s2 | s0] (B) |
| str r9, [r0], #4 ; diff (A) |
| |
| uxtb16 r9, r11 ; [p2 | p0] (B) |
| uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) |
| uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) |
| |
| usub16 r6, r8, r9 ; [d2 | d0] (B) |
| usub16 r7, r10, r11 ; [d3 | d1] (B) |
| |
| ldr r10, [r1, #8] ; src (C) |
| ldr r11, [r2], #4 ; pred (C) |
| |
| pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) |
| pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) |
| |
| str r8, [r0], #4 ; diff (B) |
| uxtb16 r8, r10 ; [s2 | s0] (C) |
| str r9, [r0], #4 ; diff (B) |
| |
| uxtb16 r9, r11 ; [p2 | p0] (C) |
| uxtb16 r10, r10, ror #8 ; [s3 | s1] (C) |
| uxtb16 r11, r11, ror #8 ; [p3 | p1] (C) |
| |
| usub16 r6, r8, r9 ; [d2 | d0] (C) |
| usub16 r7, r10, r11 ; [d3 | d1] (C) |
| |
| ldr r10, [r1, #12] ; src (D) |
| ldr r11, [r2], #4 ; pred (D) |
| |
| pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C) |
| pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C) |
| |
| str r8, [r0], #4 ; diff (C) |
| uxtb16 r8, r10 ; [s2 | s0] (D) |
| str r9, [r0], #4 ; diff (C) |
| |
| uxtb16 r9, r11 ; [p2 | p0] (D) |
| uxtb16 r10, r10, ror #8 ; [s3 | s1] (D) |
| uxtb16 r11, r11, ror #8 ; [p3 | p1] (D) |
| |
| usub16 r6, r8, r9 ; [d2 | d0] (D) |
| usub16 r7, r10, r11 ; [d3 | d1] (D) |
| |
| add r1, r1, r3 ; update src pointer |
| |
| pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D) |
| pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D) |
| |
| str r8, [r0], #4 ; diff (D) |
| subs r4, r4, #1 ; update loop counter |
| str r9, [r0], #4 ; diff (D) |
| |
| bne loop |
| |
| ldmfd sp!, {r4-r11} |
| mov pc, lr |
| |
| ENDP |
| |
| END |
| |