| /* libs/pixelflinger/t32cb16blend.S |
| ** |
| ** Copyright 2010, The Android Open Source Project |
| ** |
| ** Licensed under the Apache License, Version 2.0 (the "License"); |
| ** you may not use this file except in compliance with the License. |
| ** You may obtain a copy of the License at |
| ** |
| ** http://www.apache.org/licenses/LICENSE-2.0 |
| ** |
| ** Unless required by applicable law or agreed to in writing, software |
| ** distributed under the License is distributed on an "AS IS" BASIS, |
| ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| ** See the License for the specific language governing permissions and |
| ** limitations under the License. |
| */ |
| |
| #ifdef DEBUG |
| #define DBG |
| #else |
| #define DBG # |
| #endif |
| |
| /* |
| * blend one of 2 16bpp RGB pixels held in dreg selected by shift |
| * with the 32bpp ABGR pixel held in src and store the result in fb |
| * |
| * Assumes that the dreg data is little endian and that |
| * the the second pixel (shift==16) will be merged into |
| * the fb result |
| * |
| * Uses $t0,$t6,$t7,$t8 |
| */ |
| |
| #if __mips==32 && __mips_isa_rev>=2 |
| .macro pixel dreg src fb shift |
| /* |
| * sA = s >> 24 |
| * f = 0x100 - (sA + (sA>>7)) |
| */ |
| DBG .set noat |
| DBG rdhwr $at,$2 |
| DBG .set at |
| |
| srl $t7,\src,24 |
| srl $t6,$t7,7 |
| addu $t7,$t6 |
| li $t6,0x100 |
| subu $t7,$t6,$t7 |
| |
| /* red */ |
| ext $t8,\dreg,\shift+6+5,5 # dst[\shift:15..11] |
| mul $t6,$t8,$t7 |
| ext $t0,\dreg,\shift+5,6 # start green extraction dst[\shift:10..5] |
| ext $t8,\src,3,5 # src[7..3] |
| srl $t6,8 |
| addu $t8,$t6 |
| ins \fb,$t8,\shift+6+5,5 # dst[\shift:15..11] |
| |
| /* green */ |
| mul $t8,$t0,$t7 |
| ext $t0,\dreg,\shift,5 # start blue extraction dst[\shift:4..0] |
| ext $t6,\src,2+8,6 # src[15..10] |
| srl $t8,8 |
| addu $t8,$t6 |
| |
| /* blue */ |
| mul $t0,$t0,$t7 |
| ins \fb,$t8,\shift+5,6 # finish green insertion dst[\shift:10..5] |
| ext $t6,\src,(3+8+8),5 |
| srl $t8,$t0,8 |
| addu $t8,$t6 |
| ins \fb,$t8,\shift,5 |
| |
| DBG .set noat |
| DBG rdhwr $t8,$2 |
| DBG subu $t8,$at |
| DBG sltu $at,$t8,$v0 |
| DBG movn $v0,$t8,$at |
| DBG sgtu $at,$t8,$v1 |
| DBG movn $v1,$t8,$at |
| DBG .set at |
| .endm |
| |
| #else |
| |
| .macro pixel dreg src fb shift |
| /* |
| * sA = s >> 24 |
| * f = 0x100 - (sA + (sA>>7)) |
| */ |
| DBG .set push |
| DBG .set noat |
| DBG .set mips32r2 |
| DBG rdhwr $at,$2 |
| DBG .set pop |
| |
| srl $t7,\src,24 |
| srl $t6,$t7,7 |
| addu $t7,$t6 |
| li $t6,0x100 |
| subu $t7,$t6,$t7 |
| |
| /* |
| * red |
| * dR = (d >> (6 + 5)) & 0x1f; |
| * dR = (f*dR)>>8 |
| * sR = (s >> ( 3)) & 0x1f; |
| * sR += dR |
| * fb |= sR << 11 |
| */ |
| srl $t8,\dreg,\shift+6+5 |
| .if \shift==0 |
| and $t8,0x1f |
| .endif |
| mul $t8,$t8,$t7 |
| srl $t6,\src,3 |
| and $t6,0x1f |
| srl $t8,8 |
| addu $t8,$t6 |
| .if \shift!=0 |
| sll $t8,\shift+11 |
| or \fb,$t8 |
| .else |
| sll \fb,$t8,11 |
| .endif |
| |
| /* |
| * green |
| * dG = (d >> 5) & 0x3f |
| * dG = (f*dG) >> 8 |
| * sG = (s >> ( 8+2))&0x3F; |
| */ |
| srl $t8,\dreg,\shift+5 |
| and $t8,0x3f |
| mul $t8,$t8,$t7 |
| srl $t6,\src,8+2 |
| and $t6,0x3f |
| srl $t8,8 |
| addu $t8,$t6 |
| sll $t8,\shift + 5 |
| or \fb,$t8 |
| |
| /* blue */ |
| .if \shift!=0 |
| srl $t8,\dreg,\shift |
| and $t8,0x1f |
| .else |
| and $t8,\dreg,0x1f |
| .endif |
| mul $t8,$t8,$t7 |
| srl $t6,\src,(8+8+3) |
| and $t6,0x1f |
| srl $t8,8 |
| addu $t8,$t6 |
| .if \shift!=0 |
| sll $t8,\shift |
| .endif |
| or \fb,$t8 |
| DBG .set push |
| DBG .set noat |
| DBG .set mips32r2 |
| DBG rdhwr $t8,$2 |
| DBG subu $t8,$at |
| DBG sltu $at,$t8,$v0 |
| DBG movn $v0,$t8,$at |
| DBG sgtu $at,$t8,$v1 |
| DBG movn $v1,$t8,$at |
| DBG .set pop |
| .endm |
| #endif |
| |
| .text |
| .align |
| |
| .global scanline_t32cb16blend_mips |
| .ent scanline_t32cb16blend_mips |
| scanline_t32cb16blend_mips: |
| DBG li $v0,0xffffffff |
| DBG li $v1,0 |
| /* Align the destination if necessary */ |
| and $t0,$a0,3 |
| beqz $t0,aligned |
| |
| /* as long as there is at least one pixel */ |
| beqz $a2,done |
| |
| lw $t4,($a1) |
| addu $a0,2 |
| addu $a1,4 |
| beqz $t4,1f |
| lhu $t3,-2($a0) |
| pixel $t3,$t4,$t1,0 |
| sh $t1,-2($a0) |
| 1: subu $a2,1 |
| |
| aligned: |
| /* Check to see if its worth unrolling the loop */ |
| subu $a2,4 |
| bltz $a2,tail |
| |
| /* Process 4 pixels at a time */ |
| fourpixels: |
| /* 1st pair of pixels */ |
| lw $t4,0($a1) |
| lw $t5,4($a1) |
| addu $a0,8 |
| addu $a1,16 |
| |
| /* both are zero, skip this pair */ |
| or $t3,$t4,$t5 |
| beqz $t3,1f |
| |
| /* load the destination */ |
| lw $t3,-8($a0) |
| |
| pixel $t3,$t4,$t1,0 |
| pixel $t3,$t5,$t1,16 |
| sw $t1,-8($a0) |
| |
| 1: |
| /* 2nd pair of pixels */ |
| lw $t4,-8($a1) |
| lw $t5,-4($a1) |
| |
| /* both are zero, skip this pair */ |
| or $t3,$t4,$t5 |
| beqz $t3,1f |
| |
| /* load the destination */ |
| lw $t3,-4($a0) |
| |
| pixel $t3,$t4,$t1,0 |
| pixel $t3,$t5,$t1,16 |
| sw $t1,-4($a0) |
| |
| 1: subu $a2,4 |
| bgtz $a2,fourpixels |
| |
| tail: |
| /* the pixel count underran, restore it now */ |
| addu $a2,4 |
| |
| /* handle the last 0..3 pixels */ |
| beqz $a2,done |
| onepixel: |
| lw $t4,($a1) |
| addu $a0,2 |
| addu $a1,4 |
| beqz $t4,1f |
| lhu $t3,-2($a0) |
| pixel $t3,$t4,$t1,0 |
| sh $t1,-2($a0) |
| 1: subu $a2,1 |
| bnez $a2,onepixel |
| done: |
| DBG .set push |
| DBG .set mips32r2 |
| DBG rdhwr $a0,$3 |
| DBG mul $v0,$a0 |
| DBG mul $v1,$a0 |
| DBG .set pop |
| j $ra |
| .end scanline_t32cb16blend_mips |