| /* $OpenBSD: swab.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */ |
| /* |
| * Written by J.T. Conklin <jtc@netbsd.org>. |
| * Public domain. |
| */ |
| |
| #include <machine/asm.h> |
| |
| /* |
| * On the i486, this code is negligibly faster than the code generated |
| * by gcc at about half the size. If my i386 databook is correct, it |
| * should be considerably faster than the gcc code on a i386. |
| */ |
| |
| ENTRY(swab) |
| pushl %esi |
| pushl %edi |
| movl 12(%esp),%esi |
| movl 16(%esp),%edi |
| movl 20(%esp),%ecx |
| |
| cld # set direction forward |
| |
| shrl $1,%ecx |
| testl $7,%ecx # copy first group of 1 to 7 words |
| jz L2 # while swaping alternate bytes. |
| .align 2,0x90 |
| L1: lodsw |
| rorw $8,%ax |
| stosw |
| decl %ecx |
| testl $7,%ecx |
| jnz L1 |
| |
| L2: shrl $3,%ecx # copy remainder 8 words at a time |
| jz L4 # while swapping alternate bytes. |
| .align 2,0x90 |
| L3: lodsw |
| rorw $8,%ax |
| stosw |
| lodsw |
| rorw $8,%ax |
| stosw |
| lodsw |
| rorw $8,%ax |
| stosw |
| lodsw |
| rorw $8,%ax |
| stosw |
| lodsw |
| rorw $8,%ax |
| stosw |
| lodsw |
| rorw $8,%ax |
| stosw |
| lodsw |
| rorw $8,%ax |
| stosw |
| lodsw |
| rorw $8,%ax |
| stosw |
| decl %ecx |
| jnz L3 |
| |
| L4: popl %edi |
| popl %esi |
| ret |
| END(swab) |