| #define STRLEN sse2_strlen_atom |
| |
| #ifndef L |
| # define L(label) .L##label |
| #endif |
| |
| #ifndef cfi_startproc |
| # define cfi_startproc .cfi_startproc |
| #endif |
| |
| #ifndef cfi_endproc |
| # define cfi_endproc .cfi_endproc |
| #endif |
| |
| #ifndef cfi_rel_offset |
| # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off |
| #endif |
| |
| #ifndef cfi_restore |
| # define cfi_restore(reg) .cfi_restore reg |
| #endif |
| |
| #ifndef cfi_adjust_cfa_offset |
| # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off |
| #endif |
| |
| #ifndef cfi_remember_state |
| # define cfi_remember_state .cfi_remember_state |
| #endif |
| |
| #ifndef cfi_restore_state |
| # define cfi_restore_state .cfi_restore_state |
| #endif |
| |
| #ifndef ENTRY |
| # define ENTRY(name) \ |
| .type name, @function; \ |
| .globl name; \ |
| .p2align 4; \ |
| name: \ |
| cfi_startproc |
| #endif |
| |
| #ifndef END |
| # define END(name) \ |
| cfi_endproc; \ |
| .size name, .-name |
| #endif |
| |
| #define CFI_PUSH(REG) \ |
| cfi_adjust_cfa_offset (4); \ |
| cfi_rel_offset (REG, 0) |
| |
| #define CFI_POP(REG) \ |
| cfi_adjust_cfa_offset (-4); \ |
| cfi_restore (REG) |
| |
| #define PUSH(REG) pushl REG; CFI_PUSH (REG) |
| #define POP(REG) popl REG; CFI_POP (REG) |
| #define PARMS 4 |
| #define STR PARMS |
| #define ENTRANCE |
| #define RETURN ret |
| |
| .text |
| ENTRY (STRLEN) |
| ENTRANCE |
| mov STR(%esp), %edx |
| xor %eax, %eax |
| cmpb $0, (%edx) |
| jz L(exit_tail0) |
| cmpb $0, 1(%edx) |
| jz L(exit_tail1) |
| cmpb $0, 2(%edx) |
| jz L(exit_tail2) |
| cmpb $0, 3(%edx) |
| jz L(exit_tail3) |
| cmpb $0, 4(%edx) |
| jz L(exit_tail4) |
| cmpb $0, 5(%edx) |
| jz L(exit_tail5) |
| cmpb $0, 6(%edx) |
| jz L(exit_tail6) |
| cmpb $0, 7(%edx) |
| jz L(exit_tail7) |
| cmpb $0, 8(%edx) |
| jz L(exit_tail8) |
| cmpb $0, 9(%edx) |
| jz L(exit_tail9) |
| cmpb $0, 10(%edx) |
| jz L(exit_tail10) |
| cmpb $0, 11(%edx) |
| jz L(exit_tail11) |
| cmpb $0, 12(%edx) |
| jz L(exit_tail12) |
| cmpb $0, 13(%edx) |
| jz L(exit_tail13) |
| cmpb $0, 14(%edx) |
| jz L(exit_tail14) |
| cmpb $0, 15(%edx) |
| jz L(exit_tail15) |
| pxor %xmm0, %xmm0 |
| mov %edx, %eax |
| mov %edx, %ecx |
| and $-16, %eax |
| add $16, %ecx |
| add $16, %eax |
| |
| pcmpeqb (%eax), %xmm0 |
| pmovmskb %xmm0, %edx |
| pxor %xmm1, %xmm1 |
| test %edx, %edx |
| lea 16(%eax), %eax |
| jnz L(exit) |
| |
| pcmpeqb (%eax), %xmm1 |
| pmovmskb %xmm1, %edx |
| pxor %xmm2, %xmm2 |
| test %edx, %edx |
| lea 16(%eax), %eax |
| jnz L(exit) |
| |
| |
| pcmpeqb (%eax), %xmm2 |
| pmovmskb %xmm2, %edx |
| pxor %xmm3, %xmm3 |
| test %edx, %edx |
| lea 16(%eax), %eax |
| jnz L(exit) |
| |
| pcmpeqb (%eax), %xmm3 |
| pmovmskb %xmm3, %edx |
| test %edx, %edx |
| lea 16(%eax), %eax |
| jnz L(exit) |
| |
| pcmpeqb (%eax), %xmm0 |
| pmovmskb %xmm0, %edx |
| test %edx, %edx |
| lea 16(%eax), %eax |
| jnz L(exit) |
| |
| pcmpeqb (%eax), %xmm1 |
| pmovmskb %xmm1, %edx |
| test %edx, %edx |
| lea 16(%eax), %eax |
| jnz L(exit) |
| |
| pcmpeqb (%eax), %xmm2 |
| pmovmskb %xmm2, %edx |
| test %edx, %edx |
| lea 16(%eax), %eax |
| jnz L(exit) |
| |
| pcmpeqb (%eax), %xmm3 |
| pmovmskb %xmm3, %edx |
| test %edx, %edx |
| lea 16(%eax), %eax |
| jnz L(exit) |
| |
| pcmpeqb (%eax), %xmm0 |
| pmovmskb %xmm0, %edx |
| test %edx, %edx |
| lea 16(%eax), %eax |
| jnz L(exit) |
| |
| pcmpeqb (%eax), %xmm1 |
| pmovmskb %xmm1, %edx |
| test %edx, %edx |
| lea 16(%eax), %eax |
| jnz L(exit) |
| |
| pcmpeqb (%eax), %xmm2 |
| pmovmskb %xmm2, %edx |
| test %edx, %edx |
| lea 16(%eax), %eax |
| jnz L(exit) |
| |
| pcmpeqb (%eax), %xmm3 |
| pmovmskb %xmm3, %edx |
| test %edx, %edx |
| lea 16(%eax), %eax |
| jnz L(exit) |
| |
| pcmpeqb (%eax), %xmm0 |
| pmovmskb %xmm0, %edx |
| test %edx, %edx |
| lea 16(%eax), %eax |
| jnz L(exit) |
| |
| pcmpeqb (%eax), %xmm1 |
| pmovmskb %xmm1, %edx |
| test %edx, %edx |
| lea 16(%eax), %eax |
| jnz L(exit) |
| |
| pcmpeqb (%eax), %xmm2 |
| pmovmskb %xmm2, %edx |
| test %edx, %edx |
| lea 16(%eax), %eax |
| jnz L(exit) |
| |
| pcmpeqb (%eax), %xmm3 |
| pmovmskb %xmm3, %edx |
| test %edx, %edx |
| lea 16(%eax), %eax |
| jnz L(exit) |
| |
| and $-0x40, %eax |
| PUSH (%esi) |
| PUSH (%edi) |
| PUSH (%ebx) |
| PUSH (%ebp) |
| xor %ebp, %ebp |
| L(aligned_64): |
| pcmpeqb (%eax), %xmm0 |
| pcmpeqb 16(%eax), %xmm1 |
| pcmpeqb 32(%eax), %xmm2 |
| pcmpeqb 48(%eax), %xmm3 |
| pmovmskb %xmm0, %edx |
| pmovmskb %xmm1, %esi |
| pmovmskb %xmm2, %edi |
| pmovmskb %xmm3, %ebx |
| or %edx, %ebp |
| or %esi, %ebp |
| or %edi, %ebp |
| or %ebx, %ebp |
| lea 64(%eax), %eax |
| jz L(aligned_64) |
| L(48leave): |
| test %edx, %edx |
| jnz L(aligned_64_exit_16) |
| test %esi, %esi |
| jnz L(aligned_64_exit_32) |
| test %edi, %edi |
| jnz L(aligned_64_exit_48) |
| mov %ebx, %edx |
| lea (%eax), %eax |
| jmp L(aligned_64_exit) |
| L(aligned_64_exit_48): |
| lea -16(%eax), %eax |
| mov %edi, %edx |
| jmp L(aligned_64_exit) |
| L(aligned_64_exit_32): |
| lea -32(%eax), %eax |
| mov %esi, %edx |
| jmp L(aligned_64_exit) |
| L(aligned_64_exit_16): |
| lea -48(%eax), %eax |
| L(aligned_64_exit): |
| POP (%ebp) |
| POP (%ebx) |
| POP (%edi) |
| POP (%esi) |
| L(exit): |
| sub %ecx, %eax |
| test %dl, %dl |
| jz L(exit_high) |
| test $0x01, %dl |
| jnz L(exit_tail0) |
| |
| test $0x02, %dl |
| jnz L(exit_tail1) |
| |
| test $0x04, %dl |
| jnz L(exit_tail2) |
| |
| test $0x08, %dl |
| jnz L(exit_tail3) |
| |
| test $0x10, %dl |
| jnz L(exit_tail4) |
| |
| test $0x20, %dl |
| jnz L(exit_tail5) |
| |
| test $0x40, %dl |
| jnz L(exit_tail6) |
| add $7, %eax |
| L(exit_tail0): |
| RETURN |
| |
| L(exit_high): |
| add $8, %eax |
| test $0x01, %dh |
| jnz L(exit_tail0) |
| |
| test $0x02, %dh |
| jnz L(exit_tail1) |
| |
| test $0x04, %dh |
| jnz L(exit_tail2) |
| |
| test $0x08, %dh |
| jnz L(exit_tail3) |
| |
| test $0x10, %dh |
| jnz L(exit_tail4) |
| |
| test $0x20, %dh |
| jnz L(exit_tail5) |
| |
| test $0x40, %dh |
| jnz L(exit_tail6) |
| add $7, %eax |
| RETURN |
| |
| .p2align 4 |
| L(exit_tail1): |
| add $1, %eax |
| RETURN |
| |
| L(exit_tail2): |
| add $2, %eax |
| RETURN |
| |
| L(exit_tail3): |
| add $3, %eax |
| RETURN |
| |
| L(exit_tail4): |
| add $4, %eax |
| RETURN |
| |
| L(exit_tail5): |
| add $5, %eax |
| RETURN |
| |
| L(exit_tail6): |
| add $6, %eax |
| RETURN |
| |
| L(exit_tail7): |
| add $7, %eax |
| RETURN |
| |
| L(exit_tail8): |
| add $8, %eax |
| RETURN |
| |
| L(exit_tail9): |
| add $9, %eax |
| RETURN |
| |
| L(exit_tail10): |
| add $10, %eax |
| RETURN |
| |
| L(exit_tail11): |
| add $11, %eax |
| RETURN |
| |
| L(exit_tail12): |
| add $12, %eax |
| RETURN |
| |
| L(exit_tail13): |
| add $13, %eax |
| RETURN |
| |
| L(exit_tail14): |
| add $14, %eax |
| RETURN |
| |
| L(exit_tail15): |
| add $15, %eax |
| ret |
| |
| END (STRLEN) |