Fix x86_64 assembly file generation.
The "sha512-x86_64.pl" script actually needs the name of the output
file as its second parameter to determine whether to generate SHA-256
or SHA-512 routines.
This patch does the following:
- Fix import_openssl.sh to invoke the script properly
- Add the generation of sha256-x86_64.S as well.
Note that this patch is the result of running:
./import_openssl.sh import /path/to/openssl-1.0.1c.tar.gz
Which means that no other source files were impacted by the change.
Only needed for the Chromium x86_64 "linux_redux" build and the
SPDY host proxy program (flip_in_mem_edsm_server).
Change-Id: Ia40737f5952c7b156bd51844571e4f759910a6a1
diff --git a/crypto/sha/asm/sha256-x86_64.S b/crypto/sha/asm/sha256-x86_64.S
new file mode 100644
index 0000000..db5b898
--- /dev/null
+++ b/crypto/sha/asm/sha256-x86_64.S
@@ -0,0 +1,1778 @@
+.text
+
+.globl sha256_block_data_order
+.type sha256_block_data_order,@function
+.align 16
+sha256_block_data_order:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $64+32,%rsp
+ leaq (%rsi,%rdx,4),%rdx
+ andq $-64,%rsp
+ movq %rdi,64+0(%rsp)
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+ movq %r11,64+24(%rsp)
+.Lprologue:
+
+ leaq K256(%rip),%rbp
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ movl 16(%rdi),%r8d
+ movl 20(%rdi),%r9d
+ movl 24(%rdi),%r10d
+ movl 28(%rdi),%r11d
+ jmp .Lloop
+
+.align 16
+.Lloop:
+ xorq %rdi,%rdi
+ movl 0(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,0(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 4(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,4(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 8(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,8(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 12(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,12(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 16(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,16(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 20(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,20(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 24(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,24(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 28(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,28(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ movl 32(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,32(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 36(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,36(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 40(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,40(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 44(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,44(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 48(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,48(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 52(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,52(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 56(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,56(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 60(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,60(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ jmp .Lrounds_16_xx
+.align 16
+.Lrounds_16_xx:
+ movl 4(%rsp),%r13d
+ movl 56(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 36(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 0(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r14d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,0(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 8(%rsp),%r13d
+ movl 60(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 40(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 4(%rsp),%r12d
+ movl %edx,%r13d
+ addl %r14d,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,4(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 12(%rsp),%r13d
+ movl 0(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 44(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 8(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r14d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,8(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 16(%rsp),%r13d
+ movl 4(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 48(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 12(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %r14d,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,12(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 20(%rsp),%r13d
+ movl 8(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 52(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 16(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r14d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,16(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 24(%rsp),%r13d
+ movl 12(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 56(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 20(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %r14d,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,20(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 28(%rsp),%r13d
+ movl 16(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 60(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 24(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r14d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,24(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 32(%rsp),%r13d
+ movl 20(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 0(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 28(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %r14d,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,28(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ movl 36(%rsp),%r13d
+ movl 24(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 4(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 32(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r14d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,32(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 40(%rsp),%r13d
+ movl 28(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 8(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 36(%rsp),%r12d
+ movl %edx,%r13d
+ addl %r14d,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,36(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 44(%rsp),%r13d
+ movl 32(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 12(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 40(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r14d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,40(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 48(%rsp),%r13d
+ movl 36(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 16(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 44(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %r14d,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,44(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 52(%rsp),%r13d
+ movl 40(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 20(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 48(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r14d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,48(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 56(%rsp),%r13d
+ movl 44(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 24(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 52(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %r14d,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,52(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 60(%rsp),%r13d
+ movl 48(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 28(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 56(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r14d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,56(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 0(%rsp),%r13d
+ movl 52(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 32(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 60(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %r14d,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,60(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ cmpq $64,%rdi
+ jb .Lrounds_16_xx
+
+ movq 64+0(%rsp),%rdi
+ leaq 64(%rsi),%rsi
+
+ addl 0(%rdi),%eax
+ addl 4(%rdi),%ebx
+ addl 8(%rdi),%ecx
+ addl 12(%rdi),%edx
+ addl 16(%rdi),%r8d
+ addl 20(%rdi),%r9d
+ addl 24(%rdi),%r10d
+ addl 28(%rdi),%r11d
+
+ cmpq 64+16(%rsp),%rsi
+
+ movl %eax,0(%rdi)
+ movl %ebx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %edx,12(%rdi)
+ movl %r8d,16(%rdi)
+ movl %r9d,20(%rdi)
+ movl %r10d,24(%rdi)
+ movl %r11d,28(%rdi)
+ jb .Lloop
+
+ movq 64+24(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue:
+ .byte 0xf3,0xc3
+.size sha256_block_data_order,.-sha256_block_data_order
+.align 64
+.type K256,@object
+K256:
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/crypto/sha/asm/sha512-x86_64.S b/crypto/sha/asm/sha512-x86_64.S
index db5b898..2d3294e 100644
--- a/crypto/sha/asm/sha512-x86_64.S
+++ b/crypto/sha/asm/sha512-x86_64.S
@@ -1,9 +1,9 @@
.text
-.globl sha256_block_data_order
-.type sha256_block_data_order,@function
+.globl sha512_block_data_order
+.type sha512_block_data_order,@function
.align 16
-sha256_block_data_order:
+sha512_block_data_order:
pushq %rbx
pushq %rbp
pushq %r12
@@ -12,1741 +12,1741 @@
pushq %r15
movq %rsp,%r11
shlq $4,%rdx
- subq $64+32,%rsp
- leaq (%rsi,%rdx,4),%rdx
+ subq $128+32,%rsp
+ leaq (%rsi,%rdx,8),%rdx
andq $-64,%rsp
- movq %rdi,64+0(%rsp)
- movq %rsi,64+8(%rsp)
- movq %rdx,64+16(%rsp)
- movq %r11,64+24(%rsp)
+ movq %rdi,128+0(%rsp)
+ movq %rsi,128+8(%rsp)
+ movq %rdx,128+16(%rsp)
+ movq %r11,128+24(%rsp)
.Lprologue:
- leaq K256(%rip),%rbp
+ leaq K512(%rip),%rbp
- movl 0(%rdi),%eax
- movl 4(%rdi),%ebx
- movl 8(%rdi),%ecx
- movl 12(%rdi),%edx
- movl 16(%rdi),%r8d
- movl 20(%rdi),%r9d
- movl 24(%rdi),%r10d
- movl 28(%rdi),%r11d
+ movq 0(%rdi),%rax
+ movq 8(%rdi),%rbx
+ movq 16(%rdi),%rcx
+ movq 24(%rdi),%rdx
+ movq 32(%rdi),%r8
+ movq 40(%rdi),%r9
+ movq 48(%rdi),%r10
+ movq 56(%rdi),%r11
jmp .Lloop
.align 16
.Lloop:
xorq %rdi,%rdi
- movl 0(%rsi),%r12d
- movl %r8d,%r13d
- movl %eax,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %r9d,%r15d
- movl %r12d,0(%rsp)
+ movq 0(%rsi),%r12
+ movq %r8,%r13
+ movq %rax,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,0(%rsp)
- rorl $9,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r15d
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
- rorl $5,%r13d
- addl %r11d,%r12d
- xorl %eax,%r14d
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %r8d,%r15d
- movl %ebx,%r11d
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
- rorl $11,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r15d
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
- xorl %ecx,%r11d
- xorl %eax,%r14d
- addl %r15d,%r12d
- movl %ebx,%r15d
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
- rorl $6,%r13d
- andl %eax,%r11d
- andl %ecx,%r15d
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r11d
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
- addl %r12d,%edx
- addl %r12d,%r11d
+ addq %r12,%rdx
+ addq %r12,%r11
leaq 1(%rdi),%rdi
- addl %r14d,%r11d
+ addq %r14,%r11
- movl 4(%rsi),%r12d
- movl %edx,%r13d
- movl %r11d,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %r8d,%r15d
- movl %r12d,4(%rsp)
+ movq 8(%rsi),%r12
+ movq %rdx,%r13
+ movq %r11,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,8(%rsp)
- rorl $9,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r15d
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
- rorl $5,%r13d
- addl %r10d,%r12d
- xorl %r11d,%r14d
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %edx,%r15d
- movl %eax,%r10d
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
- rorl $11,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r15d
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
- xorl %ebx,%r10d
- xorl %r11d,%r14d
- addl %r15d,%r12d
- movl %eax,%r15d
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
- rorl $6,%r13d
- andl %r11d,%r10d
- andl %ebx,%r15d
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r10d
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
- addl %r12d,%ecx
- addl %r12d,%r10d
+ addq %r12,%rcx
+ addq %r12,%r10
leaq 1(%rdi),%rdi
- addl %r14d,%r10d
+ addq %r14,%r10
- movl 8(%rsi),%r12d
- movl %ecx,%r13d
- movl %r10d,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %edx,%r15d
- movl %r12d,8(%rsp)
+ movq 16(%rsi),%r12
+ movq %rcx,%r13
+ movq %r10,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,16(%rsp)
- rorl $9,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r15d
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
- rorl $5,%r13d
- addl %r9d,%r12d
- xorl %r10d,%r14d
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %ecx,%r15d
- movl %r11d,%r9d
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
- rorl $11,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r15d
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
- xorl %eax,%r9d
- xorl %r10d,%r14d
- addl %r15d,%r12d
- movl %r11d,%r15d
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
- rorl $6,%r13d
- andl %r10d,%r9d
- andl %eax,%r15d
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r9d
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
- addl %r12d,%ebx
- addl %r12d,%r9d
+ addq %r12,%rbx
+ addq %r12,%r9
leaq 1(%rdi),%rdi
- addl %r14d,%r9d
+ addq %r14,%r9
- movl 12(%rsi),%r12d
- movl %ebx,%r13d
- movl %r9d,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %ecx,%r15d
- movl %r12d,12(%rsp)
+ movq 24(%rsi),%r12
+ movq %rbx,%r13
+ movq %r9,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,24(%rsp)
- rorl $9,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r15d
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
- rorl $5,%r13d
- addl %r8d,%r12d
- xorl %r9d,%r14d
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %ebx,%r15d
- movl %r10d,%r8d
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
- rorl $11,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r15d
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
- xorl %r11d,%r8d
- xorl %r9d,%r14d
- addl %r15d,%r12d
- movl %r10d,%r15d
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
- rorl $6,%r13d
- andl %r9d,%r8d
- andl %r11d,%r15d
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r8d
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
- addl %r12d,%eax
- addl %r12d,%r8d
+ addq %r12,%rax
+ addq %r12,%r8
leaq 1(%rdi),%rdi
- addl %r14d,%r8d
+ addq %r14,%r8
- movl 16(%rsi),%r12d
- movl %eax,%r13d
- movl %r8d,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %ebx,%r15d
- movl %r12d,16(%rsp)
+ movq 32(%rsi),%r12
+ movq %rax,%r13
+ movq %r8,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,32(%rsp)
- rorl $9,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r15d
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
- rorl $5,%r13d
- addl %edx,%r12d
- xorl %r8d,%r14d
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %eax,%r15d
- movl %r9d,%edx
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
- rorl $11,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r15d
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
- xorl %r10d,%edx
- xorl %r8d,%r14d
- addl %r15d,%r12d
- movl %r9d,%r15d
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
- rorl $6,%r13d
- andl %r8d,%edx
- andl %r10d,%r15d
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%edx
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
- addl %r12d,%r11d
- addl %r12d,%edx
+ addq %r12,%r11
+ addq %r12,%rdx
leaq 1(%rdi),%rdi
- addl %r14d,%edx
+ addq %r14,%rdx
- movl 20(%rsi),%r12d
- movl %r11d,%r13d
- movl %edx,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %eax,%r15d
- movl %r12d,20(%rsp)
+ movq 40(%rsi),%r12
+ movq %r11,%r13
+ movq %rdx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,40(%rsp)
- rorl $9,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r15d
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
- rorl $5,%r13d
- addl %ecx,%r12d
- xorl %edx,%r14d
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %r11d,%r15d
- movl %r8d,%ecx
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
- rorl $11,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r15d
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
- xorl %r9d,%ecx
- xorl %edx,%r14d
- addl %r15d,%r12d
- movl %r8d,%r15d
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
- rorl $6,%r13d
- andl %edx,%ecx
- andl %r9d,%r15d
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%ecx
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
- addl %r12d,%r10d
- addl %r12d,%ecx
+ addq %r12,%r10
+ addq %r12,%rcx
leaq 1(%rdi),%rdi
- addl %r14d,%ecx
+ addq %r14,%rcx
- movl 24(%rsi),%r12d
- movl %r10d,%r13d
- movl %ecx,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %r11d,%r15d
- movl %r12d,24(%rsp)
+ movq 48(%rsi),%r12
+ movq %r10,%r13
+ movq %rcx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,48(%rsp)
- rorl $9,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r15d
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
- rorl $5,%r13d
- addl %ebx,%r12d
- xorl %ecx,%r14d
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %r10d,%r15d
- movl %edx,%ebx
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
- rorl $11,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r15d
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
- xorl %r8d,%ebx
- xorl %ecx,%r14d
- addl %r15d,%r12d
- movl %edx,%r15d
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
- rorl $6,%r13d
- andl %ecx,%ebx
- andl %r8d,%r15d
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%ebx
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
- addl %r12d,%r9d
- addl %r12d,%ebx
+ addq %r12,%r9
+ addq %r12,%rbx
leaq 1(%rdi),%rdi
- addl %r14d,%ebx
+ addq %r14,%rbx
- movl 28(%rsi),%r12d
- movl %r9d,%r13d
- movl %ebx,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %r10d,%r15d
- movl %r12d,28(%rsp)
+ movq 56(%rsi),%r12
+ movq %r9,%r13
+ movq %rbx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,56(%rsp)
- rorl $9,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r15d
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
- rorl $5,%r13d
- addl %eax,%r12d
- xorl %ebx,%r14d
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %r9d,%r15d
- movl %ecx,%eax
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
- rorl $11,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r15d
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
- xorl %edx,%eax
- xorl %ebx,%r14d
- addl %r15d,%r12d
- movl %ecx,%r15d
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
- rorl $6,%r13d
- andl %ebx,%eax
- andl %edx,%r15d
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%eax
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
- addl %r12d,%r8d
- addl %r12d,%eax
+ addq %r12,%r8
+ addq %r12,%rax
leaq 1(%rdi),%rdi
- addl %r14d,%eax
+ addq %r14,%rax
- movl 32(%rsi),%r12d
- movl %r8d,%r13d
- movl %eax,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %r9d,%r15d
- movl %r12d,32(%rsp)
+ movq 64(%rsi),%r12
+ movq %r8,%r13
+ movq %rax,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,64(%rsp)
- rorl $9,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r15d
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
- rorl $5,%r13d
- addl %r11d,%r12d
- xorl %eax,%r14d
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %r8d,%r15d
- movl %ebx,%r11d
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
- rorl $11,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r15d
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
- xorl %ecx,%r11d
- xorl %eax,%r14d
- addl %r15d,%r12d
- movl %ebx,%r15d
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
- rorl $6,%r13d
- andl %eax,%r11d
- andl %ecx,%r15d
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r11d
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
- addl %r12d,%edx
- addl %r12d,%r11d
+ addq %r12,%rdx
+ addq %r12,%r11
leaq 1(%rdi),%rdi
- addl %r14d,%r11d
+ addq %r14,%r11
- movl 36(%rsi),%r12d
- movl %edx,%r13d
- movl %r11d,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %r8d,%r15d
- movl %r12d,36(%rsp)
+ movq 72(%rsi),%r12
+ movq %rdx,%r13
+ movq %r11,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,72(%rsp)
- rorl $9,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r15d
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
- rorl $5,%r13d
- addl %r10d,%r12d
- xorl %r11d,%r14d
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %edx,%r15d
- movl %eax,%r10d
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
- rorl $11,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r15d
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
- xorl %ebx,%r10d
- xorl %r11d,%r14d
- addl %r15d,%r12d
- movl %eax,%r15d
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
- rorl $6,%r13d
- andl %r11d,%r10d
- andl %ebx,%r15d
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r10d
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
- addl %r12d,%ecx
- addl %r12d,%r10d
+ addq %r12,%rcx
+ addq %r12,%r10
leaq 1(%rdi),%rdi
- addl %r14d,%r10d
+ addq %r14,%r10
- movl 40(%rsi),%r12d
- movl %ecx,%r13d
- movl %r10d,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %edx,%r15d
- movl %r12d,40(%rsp)
+ movq 80(%rsi),%r12
+ movq %rcx,%r13
+ movq %r10,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,80(%rsp)
- rorl $9,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r15d
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
- rorl $5,%r13d
- addl %r9d,%r12d
- xorl %r10d,%r14d
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %ecx,%r15d
- movl %r11d,%r9d
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
- rorl $11,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r15d
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
- xorl %eax,%r9d
- xorl %r10d,%r14d
- addl %r15d,%r12d
- movl %r11d,%r15d
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
- rorl $6,%r13d
- andl %r10d,%r9d
- andl %eax,%r15d
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r9d
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
- addl %r12d,%ebx
- addl %r12d,%r9d
+ addq %r12,%rbx
+ addq %r12,%r9
leaq 1(%rdi),%rdi
- addl %r14d,%r9d
+ addq %r14,%r9
- movl 44(%rsi),%r12d
- movl %ebx,%r13d
- movl %r9d,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %ecx,%r15d
- movl %r12d,44(%rsp)
+ movq 88(%rsi),%r12
+ movq %rbx,%r13
+ movq %r9,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,88(%rsp)
- rorl $9,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r15d
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
- rorl $5,%r13d
- addl %r8d,%r12d
- xorl %r9d,%r14d
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %ebx,%r15d
- movl %r10d,%r8d
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
- rorl $11,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r15d
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
- xorl %r11d,%r8d
- xorl %r9d,%r14d
- addl %r15d,%r12d
- movl %r10d,%r15d
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
- rorl $6,%r13d
- andl %r9d,%r8d
- andl %r11d,%r15d
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r8d
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
- addl %r12d,%eax
- addl %r12d,%r8d
+ addq %r12,%rax
+ addq %r12,%r8
leaq 1(%rdi),%rdi
- addl %r14d,%r8d
+ addq %r14,%r8
- movl 48(%rsi),%r12d
- movl %eax,%r13d
- movl %r8d,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %ebx,%r15d
- movl %r12d,48(%rsp)
+ movq 96(%rsi),%r12
+ movq %rax,%r13
+ movq %r8,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,96(%rsp)
- rorl $9,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r15d
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
- rorl $5,%r13d
- addl %edx,%r12d
- xorl %r8d,%r14d
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %eax,%r15d
- movl %r9d,%edx
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
- rorl $11,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r15d
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
- xorl %r10d,%edx
- xorl %r8d,%r14d
- addl %r15d,%r12d
- movl %r9d,%r15d
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
- rorl $6,%r13d
- andl %r8d,%edx
- andl %r10d,%r15d
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%edx
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
- addl %r12d,%r11d
- addl %r12d,%edx
+ addq %r12,%r11
+ addq %r12,%rdx
leaq 1(%rdi),%rdi
- addl %r14d,%edx
+ addq %r14,%rdx
- movl 52(%rsi),%r12d
- movl %r11d,%r13d
- movl %edx,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %eax,%r15d
- movl %r12d,52(%rsp)
+ movq 104(%rsi),%r12
+ movq %r11,%r13
+ movq %rdx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,104(%rsp)
- rorl $9,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r15d
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
- rorl $5,%r13d
- addl %ecx,%r12d
- xorl %edx,%r14d
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %r11d,%r15d
- movl %r8d,%ecx
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
- rorl $11,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r15d
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
- xorl %r9d,%ecx
- xorl %edx,%r14d
- addl %r15d,%r12d
- movl %r8d,%r15d
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
- rorl $6,%r13d
- andl %edx,%ecx
- andl %r9d,%r15d
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%ecx
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
- addl %r12d,%r10d
- addl %r12d,%ecx
+ addq %r12,%r10
+ addq %r12,%rcx
leaq 1(%rdi),%rdi
- addl %r14d,%ecx
+ addq %r14,%rcx
- movl 56(%rsi),%r12d
- movl %r10d,%r13d
- movl %ecx,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %r11d,%r15d
- movl %r12d,56(%rsp)
+ movq 112(%rsi),%r12
+ movq %r10,%r13
+ movq %rcx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,112(%rsp)
- rorl $9,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r15d
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
- rorl $5,%r13d
- addl %ebx,%r12d
- xorl %ecx,%r14d
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %r10d,%r15d
- movl %edx,%ebx
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
- rorl $11,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r15d
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
- xorl %r8d,%ebx
- xorl %ecx,%r14d
- addl %r15d,%r12d
- movl %edx,%r15d
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
- rorl $6,%r13d
- andl %ecx,%ebx
- andl %r8d,%r15d
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%ebx
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
- addl %r12d,%r9d
- addl %r12d,%ebx
+ addq %r12,%r9
+ addq %r12,%rbx
leaq 1(%rdi),%rdi
- addl %r14d,%ebx
+ addq %r14,%rbx
- movl 60(%rsi),%r12d
- movl %r9d,%r13d
- movl %ebx,%r14d
- bswapl %r12d
- rorl $14,%r13d
- movl %r10d,%r15d
- movl %r12d,60(%rsp)
+ movq 120(%rsi),%r12
+ movq %r9,%r13
+ movq %rbx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,120(%rsp)
- rorl $9,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r15d
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
- rorl $5,%r13d
- addl %eax,%r12d
- xorl %ebx,%r14d
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %r9d,%r15d
- movl %ecx,%eax
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
- rorl $11,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r15d
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
- xorl %edx,%eax
- xorl %ebx,%r14d
- addl %r15d,%r12d
- movl %ecx,%r15d
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
- rorl $6,%r13d
- andl %ebx,%eax
- andl %edx,%r15d
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%eax
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
- addl %r12d,%r8d
- addl %r12d,%eax
+ addq %r12,%r8
+ addq %r12,%rax
leaq 1(%rdi),%rdi
- addl %r14d,%eax
+ addq %r14,%rax
jmp .Lrounds_16_xx
.align 16
.Lrounds_16_xx:
- movl 4(%rsp),%r13d
- movl 56(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
+ movq 8(%rsp),%r13
+ movq 112(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 36(%rsp),%r12d
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 72(%rsp),%r12
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
- addl 0(%rsp),%r12d
- movl %r8d,%r13d
- addl %r14d,%r12d
- movl %eax,%r14d
- rorl $14,%r13d
- movl %r9d,%r15d
- movl %r12d,0(%rsp)
+ addq 0(%rsp),%r12
+ movq %r8,%r13
+ addq %r14,%r12
+ movq %rax,%r14
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,0(%rsp)
- rorl $9,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r15d
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
- rorl $5,%r13d
- addl %r11d,%r12d
- xorl %eax,%r14d
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %r8d,%r15d
- movl %ebx,%r11d
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
- rorl $11,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r15d
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
- xorl %ecx,%r11d
- xorl %eax,%r14d
- addl %r15d,%r12d
- movl %ebx,%r15d
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
- rorl $6,%r13d
- andl %eax,%r11d
- andl %ecx,%r15d
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r11d
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
- addl %r12d,%edx
- addl %r12d,%r11d
+ addq %r12,%rdx
+ addq %r12,%r11
leaq 1(%rdi),%rdi
- addl %r14d,%r11d
+ addq %r14,%r11
- movl 8(%rsp),%r13d
- movl 60(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
+ movq 16(%rsp),%r13
+ movq 120(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 40(%rsp),%r12d
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 80(%rsp),%r12
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
- addl 4(%rsp),%r12d
- movl %edx,%r13d
- addl %r14d,%r12d
- movl %r11d,%r14d
- rorl $14,%r13d
- movl %r8d,%r15d
- movl %r12d,4(%rsp)
+ addq 8(%rsp),%r12
+ movq %rdx,%r13
+ addq %r14,%r12
+ movq %r11,%r14
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,8(%rsp)
- rorl $9,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r15d
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
- rorl $5,%r13d
- addl %r10d,%r12d
- xorl %r11d,%r14d
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %edx,%r15d
- movl %eax,%r10d
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
- rorl $11,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r15d
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
- xorl %ebx,%r10d
- xorl %r11d,%r14d
- addl %r15d,%r12d
- movl %eax,%r15d
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
- rorl $6,%r13d
- andl %r11d,%r10d
- andl %ebx,%r15d
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r10d
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
- addl %r12d,%ecx
- addl %r12d,%r10d
+ addq %r12,%rcx
+ addq %r12,%r10
leaq 1(%rdi),%rdi
- addl %r14d,%r10d
+ addq %r14,%r10
- movl 12(%rsp),%r13d
- movl 0(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
+ movq 24(%rsp),%r13
+ movq 0(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 44(%rsp),%r12d
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 88(%rsp),%r12
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
- addl 8(%rsp),%r12d
- movl %ecx,%r13d
- addl %r14d,%r12d
- movl %r10d,%r14d
- rorl $14,%r13d
- movl %edx,%r15d
- movl %r12d,8(%rsp)
+ addq 16(%rsp),%r12
+ movq %rcx,%r13
+ addq %r14,%r12
+ movq %r10,%r14
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,16(%rsp)
- rorl $9,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r15d
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
- rorl $5,%r13d
- addl %r9d,%r12d
- xorl %r10d,%r14d
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %ecx,%r15d
- movl %r11d,%r9d
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
- rorl $11,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r15d
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
- xorl %eax,%r9d
- xorl %r10d,%r14d
- addl %r15d,%r12d
- movl %r11d,%r15d
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
- rorl $6,%r13d
- andl %r10d,%r9d
- andl %eax,%r15d
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r9d
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
- addl %r12d,%ebx
- addl %r12d,%r9d
+ addq %r12,%rbx
+ addq %r12,%r9
leaq 1(%rdi),%rdi
- addl %r14d,%r9d
+ addq %r14,%r9
- movl 16(%rsp),%r13d
- movl 4(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
+ movq 32(%rsp),%r13
+ movq 8(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 48(%rsp),%r12d
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 96(%rsp),%r12
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
- addl 12(%rsp),%r12d
- movl %ebx,%r13d
- addl %r14d,%r12d
- movl %r9d,%r14d
- rorl $14,%r13d
- movl %ecx,%r15d
- movl %r12d,12(%rsp)
+ addq 24(%rsp),%r12
+ movq %rbx,%r13
+ addq %r14,%r12
+ movq %r9,%r14
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,24(%rsp)
- rorl $9,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r15d
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
- rorl $5,%r13d
- addl %r8d,%r12d
- xorl %r9d,%r14d
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %ebx,%r15d
- movl %r10d,%r8d
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
- rorl $11,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r15d
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
- xorl %r11d,%r8d
- xorl %r9d,%r14d
- addl %r15d,%r12d
- movl %r10d,%r15d
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
- rorl $6,%r13d
- andl %r9d,%r8d
- andl %r11d,%r15d
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r8d
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
- addl %r12d,%eax
- addl %r12d,%r8d
+ addq %r12,%rax
+ addq %r12,%r8
leaq 1(%rdi),%rdi
- addl %r14d,%r8d
+ addq %r14,%r8
- movl 20(%rsp),%r13d
- movl 8(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
+ movq 40(%rsp),%r13
+ movq 16(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 52(%rsp),%r12d
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 104(%rsp),%r12
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
- addl 16(%rsp),%r12d
- movl %eax,%r13d
- addl %r14d,%r12d
- movl %r8d,%r14d
- rorl $14,%r13d
- movl %ebx,%r15d
- movl %r12d,16(%rsp)
+ addq 32(%rsp),%r12
+ movq %rax,%r13
+ addq %r14,%r12
+ movq %r8,%r14
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,32(%rsp)
- rorl $9,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r15d
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
- rorl $5,%r13d
- addl %edx,%r12d
- xorl %r8d,%r14d
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %eax,%r15d
- movl %r9d,%edx
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
- rorl $11,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r15d
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
- xorl %r10d,%edx
- xorl %r8d,%r14d
- addl %r15d,%r12d
- movl %r9d,%r15d
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
- rorl $6,%r13d
- andl %r8d,%edx
- andl %r10d,%r15d
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%edx
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
- addl %r12d,%r11d
- addl %r12d,%edx
+ addq %r12,%r11
+ addq %r12,%rdx
leaq 1(%rdi),%rdi
- addl %r14d,%edx
+ addq %r14,%rdx
- movl 24(%rsp),%r13d
- movl 12(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
+ movq 48(%rsp),%r13
+ movq 24(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 56(%rsp),%r12d
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 112(%rsp),%r12
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
- addl 20(%rsp),%r12d
- movl %r11d,%r13d
- addl %r14d,%r12d
- movl %edx,%r14d
- rorl $14,%r13d
- movl %eax,%r15d
- movl %r12d,20(%rsp)
+ addq 40(%rsp),%r12
+ movq %r11,%r13
+ addq %r14,%r12
+ movq %rdx,%r14
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,40(%rsp)
- rorl $9,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r15d
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
- rorl $5,%r13d
- addl %ecx,%r12d
- xorl %edx,%r14d
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %r11d,%r15d
- movl %r8d,%ecx
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
- rorl $11,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r15d
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
- xorl %r9d,%ecx
- xorl %edx,%r14d
- addl %r15d,%r12d
- movl %r8d,%r15d
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
- rorl $6,%r13d
- andl %edx,%ecx
- andl %r9d,%r15d
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%ecx
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
- addl %r12d,%r10d
- addl %r12d,%ecx
+ addq %r12,%r10
+ addq %r12,%rcx
leaq 1(%rdi),%rdi
- addl %r14d,%ecx
+ addq %r14,%rcx
- movl 28(%rsp),%r13d
- movl 16(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
+ movq 56(%rsp),%r13
+ movq 32(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 60(%rsp),%r12d
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 120(%rsp),%r12
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
- addl 24(%rsp),%r12d
- movl %r10d,%r13d
- addl %r14d,%r12d
- movl %ecx,%r14d
- rorl $14,%r13d
- movl %r11d,%r15d
- movl %r12d,24(%rsp)
+ addq 48(%rsp),%r12
+ movq %r10,%r13
+ addq %r14,%r12
+ movq %rcx,%r14
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,48(%rsp)
- rorl $9,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r15d
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
- rorl $5,%r13d
- addl %ebx,%r12d
- xorl %ecx,%r14d
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %r10d,%r15d
- movl %edx,%ebx
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
- rorl $11,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r15d
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
- xorl %r8d,%ebx
- xorl %ecx,%r14d
- addl %r15d,%r12d
- movl %edx,%r15d
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
- rorl $6,%r13d
- andl %ecx,%ebx
- andl %r8d,%r15d
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%ebx
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
- addl %r12d,%r9d
- addl %r12d,%ebx
+ addq %r12,%r9
+ addq %r12,%rbx
leaq 1(%rdi),%rdi
- addl %r14d,%ebx
+ addq %r14,%rbx
- movl 32(%rsp),%r13d
- movl 20(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
+ movq 64(%rsp),%r13
+ movq 40(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 0(%rsp),%r12d
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 0(%rsp),%r12
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
- addl 28(%rsp),%r12d
- movl %r9d,%r13d
- addl %r14d,%r12d
- movl %ebx,%r14d
- rorl $14,%r13d
- movl %r10d,%r15d
- movl %r12d,28(%rsp)
+ addq 56(%rsp),%r12
+ movq %r9,%r13
+ addq %r14,%r12
+ movq %rbx,%r14
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,56(%rsp)
- rorl $9,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r15d
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
- rorl $5,%r13d
- addl %eax,%r12d
- xorl %ebx,%r14d
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %r9d,%r15d
- movl %ecx,%eax
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
- rorl $11,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r15d
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
- xorl %edx,%eax
- xorl %ebx,%r14d
- addl %r15d,%r12d
- movl %ecx,%r15d
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
- rorl $6,%r13d
- andl %ebx,%eax
- andl %edx,%r15d
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%eax
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
- addl %r12d,%r8d
- addl %r12d,%eax
+ addq %r12,%r8
+ addq %r12,%rax
leaq 1(%rdi),%rdi
- addl %r14d,%eax
+ addq %r14,%rax
- movl 36(%rsp),%r13d
- movl 24(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
+ movq 72(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 4(%rsp),%r12d
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 8(%rsp),%r12
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
- addl 32(%rsp),%r12d
- movl %r8d,%r13d
- addl %r14d,%r12d
- movl %eax,%r14d
- rorl $14,%r13d
- movl %r9d,%r15d
- movl %r12d,32(%rsp)
+ addq 64(%rsp),%r12
+ movq %r8,%r13
+ addq %r14,%r12
+ movq %rax,%r14
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,64(%rsp)
- rorl $9,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r15d
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
- rorl $5,%r13d
- addl %r11d,%r12d
- xorl %eax,%r14d
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %r8d,%r15d
- movl %ebx,%r11d
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
- rorl $11,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r15d
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
- xorl %ecx,%r11d
- xorl %eax,%r14d
- addl %r15d,%r12d
- movl %ebx,%r15d
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
- rorl $6,%r13d
- andl %eax,%r11d
- andl %ecx,%r15d
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r11d
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
- addl %r12d,%edx
- addl %r12d,%r11d
+ addq %r12,%rdx
+ addq %r12,%r11
leaq 1(%rdi),%rdi
- addl %r14d,%r11d
+ addq %r14,%r11
- movl 40(%rsp),%r13d
- movl 28(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
+ movq 80(%rsp),%r13
+ movq 56(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 8(%rsp),%r12d
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 16(%rsp),%r12
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
- addl 36(%rsp),%r12d
- movl %edx,%r13d
- addl %r14d,%r12d
- movl %r11d,%r14d
- rorl $14,%r13d
- movl %r8d,%r15d
- movl %r12d,36(%rsp)
+ addq 72(%rsp),%r12
+ movq %rdx,%r13
+ addq %r14,%r12
+ movq %r11,%r14
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,72(%rsp)
- rorl $9,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r15d
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
- rorl $5,%r13d
- addl %r10d,%r12d
- xorl %r11d,%r14d
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %edx,%r15d
- movl %eax,%r10d
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
- rorl $11,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r15d
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
- xorl %ebx,%r10d
- xorl %r11d,%r14d
- addl %r15d,%r12d
- movl %eax,%r15d
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
- rorl $6,%r13d
- andl %r11d,%r10d
- andl %ebx,%r15d
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r10d
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
- addl %r12d,%ecx
- addl %r12d,%r10d
+ addq %r12,%rcx
+ addq %r12,%r10
leaq 1(%rdi),%rdi
- addl %r14d,%r10d
+ addq %r14,%r10
- movl 44(%rsp),%r13d
- movl 32(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
+ movq 88(%rsp),%r13
+ movq 64(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 12(%rsp),%r12d
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 24(%rsp),%r12
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
- addl 40(%rsp),%r12d
- movl %ecx,%r13d
- addl %r14d,%r12d
- movl %r10d,%r14d
- rorl $14,%r13d
- movl %edx,%r15d
- movl %r12d,40(%rsp)
+ addq 80(%rsp),%r12
+ movq %rcx,%r13
+ addq %r14,%r12
+ movq %r10,%r14
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,80(%rsp)
- rorl $9,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r15d
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
- rorl $5,%r13d
- addl %r9d,%r12d
- xorl %r10d,%r14d
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %ecx,%r15d
- movl %r11d,%r9d
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
- rorl $11,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r15d
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
- xorl %eax,%r9d
- xorl %r10d,%r14d
- addl %r15d,%r12d
- movl %r11d,%r15d
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
- rorl $6,%r13d
- andl %r10d,%r9d
- andl %eax,%r15d
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r9d
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
- addl %r12d,%ebx
- addl %r12d,%r9d
+ addq %r12,%rbx
+ addq %r12,%r9
leaq 1(%rdi),%rdi
- addl %r14d,%r9d
+ addq %r14,%r9
- movl 48(%rsp),%r13d
- movl 36(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
+ movq 96(%rsp),%r13
+ movq 72(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 16(%rsp),%r12d
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 32(%rsp),%r12
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
- addl 44(%rsp),%r12d
- movl %ebx,%r13d
- addl %r14d,%r12d
- movl %r9d,%r14d
- rorl $14,%r13d
- movl %ecx,%r15d
- movl %r12d,44(%rsp)
+ addq 88(%rsp),%r12
+ movq %rbx,%r13
+ addq %r14,%r12
+ movq %r9,%r14
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,88(%rsp)
- rorl $9,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r15d
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
- rorl $5,%r13d
- addl %r8d,%r12d
- xorl %r9d,%r14d
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %ebx,%r15d
- movl %r10d,%r8d
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
- rorl $11,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r15d
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
- xorl %r11d,%r8d
- xorl %r9d,%r14d
- addl %r15d,%r12d
- movl %r10d,%r15d
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
- rorl $6,%r13d
- andl %r9d,%r8d
- andl %r11d,%r15d
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%r8d
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
- addl %r12d,%eax
- addl %r12d,%r8d
+ addq %r12,%rax
+ addq %r12,%r8
leaq 1(%rdi),%rdi
- addl %r14d,%r8d
+ addq %r14,%r8
- movl 52(%rsp),%r13d
- movl 40(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
+ movq 104(%rsp),%r13
+ movq 80(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 20(%rsp),%r12d
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 40(%rsp),%r12
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
- addl 48(%rsp),%r12d
- movl %eax,%r13d
- addl %r14d,%r12d
- movl %r8d,%r14d
- rorl $14,%r13d
- movl %ebx,%r15d
- movl %r12d,48(%rsp)
+ addq 96(%rsp),%r12
+ movq %rax,%r13
+ addq %r14,%r12
+ movq %r8,%r14
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,96(%rsp)
- rorl $9,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r15d
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
- rorl $5,%r13d
- addl %edx,%r12d
- xorl %r8d,%r14d
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %eax,%r15d
- movl %r9d,%edx
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
- rorl $11,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r15d
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
- xorl %r10d,%edx
- xorl %r8d,%r14d
- addl %r15d,%r12d
- movl %r9d,%r15d
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
- rorl $6,%r13d
- andl %r8d,%edx
- andl %r10d,%r15d
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%edx
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
- addl %r12d,%r11d
- addl %r12d,%edx
+ addq %r12,%r11
+ addq %r12,%rdx
leaq 1(%rdi),%rdi
- addl %r14d,%edx
+ addq %r14,%rdx
- movl 56(%rsp),%r13d
- movl 44(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
+ movq 112(%rsp),%r13
+ movq 88(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 24(%rsp),%r12d
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 48(%rsp),%r12
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
- addl 52(%rsp),%r12d
- movl %r11d,%r13d
- addl %r14d,%r12d
- movl %edx,%r14d
- rorl $14,%r13d
- movl %eax,%r15d
- movl %r12d,52(%rsp)
+ addq 104(%rsp),%r12
+ movq %r11,%r13
+ addq %r14,%r12
+ movq %rdx,%r14
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,104(%rsp)
- rorl $9,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r15d
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
- rorl $5,%r13d
- addl %ecx,%r12d
- xorl %edx,%r14d
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %r11d,%r15d
- movl %r8d,%ecx
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
- rorl $11,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r15d
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
- xorl %r9d,%ecx
- xorl %edx,%r14d
- addl %r15d,%r12d
- movl %r8d,%r15d
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
- rorl $6,%r13d
- andl %edx,%ecx
- andl %r9d,%r15d
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%ecx
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
- addl %r12d,%r10d
- addl %r12d,%ecx
+ addq %r12,%r10
+ addq %r12,%rcx
leaq 1(%rdi),%rdi
- addl %r14d,%ecx
+ addq %r14,%rcx
- movl 60(%rsp),%r13d
- movl 48(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
+ movq 120(%rsp),%r13
+ movq 96(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 28(%rsp),%r12d
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 56(%rsp),%r12
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
- addl 56(%rsp),%r12d
- movl %r10d,%r13d
- addl %r14d,%r12d
- movl %ecx,%r14d
- rorl $14,%r13d
- movl %r11d,%r15d
- movl %r12d,56(%rsp)
+ addq 112(%rsp),%r12
+ movq %r10,%r13
+ addq %r14,%r12
+ movq %rcx,%r14
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,112(%rsp)
- rorl $9,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r15d
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
- rorl $5,%r13d
- addl %ebx,%r12d
- xorl %ecx,%r14d
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %r10d,%r15d
- movl %edx,%ebx
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
- rorl $11,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r15d
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
- xorl %r8d,%ebx
- xorl %ecx,%r14d
- addl %r15d,%r12d
- movl %edx,%r15d
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
- rorl $6,%r13d
- andl %ecx,%ebx
- andl %r8d,%r15d
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%ebx
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
- addl %r12d,%r9d
- addl %r12d,%ebx
+ addq %r12,%r9
+ addq %r12,%rbx
leaq 1(%rdi),%rdi
- addl %r14d,%ebx
+ addq %r14,%rbx
- movl 0(%rsp),%r13d
- movl 52(%rsp),%r14d
- movl %r13d,%r12d
- movl %r14d,%r15d
+ movq 0(%rsp),%r13
+ movq 104(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
- rorl $11,%r12d
- xorl %r13d,%r12d
- shrl $3,%r13d
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
- rorl $7,%r12d
- xorl %r12d,%r13d
- movl 32(%rsp),%r12d
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 64(%rsp),%r12
- rorl $2,%r15d
- xorl %r14d,%r15d
- shrl $10,%r14d
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
- rorl $17,%r15d
- addl %r13d,%r12d
- xorl %r15d,%r14d
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
- addl 60(%rsp),%r12d
- movl %r9d,%r13d
- addl %r14d,%r12d
- movl %ebx,%r14d
- rorl $14,%r13d
- movl %r10d,%r15d
- movl %r12d,60(%rsp)
+ addq 120(%rsp),%r12
+ movq %r9,%r13
+ addq %r14,%r12
+ movq %rbx,%r14
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,120(%rsp)
- rorl $9,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r15d
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
- rorl $5,%r13d
- addl %eax,%r12d
- xorl %ebx,%r14d
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
- addl (%rbp,%rdi,4),%r12d
- andl %r9d,%r15d
- movl %ecx,%eax
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
- rorl $11,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r15d
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
- xorl %edx,%eax
- xorl %ebx,%r14d
- addl %r15d,%r12d
- movl %ecx,%r15d
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
- rorl $6,%r13d
- andl %ebx,%eax
- andl %edx,%r15d
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
- rorl $2,%r14d
- addl %r13d,%r12d
- addl %r15d,%eax
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
- addl %r12d,%r8d
- addl %r12d,%eax
+ addq %r12,%r8
+ addq %r12,%rax
leaq 1(%rdi),%rdi
- addl %r14d,%eax
+ addq %r14,%rax
- cmpq $64,%rdi
+ cmpq $80,%rdi
jb .Lrounds_16_xx
- movq 64+0(%rsp),%rdi
- leaq 64(%rsi),%rsi
+ movq 128+0(%rsp),%rdi
+ leaq 128(%rsi),%rsi
- addl 0(%rdi),%eax
- addl 4(%rdi),%ebx
- addl 8(%rdi),%ecx
- addl 12(%rdi),%edx
- addl 16(%rdi),%r8d
- addl 20(%rdi),%r9d
- addl 24(%rdi),%r10d
- addl 28(%rdi),%r11d
+ addq 0(%rdi),%rax
+ addq 8(%rdi),%rbx
+ addq 16(%rdi),%rcx
+ addq 24(%rdi),%rdx
+ addq 32(%rdi),%r8
+ addq 40(%rdi),%r9
+ addq 48(%rdi),%r10
+ addq 56(%rdi),%r11
- cmpq 64+16(%rsp),%rsi
+ cmpq 128+16(%rsp),%rsi
- movl %eax,0(%rdi)
- movl %ebx,4(%rdi)
- movl %ecx,8(%rdi)
- movl %edx,12(%rdi)
- movl %r8d,16(%rdi)
- movl %r9d,20(%rdi)
- movl %r10d,24(%rdi)
- movl %r11d,28(%rdi)
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+ movq %r8,32(%rdi)
+ movq %r9,40(%rdi)
+ movq %r10,48(%rdi)
+ movq %r11,56(%rdi)
jb .Lloop
- movq 64+24(%rsp),%rsi
+ movq 128+24(%rsp),%rsi
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
@@ -1756,23 +1756,47 @@
leaq 48(%rsi),%rsp
.Lepilogue:
.byte 0xf3,0xc3
-.size sha256_block_data_order,.-sha256_block_data_order
+.size sha512_block_data_order,.-sha512_block_data_order
.align 64
-.type K256,@object
-K256:
-.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
-.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
-.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
-.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
-.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
-.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
-.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
-.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
-.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
-.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
-.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
-.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
-.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
-.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
-.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
-.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.type K512,@object
+K512:
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0xd807aa98a3030242,0x12835b0145706fbe
+.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad 0x9bdc06a725c71235,0xc19bf174cf692694
+.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad 0x983e5152ee66dfab,0xa831c66d2db43210
+.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad 0x06ca6351e003826f,0x142929670a0e6e70
+.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad 0x81c2c92e47edaee6,0x92722c851482353b
+.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad 0xd192e819d6ef5218,0xd69906245565a910
+.quad 0xf40e35855771202a,0x106aa07032bbd1b8
+.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad 0x90befffa23631e28,0xa4506cebde82bde9
+.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad 0xca273eceea26619c,0xd186b8c721c0c207
+.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+.quad 0x113f9804bef90dae,0x1b710b35131c471b
+.quad 0x28db77f523047d84,0x32caab7b40c72493
+.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
diff --git a/import_openssl.sh b/import_openssl.sh
index 640e6ee..b28256b 100755
--- a/import_openssl.sh
+++ b/import_openssl.sh
@@ -141,7 +141,7 @@
function gen_asm_x86_64 () {
local OUT
OUT=$(default_asm_file "$@")
- perl "$1" elf > "$OUT"
+ perl "$1" elf "$OUT" > "$OUT"
}
function import() {
@@ -214,6 +214,7 @@
# Generate x86_64 asm
gen_asm_x86_64 crypto/x86_64cpuid.pl
gen_asm_x86_64 crypto/sha/asm/sha1-x86_64.pl
+ gen_asm_x86_64 crypto/sha/asm/sha512-x86_64.pl crypto/sha/asm/sha256-x86_64.S
gen_asm_x86_64 crypto/sha/asm/sha512-x86_64.pl
gen_asm_x86_64 crypto/modes/asm/ghash-x86_64.pl
gen_asm_x86_64 crypto/aes/asm/aesni-x86_64.pl