Enable openssl crypto optimizations for x86 platform

Asm files attached to this patch were generated from the
current OpenSSL version.

Change-Id: I05ef67a6e34016ef94a0ef23ca264bcac805b1cc
Signed-off-by: Catalin Ionita <catalin.ionita@intel.com>
diff --git a/Crypto.mk b/Crypto.mk
index 23a4f03..41e709b 100644
--- a/Crypto.mk
+++ b/Crypto.mk
@@ -1,536 +1,560 @@
 arm_cflags := -DOPENSSL_BN_ASM_GF2m -DOPENSSL_BN_ASM_MONT -DGHASH_ASM -DAES_ASM -DSHA1_ASM -DSHA256_ASM -DSHA512_ASM
 mips_cflags := -DOPENSSL_BN_ASM_MONT -DAES_ASM -DSHA1_ASM -DSHA256_ASM
+x86_cflags := -DOPENSSL_BN_ASM_GF2m -DOPENSSL_BN_ASM_MONT -DOPENSSL_BN_ASM_PART_WORDS -DAES_ASM -DGHASH_ASM -DSHA1_ASM -DSHA256_ASM -DSHA512_ASM -DMD5_ASM -DDES_PTR -DDES_RISC1 -DDES_UNROLL
 
 arm_src_files := \
-	crypto/aes/asm/aes-armv4.s \
-	crypto/bn/asm/armv4-gf2m.s \
-	crypto/bn/asm/armv4-mont.s \
-	crypto/bn/bn_asm.c \
-	crypto/modes/asm/ghash-armv4.s \
-	crypto/sha/asm/sha1-armv4-large.s \
-	crypto/sha/asm/sha256-armv4.s \
-	crypto/sha/asm/sha512-armv4.s
+ crypto/aes/asm/aes-armv4.s \
+ crypto/bn/asm/armv4-gf2m.s \
+ crypto/bn/asm/armv4-mont.s \
+ crypto/bn/bn_asm.c \
+ crypto/modes/asm/ghash-armv4.s \
+ crypto/sha/asm/sha1-armv4-large.s \
+ crypto/sha/asm/sha256-armv4.s \
+ crypto/sha/asm/sha512-armv4.s
 
 mips_src_files := \
-	crypto/aes/asm/aes-mips.s \
-	crypto/bn/asm/bn-mips.s \
-	crypto/bn/asm/mips-mont.s \
-	crypto/sha/asm/sha1-mips.s \
-	crypto/sha/asm/sha256-mips.s
+ crypto/aes/asm/aes-mips.s \
+ crypto/bn/asm/bn-mips.s \
+ crypto/bn/asm/mips-mont.s \
+ crypto/sha/asm/sha1-mips.s \
+ crypto/sha/asm/sha256-mips.s
+
+x86_src_files := \
+ crypto/aes/asm/aes-586.s \
+ crypto/aes/asm/vpaes-x86.s \
+ crypto/aes/asm/aesni-x86.s \
+ crypto/bn/asm/bn-586.s \
+ crypto/bn/asm/co-586.s \
+ crypto/bn/asm/x86-mont.s \
+ crypto/bn/asm/x86-gf2m.s \
+ crypto/modes/asm/ghash-x86.s \
+ crypto/sha/asm/sha1-586.s \
+ crypto/sha/asm/sha256-586.s \
+ crypto/sha/asm/sha512-586.s \
+ crypto/md5/asm/md5-586.s \
+ crypto/des/asm/des-586.s \
+ crypto/des/asm/crypt586.s \
+ crypto/bf/asm/bf-586.s
+
+x86_exclude_files := \
+ crypto/aes/aes_cbc.c \
+ crypto/des/des_enc.c \
+ crypto/des/fcrypt_b.c \
+ crypto/bf/bf_enc.c
 
 other_arch_src_files := \
-	crypto/aes/aes_core.c \
-	crypto/bn/bn_asm.c
+ crypto/aes/aes_core.c \
+ crypto/bn/bn_asm.c
 
 local_src_files := \
-	crypto/cryptlib.c \
-	crypto/mem.c \
-	crypto/mem_clr.c \
-	crypto/mem_dbg.c \
-	crypto/cversion.c \
-	crypto/ex_data.c \
-	crypto/cpt_err.c \
-	crypto/ebcdic.c \
-	crypto/uid.c \
-	crypto/o_time.c \
-	crypto/o_str.c \
-	crypto/o_dir.c \
-	crypto/aes/aes_cbc.c \
-	crypto/aes/aes_cfb.c \
-	crypto/aes/aes_ctr.c \
-	crypto/aes/aes_ecb.c \
-	crypto/aes/aes_misc.c \
-	crypto/aes/aes_ofb.c \
-	crypto/aes/aes_wrap.c \
-	crypto/asn1/a_bitstr.c \
-	crypto/asn1/a_bool.c \
-	crypto/asn1/a_bytes.c \
-	crypto/asn1/a_d2i_fp.c \
-	crypto/asn1/a_digest.c \
-	crypto/asn1/a_dup.c \
-	crypto/asn1/a_enum.c \
-	crypto/asn1/a_gentm.c \
-	crypto/asn1/a_i2d_fp.c \
-	crypto/asn1/a_int.c \
-	crypto/asn1/a_mbstr.c \
-	crypto/asn1/a_object.c \
-	crypto/asn1/a_octet.c \
-	crypto/asn1/a_print.c \
-	crypto/asn1/a_set.c \
-	crypto/asn1/a_sign.c \
-	crypto/asn1/a_strex.c \
-	crypto/asn1/a_strnid.c \
-	crypto/asn1/a_time.c \
-	crypto/asn1/a_type.c \
-	crypto/asn1/a_utctm.c \
-	crypto/asn1/a_utf8.c \
-	crypto/asn1/a_verify.c \
-	crypto/asn1/ameth_lib.c \
-	crypto/asn1/asn1_err.c \
-	crypto/asn1/asn1_gen.c \
-	crypto/asn1/asn1_lib.c \
-	crypto/asn1/asn1_par.c \
-	crypto/asn1/asn_mime.c \
-	crypto/asn1/asn_moid.c \
-	crypto/asn1/asn_pack.c \
-	crypto/asn1/bio_asn1.c \
-	crypto/asn1/bio_ndef.c \
-	crypto/asn1/d2i_pr.c \
-	crypto/asn1/d2i_pu.c \
-	crypto/asn1/evp_asn1.c \
-	crypto/asn1/f_enum.c \
-	crypto/asn1/f_int.c \
-	crypto/asn1/f_string.c \
-	crypto/asn1/i2d_pr.c \
-	crypto/asn1/i2d_pu.c \
-	crypto/asn1/n_pkey.c \
-	crypto/asn1/nsseq.c \
-	crypto/asn1/p5_pbe.c \
-	crypto/asn1/p5_pbev2.c \
-	crypto/asn1/p8_pkey.c \
-	crypto/asn1/t_bitst.c \
-	crypto/asn1/t_crl.c \
-	crypto/asn1/t_pkey.c \
-	crypto/asn1/t_req.c \
-	crypto/asn1/t_spki.c \
-	crypto/asn1/t_x509.c \
-	crypto/asn1/t_x509a.c \
-	crypto/asn1/tasn_dec.c \
-	crypto/asn1/tasn_enc.c \
-	crypto/asn1/tasn_fre.c \
-	crypto/asn1/tasn_new.c \
-	crypto/asn1/tasn_prn.c \
-	crypto/asn1/tasn_typ.c \
-	crypto/asn1/tasn_utl.c \
-	crypto/asn1/x_algor.c \
-	crypto/asn1/x_attrib.c \
-	crypto/asn1/x_bignum.c \
-	crypto/asn1/x_crl.c \
-	crypto/asn1/x_exten.c \
-	crypto/asn1/x_info.c \
-	crypto/asn1/x_long.c \
-	crypto/asn1/x_name.c \
-	crypto/asn1/x_nx509.c \
-	crypto/asn1/x_pkey.c \
-	crypto/asn1/x_pubkey.c \
-	crypto/asn1/x_req.c \
-	crypto/asn1/x_sig.c \
-	crypto/asn1/x_spki.c \
-	crypto/asn1/x_val.c \
-	crypto/asn1/x_x509.c \
-	crypto/asn1/x_x509a.c \
-	crypto/bf/bf_cfb64.c \
-	crypto/bf/bf_ecb.c \
-	crypto/bf/bf_enc.c \
-	crypto/bf/bf_ofb64.c \
-	crypto/bf/bf_skey.c \
-	crypto/bio/b_dump.c \
-	crypto/bio/b_print.c \
-	crypto/bio/b_sock.c \
-	crypto/bio/bf_buff.c \
-	crypto/bio/bf_nbio.c \
-	crypto/bio/bf_null.c \
-	crypto/bio/bio_cb.c \
-	crypto/bio/bio_err.c \
-	crypto/bio/bio_lib.c \
-	crypto/bio/bss_acpt.c \
-	crypto/bio/bss_bio.c \
-	crypto/bio/bss_conn.c \
-	crypto/bio/bss_dgram.c \
-	crypto/bio/bss_fd.c \
-	crypto/bio/bss_file.c \
-	crypto/bio/bss_log.c \
-	crypto/bio/bss_mem.c \
-	crypto/bio/bss_null.c \
-	crypto/bio/bss_sock.c \
-	crypto/bn/bn_add.c \
-	crypto/bn/bn_blind.c \
-	crypto/bn/bn_const.c \
-	crypto/bn/bn_ctx.c \
-	crypto/bn/bn_div.c \
-	crypto/bn/bn_err.c \
-	crypto/bn/bn_exp.c \
-	crypto/bn/bn_exp2.c \
-	crypto/bn/bn_gcd.c \
-	crypto/bn/bn_gf2m.c \
-	crypto/bn/bn_kron.c \
-	crypto/bn/bn_lib.c \
-	crypto/bn/bn_mod.c \
-	crypto/bn/bn_mont.c \
-	crypto/bn/bn_mpi.c \
-	crypto/bn/bn_mul.c \
-	crypto/bn/bn_nist.c \
-	crypto/bn/bn_prime.c \
-	crypto/bn/bn_print.c \
-	crypto/bn/bn_rand.c \
-	crypto/bn/bn_recp.c \
-	crypto/bn/bn_shift.c \
-	crypto/bn/bn_sqr.c \
-	crypto/bn/bn_sqrt.c \
-	crypto/bn/bn_word.c \
-	crypto/buffer/buf_err.c \
-	crypto/buffer/buf_str.c \
-	crypto/buffer/buffer.c \
-	crypto/cmac/cm_ameth.c \
-	crypto/cmac/cm_pmeth.c \
-	crypto/cmac/cmac.c \
-	crypto/comp/c_rle.c \
-	crypto/comp/c_zlib.c \
-	crypto/comp/comp_err.c \
-	crypto/comp/comp_lib.c \
-	crypto/conf/conf_api.c \
-	crypto/conf/conf_def.c \
-	crypto/conf/conf_err.c \
-	crypto/conf/conf_lib.c \
-	crypto/conf/conf_mall.c \
-	crypto/conf/conf_mod.c \
-	crypto/conf/conf_sap.c \
-	crypto/des/cbc_cksm.c \
-	crypto/des/cbc_enc.c \
-	crypto/des/cfb64ede.c \
-	crypto/des/cfb64enc.c \
-	crypto/des/cfb_enc.c \
-	crypto/des/des_enc.c \
-	crypto/des/des_old.c \
-	crypto/des/des_old2.c \
-	crypto/des/ecb3_enc.c \
-	crypto/des/ecb_enc.c \
-	crypto/des/ede_cbcm_enc.c \
-	crypto/des/enc_read.c \
-	crypto/des/enc_writ.c \
-	crypto/des/fcrypt.c \
-	crypto/des/fcrypt_b.c \
-	crypto/des/ofb64ede.c \
-	crypto/des/ofb64enc.c \
-	crypto/des/ofb_enc.c \
-	crypto/des/pcbc_enc.c \
-	crypto/des/qud_cksm.c \
-	crypto/des/rand_key.c \
-	crypto/des/read2pwd.c \
-	crypto/des/rpc_enc.c \
-	crypto/des/set_key.c \
-	crypto/des/str2key.c \
-	crypto/des/xcbc_enc.c \
-	crypto/dh/dh_ameth.c \
-	crypto/dh/dh_asn1.c \
-	crypto/dh/dh_check.c \
-	crypto/dh/dh_depr.c \
-	crypto/dh/dh_err.c \
-	crypto/dh/dh_gen.c \
-	crypto/dh/dh_key.c \
-	crypto/dh/dh_lib.c \
-	crypto/dh/dh_pmeth.c \
-	crypto/dsa/dsa_ameth.c \
-	crypto/dsa/dsa_asn1.c \
-	crypto/dsa/dsa_depr.c \
-	crypto/dsa/dsa_err.c \
-	crypto/dsa/dsa_gen.c \
-	crypto/dsa/dsa_key.c \
-	crypto/dsa/dsa_lib.c \
-	crypto/dsa/dsa_ossl.c \
-	crypto/dsa/dsa_pmeth.c \
-	crypto/dsa/dsa_prn.c \
-	crypto/dsa/dsa_sign.c \
-	crypto/dsa/dsa_vrf.c \
-	crypto/dso/dso_dl.c \
-	crypto/dso/dso_dlfcn.c \
-	crypto/dso/dso_err.c \
-	crypto/dso/dso_lib.c \
-	crypto/dso/dso_null.c \
-	crypto/dso/dso_openssl.c \
-	crypto/ec/ec2_mult.c \
-	crypto/ec/ec2_oct.c \
-	crypto/ec/ec2_smpl.c \
-	crypto/ec/ec_ameth.c \
-	crypto/ec/ec_asn1.c \
-	crypto/ec/ec_check.c \
-	crypto/ec/ec_curve.c \
-	crypto/ec/ec_cvt.c \
-	crypto/ec/ec_err.c \
-	crypto/ec/ec_key.c \
-	crypto/ec/ec_lib.c \
-	crypto/ec/ec_mult.c \
-	crypto/ec/ec_oct.c \
-	crypto/ec/ec_pmeth.c \
-	crypto/ec/ec_print.c \
-	crypto/ec/eck_prn.c \
-	crypto/ec/ecp_mont.c \
-	crypto/ec/ecp_nist.c \
-	crypto/ec/ecp_oct.c \
-	crypto/ec/ecp_smpl.c \
-	crypto/ecdh/ech_err.c \
-	crypto/ecdh/ech_key.c \
-	crypto/ecdh/ech_lib.c \
-	crypto/ecdh/ech_ossl.c \
-	crypto/ecdsa/ecs_asn1.c \
-	crypto/ecdsa/ecs_err.c \
-	crypto/ecdsa/ecs_lib.c \
-	crypto/ecdsa/ecs_ossl.c \
-	crypto/ecdsa/ecs_sign.c \
-	crypto/ecdsa/ecs_vrf.c \
-	crypto/engine/eng_all.c \
-	crypto/engine/eng_cnf.c \
-	crypto/engine/eng_ctrl.c \
-	crypto/engine/eng_dyn.c \
-	crypto/engine/eng_err.c \
-	crypto/engine/eng_fat.c \
-	crypto/engine/eng_init.c \
-	crypto/engine/eng_lib.c \
-	crypto/engine/eng_list.c \
-	crypto/engine/eng_pkey.c \
-	crypto/engine/eng_table.c \
-	crypto/engine/tb_asnmth.c \
-	crypto/engine/tb_cipher.c \
-	crypto/engine/tb_dh.c \
-	crypto/engine/tb_digest.c \
-	crypto/engine/tb_dsa.c \
-	crypto/engine/tb_ecdh.c \
-	crypto/engine/tb_ecdsa.c \
-	crypto/engine/tb_pkmeth.c \
-	crypto/engine/tb_rand.c \
-	crypto/engine/tb_rsa.c \
-	crypto/engine/tb_store.c \
-	crypto/err/err.c \
-	crypto/err/err_all.c \
-	crypto/err/err_prn.c \
-	crypto/evp/bio_b64.c \
-	crypto/evp/bio_enc.c \
-	crypto/evp/bio_md.c \
-	crypto/evp/bio_ok.c \
-	crypto/evp/c_all.c \
-	crypto/evp/c_allc.c \
-	crypto/evp/c_alld.c \
-	crypto/evp/digest.c \
-	crypto/evp/e_aes.c \
-	crypto/evp/e_aes_cbc_hmac_sha1.c \
-	crypto/evp/e_bf.c \
-	crypto/evp/e_des.c \
-	crypto/evp/e_des3.c \
-	crypto/evp/e_null.c \
-	crypto/evp/e_old.c \
-	crypto/evp/e_rc2.c \
-	crypto/evp/e_rc4.c \
-	crypto/evp/e_rc4_hmac_md5.c \
-	crypto/evp/e_rc5.c \
-	crypto/evp/e_xcbc_d.c \
-	crypto/evp/encode.c \
-	crypto/evp/evp_acnf.c \
-	crypto/evp/evp_enc.c \
-	crypto/evp/evp_err.c \
-	crypto/evp/evp_key.c \
-	crypto/evp/evp_lib.c \
-	crypto/evp/evp_pbe.c \
-	crypto/evp/evp_pkey.c \
-	crypto/evp/m_dss.c \
-	crypto/evp/m_dss1.c \
-	crypto/evp/m_ecdsa.c \
-	crypto/evp/m_md4.c \
-	crypto/evp/m_md5.c \
-	crypto/evp/m_mdc2.c \
-	crypto/evp/m_null.c \
-	crypto/evp/m_ripemd.c \
-	crypto/evp/m_sha1.c \
-	crypto/evp/m_sigver.c \
-	crypto/evp/m_wp.c \
-	crypto/evp/names.c \
-	crypto/evp/p5_crpt.c \
-	crypto/evp/p5_crpt2.c \
-	crypto/evp/p_dec.c \
-	crypto/evp/p_enc.c \
-	crypto/evp/p_lib.c \
-	crypto/evp/p_open.c \
-	crypto/evp/p_seal.c \
-	crypto/evp/p_sign.c \
-	crypto/evp/p_verify.c \
-	crypto/evp/pmeth_fn.c \
-	crypto/evp/pmeth_gn.c \
-	crypto/evp/pmeth_lib.c \
-	crypto/hmac/hm_ameth.c \
-	crypto/hmac/hm_pmeth.c \
-	crypto/hmac/hmac.c \
-	crypto/krb5/krb5_asn.c \
-	crypto/lhash/lh_stats.c \
-	crypto/lhash/lhash.c \
-	crypto/md4/md4_dgst.c \
-	crypto/md4/md4_one.c \
-	crypto/md5/md5_dgst.c \
-	crypto/md5/md5_one.c \
-	crypto/modes/cbc128.c \
-	crypto/modes/ccm128.c \
-	crypto/modes/cfb128.c \
-	crypto/modes/ctr128.c \
-	crypto/modes/gcm128.c \
-	crypto/modes/ofb128.c \
-	crypto/modes/xts128.c \
-	crypto/o_init.c \
-	crypto/objects/o_names.c \
-	crypto/objects/obj_dat.c \
-	crypto/objects/obj_err.c \
-	crypto/objects/obj_lib.c \
-	crypto/objects/obj_xref.c \
-	crypto/ocsp/ocsp_asn.c \
-	crypto/ocsp/ocsp_cl.c \
-	crypto/ocsp/ocsp_err.c \
-	crypto/ocsp/ocsp_ext.c \
-	crypto/ocsp/ocsp_ht.c \
-	crypto/ocsp/ocsp_lib.c \
-	crypto/ocsp/ocsp_prn.c \
-	crypto/ocsp/ocsp_srv.c \
-	crypto/ocsp/ocsp_vfy.c \
-	crypto/pem/pem_all.c \
-	crypto/pem/pem_err.c \
-	crypto/pem/pem_info.c \
-	crypto/pem/pem_lib.c \
-	crypto/pem/pem_oth.c \
-	crypto/pem/pem_pk8.c \
-	crypto/pem/pem_pkey.c \
-	crypto/pem/pem_seal.c \
-	crypto/pem/pem_sign.c \
-	crypto/pem/pem_x509.c \
-	crypto/pem/pem_xaux.c \
-	crypto/pem/pvkfmt.c \
-	crypto/pkcs12/p12_add.c \
-	crypto/pkcs12/p12_asn.c \
-	crypto/pkcs12/p12_attr.c \
-	crypto/pkcs12/p12_crpt.c \
-	crypto/pkcs12/p12_crt.c \
-	crypto/pkcs12/p12_decr.c \
-	crypto/pkcs12/p12_init.c \
-	crypto/pkcs12/p12_key.c \
-	crypto/pkcs12/p12_kiss.c \
-	crypto/pkcs12/p12_mutl.c \
-	crypto/pkcs12/p12_npas.c \
-	crypto/pkcs12/p12_p8d.c \
-	crypto/pkcs12/p12_p8e.c \
-	crypto/pkcs12/p12_utl.c \
-	crypto/pkcs12/pk12err.c \
-	crypto/pkcs7/pk7_asn1.c \
-	crypto/pkcs7/pk7_attr.c \
-	crypto/pkcs7/pk7_doit.c \
-	crypto/pkcs7/pk7_lib.c	crypto/\
-	crypto/pkcs7/pk7_mime.c \
-	crypto/pkcs7/pk7_smime.c \
-	crypto/pkcs7/pkcs7err.c \
-	crypto/pqueue/pqueue.c \
-	crypto/rand/md_rand.c \
-	crypto/rand/rand_egd.c \
-	crypto/rand/rand_err.c \
-	crypto/rand/rand_lib.c \
-	crypto/rand/rand_unix.c \
-	crypto/rand/rand_win.c \
-	crypto/rand/randfile.c \
-	crypto/rc2/rc2_cbc.c \
-	crypto/rc2/rc2_ecb.c \
-	crypto/rc2/rc2_skey.c \
-	crypto/rc2/rc2cfb64.c \
-	crypto/rc2/rc2ofb64.c \
-	crypto/rc4/rc4_enc.c \
-	crypto/rc4/rc4_skey.c \
-	crypto/rc4/rc4_utl.c \
-	crypto/ripemd/rmd_dgst.c \
-	crypto/ripemd/rmd_one.c \
-	crypto/rsa/rsa_ameth.c \
-	crypto/rsa/rsa_asn1.c \
-	crypto/rsa/rsa_chk.c \
-	crypto/rsa/rsa_crpt.c \
-	crypto/rsa/rsa_eay.c \
-	crypto/rsa/rsa_err.c \
-	crypto/rsa/rsa_gen.c \
-	crypto/rsa/rsa_lib.c \
-	crypto/rsa/rsa_none.c \
-	crypto/rsa/rsa_null.c \
-	crypto/rsa/rsa_oaep.c \
-	crypto/rsa/rsa_pk1.c \
-	crypto/rsa/rsa_pmeth.c \
-	crypto/rsa/rsa_prn.c \
-	crypto/rsa/rsa_pss.c \
-	crypto/rsa/rsa_saos.c \
-	crypto/rsa/rsa_sign.c \
-	crypto/rsa/rsa_ssl.c \
-	crypto/rsa/rsa_x931.c \
-	crypto/sha/sha1_one.c \
-	crypto/sha/sha1dgst.c \
-	crypto/sha/sha256.c \
-	crypto/sha/sha512.c \
-	crypto/sha/sha_dgst.c \
-	crypto/srp/srp_lib.c \
-	crypto/srp/srp_vfy.c \
-	crypto/stack/stack.c \
-	crypto/ts/ts_err.c \
-	crypto/txt_db/txt_db.c \
-	crypto/ui/ui_compat.c \
-	crypto/ui/ui_err.c \
-	crypto/ui/ui_lib.c \
-	crypto/ui/ui_openssl.c \
-	crypto/ui/ui_util.c \
-	crypto/x509/by_dir.c \
-	crypto/x509/by_file.c \
-	crypto/x509/x509_att.c \
-	crypto/x509/x509_cmp.c \
-	crypto/x509/x509_d2.c \
-	crypto/x509/x509_def.c \
-	crypto/x509/x509_err.c \
-	crypto/x509/x509_ext.c \
-	crypto/x509/x509_lu.c \
-	crypto/x509/x509_obj.c \
-	crypto/x509/x509_r2x.c \
-	crypto/x509/x509_req.c \
-	crypto/x509/x509_set.c \
-	crypto/x509/x509_trs.c \
-	crypto/x509/x509_txt.c \
-	crypto/x509/x509_v3.c \
-	crypto/x509/x509_vfy.c \
-	crypto/x509/x509_vpm.c \
-	crypto/x509/x509cset.c \
-	crypto/x509/x509name.c \
-	crypto/x509/x509rset.c \
-	crypto/x509/x509spki.c \
-	crypto/x509/x509type.c \
-	crypto/x509/x_all.c \
-	crypto/x509v3/pcy_cache.c \
-	crypto/x509v3/pcy_data.c \
-	crypto/x509v3/pcy_lib.c \
-	crypto/x509v3/pcy_map.c \
-	crypto/x509v3/pcy_node.c \
-	crypto/x509v3/pcy_tree.c \
-	crypto/x509v3/v3_akey.c \
-	crypto/x509v3/v3_akeya.c \
-	crypto/x509v3/v3_alt.c \
-	crypto/x509v3/v3_bcons.c \
-	crypto/x509v3/v3_bitst.c \
-	crypto/x509v3/v3_conf.c \
-	crypto/x509v3/v3_cpols.c \
-	crypto/x509v3/v3_crld.c \
-	crypto/x509v3/v3_enum.c \
-	crypto/x509v3/v3_extku.c \
-	crypto/x509v3/v3_genn.c \
-	crypto/x509v3/v3_ia5.c \
-	crypto/x509v3/v3_info.c \
-	crypto/x509v3/v3_int.c \
-	crypto/x509v3/v3_lib.c \
-	crypto/x509v3/v3_ncons.c \
-	crypto/x509v3/v3_ocsp.c \
-	crypto/x509v3/v3_pci.c \
-	crypto/x509v3/v3_pcia.c \
-	crypto/x509v3/v3_pcons.c \
-	crypto/x509v3/v3_pku.c \
-	crypto/x509v3/v3_pmaps.c \
-	crypto/x509v3/v3_prn.c \
-	crypto/x509v3/v3_purp.c \
-	crypto/x509v3/v3_skey.c \
-	crypto/x509v3/v3_sxnet.c \
-	crypto/x509v3/v3_utl.c \
-	crypto/x509v3/v3err.c
+ crypto/cryptlib.c \
+ crypto/mem.c \
+ crypto/mem_clr.c \
+ crypto/mem_dbg.c \
+ crypto/cversion.c \
+ crypto/ex_data.c \
+ crypto/cpt_err.c \
+ crypto/ebcdic.c \
+ crypto/uid.c \
+ crypto/o_time.c \
+ crypto/o_str.c \
+ crypto/o_dir.c \
+ crypto/aes/aes_cbc.c \
+ crypto/aes/aes_cfb.c \
+ crypto/aes/aes_ctr.c \
+ crypto/aes/aes_ecb.c \
+ crypto/aes/aes_misc.c \
+ crypto/aes/aes_ofb.c \
+ crypto/aes/aes_wrap.c \
+ crypto/asn1/a_bitstr.c \
+ crypto/asn1/a_bool.c \
+ crypto/asn1/a_bytes.c \
+ crypto/asn1/a_d2i_fp.c \
+ crypto/asn1/a_digest.c \
+ crypto/asn1/a_dup.c \
+ crypto/asn1/a_enum.c \
+ crypto/asn1/a_gentm.c \
+ crypto/asn1/a_i2d_fp.c \
+ crypto/asn1/a_int.c \
+ crypto/asn1/a_mbstr.c \
+ crypto/asn1/a_object.c \
+ crypto/asn1/a_octet.c \
+ crypto/asn1/a_print.c \
+ crypto/asn1/a_set.c \
+ crypto/asn1/a_sign.c \
+ crypto/asn1/a_strex.c \
+ crypto/asn1/a_strnid.c \
+ crypto/asn1/a_time.c \
+ crypto/asn1/a_type.c \
+ crypto/asn1/a_utctm.c \
+ crypto/asn1/a_utf8.c \
+ crypto/asn1/a_verify.c \
+ crypto/asn1/ameth_lib.c \
+ crypto/asn1/asn1_err.c \
+ crypto/asn1/asn1_gen.c \
+ crypto/asn1/asn1_lib.c \
+ crypto/asn1/asn1_par.c \
+ crypto/asn1/asn_mime.c \
+ crypto/asn1/asn_moid.c \
+ crypto/asn1/asn_pack.c \
+ crypto/asn1/bio_asn1.c \
+ crypto/asn1/bio_ndef.c \
+ crypto/asn1/d2i_pr.c \
+ crypto/asn1/d2i_pu.c \
+ crypto/asn1/evp_asn1.c \
+ crypto/asn1/f_enum.c \
+ crypto/asn1/f_int.c \
+ crypto/asn1/f_string.c \
+ crypto/asn1/i2d_pr.c \
+ crypto/asn1/i2d_pu.c \
+ crypto/asn1/n_pkey.c \
+ crypto/asn1/nsseq.c \
+ crypto/asn1/p5_pbe.c \
+ crypto/asn1/p5_pbev2.c \
+ crypto/asn1/p8_pkey.c \
+ crypto/asn1/t_bitst.c \
+ crypto/asn1/t_crl.c \
+ crypto/asn1/t_pkey.c \
+ crypto/asn1/t_req.c \
+ crypto/asn1/t_spki.c \
+ crypto/asn1/t_x509.c \
+ crypto/asn1/t_x509a.c \
+ crypto/asn1/tasn_dec.c \
+ crypto/asn1/tasn_enc.c \
+ crypto/asn1/tasn_fre.c \
+ crypto/asn1/tasn_new.c \
+ crypto/asn1/tasn_prn.c \
+ crypto/asn1/tasn_typ.c \
+ crypto/asn1/tasn_utl.c \
+ crypto/asn1/x_algor.c \
+ crypto/asn1/x_attrib.c \
+ crypto/asn1/x_bignum.c \
+ crypto/asn1/x_crl.c \
+ crypto/asn1/x_exten.c \
+ crypto/asn1/x_info.c \
+ crypto/asn1/x_long.c \
+ crypto/asn1/x_name.c \
+ crypto/asn1/x_nx509.c \
+ crypto/asn1/x_pkey.c \
+ crypto/asn1/x_pubkey.c \
+ crypto/asn1/x_req.c \
+ crypto/asn1/x_sig.c \
+ crypto/asn1/x_spki.c \
+ crypto/asn1/x_val.c \
+ crypto/asn1/x_x509.c \
+ crypto/asn1/x_x509a.c \
+ crypto/bf/bf_cfb64.c \
+ crypto/bf/bf_ecb.c \
+ crypto/bf/bf_enc.c \
+ crypto/bf/bf_ofb64.c \
+ crypto/bf/bf_skey.c \
+ crypto/bio/b_dump.c \
+ crypto/bio/b_print.c \
+ crypto/bio/b_sock.c \
+ crypto/bio/bf_buff.c \
+ crypto/bio/bf_nbio.c \
+ crypto/bio/bf_null.c \
+ crypto/bio/bio_cb.c \
+ crypto/bio/bio_err.c \
+ crypto/bio/bio_lib.c \
+ crypto/bio/bss_acpt.c \
+ crypto/bio/bss_bio.c \
+ crypto/bio/bss_conn.c \
+ crypto/bio/bss_dgram.c \
+ crypto/bio/bss_fd.c \
+ crypto/bio/bss_file.c \
+ crypto/bio/bss_log.c \
+ crypto/bio/bss_mem.c \
+ crypto/bio/bss_null.c \
+ crypto/bio/bss_sock.c \
+ crypto/bn/bn_add.c \
+ crypto/bn/bn_blind.c \
+ crypto/bn/bn_const.c \
+ crypto/bn/bn_ctx.c \
+ crypto/bn/bn_div.c \
+ crypto/bn/bn_err.c \
+ crypto/bn/bn_exp.c \
+ crypto/bn/bn_exp2.c \
+ crypto/bn/bn_gcd.c \
+ crypto/bn/bn_gf2m.c \
+ crypto/bn/bn_kron.c \
+ crypto/bn/bn_lib.c \
+ crypto/bn/bn_mod.c \
+ crypto/bn/bn_mont.c \
+ crypto/bn/bn_mpi.c \
+ crypto/bn/bn_mul.c \
+ crypto/bn/bn_nist.c \
+ crypto/bn/bn_prime.c \
+ crypto/bn/bn_print.c \
+ crypto/bn/bn_rand.c \
+ crypto/bn/bn_recp.c \
+ crypto/bn/bn_shift.c \
+ crypto/bn/bn_sqr.c \
+ crypto/bn/bn_sqrt.c \
+ crypto/bn/bn_word.c \
+ crypto/buffer/buf_err.c \
+ crypto/buffer/buf_str.c \
+ crypto/buffer/buffer.c \
+ crypto/cmac/cm_ameth.c \
+ crypto/cmac/cm_pmeth.c \
+ crypto/cmac/cmac.c \
+ crypto/comp/c_rle.c \
+ crypto/comp/c_zlib.c \
+ crypto/comp/comp_err.c \
+ crypto/comp/comp_lib.c \
+ crypto/conf/conf_api.c \
+ crypto/conf/conf_def.c \
+ crypto/conf/conf_err.c \
+ crypto/conf/conf_lib.c \
+ crypto/conf/conf_mall.c \
+ crypto/conf/conf_mod.c \
+ crypto/conf/conf_sap.c \
+ crypto/des/cbc_cksm.c \
+ crypto/des/cbc_enc.c \
+ crypto/des/cfb64ede.c \
+ crypto/des/cfb64enc.c \
+ crypto/des/cfb_enc.c \
+ crypto/des/des_enc.c \
+ crypto/des/des_old.c \
+ crypto/des/des_old2.c \
+ crypto/des/ecb3_enc.c \
+ crypto/des/ecb_enc.c \
+ crypto/des/ede_cbcm_enc.c \
+ crypto/des/enc_read.c \
+ crypto/des/enc_writ.c \
+ crypto/des/fcrypt.c \
+ crypto/des/fcrypt_b.c \
+ crypto/des/ofb64ede.c \
+ crypto/des/ofb64enc.c \
+ crypto/des/ofb_enc.c \
+ crypto/des/pcbc_enc.c \
+ crypto/des/qud_cksm.c \
+ crypto/des/rand_key.c \
+ crypto/des/read2pwd.c \
+ crypto/des/rpc_enc.c \
+ crypto/des/set_key.c \
+ crypto/des/str2key.c \
+ crypto/des/xcbc_enc.c \
+ crypto/dh/dh_ameth.c \
+ crypto/dh/dh_asn1.c \
+ crypto/dh/dh_check.c \
+ crypto/dh/dh_depr.c \
+ crypto/dh/dh_err.c \
+ crypto/dh/dh_gen.c \
+ crypto/dh/dh_key.c \
+ crypto/dh/dh_lib.c \
+ crypto/dh/dh_pmeth.c \
+ crypto/dsa/dsa_ameth.c \
+ crypto/dsa/dsa_asn1.c \
+ crypto/dsa/dsa_depr.c \
+ crypto/dsa/dsa_err.c \
+ crypto/dsa/dsa_gen.c \
+ crypto/dsa/dsa_key.c \
+ crypto/dsa/dsa_lib.c \
+ crypto/dsa/dsa_ossl.c \
+ crypto/dsa/dsa_pmeth.c \
+ crypto/dsa/dsa_prn.c \
+ crypto/dsa/dsa_sign.c \
+ crypto/dsa/dsa_vrf.c \
+ crypto/dso/dso_dl.c \
+ crypto/dso/dso_dlfcn.c \
+ crypto/dso/dso_err.c \
+ crypto/dso/dso_lib.c \
+ crypto/dso/dso_null.c \
+ crypto/dso/dso_openssl.c \
+ crypto/ec/ec2_mult.c \
+ crypto/ec/ec2_oct.c \
+ crypto/ec/ec2_smpl.c \
+ crypto/ec/ec_ameth.c \
+ crypto/ec/ec_asn1.c \
+ crypto/ec/ec_check.c \
+ crypto/ec/ec_curve.c \
+ crypto/ec/ec_cvt.c \
+ crypto/ec/ec_err.c \
+ crypto/ec/ec_key.c \
+ crypto/ec/ec_lib.c \
+ crypto/ec/ec_mult.c \
+ crypto/ec/ec_oct.c \
+ crypto/ec/ec_pmeth.c \
+ crypto/ec/ec_print.c \
+ crypto/ec/eck_prn.c \
+ crypto/ec/ecp_mont.c \
+ crypto/ec/ecp_nist.c \
+ crypto/ec/ecp_oct.c \
+ crypto/ec/ecp_smpl.c \
+ crypto/ecdh/ech_err.c \
+ crypto/ecdh/ech_key.c \
+ crypto/ecdh/ech_lib.c \
+ crypto/ecdh/ech_ossl.c \
+ crypto/ecdsa/ecs_asn1.c \
+ crypto/ecdsa/ecs_err.c \
+ crypto/ecdsa/ecs_lib.c \
+ crypto/ecdsa/ecs_ossl.c \
+ crypto/ecdsa/ecs_sign.c \
+ crypto/ecdsa/ecs_vrf.c \
+ crypto/engine/eng_all.c \
+ crypto/engine/eng_cnf.c \
+ crypto/engine/eng_ctrl.c \
+ crypto/engine/eng_dyn.c \
+ crypto/engine/eng_err.c \
+ crypto/engine/eng_fat.c \
+ crypto/engine/eng_init.c \
+ crypto/engine/eng_lib.c \
+ crypto/engine/eng_list.c \
+ crypto/engine/eng_pkey.c \
+ crypto/engine/eng_table.c \
+ crypto/engine/tb_asnmth.c \
+ crypto/engine/tb_cipher.c \
+ crypto/engine/tb_dh.c \
+ crypto/engine/tb_digest.c \
+ crypto/engine/tb_dsa.c \
+ crypto/engine/tb_ecdh.c \
+ crypto/engine/tb_ecdsa.c \
+ crypto/engine/tb_pkmeth.c \
+ crypto/engine/tb_rand.c \
+ crypto/engine/tb_rsa.c \
+ crypto/engine/tb_store.c \
+ crypto/err/err.c \
+ crypto/err/err_all.c \
+ crypto/err/err_prn.c \
+ crypto/evp/bio_b64.c \
+ crypto/evp/bio_enc.c \
+ crypto/evp/bio_md.c \
+ crypto/evp/bio_ok.c \
+ crypto/evp/c_all.c \
+ crypto/evp/c_allc.c \
+ crypto/evp/c_alld.c \
+ crypto/evp/digest.c \
+ crypto/evp/e_aes.c \
+ crypto/evp/e_aes_cbc_hmac_sha1.c \
+ crypto/evp/e_bf.c \
+ crypto/evp/e_des.c \
+ crypto/evp/e_des3.c \
+ crypto/evp/e_null.c \
+ crypto/evp/e_old.c \
+ crypto/evp/e_rc2.c \
+ crypto/evp/e_rc4.c \
+ crypto/evp/e_rc4_hmac_md5.c \
+ crypto/evp/e_rc5.c \
+ crypto/evp/e_xcbc_d.c \
+ crypto/evp/encode.c \
+ crypto/evp/evp_acnf.c \
+ crypto/evp/evp_enc.c \
+ crypto/evp/evp_err.c \
+ crypto/evp/evp_key.c \
+ crypto/evp/evp_lib.c \
+ crypto/evp/evp_pbe.c \
+ crypto/evp/evp_pkey.c \
+ crypto/evp/m_dss.c \
+ crypto/evp/m_dss1.c \
+ crypto/evp/m_ecdsa.c \
+ crypto/evp/m_md4.c \
+ crypto/evp/m_md5.c \
+ crypto/evp/m_mdc2.c \
+ crypto/evp/m_null.c \
+ crypto/evp/m_ripemd.c \
+ crypto/evp/m_sha1.c \
+ crypto/evp/m_sigver.c \
+ crypto/evp/m_wp.c \
+ crypto/evp/names.c \
+ crypto/evp/p5_crpt.c \
+ crypto/evp/p5_crpt2.c \
+ crypto/evp/p_dec.c \
+ crypto/evp/p_enc.c \
+ crypto/evp/p_lib.c \
+ crypto/evp/p_open.c \
+ crypto/evp/p_seal.c \
+ crypto/evp/p_sign.c \
+ crypto/evp/p_verify.c \
+ crypto/evp/pmeth_fn.c \
+ crypto/evp/pmeth_gn.c \
+ crypto/evp/pmeth_lib.c \
+ crypto/hmac/hm_ameth.c \
+ crypto/hmac/hm_pmeth.c \
+ crypto/hmac/hmac.c \
+ crypto/krb5/krb5_asn.c \
+ crypto/lhash/lh_stats.c \
+ crypto/lhash/lhash.c \
+ crypto/md4/md4_dgst.c \
+ crypto/md4/md4_one.c \
+ crypto/md5/md5_dgst.c \
+ crypto/md5/md5_one.c \
+ crypto/modes/cbc128.c \
+ crypto/modes/ccm128.c \
+ crypto/modes/cfb128.c \
+ crypto/modes/ctr128.c \
+ crypto/modes/gcm128.c \
+ crypto/modes/ofb128.c \
+ crypto/modes/xts128.c \
+ crypto/o_init.c \
+ crypto/objects/o_names.c \
+ crypto/objects/obj_dat.c \
+ crypto/objects/obj_err.c \
+ crypto/objects/obj_lib.c \
+ crypto/objects/obj_xref.c \
+ crypto/ocsp/ocsp_asn.c \
+ crypto/ocsp/ocsp_cl.c \
+ crypto/ocsp/ocsp_err.c \
+ crypto/ocsp/ocsp_ext.c \
+ crypto/ocsp/ocsp_ht.c \
+ crypto/ocsp/ocsp_lib.c \
+ crypto/ocsp/ocsp_prn.c \
+ crypto/ocsp/ocsp_srv.c \
+ crypto/ocsp/ocsp_vfy.c \
+ crypto/pem/pem_all.c \
+ crypto/pem/pem_err.c \
+ crypto/pem/pem_info.c \
+ crypto/pem/pem_lib.c \
+ crypto/pem/pem_oth.c \
+ crypto/pem/pem_pk8.c \
+ crypto/pem/pem_pkey.c \
+ crypto/pem/pem_seal.c \
+ crypto/pem/pem_sign.c \
+ crypto/pem/pem_x509.c \
+ crypto/pem/pem_xaux.c \
+ crypto/pem/pvkfmt.c \
+ crypto/pkcs12/p12_add.c \
+ crypto/pkcs12/p12_asn.c \
+ crypto/pkcs12/p12_attr.c \
+ crypto/pkcs12/p12_crpt.c \
+ crypto/pkcs12/p12_crt.c \
+ crypto/pkcs12/p12_decr.c \
+ crypto/pkcs12/p12_init.c \
+ crypto/pkcs12/p12_key.c \
+ crypto/pkcs12/p12_kiss.c \
+ crypto/pkcs12/p12_mutl.c \
+ crypto/pkcs12/p12_npas.c \
+ crypto/pkcs12/p12_p8d.c \
+ crypto/pkcs12/p12_p8e.c \
+ crypto/pkcs12/p12_utl.c \
+ crypto/pkcs12/pk12err.c \
+ crypto/pkcs7/pk7_asn1.c \
+ crypto/pkcs7/pk7_attr.c \
+ crypto/pkcs7/pk7_doit.c \
+ crypto/pkcs7/pk7_lib.c crypto/\
+ crypto/pkcs7/pk7_mime.c \
+ crypto/pkcs7/pk7_smime.c \
+ crypto/pkcs7/pkcs7err.c \
+ crypto/pqueue/pqueue.c \
+ crypto/rand/md_rand.c \
+ crypto/rand/rand_egd.c \
+ crypto/rand/rand_err.c \
+ crypto/rand/rand_lib.c \
+ crypto/rand/rand_unix.c \
+ crypto/rand/rand_win.c \
+ crypto/rand/randfile.c \
+ crypto/rc2/rc2_cbc.c \
+ crypto/rc2/rc2_ecb.c \
+ crypto/rc2/rc2_skey.c \
+ crypto/rc2/rc2cfb64.c \
+ crypto/rc2/rc2ofb64.c \
+ crypto/rc4/rc4_enc.c \
+ crypto/rc4/rc4_skey.c \
+ crypto/rc4/rc4_utl.c \
+ crypto/ripemd/rmd_dgst.c \
+ crypto/ripemd/rmd_one.c \
+ crypto/rsa/rsa_ameth.c \
+ crypto/rsa/rsa_asn1.c \
+ crypto/rsa/rsa_chk.c \
+ crypto/rsa/rsa_crpt.c \
+ crypto/rsa/rsa_eay.c \
+ crypto/rsa/rsa_err.c \
+ crypto/rsa/rsa_gen.c \
+ crypto/rsa/rsa_lib.c \
+ crypto/rsa/rsa_none.c \
+ crypto/rsa/rsa_null.c \
+ crypto/rsa/rsa_oaep.c \
+ crypto/rsa/rsa_pk1.c \
+ crypto/rsa/rsa_pmeth.c \
+ crypto/rsa/rsa_prn.c \
+ crypto/rsa/rsa_pss.c \
+ crypto/rsa/rsa_saos.c \
+ crypto/rsa/rsa_sign.c \
+ crypto/rsa/rsa_ssl.c \
+ crypto/rsa/rsa_x931.c \
+ crypto/sha/sha1_one.c \
+ crypto/sha/sha1dgst.c \
+ crypto/sha/sha256.c \
+ crypto/sha/sha512.c \
+ crypto/sha/sha_dgst.c \
+ crypto/srp/srp_lib.c \
+ crypto/srp/srp_vfy.c \
+ crypto/stack/stack.c \
+ crypto/ts/ts_err.c \
+ crypto/txt_db/txt_db.c \
+ crypto/ui/ui_compat.c \
+ crypto/ui/ui_err.c \
+ crypto/ui/ui_lib.c \
+ crypto/ui/ui_openssl.c \
+ crypto/ui/ui_util.c \
+ crypto/x509/by_dir.c \
+ crypto/x509/by_file.c \
+ crypto/x509/x509_att.c \
+ crypto/x509/x509_cmp.c \
+ crypto/x509/x509_d2.c \
+ crypto/x509/x509_def.c \
+ crypto/x509/x509_err.c \
+ crypto/x509/x509_ext.c \
+ crypto/x509/x509_lu.c \
+ crypto/x509/x509_obj.c \
+ crypto/x509/x509_r2x.c \
+ crypto/x509/x509_req.c \
+ crypto/x509/x509_set.c \
+ crypto/x509/x509_trs.c \
+ crypto/x509/x509_txt.c \
+ crypto/x509/x509_v3.c \
+ crypto/x509/x509_vfy.c \
+ crypto/x509/x509_vpm.c \
+ crypto/x509/x509cset.c \
+ crypto/x509/x509name.c \
+ crypto/x509/x509rset.c \
+ crypto/x509/x509spki.c \
+ crypto/x509/x509type.c \
+ crypto/x509/x_all.c \
+ crypto/x509v3/pcy_cache.c \
+ crypto/x509v3/pcy_data.c \
+ crypto/x509v3/pcy_lib.c \
+ crypto/x509v3/pcy_map.c \
+ crypto/x509v3/pcy_node.c \
+ crypto/x509v3/pcy_tree.c \
+ crypto/x509v3/v3_akey.c \
+ crypto/x509v3/v3_akeya.c \
+ crypto/x509v3/v3_alt.c \
+ crypto/x509v3/v3_bcons.c \
+ crypto/x509v3/v3_bitst.c \
+ crypto/x509v3/v3_conf.c \
+ crypto/x509v3/v3_cpols.c \
+ crypto/x509v3/v3_crld.c \
+ crypto/x509v3/v3_enum.c \
+ crypto/x509v3/v3_extku.c \
+ crypto/x509v3/v3_genn.c \
+ crypto/x509v3/v3_ia5.c \
+ crypto/x509v3/v3_info.c \
+ crypto/x509v3/v3_int.c \
+ crypto/x509v3/v3_lib.c \
+ crypto/x509v3/v3_ncons.c \
+ crypto/x509v3/v3_ocsp.c \
+ crypto/x509v3/v3_pci.c \
+ crypto/x509v3/v3_pcia.c \
+ crypto/x509v3/v3_pcons.c \
+ crypto/x509v3/v3_pku.c \
+ crypto/x509v3/v3_pmaps.c \
+ crypto/x509v3/v3_prn.c \
+ crypto/x509v3/v3_purp.c \
+ crypto/x509v3/v3_skey.c \
+ crypto/x509v3/v3_sxnet.c \
+ crypto/x509v3/v3_utl.c \
+ crypto/x509v3/v3err.c
 
 local_c_includes := \
-	external/openssl \
-	external/openssl/crypto \
-	external/openssl/crypto/asn1 \
-	external/openssl/crypto/evp \
-	external/openssl/crypto/modes \
-	external/openssl/include \
-	external/openssl/include/openssl \
-	external/zlib
+ external/openssl \
+ external/openssl/crypto \
+ external/openssl/crypto/asn1 \
+ external/openssl/crypto/evp \
+ external/openssl/crypto/modes \
+ external/openssl/include \
+ external/openssl/include/openssl \
+ external/zlib
 
 local_c_flags := -DNO_WINDOWS_BRAINDEATH
 
@@ -557,19 +581,21 @@
 LOCAL_ASFLAGS += $(local_as_flags)
 LOCAL_C_INCLUDES += $(local_c_includes)
 ifeq ($(TARGET_ARCH),arm)
-	LOCAL_SRC_FILES += $(arm_src_files)
-	LOCAL_CFLAGS += $(arm_cflags)
+ LOCAL_SRC_FILES += $(arm_src_files)
+ LOCAL_CFLAGS += $(arm_cflags)
 endif
 ifeq ($(TARGET_ARCH),mips)
-    ifneq (($TARGET_HAS_BIGENDIAN),true)
-      LOCAL_SRC_FILES += $(mips_src_files)
-      LOCAL_CFLAGS += $(mips_cflags)
-    else
-      LOCAL_SRC_FILES += $(other_arch_src_files)
-    endif
+  ifneq (($TARGET_HAS_BIGENDIAN),true)
+    LOCAL_SRC_FILES += $(mips_src_files)
+    LOCAL_CFLAGS += $(mips_cflags)
+  else
+    LOCAL_SRC_FILES += $(other_arch_src_files)
+  endif
 endif
 ifeq ($(TARGET_ARCH),x86)
-	LOCAL_SRC_FILES += $(other_arch_src_files)
+  LOCAL_SRC_FILES += $(x86_src_files)
+  LOCAL_SRC_FILES := $(filter-out $(x86_exclude_files),$(LOCAL_SRC_FILES))
+  LOCAL_CFLAGS += $(x86_cflags)
 endif
 LOCAL_MODULE_TAGS := optional
 LOCAL_MODULE:= libcrypto_static
@@ -597,19 +623,21 @@
 LOCAL_ASFLAGS += $(local_as_flags)
 LOCAL_C_INCLUDES += $(local_c_includes)
 ifeq ($(TARGET_ARCH),arm)
-	LOCAL_SRC_FILES += $(arm_src_files)
-	LOCAL_CFLAGS += $(arm_cflags)
+  LOCAL_SRC_FILES += $(arm_src_files)
+  LOCAL_CFLAGS += $(arm_cflags)
 endif
 ifeq ($(TARGET_ARCH),mips)
-    ifneq (($TARGET_HAS_BIGENDIAN),true)
-      LOCAL_SRC_FILES += $(mips_src_files)
-      LOCAL_CFLAGS += $(mips_cflags)
-    else
-      LOCAL_SRC_FILES += $(other_arch_src_files)
-    endif
+  ifneq (($TARGET_HAS_BIGENDIAN),true)
+    LOCAL_SRC_FILES += $(mips_src_files)
+    LOCAL_CFLAGS += $(mips_cflags)
+  else
+    LOCAL_SRC_FILES += $(other_arch_src_files)
+  endif
 endif
 ifeq ($(TARGET_ARCH),x86)
-	LOCAL_SRC_FILES += $(other_arch_src_files)
+  LOCAL_SRC_FILES += $(x86_src_files)
+  LOCAL_SRC_FILES := $(filter-out $(x86_exclude_files),$(LOCAL_SRC_FILES))
+  LOCAL_CFLAGS += $(x86_cflags)
 endif
 LOCAL_MODULE_TAGS := optional
 LOCAL_MODULE:= libcrypto
@@ -622,10 +650,16 @@
 include $(LOCAL_PATH)/android-config.mk
 LOCAL_SHARED_LIBRARIES := $(log_shared_libraries)
 LOCAL_SRC_FILES += $(local_src_files)
+ifeq ($(HOST_ARCH),x86)
+  LOCAL_SRC_FILES += $(x86_src_files)
+  LOCAL_SRC_FILES := $(filter-out $(x86_exclude_files),$(LOCAL_SRC_FILES))
+  LOCAL_CFLAGS += $(x86_cflags)
+else
+  LOCAL_SRC_FILES += $(other_arch_src_files)
+endif
 LOCAL_CFLAGS += $(local_c_flags) -DPURIFY
 LOCAL_ASFLAGS += $(local_as_flags)
 LOCAL_C_INCLUDES += $(local_c_includes)
-LOCAL_SRC_FILES += $(other_arch_src_files)
 LOCAL_LDLIBS += -ldl
 LOCAL_MODULE_TAGS := optional
 LOCAL_MODULE:= libcrypto
@@ -639,10 +673,16 @@
 include $(LOCAL_PATH)/android-config.mk
 LOCAL_SHARED_LIBRARIES := $(log_shared_libraries)
 LOCAL_SRC_FILES += $(local_src_files)
+ifeq ($(HOST_ARCH),x86)
+  LOCAL_SRC_FILES += $(x86_src_files)
+  LOCAL_SRC_FILES := $(filter-out $(x86_exclude_files),$(LOCAL_SRC_FILES))
+  LOCAL_CFLAGS += $(x86_cflags)
+else
+  LOCAL_SRC_FILES += $(other_arch_src_files)
+endif
 LOCAL_CFLAGS += $(local_c_flags) -DPURIFY
 LOCAL_ASFLAGS += $(local_as_flags)
 LOCAL_C_INCLUDES += $(local_c_includes)
-LOCAL_SRC_FILES += $(other_arch_src_files)
 LOCAL_LDLIBS += -ldl
 LOCAL_MODULE_TAGS := optional
 LOCAL_MODULE:= libcrypto_static
diff --git a/crypto/aes/asm/aes-586.s b/crypto/aes/asm/aes-586.s
new file mode 100644
index 0000000..f69b7d5
--- /dev/null
+++ b/crypto/aes/asm/aes-586.s
@@ -0,0 +1,3236 @@
+.file	"aes-586.s"
+.text
+.type	_x86_AES_encrypt_compact,@function
+.align	16
+_x86_AES_encrypt_compact:
+	movl	%edi,20(%esp)
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	movl	240(%edi),%esi
+	leal	-2(%esi,%esi,1),%esi
+	leal	(%edi,%esi,8),%esi
+	movl	%esi,24(%esp)
+	movl	-128(%ebp),%edi
+	movl	-96(%ebp),%esi
+	movl	-64(%ebp),%edi
+	movl	-32(%ebp),%esi
+	movl	(%ebp),%edi
+	movl	32(%ebp),%esi
+	movl	64(%ebp),%edi
+	movl	96(%ebp),%esi
+.align	16
+.L000loop:
+	movl	%eax,%esi
+	andl	$255,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%bh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,4(%esp)
+
+	movl	%ebx,%esi
+	andl	$255,%esi
+	shrl	$16,%ebx
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%ch,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,8(%esp)
+
+	movl	%ecx,%esi
+	andl	$255,%esi
+	shrl	$24,%ecx
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%dh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$16,%edi
+	andl	$255,%edx
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movzbl	%bh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+
+	andl	$255,%edx
+	movzbl	-128(%ebp,%edx,1),%edx
+	movzbl	%ah,%eax
+	movzbl	-128(%ebp,%eax,1),%eax
+	shll	$8,%eax
+	xorl	%eax,%edx
+	movl	4(%esp),%eax
+	andl	$255,%ebx
+	movzbl	-128(%ebp,%ebx,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%edx
+	movl	8(%esp),%ebx
+	movzbl	-128(%ebp,%ecx,1),%ecx
+	shll	$24,%ecx
+	xorl	%ecx,%edx
+	movl	%esi,%ecx
+
+	movl	%ecx,%esi
+	andl	$2155905152,%esi
+	movl	%esi,%ebp
+	shrl	$7,%ebp
+	leal	(%ecx,%ecx,1),%edi
+	subl	%ebp,%esi
+	andl	$4278124286,%edi
+	andl	$454761243,%esi
+	movl	%ecx,%ebp
+	xorl	%edi,%esi
+	xorl	%esi,%ecx
+	roll	$24,%ecx
+	xorl	%esi,%ecx
+	rorl	$16,%ebp
+	xorl	%ebp,%ecx
+	rorl	$8,%ebp
+	xorl	%ebp,%ecx
+	movl	%edx,%esi
+	andl	$2155905152,%esi
+	movl	%esi,%ebp
+	shrl	$7,%ebp
+	leal	(%edx,%edx,1),%edi
+	subl	%ebp,%esi
+	andl	$4278124286,%edi
+	andl	$454761243,%esi
+	movl	%edx,%ebp
+	xorl	%edi,%esi
+	xorl	%esi,%edx
+	roll	$24,%edx
+	xorl	%esi,%edx
+	rorl	$16,%ebp
+	xorl	%ebp,%edx
+	rorl	$8,%ebp
+	xorl	%ebp,%edx
+	movl	%eax,%esi
+	andl	$2155905152,%esi
+	movl	%esi,%ebp
+	shrl	$7,%ebp
+	leal	(%eax,%eax,1),%edi
+	subl	%ebp,%esi
+	andl	$4278124286,%edi
+	andl	$454761243,%esi
+	movl	%eax,%ebp
+	xorl	%edi,%esi
+	xorl	%esi,%eax
+	roll	$24,%eax
+	xorl	%esi,%eax
+	rorl	$16,%ebp
+	xorl	%ebp,%eax
+	rorl	$8,%ebp
+	xorl	%ebp,%eax
+	movl	%ebx,%esi
+	andl	$2155905152,%esi
+	movl	%esi,%ebp
+	shrl	$7,%ebp
+	leal	(%ebx,%ebx,1),%edi
+	subl	%ebp,%esi
+	andl	$4278124286,%edi
+	andl	$454761243,%esi
+	movl	%ebx,%ebp
+	xorl	%edi,%esi
+	xorl	%esi,%ebx
+	roll	$24,%ebx
+	xorl	%esi,%ebx
+	rorl	$16,%ebp
+	xorl	%ebp,%ebx
+	rorl	$8,%ebp
+	xorl	%ebp,%ebx
+	movl	20(%esp),%edi
+	movl	28(%esp),%ebp
+	addl	$16,%edi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	cmpl	24(%esp),%edi
+	movl	%edi,20(%esp)
+	jb	.L000loop
+	movl	%eax,%esi
+	andl	$255,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%bh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,4(%esp)
+
+	movl	%ebx,%esi
+	andl	$255,%esi
+	shrl	$16,%ebx
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%ch,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,8(%esp)
+
+	movl	%ecx,%esi
+	andl	$255,%esi
+	shrl	$24,%ecx
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%dh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$16,%edi
+	andl	$255,%edx
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movzbl	%bh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+
+	movl	20(%esp),%edi
+	andl	$255,%edx
+	movzbl	-128(%ebp,%edx,1),%edx
+	movzbl	%ah,%eax
+	movzbl	-128(%ebp,%eax,1),%eax
+	shll	$8,%eax
+	xorl	%eax,%edx
+	movl	4(%esp),%eax
+	andl	$255,%ebx
+	movzbl	-128(%ebp,%ebx,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%edx
+	movl	8(%esp),%ebx
+	movzbl	-128(%ebp,%ecx,1),%ecx
+	shll	$24,%ecx
+	xorl	%ecx,%edx
+	movl	%esi,%ecx
+
+	xorl	16(%edi),%eax
+	xorl	20(%edi),%ebx
+	xorl	24(%edi),%ecx
+	xorl	28(%edi),%edx
+	ret
+.size	_x86_AES_encrypt_compact,.-_x86_AES_encrypt_compact
+.type	_sse_AES_encrypt_compact,@function
+.align	16
+_sse_AES_encrypt_compact:
+	pxor	(%edi),%mm0
+	pxor	8(%edi),%mm4
+	movl	240(%edi),%esi
+	leal	-2(%esi,%esi,1),%esi
+	leal	(%edi,%esi,8),%esi
+	movl	%esi,24(%esp)
+	movl	$454761243,%eax
+	movl	%eax,8(%esp)
+	movl	%eax,12(%esp)
+	movl	-128(%ebp),%eax
+	movl	-96(%ebp),%ebx
+	movl	-64(%ebp),%ecx
+	movl	-32(%ebp),%edx
+	movl	(%ebp),%eax
+	movl	32(%ebp),%ebx
+	movl	64(%ebp),%ecx
+	movl	96(%ebp),%edx
+.align	16
+.L001loop:
+	pshufw	$8,%mm0,%mm1
+	pshufw	$13,%mm4,%mm5
+	movd	%mm1,%eax
+	movd	%mm5,%ebx
+	movzbl	%al,%esi
+	movzbl	-128(%ebp,%esi,1),%ecx
+	pshufw	$13,%mm0,%mm2
+	movzbl	%ah,%edx
+	movzbl	-128(%ebp,%edx,1),%edx
+	shll	$8,%edx
+	shrl	$16,%eax
+	movzbl	%bl,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$16,%esi
+	orl	%esi,%ecx
+	pshufw	$8,%mm4,%mm6
+	movzbl	%bh,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$24,%esi
+	orl	%esi,%edx
+	shrl	$16,%ebx
+	movzbl	%ah,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$8,%esi
+	orl	%esi,%ecx
+	movzbl	%bh,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$24,%esi
+	orl	%esi,%ecx
+	movd	%ecx,%mm0
+	movzbl	%al,%esi
+	movzbl	-128(%ebp,%esi,1),%ecx
+	movd	%mm2,%eax
+	movzbl	%bl,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$16,%esi
+	orl	%esi,%ecx
+	movd	%mm6,%ebx
+	movzbl	%ah,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$24,%esi
+	orl	%esi,%ecx
+	movzbl	%bh,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$8,%esi
+	orl	%esi,%ecx
+	movd	%ecx,%mm1
+	movzbl	%bl,%esi
+	movzbl	-128(%ebp,%esi,1),%ecx
+	shrl	$16,%ebx
+	movzbl	%al,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$16,%esi
+	orl	%esi,%ecx
+	shrl	$16,%eax
+	punpckldq	%mm1,%mm0
+	movzbl	%ah,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$24,%esi
+	orl	%esi,%ecx
+	andl	$255,%eax
+	movzbl	-128(%ebp,%eax,1),%eax
+	shll	$16,%eax
+	orl	%eax,%edx
+	movzbl	%bh,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$8,%esi
+	orl	%esi,%ecx
+	movd	%ecx,%mm4
+	andl	$255,%ebx
+	movzbl	-128(%ebp,%ebx,1),%ebx
+	orl	%ebx,%edx
+	movd	%edx,%mm5
+	punpckldq	%mm5,%mm4
+	addl	$16,%edi
+	cmpl	24(%esp),%edi
+	ja	.L002out
+	movq	8(%esp),%mm2
+	pxor	%mm3,%mm3
+	pxor	%mm7,%mm7
+	movq	%mm0,%mm1
+	movq	%mm4,%mm5
+	pcmpgtb	%mm0,%mm3
+	pcmpgtb	%mm4,%mm7
+	pand	%mm2,%mm3
+	pand	%mm2,%mm7
+	pshufw	$177,%mm0,%mm2
+	pshufw	$177,%mm4,%mm6
+	paddb	%mm0,%mm0
+	paddb	%mm4,%mm4
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	pshufw	$177,%mm2,%mm3
+	pshufw	$177,%mm6,%mm7
+	pxor	%mm0,%mm1
+	pxor	%mm4,%mm5
+	pxor	%mm2,%mm0
+	pxor	%mm6,%mm4
+	movq	%mm3,%mm2
+	movq	%mm7,%mm6
+	pslld	$8,%mm3
+	pslld	$8,%mm7
+	psrld	$24,%mm2
+	psrld	$24,%mm6
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	pxor	%mm2,%mm0
+	pxor	%mm6,%mm4
+	movq	%mm1,%mm3
+	movq	%mm5,%mm7
+	movq	(%edi),%mm2
+	movq	8(%edi),%mm6
+	psrld	$8,%mm1
+	psrld	$8,%mm5
+	movl	-128(%ebp),%eax
+	pslld	$24,%mm3
+	pslld	$24,%mm7
+	movl	-64(%ebp),%ebx
+	pxor	%mm1,%mm0
+	pxor	%mm5,%mm4
+	movl	(%ebp),%ecx
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	movl	64(%ebp),%edx
+	pxor	%mm2,%mm0
+	pxor	%mm6,%mm4
+	jmp	.L001loop
+.align	16
+.L002out:
+	pxor	(%edi),%mm0
+	pxor	8(%edi),%mm4
+	ret
+.size	_sse_AES_encrypt_compact,.-_sse_AES_encrypt_compact
+.type	_x86_AES_encrypt,@function
+.align	16
+_x86_AES_encrypt:
+	movl	%edi,20(%esp)
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	movl	240(%edi),%esi
+	leal	-2(%esi,%esi,1),%esi
+	leal	(%edi,%esi,8),%esi
+	movl	%esi,24(%esp)
+.align	16
+.L003loop:
+	movl	%eax,%esi
+	andl	$255,%esi
+	movl	(%ebp,%esi,8),%esi
+	movzbl	%bh,%edi
+	xorl	3(%ebp,%edi,8),%esi
+	movl	%ecx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	xorl	2(%ebp,%edi,8),%esi
+	movl	%edx,%edi
+	shrl	$24,%edi
+	xorl	1(%ebp,%edi,8),%esi
+	movl	%esi,4(%esp)
+
+	movl	%ebx,%esi
+	andl	$255,%esi
+	shrl	$16,%ebx
+	movl	(%ebp,%esi,8),%esi
+	movzbl	%ch,%edi
+	xorl	3(%ebp,%edi,8),%esi
+	movl	%edx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	xorl	2(%ebp,%edi,8),%esi
+	movl	%eax,%edi
+	shrl	$24,%edi
+	xorl	1(%ebp,%edi,8),%esi
+	movl	%esi,8(%esp)
+
+	movl	%ecx,%esi
+	andl	$255,%esi
+	shrl	$24,%ecx
+	movl	(%ebp,%esi,8),%esi
+	movzbl	%dh,%edi
+	xorl	3(%ebp,%edi,8),%esi
+	movl	%eax,%edi
+	shrl	$16,%edi
+	andl	$255,%edx
+	andl	$255,%edi
+	xorl	2(%ebp,%edi,8),%esi
+	movzbl	%bh,%edi
+	xorl	1(%ebp,%edi,8),%esi
+
+	movl	20(%esp),%edi
+	movl	(%ebp,%edx,8),%edx
+	movzbl	%ah,%eax
+	xorl	3(%ebp,%eax,8),%edx
+	movl	4(%esp),%eax
+	andl	$255,%ebx
+	xorl	2(%ebp,%ebx,8),%edx
+	movl	8(%esp),%ebx
+	xorl	1(%ebp,%ecx,8),%edx
+	movl	%esi,%ecx
+
+	addl	$16,%edi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	cmpl	24(%esp),%edi
+	movl	%edi,20(%esp)
+	jb	.L003loop
+	movl	%eax,%esi
+	andl	$255,%esi
+	movl	2(%ebp,%esi,8),%esi
+	andl	$255,%esi
+	movzbl	%bh,%edi
+	movl	(%ebp,%edi,8),%edi
+	andl	$65280,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movl	(%ebp,%edi,8),%edi
+	andl	$16711680,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$24,%edi
+	movl	2(%ebp,%edi,8),%edi
+	andl	$4278190080,%edi
+	xorl	%edi,%esi
+	movl	%esi,4(%esp)
+	movl	%ebx,%esi
+	andl	$255,%esi
+	shrl	$16,%ebx
+	movl	2(%ebp,%esi,8),%esi
+	andl	$255,%esi
+	movzbl	%ch,%edi
+	movl	(%ebp,%edi,8),%edi
+	andl	$65280,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movl	(%ebp,%edi,8),%edi
+	andl	$16711680,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$24,%edi
+	movl	2(%ebp,%edi,8),%edi
+	andl	$4278190080,%edi
+	xorl	%edi,%esi
+	movl	%esi,8(%esp)
+	movl	%ecx,%esi
+	andl	$255,%esi
+	shrl	$24,%ecx
+	movl	2(%ebp,%esi,8),%esi
+	andl	$255,%esi
+	movzbl	%dh,%edi
+	movl	(%ebp,%edi,8),%edi
+	andl	$65280,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$16,%edi
+	andl	$255,%edx
+	andl	$255,%edi
+	movl	(%ebp,%edi,8),%edi
+	andl	$16711680,%edi
+	xorl	%edi,%esi
+	movzbl	%bh,%edi
+	movl	2(%ebp,%edi,8),%edi
+	andl	$4278190080,%edi
+	xorl	%edi,%esi
+	movl	20(%esp),%edi
+	andl	$255,%edx
+	movl	2(%ebp,%edx,8),%edx
+	andl	$255,%edx
+	movzbl	%ah,%eax
+	movl	(%ebp,%eax,8),%eax
+	andl	$65280,%eax
+	xorl	%eax,%edx
+	movl	4(%esp),%eax
+	andl	$255,%ebx
+	movl	(%ebp,%ebx,8),%ebx
+	andl	$16711680,%ebx
+	xorl	%ebx,%edx
+	movl	8(%esp),%ebx
+	movl	2(%ebp,%ecx,8),%ecx
+	andl	$4278190080,%ecx
+	xorl	%ecx,%edx
+	movl	%esi,%ecx
+	addl	$16,%edi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	ret
+.align	64
+.LAES_Te:
+.long	2774754246,2774754246
+.long	2222750968,2222750968
+.long	2574743534,2574743534
+.long	2373680118,2373680118
+.long	234025727,234025727
+.long	3177933782,3177933782
+.long	2976870366,2976870366
+.long	1422247313,1422247313
+.long	1345335392,1345335392
+.long	50397442,50397442
+.long	2842126286,2842126286
+.long	2099981142,2099981142
+.long	436141799,436141799
+.long	1658312629,1658312629
+.long	3870010189,3870010189
+.long	2591454956,2591454956
+.long	1170918031,1170918031
+.long	2642575903,2642575903
+.long	1086966153,1086966153
+.long	2273148410,2273148410
+.long	368769775,368769775
+.long	3948501426,3948501426
+.long	3376891790,3376891790
+.long	200339707,200339707
+.long	3970805057,3970805057
+.long	1742001331,1742001331
+.long	4255294047,4255294047
+.long	3937382213,3937382213
+.long	3214711843,3214711843
+.long	4154762323,4154762323
+.long	2524082916,2524082916
+.long	1539358875,1539358875
+.long	3266819957,3266819957
+.long	486407649,486407649
+.long	2928907069,2928907069
+.long	1780885068,1780885068
+.long	1513502316,1513502316
+.long	1094664062,1094664062
+.long	49805301,49805301
+.long	1338821763,1338821763
+.long	1546925160,1546925160
+.long	4104496465,4104496465
+.long	887481809,887481809
+.long	150073849,150073849
+.long	2473685474,2473685474
+.long	1943591083,1943591083
+.long	1395732834,1395732834
+.long	1058346282,1058346282
+.long	201589768,201589768
+.long	1388824469,1388824469
+.long	1696801606,1696801606
+.long	1589887901,1589887901
+.long	672667696,672667696
+.long	2711000631,2711000631
+.long	251987210,251987210
+.long	3046808111,3046808111
+.long	151455502,151455502
+.long	907153956,907153956
+.long	2608889883,2608889883
+.long	1038279391,1038279391
+.long	652995533,652995533
+.long	1764173646,1764173646
+.long	3451040383,3451040383
+.long	2675275242,2675275242
+.long	453576978,453576978
+.long	2659418909,2659418909
+.long	1949051992,1949051992
+.long	773462580,773462580
+.long	756751158,756751158
+.long	2993581788,2993581788
+.long	3998898868,3998898868
+.long	4221608027,4221608027
+.long	4132590244,4132590244
+.long	1295727478,1295727478
+.long	1641469623,1641469623
+.long	3467883389,3467883389
+.long	2066295122,2066295122
+.long	1055122397,1055122397
+.long	1898917726,1898917726
+.long	2542044179,2542044179
+.long	4115878822,4115878822
+.long	1758581177,1758581177
+.long	0,0
+.long	753790401,753790401
+.long	1612718144,1612718144
+.long	536673507,536673507
+.long	3367088505,3367088505
+.long	3982187446,3982187446
+.long	3194645204,3194645204
+.long	1187761037,1187761037
+.long	3653156455,3653156455
+.long	1262041458,1262041458
+.long	3729410708,3729410708
+.long	3561770136,3561770136
+.long	3898103984,3898103984
+.long	1255133061,1255133061
+.long	1808847035,1808847035
+.long	720367557,720367557
+.long	3853167183,3853167183
+.long	385612781,385612781
+.long	3309519750,3309519750
+.long	3612167578,3612167578
+.long	1429418854,1429418854
+.long	2491778321,2491778321
+.long	3477423498,3477423498
+.long	284817897,284817897
+.long	100794884,100794884
+.long	2172616702,2172616702
+.long	4031795360,4031795360
+.long	1144798328,1144798328
+.long	3131023141,3131023141
+.long	3819481163,3819481163
+.long	4082192802,4082192802
+.long	4272137053,4272137053
+.long	3225436288,3225436288
+.long	2324664069,2324664069
+.long	2912064063,2912064063
+.long	3164445985,3164445985
+.long	1211644016,1211644016
+.long	83228145,83228145
+.long	3753688163,3753688163
+.long	3249976951,3249976951
+.long	1977277103,1977277103
+.long	1663115586,1663115586
+.long	806359072,806359072
+.long	452984805,452984805
+.long	250868733,250868733
+.long	1842533055,1842533055
+.long	1288555905,1288555905
+.long	336333848,336333848
+.long	890442534,890442534
+.long	804056259,804056259
+.long	3781124030,3781124030
+.long	2727843637,2727843637
+.long	3427026056,3427026056
+.long	957814574,957814574
+.long	1472513171,1472513171
+.long	4071073621,4071073621
+.long	2189328124,2189328124
+.long	1195195770,1195195770
+.long	2892260552,2892260552
+.long	3881655738,3881655738
+.long	723065138,723065138
+.long	2507371494,2507371494
+.long	2690670784,2690670784
+.long	2558624025,2558624025
+.long	3511635870,3511635870
+.long	2145180835,2145180835
+.long	1713513028,1713513028
+.long	2116692564,2116692564
+.long	2878378043,2878378043
+.long	2206763019,2206763019
+.long	3393603212,3393603212
+.long	703524551,703524551
+.long	3552098411,3552098411
+.long	1007948840,1007948840
+.long	2044649127,2044649127
+.long	3797835452,3797835452
+.long	487262998,487262998
+.long	1994120109,1994120109
+.long	1004593371,1004593371
+.long	1446130276,1446130276
+.long	1312438900,1312438900
+.long	503974420,503974420
+.long	3679013266,3679013266
+.long	168166924,168166924
+.long	1814307912,1814307912
+.long	3831258296,3831258296
+.long	1573044895,1573044895
+.long	1859376061,1859376061
+.long	4021070915,4021070915
+.long	2791465668,2791465668
+.long	2828112185,2828112185
+.long	2761266481,2761266481
+.long	937747667,937747667
+.long	2339994098,2339994098
+.long	854058965,854058965
+.long	1137232011,1137232011
+.long	1496790894,1496790894
+.long	3077402074,3077402074
+.long	2358086913,2358086913
+.long	1691735473,1691735473
+.long	3528347292,3528347292
+.long	3769215305,3769215305
+.long	3027004632,3027004632
+.long	4199962284,4199962284
+.long	133494003,133494003
+.long	636152527,636152527
+.long	2942657994,2942657994
+.long	2390391540,2390391540
+.long	3920539207,3920539207
+.long	403179536,403179536
+.long	3585784431,3585784431
+.long	2289596656,2289596656
+.long	1864705354,1864705354
+.long	1915629148,1915629148
+.long	605822008,605822008
+.long	4054230615,4054230615
+.long	3350508659,3350508659
+.long	1371981463,1371981463
+.long	602466507,602466507
+.long	2094914977,2094914977
+.long	2624877800,2624877800
+.long	555687742,555687742
+.long	3712699286,3712699286
+.long	3703422305,3703422305
+.long	2257292045,2257292045
+.long	2240449039,2240449039
+.long	2423288032,2423288032
+.long	1111375484,1111375484
+.long	3300242801,3300242801
+.long	2858837708,2858837708
+.long	3628615824,3628615824
+.long	84083462,84083462
+.long	32962295,32962295
+.long	302911004,302911004
+.long	2741068226,2741068226
+.long	1597322602,1597322602
+.long	4183250862,4183250862
+.long	3501832553,3501832553
+.long	2441512471,2441512471
+.long	1489093017,1489093017
+.long	656219450,656219450
+.long	3114180135,3114180135
+.long	954327513,954327513
+.long	335083755,335083755
+.long	3013122091,3013122091
+.long	856756514,856756514
+.long	3144247762,3144247762
+.long	1893325225,1893325225
+.long	2307821063,2307821063
+.long	2811532339,2811532339
+.long	3063651117,3063651117
+.long	572399164,572399164
+.long	2458355477,2458355477
+.long	552200649,552200649
+.long	1238290055,1238290055
+.long	4283782570,4283782570
+.long	2015897680,2015897680
+.long	2061492133,2061492133
+.long	2408352771,2408352771
+.long	4171342169,4171342169
+.long	2156497161,2156497161
+.long	386731290,386731290
+.long	3669999461,3669999461
+.long	837215959,837215959
+.long	3326231172,3326231172
+.long	3093850320,3093850320
+.long	3275833730,3275833730
+.long	2962856233,2962856233
+.long	1999449434,1999449434
+.long	286199582,286199582
+.long	3417354363,3417354363
+.long	4233385128,4233385128
+.long	3602627437,3602627437
+.long	974525996,974525996
+.byte	99,124,119,123,242,107,111,197
+.byte	48,1,103,43,254,215,171,118
+.byte	202,130,201,125,250,89,71,240
+.byte	173,212,162,175,156,164,114,192
+.byte	183,253,147,38,54,63,247,204
+.byte	52,165,229,241,113,216,49,21
+.byte	4,199,35,195,24,150,5,154
+.byte	7,18,128,226,235,39,178,117
+.byte	9,131,44,26,27,110,90,160
+.byte	82,59,214,179,41,227,47,132
+.byte	83,209,0,237,32,252,177,91
+.byte	106,203,190,57,74,76,88,207
+.byte	208,239,170,251,67,77,51,133
+.byte	69,249,2,127,80,60,159,168
+.byte	81,163,64,143,146,157,56,245
+.byte	188,182,218,33,16,255,243,210
+.byte	205,12,19,236,95,151,68,23
+.byte	196,167,126,61,100,93,25,115
+.byte	96,129,79,220,34,42,144,136
+.byte	70,238,184,20,222,94,11,219
+.byte	224,50,58,10,73,6,36,92
+.byte	194,211,172,98,145,149,228,121
+.byte	231,200,55,109,141,213,78,169
+.byte	108,86,244,234,101,122,174,8
+.byte	186,120,37,46,28,166,180,198
+.byte	232,221,116,31,75,189,139,138
+.byte	112,62,181,102,72,3,246,14
+.byte	97,53,87,185,134,193,29,158
+.byte	225,248,152,17,105,217,142,148
+.byte	155,30,135,233,206,85,40,223
+.byte	140,161,137,13,191,230,66,104
+.byte	65,153,45,15,176,84,187,22
+.byte	99,124,119,123,242,107,111,197
+.byte	48,1,103,43,254,215,171,118
+.byte	202,130,201,125,250,89,71,240
+.byte	173,212,162,175,156,164,114,192
+.byte	183,253,147,38,54,63,247,204
+.byte	52,165,229,241,113,216,49,21
+.byte	4,199,35,195,24,150,5,154
+.byte	7,18,128,226,235,39,178,117
+.byte	9,131,44,26,27,110,90,160
+.byte	82,59,214,179,41,227,47,132
+.byte	83,209,0,237,32,252,177,91
+.byte	106,203,190,57,74,76,88,207
+.byte	208,239,170,251,67,77,51,133
+.byte	69,249,2,127,80,60,159,168
+.byte	81,163,64,143,146,157,56,245
+.byte	188,182,218,33,16,255,243,210
+.byte	205,12,19,236,95,151,68,23
+.byte	196,167,126,61,100,93,25,115
+.byte	96,129,79,220,34,42,144,136
+.byte	70,238,184,20,222,94,11,219
+.byte	224,50,58,10,73,6,36,92
+.byte	194,211,172,98,145,149,228,121
+.byte	231,200,55,109,141,213,78,169
+.byte	108,86,244,234,101,122,174,8
+.byte	186,120,37,46,28,166,180,198
+.byte	232,221,116,31,75,189,139,138
+.byte	112,62,181,102,72,3,246,14
+.byte	97,53,87,185,134,193,29,158
+.byte	225,248,152,17,105,217,142,148
+.byte	155,30,135,233,206,85,40,223
+.byte	140,161,137,13,191,230,66,104
+.byte	65,153,45,15,176,84,187,22
+.byte	99,124,119,123,242,107,111,197
+.byte	48,1,103,43,254,215,171,118
+.byte	202,130,201,125,250,89,71,240
+.byte	173,212,162,175,156,164,114,192
+.byte	183,253,147,38,54,63,247,204
+.byte	52,165,229,241,113,216,49,21
+.byte	4,199,35,195,24,150,5,154
+.byte	7,18,128,226,235,39,178,117
+.byte	9,131,44,26,27,110,90,160
+.byte	82,59,214,179,41,227,47,132
+.byte	83,209,0,237,32,252,177,91
+.byte	106,203,190,57,74,76,88,207
+.byte	208,239,170,251,67,77,51,133
+.byte	69,249,2,127,80,60,159,168
+.byte	81,163,64,143,146,157,56,245
+.byte	188,182,218,33,16,255,243,210
+.byte	205,12,19,236,95,151,68,23
+.byte	196,167,126,61,100,93,25,115
+.byte	96,129,79,220,34,42,144,136
+.byte	70,238,184,20,222,94,11,219
+.byte	224,50,58,10,73,6,36,92
+.byte	194,211,172,98,145,149,228,121
+.byte	231,200,55,109,141,213,78,169
+.byte	108,86,244,234,101,122,174,8
+.byte	186,120,37,46,28,166,180,198
+.byte	232,221,116,31,75,189,139,138
+.byte	112,62,181,102,72,3,246,14
+.byte	97,53,87,185,134,193,29,158
+.byte	225,248,152,17,105,217,142,148
+.byte	155,30,135,233,206,85,40,223
+.byte	140,161,137,13,191,230,66,104
+.byte	65,153,45,15,176,84,187,22
+.byte	99,124,119,123,242,107,111,197
+.byte	48,1,103,43,254,215,171,118
+.byte	202,130,201,125,250,89,71,240
+.byte	173,212,162,175,156,164,114,192
+.byte	183,253,147,38,54,63,247,204
+.byte	52,165,229,241,113,216,49,21
+.byte	4,199,35,195,24,150,5,154
+.byte	7,18,128,226,235,39,178,117
+.byte	9,131,44,26,27,110,90,160
+.byte	82,59,214,179,41,227,47,132
+.byte	83,209,0,237,32,252,177,91
+.byte	106,203,190,57,74,76,88,207
+.byte	208,239,170,251,67,77,51,133
+.byte	69,249,2,127,80,60,159,168
+.byte	81,163,64,143,146,157,56,245
+.byte	188,182,218,33,16,255,243,210
+.byte	205,12,19,236,95,151,68,23
+.byte	196,167,126,61,100,93,25,115
+.byte	96,129,79,220,34,42,144,136
+.byte	70,238,184,20,222,94,11,219
+.byte	224,50,58,10,73,6,36,92
+.byte	194,211,172,98,145,149,228,121
+.byte	231,200,55,109,141,213,78,169
+.byte	108,86,244,234,101,122,174,8
+.byte	186,120,37,46,28,166,180,198
+.byte	232,221,116,31,75,189,139,138
+.byte	112,62,181,102,72,3,246,14
+.byte	97,53,87,185,134,193,29,158
+.byte	225,248,152,17,105,217,142,148
+.byte	155,30,135,233,206,85,40,223
+.byte	140,161,137,13,191,230,66,104
+.byte	65,153,45,15,176,84,187,22
+.long	1,2,4,8
+.long	16,32,64,128
+.long	27,54,0,0
+.long	0,0,0,0
+.size	_x86_AES_encrypt,.-_x86_AES_encrypt
+.globl	AES_encrypt
+.type	AES_encrypt,@function
+.align	16
+AES_encrypt:
+.L_AES_encrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	28(%esp),%edi
+	movl	%esp,%eax
+	subl	$36,%esp
+	andl	$-64,%esp
+	leal	-127(%edi),%ebx
+	subl	%esp,%ebx
+	negl	%ebx
+	andl	$960,%ebx
+	subl	%ebx,%esp
+	addl	$4,%esp
+	movl	%eax,28(%esp)
+	call	.L004pic_point
+.L004pic_point:
+	popl	%ebp
+	leal	OPENSSL_ia32cap_P,%eax
+	leal	.LAES_Te-.L004pic_point(%ebp),%ebp
+	leal	764(%esp),%ebx
+	subl	%ebp,%ebx
+	andl	$768,%ebx
+	leal	2176(%ebp,%ebx,1),%ebp
+	btl	$25,(%eax)
+	jnc	.L005x86
+	movq	(%esi),%mm0
+	movq	8(%esi),%mm4
+	call	_sse_AES_encrypt_compact
+	movl	28(%esp),%esp
+	movl	24(%esp),%esi
+	movq	%mm0,(%esi)
+	movq	%mm4,8(%esi)
+	emms
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.align	16
+.L005x86:
+	movl	%ebp,24(%esp)
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	call	_x86_AES_encrypt_compact
+	movl	28(%esp),%esp
+	movl	24(%esp),%esi
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	AES_encrypt,.-.L_AES_encrypt_begin
+.type	_x86_AES_decrypt_compact,@function
+.align	16
+_x86_AES_decrypt_compact:
+	movl	%edi,20(%esp)
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	movl	240(%edi),%esi
+	leal	-2(%esi,%esi,1),%esi
+	leal	(%edi,%esi,8),%esi
+	movl	%esi,24(%esp)
+	movl	-128(%ebp),%edi
+	movl	-96(%ebp),%esi
+	movl	-64(%ebp),%edi
+	movl	-32(%ebp),%esi
+	movl	(%ebp),%edi
+	movl	32(%ebp),%esi
+	movl	64(%ebp),%edi
+	movl	96(%ebp),%esi
+.align	16
+.L006loop:
+	movl	%eax,%esi
+	andl	$255,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%dh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%ebx,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,4(%esp)
+	movl	%ebx,%esi
+	andl	$255,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%ah,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,8(%esp)
+	movl	%ecx,%esi
+	andl	$255,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%bh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	andl	$255,%edx
+	movzbl	-128(%ebp,%edx,1),%edx
+	movzbl	%ch,%ecx
+	movzbl	-128(%ebp,%ecx,1),%ecx
+	shll	$8,%ecx
+	xorl	%ecx,%edx
+	movl	%esi,%ecx
+	shrl	$16,%ebx
+	andl	$255,%ebx
+	movzbl	-128(%ebp,%ebx,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%edx
+	shrl	$24,%eax
+	movzbl	-128(%ebp,%eax,1),%eax
+	shll	$24,%eax
+	xorl	%eax,%edx
+	movl	%ecx,%esi
+	andl	$2155905152,%esi
+	movl	%esi,%edi
+	shrl	$7,%edi
+	leal	(%ecx,%ecx,1),%eax
+	subl	%edi,%esi
+	andl	$4278124286,%eax
+	andl	$454761243,%esi
+	xorl	%eax,%esi
+	movl	%esi,%eax
+	andl	$2155905152,%esi
+	movl	%esi,%edi
+	shrl	$7,%edi
+	leal	(%eax,%eax,1),%ebx
+	subl	%edi,%esi
+	andl	$4278124286,%ebx
+	andl	$454761243,%esi
+	xorl	%ecx,%eax
+	xorl	%ebx,%esi
+	movl	%esi,%ebx
+	andl	$2155905152,%esi
+	movl	%esi,%edi
+	shrl	$7,%edi
+	leal	(%ebx,%ebx,1),%ebp
+	subl	%edi,%esi
+	andl	$4278124286,%ebp
+	andl	$454761243,%esi
+	xorl	%ecx,%ebx
+	roll	$8,%ecx
+	xorl	%esi,%ebp
+	xorl	%eax,%ecx
+	xorl	%ebp,%eax
+	roll	$24,%eax
+	xorl	%ebx,%ecx
+	xorl	%ebp,%ebx
+	roll	$16,%ebx
+	xorl	%ebp,%ecx
+	roll	$8,%ebp
+	xorl	%eax,%ecx
+	xorl	%ebx,%ecx
+	movl	4(%esp),%eax
+	xorl	%ebp,%ecx
+	movl	%ecx,12(%esp)
+	movl	%edx,%esi
+	andl	$2155905152,%esi
+	movl	%esi,%edi
+	shrl	$7,%edi
+	leal	(%edx,%edx,1),%ebx
+	subl	%edi,%esi
+	andl	$4278124286,%ebx
+	andl	$454761243,%esi
+	xorl	%ebx,%esi
+	movl	%esi,%ebx
+	andl	$2155905152,%esi
+	movl	%esi,%edi
+	shrl	$7,%edi
+	leal	(%ebx,%ebx,1),%ecx
+	subl	%edi,%esi
+	andl	$4278124286,%ecx
+	andl	$454761243,%esi
+	xorl	%edx,%ebx
+	xorl	%ecx,%esi
+	movl	%esi,%ecx
+	andl	$2155905152,%esi
+	movl	%esi,%edi
+	shrl	$7,%edi
+	leal	(%ecx,%ecx,1),%ebp
+	subl	%edi,%esi
+	andl	$4278124286,%ebp
+	andl	$454761243,%esi
+	xorl	%edx,%ecx
+	roll	$8,%edx
+	xorl	%esi,%ebp
+	xorl	%ebx,%edx
+	xorl	%ebp,%ebx
+	roll	$24,%ebx
+	xorl	%ecx,%edx
+	xorl	%ebp,%ecx
+	roll	$16,%ecx
+	xorl	%ebp,%edx
+	roll	$8,%ebp
+	xorl	%ebx,%edx
+	xorl	%ecx,%edx
+	movl	8(%esp),%ebx
+	xorl	%ebp,%edx
+	movl	%edx,16(%esp)
+	movl	%eax,%esi
+	andl	$2155905152,%esi
+	movl	%esi,%edi
+	shrl	$7,%edi
+	leal	(%eax,%eax,1),%ecx
+	subl	%edi,%esi
+	andl	$4278124286,%ecx
+	andl	$454761243,%esi
+	xorl	%ecx,%esi
+	movl	%esi,%ecx
+	andl	$2155905152,%esi
+	movl	%esi,%edi
+	shrl	$7,%edi
+	leal	(%ecx,%ecx,1),%edx
+	subl	%edi,%esi
+	andl	$4278124286,%edx
+	andl	$454761243,%esi
+	xorl	%eax,%ecx
+	xorl	%edx,%esi
+	movl	%esi,%edx
+	andl	$2155905152,%esi
+	movl	%esi,%edi
+	shrl	$7,%edi
+	leal	(%edx,%edx,1),%ebp
+	subl	%edi,%esi
+	andl	$4278124286,%ebp
+	andl	$454761243,%esi
+	xorl	%eax,%edx
+	roll	$8,%eax
+	xorl	%esi,%ebp
+	xorl	%ecx,%eax
+	xorl	%ebp,%ecx
+	roll	$24,%ecx
+	xorl	%edx,%eax
+	xorl	%ebp,%edx
+	roll	$16,%edx
+	xorl	%ebp,%eax
+	roll	$8,%ebp
+	xorl	%ecx,%eax
+	xorl	%edx,%eax
+	xorl	%ebp,%eax
+	movl	%ebx,%esi
+	andl	$2155905152,%esi
+	movl	%esi,%edi
+	shrl	$7,%edi
+	leal	(%ebx,%ebx,1),%ecx
+	subl	%edi,%esi
+	andl	$4278124286,%ecx
+	andl	$454761243,%esi
+	xorl	%ecx,%esi
+	movl	%esi,%ecx
+	andl	$2155905152,%esi
+	movl	%esi,%edi
+	shrl	$7,%edi
+	leal	(%ecx,%ecx,1),%edx
+	subl	%edi,%esi
+	andl	$4278124286,%edx
+	andl	$454761243,%esi
+	xorl	%ebx,%ecx
+	xorl	%edx,%esi
+	movl	%esi,%edx
+	andl	$2155905152,%esi
+	movl	%esi,%edi
+	shrl	$7,%edi
+	leal	(%edx,%edx,1),%ebp
+	subl	%edi,%esi
+	andl	$4278124286,%ebp
+	andl	$454761243,%esi
+	xorl	%ebx,%edx
+	roll	$8,%ebx
+	xorl	%esi,%ebp
+	xorl	%ecx,%ebx
+	xorl	%ebp,%ecx
+	roll	$24,%ecx
+	xorl	%edx,%ebx
+	xorl	%ebp,%edx
+	roll	$16,%edx
+	xorl	%ebp,%ebx
+	roll	$8,%ebp
+	xorl	%ecx,%ebx
+	xorl	%edx,%ebx
+	movl	12(%esp),%ecx
+	xorl	%ebp,%ebx
+	movl	16(%esp),%edx
+	movl	20(%esp),%edi
+	movl	28(%esp),%ebp
+	addl	$16,%edi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	cmpl	24(%esp),%edi
+	movl	%edi,20(%esp)
+	jb	.L006loop
+	movl	%eax,%esi
+	andl	$255,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%dh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%ebx,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,4(%esp)
+	movl	%ebx,%esi
+	andl	$255,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%ah,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,8(%esp)
+	movl	%ecx,%esi
+	andl	$255,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	movzbl	%bh,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$24,%edi
+	movzbl	-128(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	20(%esp),%edi
+	andl	$255,%edx
+	movzbl	-128(%ebp,%edx,1),%edx
+	movzbl	%ch,%ecx
+	movzbl	-128(%ebp,%ecx,1),%ecx
+	shll	$8,%ecx
+	xorl	%ecx,%edx
+	movl	%esi,%ecx
+	shrl	$16,%ebx
+	andl	$255,%ebx
+	movzbl	-128(%ebp,%ebx,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%edx
+	movl	8(%esp),%ebx
+	shrl	$24,%eax
+	movzbl	-128(%ebp,%eax,1),%eax
+	shll	$24,%eax
+	xorl	%eax,%edx
+	movl	4(%esp),%eax
+	xorl	16(%edi),%eax
+	xorl	20(%edi),%ebx
+	xorl	24(%edi),%ecx
+	xorl	28(%edi),%edx
+	ret
+.size	_x86_AES_decrypt_compact,.-_x86_AES_decrypt_compact
+.type	_sse_AES_decrypt_compact,@function
+.align	16
+_sse_AES_decrypt_compact:
+	pxor	(%edi),%mm0
+	pxor	8(%edi),%mm4
+	movl	240(%edi),%esi
+	leal	-2(%esi,%esi,1),%esi
+	leal	(%edi,%esi,8),%esi
+	movl	%esi,24(%esp)
+	movl	$454761243,%eax
+	movl	%eax,8(%esp)
+	movl	%eax,12(%esp)
+	movl	-128(%ebp),%eax
+	movl	-96(%ebp),%ebx
+	movl	-64(%ebp),%ecx
+	movl	-32(%ebp),%edx
+	movl	(%ebp),%eax
+	movl	32(%ebp),%ebx
+	movl	64(%ebp),%ecx
+	movl	96(%ebp),%edx
+.align	16
+.L007loop:
+	pshufw	$12,%mm0,%mm1
+	movd	%mm1,%eax
+	pshufw	$9,%mm4,%mm5
+	movzbl	%al,%esi
+	movzbl	-128(%ebp,%esi,1),%ecx
+	movd	%mm5,%ebx
+	movzbl	%ah,%edx
+	movzbl	-128(%ebp,%edx,1),%edx
+	shll	$8,%edx
+	pshufw	$6,%mm0,%mm2
+	movzbl	%bl,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$16,%esi
+	orl	%esi,%ecx
+	shrl	$16,%eax
+	movzbl	%bh,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$24,%esi
+	orl	%esi,%edx
+	shrl	$16,%ebx
+	pshufw	$3,%mm4,%mm6
+	movzbl	%ah,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$24,%esi
+	orl	%esi,%ecx
+	movzbl	%bh,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$8,%esi
+	orl	%esi,%ecx
+	movd	%ecx,%mm0
+	movzbl	%al,%esi
+	movd	%mm2,%eax
+	movzbl	-128(%ebp,%esi,1),%ecx
+	shll	$16,%ecx
+	movzbl	%bl,%esi
+	movd	%mm6,%ebx
+	movzbl	-128(%ebp,%esi,1),%esi
+	orl	%esi,%ecx
+	movzbl	%al,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	orl	%esi,%edx
+	movzbl	%bl,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$16,%esi
+	orl	%esi,%edx
+	movd	%edx,%mm1
+	movzbl	%ah,%esi
+	movzbl	-128(%ebp,%esi,1),%edx
+	shll	$8,%edx
+	movzbl	%bh,%esi
+	shrl	$16,%eax
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$24,%esi
+	orl	%esi,%edx
+	shrl	$16,%ebx
+	punpckldq	%mm1,%mm0
+	movzbl	%bh,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$8,%esi
+	orl	%esi,%ecx
+	andl	$255,%ebx
+	movzbl	-128(%ebp,%ebx,1),%ebx
+	orl	%ebx,%edx
+	movzbl	%al,%esi
+	movzbl	-128(%ebp,%esi,1),%esi
+	shll	$16,%esi
+	orl	%esi,%edx
+	movd	%edx,%mm4
+	movzbl	%ah,%eax
+	movzbl	-128(%ebp,%eax,1),%eax
+	shll	$24,%eax
+	orl	%eax,%ecx
+	movd	%ecx,%mm5
+	punpckldq	%mm5,%mm4
+	addl	$16,%edi
+	cmpl	24(%esp),%edi
+	ja	.L008out
+	movq	%mm0,%mm3
+	movq	%mm4,%mm7
+	pshufw	$228,%mm0,%mm2
+	pshufw	$228,%mm4,%mm6
+	movq	%mm0,%mm1
+	movq	%mm4,%mm5
+	pshufw	$177,%mm0,%mm0
+	pshufw	$177,%mm4,%mm4
+	pslld	$8,%mm2
+	pslld	$8,%mm6
+	psrld	$8,%mm3
+	psrld	$8,%mm7
+	pxor	%mm2,%mm0
+	pxor	%mm6,%mm4
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	pslld	$16,%mm2
+	pslld	$16,%mm6
+	psrld	$16,%mm3
+	psrld	$16,%mm7
+	pxor	%mm2,%mm0
+	pxor	%mm6,%mm4
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	movq	8(%esp),%mm3
+	pxor	%mm2,%mm2
+	pxor	%mm6,%mm6
+	pcmpgtb	%mm1,%mm2
+	pcmpgtb	%mm5,%mm6
+	pand	%mm3,%mm2
+	pand	%mm3,%mm6
+	paddb	%mm1,%mm1
+	paddb	%mm5,%mm5
+	pxor	%mm2,%mm1
+	pxor	%mm6,%mm5
+	movq	%mm1,%mm3
+	movq	%mm5,%mm7
+	movq	%mm1,%mm2
+	movq	%mm5,%mm6
+	pxor	%mm1,%mm0
+	pxor	%mm5,%mm4
+	pslld	$24,%mm3
+	pslld	$24,%mm7
+	psrld	$8,%mm2
+	psrld	$8,%mm6
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	pxor	%mm2,%mm0
+	pxor	%mm6,%mm4
+	movq	8(%esp),%mm2
+	pxor	%mm3,%mm3
+	pxor	%mm7,%mm7
+	pcmpgtb	%mm1,%mm3
+	pcmpgtb	%mm5,%mm7
+	pand	%mm2,%mm3
+	pand	%mm2,%mm7
+	paddb	%mm1,%mm1
+	paddb	%mm5,%mm5
+	pxor	%mm3,%mm1
+	pxor	%mm7,%mm5
+	pshufw	$177,%mm1,%mm3
+	pshufw	$177,%mm5,%mm7
+	pxor	%mm1,%mm0
+	pxor	%mm5,%mm4
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	pxor	%mm3,%mm3
+	pxor	%mm7,%mm7
+	pcmpgtb	%mm1,%mm3
+	pcmpgtb	%mm5,%mm7
+	pand	%mm2,%mm3
+	pand	%mm2,%mm7
+	paddb	%mm1,%mm1
+	paddb	%mm5,%mm5
+	pxor	%mm3,%mm1
+	pxor	%mm7,%mm5
+	pxor	%mm1,%mm0
+	pxor	%mm5,%mm4
+	movq	%mm1,%mm3
+	movq	%mm5,%mm7
+	pshufw	$177,%mm1,%mm2
+	pshufw	$177,%mm5,%mm6
+	pxor	%mm2,%mm0
+	pxor	%mm6,%mm4
+	pslld	$8,%mm1
+	pslld	$8,%mm5
+	psrld	$8,%mm3
+	psrld	$8,%mm7
+	movq	(%edi),%mm2
+	movq	8(%edi),%mm6
+	pxor	%mm1,%mm0
+	pxor	%mm5,%mm4
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	movl	-128(%ebp),%eax
+	pslld	$16,%mm1
+	pslld	$16,%mm5
+	movl	-64(%ebp),%ebx
+	psrld	$16,%mm3
+	psrld	$16,%mm7
+	movl	(%ebp),%ecx
+	pxor	%mm1,%mm0
+	pxor	%mm5,%mm4
+	movl	64(%ebp),%edx
+	pxor	%mm3,%mm0
+	pxor	%mm7,%mm4
+	pxor	%mm2,%mm0
+	pxor	%mm6,%mm4
+	jmp	.L007loop
+.align	16
+.L008out:
+	pxor	(%edi),%mm0
+	pxor	8(%edi),%mm4
+	ret
+.size	_sse_AES_decrypt_compact,.-_sse_AES_decrypt_compact
+.type	_x86_AES_decrypt,@function
+.align	16
+_x86_AES_decrypt:
+	movl	%edi,20(%esp)
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	movl	240(%edi),%esi
+	leal	-2(%esi,%esi,1),%esi
+	leal	(%edi,%esi,8),%esi
+	movl	%esi,24(%esp)
+.align	16
+.L009loop:
+	movl	%eax,%esi
+	andl	$255,%esi
+	movl	(%ebp,%esi,8),%esi
+	movzbl	%dh,%edi
+	xorl	3(%ebp,%edi,8),%esi
+	movl	%ecx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	xorl	2(%ebp,%edi,8),%esi
+	movl	%ebx,%edi
+	shrl	$24,%edi
+	xorl	1(%ebp,%edi,8),%esi
+	movl	%esi,4(%esp)
+
+	movl	%ebx,%esi
+	andl	$255,%esi
+	movl	(%ebp,%esi,8),%esi
+	movzbl	%ah,%edi
+	xorl	3(%ebp,%edi,8),%esi
+	movl	%edx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	xorl	2(%ebp,%edi,8),%esi
+	movl	%ecx,%edi
+	shrl	$24,%edi
+	xorl	1(%ebp,%edi,8),%esi
+	movl	%esi,8(%esp)
+
+	movl	%ecx,%esi
+	andl	$255,%esi
+	movl	(%ebp,%esi,8),%esi
+	movzbl	%bh,%edi
+	xorl	3(%ebp,%edi,8),%esi
+	movl	%eax,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	xorl	2(%ebp,%edi,8),%esi
+	movl	%edx,%edi
+	shrl	$24,%edi
+	xorl	1(%ebp,%edi,8),%esi
+
+	movl	20(%esp),%edi
+	andl	$255,%edx
+	movl	(%ebp,%edx,8),%edx
+	movzbl	%ch,%ecx
+	xorl	3(%ebp,%ecx,8),%edx
+	movl	%esi,%ecx
+	shrl	$16,%ebx
+	andl	$255,%ebx
+	xorl	2(%ebp,%ebx,8),%edx
+	movl	8(%esp),%ebx
+	shrl	$24,%eax
+	xorl	1(%ebp,%eax,8),%edx
+	movl	4(%esp),%eax
+
+	addl	$16,%edi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	cmpl	24(%esp),%edi
+	movl	%edi,20(%esp)
+	jb	.L009loop
+	leal	2176(%ebp),%ebp
+	movl	-128(%ebp),%edi
+	movl	-96(%ebp),%esi
+	movl	-64(%ebp),%edi
+	movl	-32(%ebp),%esi
+	movl	(%ebp),%edi
+	movl	32(%ebp),%esi
+	movl	64(%ebp),%edi
+	movl	96(%ebp),%esi
+	leal	-128(%ebp),%ebp
+	movl	%eax,%esi
+	andl	$255,%esi
+	movzbl	(%ebp,%esi,1),%esi
+	movzbl	%dh,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%ebx,%edi
+	shrl	$24,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,4(%esp)
+	movl	%ebx,%esi
+	andl	$255,%esi
+	movzbl	(%ebp,%esi,1),%esi
+	movzbl	%ah,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%ecx,%edi
+	shrl	$24,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	%esi,8(%esp)
+	movl	%ecx,%esi
+	andl	$255,%esi
+	movzbl	(%ebp,%esi,1),%esi
+	movzbl	%bh,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$8,%edi
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	shrl	$16,%edi
+	andl	$255,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$16,%edi
+	xorl	%edi,%esi
+	movl	%edx,%edi
+	shrl	$24,%edi
+	movzbl	(%ebp,%edi,1),%edi
+	shll	$24,%edi
+	xorl	%edi,%esi
+	movl	20(%esp),%edi
+	andl	$255,%edx
+	movzbl	(%ebp,%edx,1),%edx
+	movzbl	%ch,%ecx
+	movzbl	(%ebp,%ecx,1),%ecx
+	shll	$8,%ecx
+	xorl	%ecx,%edx
+	movl	%esi,%ecx
+	shrl	$16,%ebx
+	andl	$255,%ebx
+	movzbl	(%ebp,%ebx,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%edx
+	movl	8(%esp),%ebx
+	shrl	$24,%eax
+	movzbl	(%ebp,%eax,1),%eax
+	shll	$24,%eax
+	xorl	%eax,%edx
+	movl	4(%esp),%eax
+	leal	-2048(%ebp),%ebp
+	addl	$16,%edi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	ret
+.align	64
+.LAES_Td:
+.long	1353184337,1353184337
+.long	1399144830,1399144830
+.long	3282310938,3282310938
+.long	2522752826,2522752826
+.long	3412831035,3412831035
+.long	4047871263,4047871263
+.long	2874735276,2874735276
+.long	2466505547,2466505547
+.long	1442459680,1442459680
+.long	4134368941,4134368941
+.long	2440481928,2440481928
+.long	625738485,625738485
+.long	4242007375,4242007375
+.long	3620416197,3620416197
+.long	2151953702,2151953702
+.long	2409849525,2409849525
+.long	1230680542,1230680542
+.long	1729870373,1729870373
+.long	2551114309,2551114309
+.long	3787521629,3787521629
+.long	41234371,41234371
+.long	317738113,317738113
+.long	2744600205,2744600205
+.long	3338261355,3338261355
+.long	3881799427,3881799427
+.long	2510066197,2510066197
+.long	3950669247,3950669247
+.long	3663286933,3663286933
+.long	763608788,763608788
+.long	3542185048,3542185048
+.long	694804553,694804553
+.long	1154009486,1154009486
+.long	1787413109,1787413109
+.long	2021232372,2021232372
+.long	1799248025,1799248025
+.long	3715217703,3715217703
+.long	3058688446,3058688446
+.long	397248752,397248752
+.long	1722556617,1722556617
+.long	3023752829,3023752829
+.long	407560035,407560035
+.long	2184256229,2184256229
+.long	1613975959,1613975959
+.long	1165972322,1165972322
+.long	3765920945,3765920945
+.long	2226023355,2226023355
+.long	480281086,480281086
+.long	2485848313,2485848313
+.long	1483229296,1483229296
+.long	436028815,436028815
+.long	2272059028,2272059028
+.long	3086515026,3086515026
+.long	601060267,601060267
+.long	3791801202,3791801202
+.long	1468997603,1468997603
+.long	715871590,715871590
+.long	120122290,120122290
+.long	63092015,63092015
+.long	2591802758,2591802758
+.long	2768779219,2768779219
+.long	4068943920,4068943920
+.long	2997206819,2997206819
+.long	3127509762,3127509762
+.long	1552029421,1552029421
+.long	723308426,723308426
+.long	2461301159,2461301159
+.long	4042393587,4042393587
+.long	2715969870,2715969870
+.long	3455375973,3455375973
+.long	3586000134,3586000134
+.long	526529745,526529745
+.long	2331944644,2331944644
+.long	2639474228,2639474228
+.long	2689987490,2689987490
+.long	853641733,853641733
+.long	1978398372,1978398372
+.long	971801355,971801355
+.long	2867814464,2867814464
+.long	111112542,111112542
+.long	1360031421,1360031421
+.long	4186579262,4186579262
+.long	1023860118,1023860118
+.long	2919579357,2919579357
+.long	1186850381,1186850381
+.long	3045938321,3045938321
+.long	90031217,90031217
+.long	1876166148,1876166148
+.long	4279586912,4279586912
+.long	620468249,620468249
+.long	2548678102,2548678102
+.long	3426959497,3426959497
+.long	2006899047,2006899047
+.long	3175278768,3175278768
+.long	2290845959,2290845959
+.long	945494503,945494503
+.long	3689859193,3689859193
+.long	1191869601,1191869601
+.long	3910091388,3910091388
+.long	3374220536,3374220536
+.long	0,0
+.long	2206629897,2206629897
+.long	1223502642,1223502642
+.long	2893025566,2893025566
+.long	1316117100,1316117100
+.long	4227796733,4227796733
+.long	1446544655,1446544655
+.long	517320253,517320253
+.long	658058550,658058550
+.long	1691946762,1691946762
+.long	564550760,564550760
+.long	3511966619,3511966619
+.long	976107044,976107044
+.long	2976320012,2976320012
+.long	266819475,266819475
+.long	3533106868,3533106868
+.long	2660342555,2660342555
+.long	1338359936,1338359936
+.long	2720062561,2720062561
+.long	1766553434,1766553434
+.long	370807324,370807324
+.long	179999714,179999714
+.long	3844776128,3844776128
+.long	1138762300,1138762300
+.long	488053522,488053522
+.long	185403662,185403662
+.long	2915535858,2915535858
+.long	3114841645,3114841645
+.long	3366526484,3366526484
+.long	2233069911,2233069911
+.long	1275557295,1275557295
+.long	3151862254,3151862254
+.long	4250959779,4250959779
+.long	2670068215,2670068215
+.long	3170202204,3170202204
+.long	3309004356,3309004356
+.long	880737115,880737115
+.long	1982415755,1982415755
+.long	3703972811,3703972811
+.long	1761406390,1761406390
+.long	1676797112,1676797112
+.long	3403428311,3403428311
+.long	277177154,277177154
+.long	1076008723,1076008723
+.long	538035844,538035844
+.long	2099530373,2099530373
+.long	4164795346,4164795346
+.long	288553390,288553390
+.long	1839278535,1839278535
+.long	1261411869,1261411869
+.long	4080055004,4080055004
+.long	3964831245,3964831245
+.long	3504587127,3504587127
+.long	1813426987,1813426987
+.long	2579067049,2579067049
+.long	4199060497,4199060497
+.long	577038663,577038663
+.long	3297574056,3297574056
+.long	440397984,440397984
+.long	3626794326,3626794326
+.long	4019204898,4019204898
+.long	3343796615,3343796615
+.long	3251714265,3251714265
+.long	4272081548,4272081548
+.long	906744984,906744984
+.long	3481400742,3481400742
+.long	685669029,685669029
+.long	646887386,646887386
+.long	2764025151,2764025151
+.long	3835509292,3835509292
+.long	227702864,227702864
+.long	2613862250,2613862250
+.long	1648787028,1648787028
+.long	3256061430,3256061430
+.long	3904428176,3904428176
+.long	1593260334,1593260334
+.long	4121936770,4121936770
+.long	3196083615,3196083615
+.long	2090061929,2090061929
+.long	2838353263,2838353263
+.long	3004310991,3004310991
+.long	999926984,999926984
+.long	2809993232,2809993232
+.long	1852021992,1852021992
+.long	2075868123,2075868123
+.long	158869197,158869197
+.long	4095236462,4095236462
+.long	28809964,28809964
+.long	2828685187,2828685187
+.long	1701746150,1701746150
+.long	2129067946,2129067946
+.long	147831841,147831841
+.long	3873969647,3873969647
+.long	3650873274,3650873274
+.long	3459673930,3459673930
+.long	3557400554,3557400554
+.long	3598495785,3598495785
+.long	2947720241,2947720241
+.long	824393514,824393514
+.long	815048134,815048134
+.long	3227951669,3227951669
+.long	935087732,935087732
+.long	2798289660,2798289660
+.long	2966458592,2966458592
+.long	366520115,366520115
+.long	1251476721,1251476721
+.long	4158319681,4158319681
+.long	240176511,240176511
+.long	804688151,804688151
+.long	2379631990,2379631990
+.long	1303441219,1303441219
+.long	1414376140,1414376140
+.long	3741619940,3741619940
+.long	3820343710,3820343710
+.long	461924940,461924940
+.long	3089050817,3089050817
+.long	2136040774,2136040774
+.long	82468509,82468509
+.long	1563790337,1563790337
+.long	1937016826,1937016826
+.long	776014843,776014843
+.long	1511876531,1511876531
+.long	1389550482,1389550482
+.long	861278441,861278441
+.long	323475053,323475053
+.long	2355222426,2355222426
+.long	2047648055,2047648055
+.long	2383738969,2383738969
+.long	2302415851,2302415851
+.long	3995576782,3995576782
+.long	902390199,902390199
+.long	3991215329,3991215329
+.long	1018251130,1018251130
+.long	1507840668,1507840668
+.long	1064563285,1064563285
+.long	2043548696,2043548696
+.long	3208103795,3208103795
+.long	3939366739,3939366739
+.long	1537932639,1537932639
+.long	342834655,342834655
+.long	2262516856,2262516856
+.long	2180231114,2180231114
+.long	1053059257,1053059257
+.long	741614648,741614648
+.long	1598071746,1598071746
+.long	1925389590,1925389590
+.long	203809468,203809468
+.long	2336832552,2336832552
+.long	1100287487,1100287487
+.long	1895934009,1895934009
+.long	3736275976,3736275976
+.long	2632234200,2632234200
+.long	2428589668,2428589668
+.long	1636092795,1636092795
+.long	1890988757,1890988757
+.long	1952214088,1952214088
+.long	1113045200,1113045200
+.byte	82,9,106,213,48,54,165,56
+.byte	191,64,163,158,129,243,215,251
+.byte	124,227,57,130,155,47,255,135
+.byte	52,142,67,68,196,222,233,203
+.byte	84,123,148,50,166,194,35,61
+.byte	238,76,149,11,66,250,195,78
+.byte	8,46,161,102,40,217,36,178
+.byte	118,91,162,73,109,139,209,37
+.byte	114,248,246,100,134,104,152,22
+.byte	212,164,92,204,93,101,182,146
+.byte	108,112,72,80,253,237,185,218
+.byte	94,21,70,87,167,141,157,132
+.byte	144,216,171,0,140,188,211,10
+.byte	247,228,88,5,184,179,69,6
+.byte	208,44,30,143,202,63,15,2
+.byte	193,175,189,3,1,19,138,107
+.byte	58,145,17,65,79,103,220,234
+.byte	151,242,207,206,240,180,230,115
+.byte	150,172,116,34,231,173,53,133
+.byte	226,249,55,232,28,117,223,110
+.byte	71,241,26,113,29,41,197,137
+.byte	111,183,98,14,170,24,190,27
+.byte	252,86,62,75,198,210,121,32
+.byte	154,219,192,254,120,205,90,244
+.byte	31,221,168,51,136,7,199,49
+.byte	177,18,16,89,39,128,236,95
+.byte	96,81,127,169,25,181,74,13
+.byte	45,229,122,159,147,201,156,239
+.byte	160,224,59,77,174,42,245,176
+.byte	200,235,187,60,131,83,153,97
+.byte	23,43,4,126,186,119,214,38
+.byte	225,105,20,99,85,33,12,125
+.byte	82,9,106,213,48,54,165,56
+.byte	191,64,163,158,129,243,215,251
+.byte	124,227,57,130,155,47,255,135
+.byte	52,142,67,68,196,222,233,203
+.byte	84,123,148,50,166,194,35,61
+.byte	238,76,149,11,66,250,195,78
+.byte	8,46,161,102,40,217,36,178
+.byte	118,91,162,73,109,139,209,37
+.byte	114,248,246,100,134,104,152,22
+.byte	212,164,92,204,93,101,182,146
+.byte	108,112,72,80,253,237,185,218
+.byte	94,21,70,87,167,141,157,132
+.byte	144,216,171,0,140,188,211,10
+.byte	247,228,88,5,184,179,69,6
+.byte	208,44,30,143,202,63,15,2
+.byte	193,175,189,3,1,19,138,107
+.byte	58,145,17,65,79,103,220,234
+.byte	151,242,207,206,240,180,230,115
+.byte	150,172,116,34,231,173,53,133
+.byte	226,249,55,232,28,117,223,110
+.byte	71,241,26,113,29,41,197,137
+.byte	111,183,98,14,170,24,190,27
+.byte	252,86,62,75,198,210,121,32
+.byte	154,219,192,254,120,205,90,244
+.byte	31,221,168,51,136,7,199,49
+.byte	177,18,16,89,39,128,236,95
+.byte	96,81,127,169,25,181,74,13
+.byte	45,229,122,159,147,201,156,239
+.byte	160,224,59,77,174,42,245,176
+.byte	200,235,187,60,131,83,153,97
+.byte	23,43,4,126,186,119,214,38
+.byte	225,105,20,99,85,33,12,125
+.byte	82,9,106,213,48,54,165,56
+.byte	191,64,163,158,129,243,215,251
+.byte	124,227,57,130,155,47,255,135
+.byte	52,142,67,68,196,222,233,203
+.byte	84,123,148,50,166,194,35,61
+.byte	238,76,149,11,66,250,195,78
+.byte	8,46,161,102,40,217,36,178
+.byte	118,91,162,73,109,139,209,37
+.byte	114,248,246,100,134,104,152,22
+.byte	212,164,92,204,93,101,182,146
+.byte	108,112,72,80,253,237,185,218
+.byte	94,21,70,87,167,141,157,132
+.byte	144,216,171,0,140,188,211,10
+.byte	247,228,88,5,184,179,69,6
+.byte	208,44,30,143,202,63,15,2
+.byte	193,175,189,3,1,19,138,107
+.byte	58,145,17,65,79,103,220,234
+.byte	151,242,207,206,240,180,230,115
+.byte	150,172,116,34,231,173,53,133
+.byte	226,249,55,232,28,117,223,110
+.byte	71,241,26,113,29,41,197,137
+.byte	111,183,98,14,170,24,190,27
+.byte	252,86,62,75,198,210,121,32
+.byte	154,219,192,254,120,205,90,244
+.byte	31,221,168,51,136,7,199,49
+.byte	177,18,16,89,39,128,236,95
+.byte	96,81,127,169,25,181,74,13
+.byte	45,229,122,159,147,201,156,239
+.byte	160,224,59,77,174,42,245,176
+.byte	200,235,187,60,131,83,153,97
+.byte	23,43,4,126,186,119,214,38
+.byte	225,105,20,99,85,33,12,125
+.byte	82,9,106,213,48,54,165,56
+.byte	191,64,163,158,129,243,215,251
+.byte	124,227,57,130,155,47,255,135
+.byte	52,142,67,68,196,222,233,203
+.byte	84,123,148,50,166,194,35,61
+.byte	238,76,149,11,66,250,195,78
+.byte	8,46,161,102,40,217,36,178
+.byte	118,91,162,73,109,139,209,37
+.byte	114,248,246,100,134,104,152,22
+.byte	212,164,92,204,93,101,182,146
+.byte	108,112,72,80,253,237,185,218
+.byte	94,21,70,87,167,141,157,132
+.byte	144,216,171,0,140,188,211,10
+.byte	247,228,88,5,184,179,69,6
+.byte	208,44,30,143,202,63,15,2
+.byte	193,175,189,3,1,19,138,107
+.byte	58,145,17,65,79,103,220,234
+.byte	151,242,207,206,240,180,230,115
+.byte	150,172,116,34,231,173,53,133
+.byte	226,249,55,232,28,117,223,110
+.byte	71,241,26,113,29,41,197,137
+.byte	111,183,98,14,170,24,190,27
+.byte	252,86,62,75,198,210,121,32
+.byte	154,219,192,254,120,205,90,244
+.byte	31,221,168,51,136,7,199,49
+.byte	177,18,16,89,39,128,236,95
+.byte	96,81,127,169,25,181,74,13
+.byte	45,229,122,159,147,201,156,239
+.byte	160,224,59,77,174,42,245,176
+.byte	200,235,187,60,131,83,153,97
+.byte	23,43,4,126,186,119,214,38
+.byte	225,105,20,99,85,33,12,125
+.size	_x86_AES_decrypt,.-_x86_AES_decrypt
+.globl	AES_decrypt
+.type	AES_decrypt,@function
+.align	16
+AES_decrypt:
+.L_AES_decrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	28(%esp),%edi
+	movl	%esp,%eax
+	subl	$36,%esp
+	andl	$-64,%esp
+	leal	-127(%edi),%ebx
+	subl	%esp,%ebx
+	negl	%ebx
+	andl	$960,%ebx
+	subl	%ebx,%esp
+	addl	$4,%esp
+	movl	%eax,28(%esp)
+	call	.L010pic_point
+.L010pic_point:
+	popl	%ebp
+	leal	OPENSSL_ia32cap_P,%eax
+	leal	.LAES_Td-.L010pic_point(%ebp),%ebp
+	leal	764(%esp),%ebx
+	subl	%ebp,%ebx
+	andl	$768,%ebx
+	leal	2176(%ebp,%ebx,1),%ebp
+	btl	$25,(%eax)
+	jnc	.L011x86
+	movq	(%esi),%mm0
+	movq	8(%esi),%mm4
+	call	_sse_AES_decrypt_compact
+	movl	28(%esp),%esp
+	movl	24(%esp),%esi
+	movq	%mm0,(%esi)
+	movq	%mm4,8(%esi)
+	emms
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.align	16
+.L011x86:
+	movl	%ebp,24(%esp)
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	call	_x86_AES_decrypt_compact
+	movl	28(%esp),%esp
+	movl	24(%esp),%esi
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	AES_decrypt,.-.L_AES_decrypt_begin
+.globl	AES_cbc_encrypt
+.type	AES_cbc_encrypt,@function
+.align	16
+AES_cbc_encrypt:
+.L_AES_cbc_encrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	28(%esp),%ecx
+	cmpl	$0,%ecx
+	je	.L012drop_out
+	call	.L013pic_point
+.L013pic_point:
+	popl	%ebp
+	leal	OPENSSL_ia32cap_P,%eax
+	cmpl	$0,40(%esp)
+	leal	.LAES_Te-.L013pic_point(%ebp),%ebp
+	jne	.L014picked_te
+	leal	.LAES_Td-.LAES_Te(%ebp),%ebp
+.L014picked_te:
+	pushfl
+	cld
+	cmpl	$512,%ecx
+	jb	.L015slow_way
+	testl	$15,%ecx
+	jnz	.L015slow_way
+	btl	$28,(%eax)
+	jc	.L015slow_way
+	leal	-324(%esp),%esi
+	andl	$-64,%esi
+	movl	%ebp,%eax
+	leal	2304(%ebp),%ebx
+	movl	%esi,%edx
+	andl	$4095,%eax
+	andl	$4095,%ebx
+	andl	$4095,%edx
+	cmpl	%ebx,%edx
+	jb	.L016tbl_break_out
+	subl	%ebx,%edx
+	subl	%edx,%esi
+	jmp	.L017tbl_ok
+.align	4
+.L016tbl_break_out:
+	subl	%eax,%edx
+	andl	$4095,%edx
+	addl	$384,%edx
+	subl	%edx,%esi
+.align	4
+.L017tbl_ok:
+	leal	24(%esp),%edx
+	xchgl	%esi,%esp
+	addl	$4,%esp
+	movl	%ebp,24(%esp)
+	movl	%esi,28(%esp)
+	movl	(%edx),%eax
+	movl	4(%edx),%ebx
+	movl	12(%edx),%edi
+	movl	16(%edx),%esi
+	movl	20(%edx),%edx
+	movl	%eax,32(%esp)
+	movl	%ebx,36(%esp)
+	movl	%ecx,40(%esp)
+	movl	%edi,44(%esp)
+	movl	%esi,48(%esp)
+	movl	$0,316(%esp)
+	movl	%edi,%ebx
+	movl	$61,%ecx
+	subl	%ebp,%ebx
+	movl	%edi,%esi
+	andl	$4095,%ebx
+	leal	76(%esp),%edi
+	cmpl	$2304,%ebx
+	jb	.L018do_copy
+	cmpl	$3852,%ebx
+	jb	.L019skip_copy
+.align	4
+.L018do_copy:
+	movl	%edi,44(%esp)
+.long	2784229001
+.L019skip_copy:
+	movl	$16,%edi
+.align	4
+.L020prefetch_tbl:
+	movl	(%ebp),%eax
+	movl	32(%ebp),%ebx
+	movl	64(%ebp),%ecx
+	movl	96(%ebp),%esi
+	leal	128(%ebp),%ebp
+	subl	$1,%edi
+	jnz	.L020prefetch_tbl
+	subl	$2048,%ebp
+	movl	32(%esp),%esi
+	movl	48(%esp),%edi
+	cmpl	$0,%edx
+	je	.L021fast_decrypt
+	movl	(%edi),%eax
+	movl	4(%edi),%ebx
+.align	16
+.L022fast_enc_loop:
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	xorl	(%esi),%eax
+	xorl	4(%esi),%ebx
+	xorl	8(%esi),%ecx
+	xorl	12(%esi),%edx
+	movl	44(%esp),%edi
+	call	_x86_AES_encrypt
+	movl	32(%esp),%esi
+	movl	36(%esp),%edi
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	leal	16(%esi),%esi
+	movl	40(%esp),%ecx
+	movl	%esi,32(%esp)
+	leal	16(%edi),%edx
+	movl	%edx,36(%esp)
+	subl	$16,%ecx
+	movl	%ecx,40(%esp)
+	jnz	.L022fast_enc_loop
+	movl	48(%esp),%esi
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	cmpl	$0,316(%esp)
+	movl	44(%esp),%edi
+	je	.L023skip_ezero
+	movl	$60,%ecx
+	xorl	%eax,%eax
+.align	4
+.long	2884892297
+.L023skip_ezero:
+	movl	28(%esp),%esp
+	popfl
+.L012drop_out:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+	pushfl
+.align	16
+.L021fast_decrypt:
+	cmpl	36(%esp),%esi
+	je	.L024fast_dec_in_place
+	movl	%edi,52(%esp)
+.align	4
+.align	16
+.L025fast_dec_loop:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	movl	44(%esp),%edi
+	call	_x86_AES_decrypt
+	movl	52(%esp),%edi
+	movl	40(%esp),%esi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	movl	36(%esp),%edi
+	movl	32(%esp),%esi
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	40(%esp),%ecx
+	movl	%esi,52(%esp)
+	leal	16(%esi),%esi
+	movl	%esi,32(%esp)
+	leal	16(%edi),%edi
+	movl	%edi,36(%esp)
+	subl	$16,%ecx
+	movl	%ecx,40(%esp)
+	jnz	.L025fast_dec_loop
+	movl	52(%esp),%edi
+	movl	48(%esp),%esi
+	movl	(%edi),%eax
+	movl	4(%edi),%ebx
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	jmp	.L026fast_dec_out
+.align	16
+.L024fast_dec_in_place:
+.L027fast_dec_in_place_loop:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	leal	60(%esp),%edi
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	44(%esp),%edi
+	call	_x86_AES_decrypt
+	movl	48(%esp),%edi
+	movl	36(%esp),%esi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	leal	16(%esi),%esi
+	movl	%esi,36(%esp)
+	leal	60(%esp),%esi
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	32(%esp),%esi
+	movl	40(%esp),%ecx
+	leal	16(%esi),%esi
+	movl	%esi,32(%esp)
+	subl	$16,%ecx
+	movl	%ecx,40(%esp)
+	jnz	.L027fast_dec_in_place_loop
+.align	4
+.L026fast_dec_out:
+	cmpl	$0,316(%esp)
+	movl	44(%esp),%edi
+	je	.L028skip_dzero
+	movl	$60,%ecx
+	xorl	%eax,%eax
+.align	4
+.long	2884892297
+.L028skip_dzero:
+	movl	28(%esp),%esp
+	popfl
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+	pushfl
+.align	16
+.L015slow_way:
+	movl	(%eax),%eax
+	movl	36(%esp),%edi
+	leal	-80(%esp),%esi
+	andl	$-64,%esi
+	leal	-143(%edi),%ebx
+	subl	%esi,%ebx
+	negl	%ebx
+	andl	$960,%ebx
+	subl	%ebx,%esi
+	leal	768(%esi),%ebx
+	subl	%ebp,%ebx
+	andl	$768,%ebx
+	leal	2176(%ebp,%ebx,1),%ebp
+	leal	24(%esp),%edx
+	xchgl	%esi,%esp
+	addl	$4,%esp
+	movl	%ebp,24(%esp)
+	movl	%esi,28(%esp)
+	movl	%eax,52(%esp)
+	movl	(%edx),%eax
+	movl	4(%edx),%ebx
+	movl	16(%edx),%esi
+	movl	20(%edx),%edx
+	movl	%eax,32(%esp)
+	movl	%ebx,36(%esp)
+	movl	%ecx,40(%esp)
+	movl	%edi,44(%esp)
+	movl	%esi,48(%esp)
+	movl	%esi,%edi
+	movl	%eax,%esi
+	cmpl	$0,%edx
+	je	.L029slow_decrypt
+	cmpl	$16,%ecx
+	movl	%ebx,%edx
+	jb	.L030slow_enc_tail
+	btl	$25,52(%esp)
+	jnc	.L031slow_enc_x86
+	movq	(%edi),%mm0
+	movq	8(%edi),%mm4
+.align	16
+.L032slow_enc_loop_sse:
+	pxor	(%esi),%mm0
+	pxor	8(%esi),%mm4
+	movl	44(%esp),%edi
+	call	_sse_AES_encrypt_compact
+	movl	32(%esp),%esi
+	movl	36(%esp),%edi
+	movl	40(%esp),%ecx
+	movq	%mm0,(%edi)
+	movq	%mm4,8(%edi)
+	leal	16(%esi),%esi
+	movl	%esi,32(%esp)
+	leal	16(%edi),%edx
+	movl	%edx,36(%esp)
+	subl	$16,%ecx
+	cmpl	$16,%ecx
+	movl	%ecx,40(%esp)
+	jae	.L032slow_enc_loop_sse
+	testl	$15,%ecx
+	jnz	.L030slow_enc_tail
+	movl	48(%esp),%esi
+	movq	%mm0,(%esi)
+	movq	%mm4,8(%esi)
+	emms
+	movl	28(%esp),%esp
+	popfl
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+	pushfl
+.align	16
+.L031slow_enc_x86:
+	movl	(%edi),%eax
+	movl	4(%edi),%ebx
+.align	4
+.L033slow_enc_loop_x86:
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	xorl	(%esi),%eax
+	xorl	4(%esi),%ebx
+	xorl	8(%esi),%ecx
+	xorl	12(%esi),%edx
+	movl	44(%esp),%edi
+	call	_x86_AES_encrypt_compact
+	movl	32(%esp),%esi
+	movl	36(%esp),%edi
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	40(%esp),%ecx
+	leal	16(%esi),%esi
+	movl	%esi,32(%esp)
+	leal	16(%edi),%edx
+	movl	%edx,36(%esp)
+	subl	$16,%ecx
+	cmpl	$16,%ecx
+	movl	%ecx,40(%esp)
+	jae	.L033slow_enc_loop_x86
+	testl	$15,%ecx
+	jnz	.L030slow_enc_tail
+	movl	48(%esp),%esi
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	movl	28(%esp),%esp
+	popfl
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+	pushfl
+.align	16
+.L030slow_enc_tail:
+	emms
+	movl	%edx,%edi
+	movl	$16,%ebx
+	subl	%ecx,%ebx
+	cmpl	%esi,%edi
+	je	.L034enc_in_place
+.align	4
+.long	2767451785
+	jmp	.L035enc_skip_in_place
+.L034enc_in_place:
+	leal	(%edi,%ecx,1),%edi
+.L035enc_skip_in_place:
+	movl	%ebx,%ecx
+	xorl	%eax,%eax
+.align	4
+.long	2868115081
+	movl	48(%esp),%edi
+	movl	%edx,%esi
+	movl	(%edi),%eax
+	movl	4(%edi),%ebx
+	movl	$16,40(%esp)
+	jmp	.L033slow_enc_loop_x86
+.align	16
+.L029slow_decrypt:
+	btl	$25,52(%esp)
+	jnc	.L036slow_dec_loop_x86
+.align	4
+.L037slow_dec_loop_sse:
+	movq	(%esi),%mm0
+	movq	8(%esi),%mm4
+	movl	44(%esp),%edi
+	call	_sse_AES_decrypt_compact
+	movl	32(%esp),%esi
+	leal	60(%esp),%eax
+	movl	36(%esp),%ebx
+	movl	40(%esp),%ecx
+	movl	48(%esp),%edi
+	movq	(%esi),%mm1
+	movq	8(%esi),%mm5
+	pxor	(%edi),%mm0
+	pxor	8(%edi),%mm4
+	movq	%mm1,(%edi)
+	movq	%mm5,8(%edi)
+	subl	$16,%ecx
+	jc	.L038slow_dec_partial_sse
+	movq	%mm0,(%ebx)
+	movq	%mm4,8(%ebx)
+	leal	16(%ebx),%ebx
+	movl	%ebx,36(%esp)
+	leal	16(%esi),%esi
+	movl	%esi,32(%esp)
+	movl	%ecx,40(%esp)
+	jnz	.L037slow_dec_loop_sse
+	emms
+	movl	28(%esp),%esp
+	popfl
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+	pushfl
+.align	16
+.L038slow_dec_partial_sse:
+	movq	%mm0,(%eax)
+	movq	%mm4,8(%eax)
+	emms
+	addl	$16,%ecx
+	movl	%ebx,%edi
+	movl	%eax,%esi
+.align	4
+.long	2767451785
+	movl	28(%esp),%esp
+	popfl
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+	pushfl
+.align	16
+.L036slow_dec_loop_x86:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	leal	60(%esp),%edi
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	44(%esp),%edi
+	call	_x86_AES_decrypt_compact
+	movl	48(%esp),%edi
+	movl	40(%esp),%esi
+	xorl	(%edi),%eax
+	xorl	4(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	12(%edi),%edx
+	subl	$16,%esi
+	jc	.L039slow_dec_partial_x86
+	movl	%esi,40(%esp)
+	movl	36(%esp),%esi
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	leal	16(%esi),%esi
+	movl	%esi,36(%esp)
+	leal	60(%esp),%esi
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	32(%esp),%esi
+	leal	16(%esi),%esi
+	movl	%esi,32(%esp)
+	jnz	.L036slow_dec_loop_x86
+	movl	28(%esp),%esp
+	popfl
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+	pushfl
+.align	16
+.L039slow_dec_partial_x86:
+	leal	60(%esp),%esi
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	movl	32(%esp),%esi
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	40(%esp),%ecx
+	movl	36(%esp),%edi
+	leal	60(%esp),%esi
+.align	4
+.long	2767451785
+	movl	28(%esp),%esp
+	popfl
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	AES_cbc_encrypt,.-.L_AES_cbc_encrypt_begin
+.type	_x86_AES_set_encrypt_key,@function
+.align	16
+_x86_AES_set_encrypt_key:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	24(%esp),%esi
+	movl	32(%esp),%edi
+	testl	$-1,%esi
+	jz	.L040badpointer
+	testl	$-1,%edi
+	jz	.L040badpointer
+	call	.L041pic_point
+.L041pic_point:
+	popl	%ebp
+	leal	.LAES_Te-.L041pic_point(%ebp),%ebp
+	leal	2176(%ebp),%ebp
+	movl	-128(%ebp),%eax
+	movl	-96(%ebp),%ebx
+	movl	-64(%ebp),%ecx
+	movl	-32(%ebp),%edx
+	movl	(%ebp),%eax
+	movl	32(%ebp),%ebx
+	movl	64(%ebp),%ecx
+	movl	96(%ebp),%edx
+	movl	28(%esp),%ecx
+	cmpl	$128,%ecx
+	je	.L04210rounds
+	cmpl	$192,%ecx
+	je	.L04312rounds
+	cmpl	$256,%ecx
+	je	.L04414rounds
+	movl	$-2,%eax
+	jmp	.L045exit
+.L04210rounds:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	xorl	%ecx,%ecx
+	jmp	.L04610shortcut
+.align	4
+.L04710loop:
+	movl	(%edi),%eax
+	movl	12(%edi),%edx
+.L04610shortcut:
+	movzbl	%dl,%esi
+	movzbl	-128(%ebp,%esi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$24,%ebx
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	shrl	$16,%edx
+	movzbl	%dl,%esi
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$8,%ebx
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%eax
+	xorl	896(%ebp,%ecx,4),%eax
+	movl	%eax,16(%edi)
+	xorl	4(%edi),%eax
+	movl	%eax,20(%edi)
+	xorl	8(%edi),%eax
+	movl	%eax,24(%edi)
+	xorl	12(%edi),%eax
+	movl	%eax,28(%edi)
+	incl	%ecx
+	addl	$16,%edi
+	cmpl	$10,%ecx
+	jl	.L04710loop
+	movl	$10,80(%edi)
+	xorl	%eax,%eax
+	jmp	.L045exit
+.L04312rounds:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	16(%esi),%ecx
+	movl	20(%esi),%edx
+	movl	%ecx,16(%edi)
+	movl	%edx,20(%edi)
+	xorl	%ecx,%ecx
+	jmp	.L04812shortcut
+.align	4
+.L04912loop:
+	movl	(%edi),%eax
+	movl	20(%edi),%edx
+.L04812shortcut:
+	movzbl	%dl,%esi
+	movzbl	-128(%ebp,%esi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$24,%ebx
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	shrl	$16,%edx
+	movzbl	%dl,%esi
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$8,%ebx
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%eax
+	xorl	896(%ebp,%ecx,4),%eax
+	movl	%eax,24(%edi)
+	xorl	4(%edi),%eax
+	movl	%eax,28(%edi)
+	xorl	8(%edi),%eax
+	movl	%eax,32(%edi)
+	xorl	12(%edi),%eax
+	movl	%eax,36(%edi)
+	cmpl	$7,%ecx
+	je	.L05012break
+	incl	%ecx
+	xorl	16(%edi),%eax
+	movl	%eax,40(%edi)
+	xorl	20(%edi),%eax
+	movl	%eax,44(%edi)
+	addl	$24,%edi
+	jmp	.L04912loop
+.L05012break:
+	movl	$12,72(%edi)
+	xorl	%eax,%eax
+	jmp	.L045exit
+.L04414rounds:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,12(%edi)
+	movl	16(%esi),%eax
+	movl	20(%esi),%ebx
+	movl	24(%esi),%ecx
+	movl	28(%esi),%edx
+	movl	%eax,16(%edi)
+	movl	%ebx,20(%edi)
+	movl	%ecx,24(%edi)
+	movl	%edx,28(%edi)
+	xorl	%ecx,%ecx
+	jmp	.L05114shortcut
+.align	4
+.L05214loop:
+	movl	28(%edi),%edx
+.L05114shortcut:
+	movl	(%edi),%eax
+	movzbl	%dl,%esi
+	movzbl	-128(%ebp,%esi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$24,%ebx
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	shrl	$16,%edx
+	movzbl	%dl,%esi
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$8,%ebx
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	shll	$16,%ebx
+	xorl	%ebx,%eax
+	xorl	896(%ebp,%ecx,4),%eax
+	movl	%eax,32(%edi)
+	xorl	4(%edi),%eax
+	movl	%eax,36(%edi)
+	xorl	8(%edi),%eax
+	movl	%eax,40(%edi)
+	xorl	12(%edi),%eax
+	movl	%eax,44(%edi)
+	cmpl	$6,%ecx
+	je	.L05314break
+	incl	%ecx
+	movl	%eax,%edx
+	movl	16(%edi),%eax
+	movzbl	%dl,%esi
+	movzbl	-128(%ebp,%esi,1),%ebx
+	movzbl	%dh,%esi
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	shrl	$16,%edx
+	shll	$8,%ebx
+	movzbl	%dl,%esi
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	movzbl	%dh,%esi
+	shll	$16,%ebx
+	xorl	%ebx,%eax
+	movzbl	-128(%ebp,%esi,1),%ebx
+	shll	$24,%ebx
+	xorl	%ebx,%eax
+	movl	%eax,48(%edi)
+	xorl	20(%edi),%eax
+	movl	%eax,52(%edi)
+	xorl	24(%edi),%eax
+	movl	%eax,56(%edi)
+	xorl	28(%edi),%eax
+	movl	%eax,60(%edi)
+	addl	$32,%edi
+	jmp	.L05214loop
+.L05314break:
+	movl	$14,48(%edi)
+	xorl	%eax,%eax
+	jmp	.L045exit
+.L040badpointer:
+	movl	$-1,%eax
+.L045exit:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	_x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key
+.globl	private_AES_set_encrypt_key
+.type	private_AES_set_encrypt_key,@function
+.align	16
+private_AES_set_encrypt_key:
+.L_private_AES_set_encrypt_key_begin:
+	call	_x86_AES_set_encrypt_key
+	ret
+.size	private_AES_set_encrypt_key,.-.L_private_AES_set_encrypt_key_begin
+.globl	private_AES_set_decrypt_key
+.type	private_AES_set_decrypt_key,@function
+.align	16
+private_AES_set_decrypt_key:
+.L_private_AES_set_decrypt_key_begin:
+	call	_x86_AES_set_encrypt_key
+	cmpl	$0,%eax
+	je	.L054proceed
+	ret
+.L054proceed:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	28(%esp),%esi
+	movl	240(%esi),%ecx
+	leal	(,%ecx,4),%ecx
+	leal	(%esi,%ecx,4),%edi
+.align	4
+.L055invert:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	(%edi),%ecx
+	movl	4(%edi),%edx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	movl	%ecx,(%esi)
+	movl	%edx,4(%esi)
+	movl	8(%esi),%eax
+	movl	12(%esi),%ebx
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	movl	%eax,8(%edi)
+	movl	%ebx,12(%edi)
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	addl	$16,%esi
+	subl	$16,%edi
+	cmpl	%edi,%esi
+	jne	.L055invert
+	movl	28(%esp),%edi
+	movl	240(%edi),%esi
+	leal	-2(%esi,%esi,1),%esi
+	leal	(%edi,%esi,8),%esi
+	movl	%esi,28(%esp)
+	movl	16(%edi),%eax
+.align	4
+.L056permute:
+	addl	$16,%edi
+	movl	%eax,%esi
+	andl	$2155905152,%esi
+	movl	%esi,%ebp
+	shrl	$7,%ebp
+	leal	(%eax,%eax,1),%ebx
+	subl	%ebp,%esi
+	andl	$4278124286,%ebx
+	andl	$454761243,%esi
+	xorl	%ebx,%esi
+	movl	%esi,%ebx
+	andl	$2155905152,%esi
+	movl	%esi,%ebp
+	shrl	$7,%ebp
+	leal	(%ebx,%ebx,1),%ecx
+	subl	%ebp,%esi
+	andl	$4278124286,%ecx
+	andl	$454761243,%esi
+	xorl	%eax,%ebx
+	xorl	%ecx,%esi
+	movl	%esi,%ecx
+	andl	$2155905152,%esi
+	movl	%esi,%ebp
+	shrl	$7,%ebp
+	leal	(%ecx,%ecx,1),%edx
+	xorl	%eax,%ecx
+	subl	%ebp,%esi
+	andl	$4278124286,%edx
+	andl	$454761243,%esi
+	roll	$8,%eax
+	xorl	%esi,%edx
+	movl	4(%edi),%ebp
+	xorl	%ebx,%eax
+	xorl	%edx,%ebx
+	xorl	%ecx,%eax
+	roll	$24,%ebx
+	xorl	%edx,%ecx
+	xorl	%edx,%eax
+	roll	$16,%ecx
+	xorl	%ebx,%eax
+	roll	$8,%edx
+	xorl	%ecx,%eax
+	movl	%ebp,%ebx
+	xorl	%edx,%eax
+	movl	%eax,(%edi)
+	movl	%ebx,%esi
+	andl	$2155905152,%esi
+	movl	%esi,%ebp
+	shrl	$7,%ebp
+	leal	(%ebx,%ebx,1),%ecx
+	subl	%ebp,%esi
+	andl	$4278124286,%ecx
+	andl	$454761243,%esi
+	xorl	%ecx,%esi
+	movl	%esi,%ecx
+	andl	$2155905152,%esi
+	movl	%esi,%ebp
+	shrl	$7,%ebp
+	leal	(%ecx,%ecx,1),%edx
+	subl	%ebp,%esi
+	andl	$4278124286,%edx
+	andl	$454761243,%esi
+	xorl	%ebx,%ecx
+	xorl	%edx,%esi
+	movl	%esi,%edx
+	andl	$2155905152,%esi
+	movl	%esi,%ebp
+	shrl	$7,%ebp
+	leal	(%edx,%edx,1),%eax
+	xorl	%ebx,%edx
+	subl	%ebp,%esi
+	andl	$4278124286,%eax
+	andl	$454761243,%esi
+	roll	$8,%ebx
+	xorl	%esi,%eax
+	movl	8(%edi),%ebp
+	xorl	%ecx,%ebx
+	xorl	%eax,%ecx
+	xorl	%edx,%ebx
+	roll	$24,%ecx
+	xorl	%eax,%edx
+	xorl	%eax,%ebx
+	roll	$16,%edx
+	xorl	%ecx,%ebx
+	roll	$8,%eax
+	xorl	%edx,%ebx
+	movl	%ebp,%ecx
+	xorl	%eax,%ebx
+	movl	%ebx,4(%edi)
+	movl	%ecx,%esi
+	andl	$2155905152,%esi
+	movl	%esi,%ebp
+	shrl	$7,%ebp
+	leal	(%ecx,%ecx,1),%edx
+	subl	%ebp,%esi
+	andl	$4278124286,%edx
+	andl	$454761243,%esi
+	xorl	%edx,%esi
+	movl	%esi,%edx
+	andl	$2155905152,%esi
+	movl	%esi,%ebp
+	shrl	$7,%ebp
+	leal	(%edx,%edx,1),%eax
+	subl	%ebp,%esi
+	andl	$4278124286,%eax
+	andl	$454761243,%esi
+	xorl	%ecx,%edx
+	xorl	%eax,%esi
+	movl	%esi,%eax
+	andl	$2155905152,%esi
+	movl	%esi,%ebp
+	shrl	$7,%ebp
+	leal	(%eax,%eax,1),%ebx
+	xorl	%ecx,%eax
+	subl	%ebp,%esi
+	andl	$4278124286,%ebx
+	andl	$454761243,%esi
+	roll	$8,%ecx
+	xorl	%esi,%ebx
+	movl	12(%edi),%ebp
+	xorl	%edx,%ecx
+	xorl	%ebx,%edx
+	xorl	%eax,%ecx
+	roll	$24,%edx
+	xorl	%ebx,%eax
+	xorl	%ebx,%ecx
+	roll	$16,%eax
+	xorl	%edx,%ecx
+	roll	$8,%ebx
+	xorl	%eax,%ecx
+	movl	%ebp,%edx
+	xorl	%ebx,%ecx
+	movl	%ecx,8(%edi)
+	movl	%edx,%esi
+	andl	$2155905152,%esi
+	movl	%esi,%ebp
+	shrl	$7,%ebp
+	leal	(%edx,%edx,1),%eax
+	subl	%ebp,%esi
+	andl	$4278124286,%eax
+	andl	$454761243,%esi
+	xorl	%eax,%esi
+	movl	%esi,%eax
+	andl	$2155905152,%esi
+	movl	%esi,%ebp
+	shrl	$7,%ebp
+	leal	(%eax,%eax,1),%ebx
+	subl	%ebp,%esi
+	andl	$4278124286,%ebx
+	andl	$454761243,%esi
+	xorl	%edx,%eax
+	xorl	%ebx,%esi
+	movl	%esi,%ebx
+	andl	$2155905152,%esi
+	movl	%esi,%ebp
+	shrl	$7,%ebp
+	leal	(%ebx,%ebx,1),%ecx
+	xorl	%edx,%ebx
+	subl	%ebp,%esi
+	andl	$4278124286,%ecx
+	andl	$454761243,%esi
+	roll	$8,%edx
+	xorl	%esi,%ecx
+	movl	16(%edi),%ebp
+	xorl	%eax,%edx
+	xorl	%ecx,%eax
+	xorl	%ebx,%edx
+	roll	$24,%eax
+	xorl	%ecx,%ebx
+	xorl	%ecx,%edx
+	roll	$16,%ebx
+	xorl	%eax,%edx
+	roll	$8,%ecx
+	xorl	%ebx,%edx
+	movl	%ebp,%eax
+	xorl	%ecx,%edx
+	movl	%edx,12(%edi)
+	cmpl	28(%esp),%edi
+	jb	.L056permute
+	xorl	%eax,%eax
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	private_AES_set_decrypt_key,.-.L_private_AES_set_decrypt_key_begin
+.byte	65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
+.byte	80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
+.byte	111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.comm	OPENSSL_ia32cap_P,8,4
diff --git a/crypto/aes/asm/aesni-x86.s b/crypto/aes/asm/aesni-x86.s
new file mode 100644
index 0000000..0766bb5
--- /dev/null
+++ b/crypto/aes/asm/aesni-x86.s
@@ -0,0 +1,2143 @@
+.file	"crypto/aes/asm/aesni-x86.s"
+.text
+.globl	aesni_encrypt
+.type	aesni_encrypt,@function
+.align	16
+aesni_encrypt:
+.L_aesni_encrypt_begin:
+	movl	4(%esp),%eax
+	movl	12(%esp),%edx
+	movups	(%eax),%xmm2
+	movl	240(%edx),%ecx
+	movl	8(%esp),%eax
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L000enc1_loop_1:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L000enc1_loop_1
+.byte	102,15,56,221,209
+	movups	%xmm2,(%eax)
+	ret
+.size	aesni_encrypt,.-.L_aesni_encrypt_begin
+.globl	aesni_decrypt
+.type	aesni_decrypt,@function
+.align	16
+aesni_decrypt:
+.L_aesni_decrypt_begin:
+	movl	4(%esp),%eax
+	movl	12(%esp),%edx
+	movups	(%eax),%xmm2
+	movl	240(%edx),%ecx
+	movl	8(%esp),%eax
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L001dec1_loop_2:
+.byte	102,15,56,222,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L001dec1_loop_2
+.byte	102,15,56,223,209
+	movups	%xmm2,(%eax)
+	ret
+.size	aesni_decrypt,.-.L_aesni_decrypt_begin
+.type	_aesni_encrypt3,@function
+.align	16
+_aesni_encrypt3:
+	movups	(%edx),%xmm0
+	shrl	$1,%ecx
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+	movups	(%edx),%xmm0
+.L002enc3_loop:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	decl	%ecx
+.byte	102,15,56,220,225
+	movups	16(%edx),%xmm1
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	leal	32(%edx),%edx
+.byte	102,15,56,220,224
+	movups	(%edx),%xmm0
+	jnz	.L002enc3_loop
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+.byte	102,15,56,221,224
+	ret
+.size	_aesni_encrypt3,.-_aesni_encrypt3
+.type	_aesni_decrypt3,@function
+.align	16
+_aesni_decrypt3:
+	movups	(%edx),%xmm0
+	shrl	$1,%ecx
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+	movups	(%edx),%xmm0
+.L003dec3_loop:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+	decl	%ecx
+.byte	102,15,56,222,225
+	movups	16(%edx),%xmm1
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+	leal	32(%edx),%edx
+.byte	102,15,56,222,224
+	movups	(%edx),%xmm0
+	jnz	.L003dec3_loop
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,223,208
+.byte	102,15,56,223,216
+.byte	102,15,56,223,224
+	ret
+.size	_aesni_decrypt3,.-_aesni_decrypt3
+.type	_aesni_encrypt4,@function
+.align	16
+_aesni_encrypt4:
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	shrl	$1,%ecx
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+	pxor	%xmm0,%xmm5
+	movups	(%edx),%xmm0
+.L004enc4_loop:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	decl	%ecx
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+	movups	16(%edx),%xmm1
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	leal	32(%edx),%edx
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+	movups	(%edx),%xmm0
+	jnz	.L004enc4_loop
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+.byte	102,15,56,221,224
+.byte	102,15,56,221,232
+	ret
+.size	_aesni_encrypt4,.-_aesni_encrypt4
+.type	_aesni_decrypt4,@function
+.align	16
+_aesni_decrypt4:
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	shrl	$1,%ecx
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+	pxor	%xmm0,%xmm4
+	pxor	%xmm0,%xmm5
+	movups	(%edx),%xmm0
+.L005dec4_loop:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+	decl	%ecx
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+	movups	16(%edx),%xmm1
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+	leal	32(%edx),%edx
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+	movups	(%edx),%xmm0
+	jnz	.L005dec4_loop
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,223,208
+.byte	102,15,56,223,216
+.byte	102,15,56,223,224
+.byte	102,15,56,223,232
+	ret
+.size	_aesni_decrypt4,.-_aesni_decrypt4
+.type	_aesni_encrypt6,@function
+.align	16
+_aesni_encrypt6:
+	movups	(%edx),%xmm0
+	shrl	$1,%ecx
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,220,209
+	pxor	%xmm0,%xmm4
+.byte	102,15,56,220,217
+	pxor	%xmm0,%xmm5
+	decl	%ecx
+.byte	102,15,56,220,225
+	pxor	%xmm0,%xmm6
+.byte	102,15,56,220,233
+	pxor	%xmm0,%xmm7
+.byte	102,15,56,220,241
+	movups	(%edx),%xmm0
+.byte	102,15,56,220,249
+	jmp	.L_aesni_encrypt6_enter
+.align	16
+.L006enc6_loop:
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	decl	%ecx
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.align	16
+.L_aesni_encrypt6_enter:
+	movups	16(%edx),%xmm1
+.byte	102,15,56,220,208
+.byte	102,15,56,220,216
+	leal	32(%edx),%edx
+.byte	102,15,56,220,224
+.byte	102,15,56,220,232
+.byte	102,15,56,220,240
+.byte	102,15,56,220,248
+	movups	(%edx),%xmm0
+	jnz	.L006enc6_loop
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+.byte	102,15,56,220,225
+.byte	102,15,56,220,233
+.byte	102,15,56,220,241
+.byte	102,15,56,220,249
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+.byte	102,15,56,221,224
+.byte	102,15,56,221,232
+.byte	102,15,56,221,240
+.byte	102,15,56,221,248
+	ret
+.size	_aesni_encrypt6,.-_aesni_encrypt6
+.type	_aesni_decrypt6,@function
+.align	16
+_aesni_decrypt6:
+	movups	(%edx),%xmm0
+	shrl	$1,%ecx
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,222,209
+	pxor	%xmm0,%xmm4
+.byte	102,15,56,222,217
+	pxor	%xmm0,%xmm5
+	decl	%ecx
+.byte	102,15,56,222,225
+	pxor	%xmm0,%xmm6
+.byte	102,15,56,222,233
+	pxor	%xmm0,%xmm7
+.byte	102,15,56,222,241
+	movups	(%edx),%xmm0
+.byte	102,15,56,222,249
+	jmp	.L_aesni_decrypt6_enter
+.align	16
+.L007dec6_loop:
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+	decl	%ecx
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.align	16
+.L_aesni_decrypt6_enter:
+	movups	16(%edx),%xmm1
+.byte	102,15,56,222,208
+.byte	102,15,56,222,216
+	leal	32(%edx),%edx
+.byte	102,15,56,222,224
+.byte	102,15,56,222,232
+.byte	102,15,56,222,240
+.byte	102,15,56,222,248
+	movups	(%edx),%xmm0
+	jnz	.L007dec6_loop
+.byte	102,15,56,222,209
+.byte	102,15,56,222,217
+.byte	102,15,56,222,225
+.byte	102,15,56,222,233
+.byte	102,15,56,222,241
+.byte	102,15,56,222,249
+.byte	102,15,56,223,208
+.byte	102,15,56,223,216
+.byte	102,15,56,223,224
+.byte	102,15,56,223,232
+.byte	102,15,56,223,240
+.byte	102,15,56,223,248
+	ret
+.size	_aesni_decrypt6,.-_aesni_decrypt6
+.globl	aesni_ecb_encrypt
+.type	aesni_ecb_encrypt,@function
+.align	16
+aesni_ecb_encrypt:
+.L_aesni_ecb_encrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	32(%esp),%edx
+	movl	36(%esp),%ebx
+	andl	$-16,%eax
+	jz	.L008ecb_ret
+	movl	240(%edx),%ecx
+	testl	%ebx,%ebx
+	jz	.L009ecb_decrypt
+	movl	%edx,%ebp
+	movl	%ecx,%ebx
+	cmpl	$96,%eax
+	jb	.L010ecb_enc_tail
+	movdqu	(%esi),%xmm2
+	movdqu	16(%esi),%xmm3
+	movdqu	32(%esi),%xmm4
+	movdqu	48(%esi),%xmm5
+	movdqu	64(%esi),%xmm6
+	movdqu	80(%esi),%xmm7
+	leal	96(%esi),%esi
+	subl	$96,%eax
+	jmp	.L011ecb_enc_loop6_enter
+.align	16
+.L012ecb_enc_loop6:
+	movups	%xmm2,(%edi)
+	movdqu	(%esi),%xmm2
+	movups	%xmm3,16(%edi)
+	movdqu	16(%esi),%xmm3
+	movups	%xmm4,32(%edi)
+	movdqu	32(%esi),%xmm4
+	movups	%xmm5,48(%edi)
+	movdqu	48(%esi),%xmm5
+	movups	%xmm6,64(%edi)
+	movdqu	64(%esi),%xmm6
+	movups	%xmm7,80(%edi)
+	leal	96(%edi),%edi
+	movdqu	80(%esi),%xmm7
+	leal	96(%esi),%esi
+.L011ecb_enc_loop6_enter:
+	call	_aesni_encrypt6
+	movl	%ebp,%edx
+	movl	%ebx,%ecx
+	subl	$96,%eax
+	jnc	.L012ecb_enc_loop6
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	movups	%xmm6,64(%edi)
+	movups	%xmm7,80(%edi)
+	leal	96(%edi),%edi
+	addl	$96,%eax
+	jz	.L008ecb_ret
+.L010ecb_enc_tail:
+	movups	(%esi),%xmm2
+	cmpl	$32,%eax
+	jb	.L013ecb_enc_one
+	movups	16(%esi),%xmm3
+	je	.L014ecb_enc_two
+	movups	32(%esi),%xmm4
+	cmpl	$64,%eax
+	jb	.L015ecb_enc_three
+	movups	48(%esi),%xmm5
+	je	.L016ecb_enc_four
+	movups	64(%esi),%xmm6
+	xorps	%xmm7,%xmm7
+	call	_aesni_encrypt6
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	movups	%xmm6,64(%edi)
+	jmp	.L008ecb_ret
+.align	16
+.L013ecb_enc_one:
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L017enc1_loop_3:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L017enc1_loop_3
+.byte	102,15,56,221,209
+	movups	%xmm2,(%edi)
+	jmp	.L008ecb_ret
+.align	16
+.L014ecb_enc_two:
+	xorps	%xmm4,%xmm4
+	call	_aesni_encrypt3
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	jmp	.L008ecb_ret
+.align	16
+.L015ecb_enc_three:
+	call	_aesni_encrypt3
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	jmp	.L008ecb_ret
+.align	16
+.L016ecb_enc_four:
+	call	_aesni_encrypt4
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	jmp	.L008ecb_ret
+.align	16
+.L009ecb_decrypt:
+	movl	%edx,%ebp
+	movl	%ecx,%ebx
+	cmpl	$96,%eax
+	jb	.L018ecb_dec_tail
+	movdqu	(%esi),%xmm2
+	movdqu	16(%esi),%xmm3
+	movdqu	32(%esi),%xmm4
+	movdqu	48(%esi),%xmm5
+	movdqu	64(%esi),%xmm6
+	movdqu	80(%esi),%xmm7
+	leal	96(%esi),%esi
+	subl	$96,%eax
+	jmp	.L019ecb_dec_loop6_enter
+.align	16
+.L020ecb_dec_loop6:
+	movups	%xmm2,(%edi)
+	movdqu	(%esi),%xmm2
+	movups	%xmm3,16(%edi)
+	movdqu	16(%esi),%xmm3
+	movups	%xmm4,32(%edi)
+	movdqu	32(%esi),%xmm4
+	movups	%xmm5,48(%edi)
+	movdqu	48(%esi),%xmm5
+	movups	%xmm6,64(%edi)
+	movdqu	64(%esi),%xmm6
+	movups	%xmm7,80(%edi)
+	leal	96(%edi),%edi
+	movdqu	80(%esi),%xmm7
+	leal	96(%esi),%esi
+.L019ecb_dec_loop6_enter:
+	call	_aesni_decrypt6
+	movl	%ebp,%edx
+	movl	%ebx,%ecx
+	subl	$96,%eax
+	jnc	.L020ecb_dec_loop6
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	movups	%xmm6,64(%edi)
+	movups	%xmm7,80(%edi)
+	leal	96(%edi),%edi
+	addl	$96,%eax
+	jz	.L008ecb_ret
+.L018ecb_dec_tail:
+	movups	(%esi),%xmm2
+	cmpl	$32,%eax
+	jb	.L021ecb_dec_one
+	movups	16(%esi),%xmm3
+	je	.L022ecb_dec_two
+	movups	32(%esi),%xmm4
+	cmpl	$64,%eax
+	jb	.L023ecb_dec_three
+	movups	48(%esi),%xmm5
+	je	.L024ecb_dec_four
+	movups	64(%esi),%xmm6
+	xorps	%xmm7,%xmm7
+	call	_aesni_decrypt6
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	movups	%xmm6,64(%edi)
+	jmp	.L008ecb_ret
+.align	16
+.L021ecb_dec_one:
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L025dec1_loop_4:
+.byte	102,15,56,222,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L025dec1_loop_4
+.byte	102,15,56,223,209
+	movups	%xmm2,(%edi)
+	jmp	.L008ecb_ret
+.align	16
+.L022ecb_dec_two:
+	xorps	%xmm4,%xmm4
+	call	_aesni_decrypt3
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	jmp	.L008ecb_ret
+.align	16
+.L023ecb_dec_three:
+	call	_aesni_decrypt3
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	jmp	.L008ecb_ret
+.align	16
+.L024ecb_dec_four:
+	call	_aesni_decrypt4
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+.L008ecb_ret:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin
+.globl	aesni_ccm64_encrypt_blocks
+.type	aesni_ccm64_encrypt_blocks,@function
+.align	16
+aesni_ccm64_encrypt_blocks:
+.L_aesni_ccm64_encrypt_blocks_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	32(%esp),%edx
+	movl	36(%esp),%ebx
+	movl	40(%esp),%ecx
+	movl	%esp,%ebp
+	subl	$60,%esp
+	andl	$-16,%esp
+	movl	%ebp,48(%esp)
+	movdqu	(%ebx),%xmm7
+	movdqu	(%ecx),%xmm3
+	movl	240(%edx),%ecx
+	movl	$202182159,(%esp)
+	movl	$134810123,4(%esp)
+	movl	$67438087,8(%esp)
+	movl	$66051,12(%esp)
+	movl	$1,%ebx
+	xorl	%ebp,%ebp
+	movl	%ebx,16(%esp)
+	movl	%ebp,20(%esp)
+	movl	%ebp,24(%esp)
+	movl	%ebp,28(%esp)
+	shrl	$1,%ecx
+	leal	(%edx),%ebp
+	movdqa	(%esp),%xmm5
+	movdqa	%xmm7,%xmm2
+	movl	%ecx,%ebx
+.byte	102,15,56,0,253
+.L026ccm64_enc_outer:
+	movups	(%ebp),%xmm0
+	movl	%ebx,%ecx
+	movups	(%esi),%xmm6
+	xorps	%xmm0,%xmm2
+	movups	16(%ebp),%xmm1
+	xorps	%xmm6,%xmm0
+	leal	32(%ebp),%edx
+	xorps	%xmm0,%xmm3
+	movups	(%edx),%xmm0
+.L027ccm64_enc2_loop:
+.byte	102,15,56,220,209
+	decl	%ecx
+.byte	102,15,56,220,217
+	movups	16(%edx),%xmm1
+.byte	102,15,56,220,208
+	leal	32(%edx),%edx
+.byte	102,15,56,220,216
+	movups	(%edx),%xmm0
+	jnz	.L027ccm64_enc2_loop
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	paddq	16(%esp),%xmm7
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+	decl	%eax
+	leal	16(%esi),%esi
+	xorps	%xmm2,%xmm6
+	movdqa	%xmm7,%xmm2
+	movups	%xmm6,(%edi)
+	leal	16(%edi),%edi
+.byte	102,15,56,0,213
+	jnz	.L026ccm64_enc_outer
+	movl	48(%esp),%esp
+	movl	40(%esp),%edi
+	movups	%xmm3,(%edi)
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin
+.globl	aesni_ccm64_decrypt_blocks
+.type	aesni_ccm64_decrypt_blocks,@function
+.align	16
+aesni_ccm64_decrypt_blocks:
+.L_aesni_ccm64_decrypt_blocks_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	32(%esp),%edx
+	movl	36(%esp),%ebx
+	movl	40(%esp),%ecx
+	movl	%esp,%ebp
+	subl	$60,%esp
+	andl	$-16,%esp
+	movl	%ebp,48(%esp)
+	movdqu	(%ebx),%xmm7
+	movdqu	(%ecx),%xmm3
+	movl	240(%edx),%ecx
+	movl	$202182159,(%esp)
+	movl	$134810123,4(%esp)
+	movl	$67438087,8(%esp)
+	movl	$66051,12(%esp)
+	movl	$1,%ebx
+	xorl	%ebp,%ebp
+	movl	%ebx,16(%esp)
+	movl	%ebp,20(%esp)
+	movl	%ebp,24(%esp)
+	movl	%ebp,28(%esp)
+	movdqa	(%esp),%xmm5
+	movdqa	%xmm7,%xmm2
+	movl	%edx,%ebp
+	movl	%ecx,%ebx
+.byte	102,15,56,0,253
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L028enc1_loop_5:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L028enc1_loop_5
+.byte	102,15,56,221,209
+	movups	(%esi),%xmm6
+	paddq	16(%esp),%xmm7
+	leal	16(%esi),%esi
+	jmp	.L029ccm64_dec_outer
+.align	16
+.L029ccm64_dec_outer:
+	xorps	%xmm2,%xmm6
+	movdqa	%xmm7,%xmm2
+	movl	%ebx,%ecx
+	movups	%xmm6,(%edi)
+	leal	16(%edi),%edi
+.byte	102,15,56,0,213
+	subl	$1,%eax
+	jz	.L030ccm64_dec_break
+	movups	(%ebp),%xmm0
+	shrl	$1,%ecx
+	movups	16(%ebp),%xmm1
+	xorps	%xmm0,%xmm6
+	leal	32(%ebp),%edx
+	xorps	%xmm0,%xmm2
+	xorps	%xmm6,%xmm3
+	movups	(%edx),%xmm0
+.L031ccm64_dec2_loop:
+.byte	102,15,56,220,209
+	decl	%ecx
+.byte	102,15,56,220,217
+	movups	16(%edx),%xmm1
+.byte	102,15,56,220,208
+	leal	32(%edx),%edx
+.byte	102,15,56,220,216
+	movups	(%edx),%xmm0
+	jnz	.L031ccm64_dec2_loop
+	movups	(%esi),%xmm6
+	paddq	16(%esp),%xmm7
+.byte	102,15,56,220,209
+.byte	102,15,56,220,217
+	leal	16(%esi),%esi
+.byte	102,15,56,221,208
+.byte	102,15,56,221,216
+	jmp	.L029ccm64_dec_outer
+.align	16
+.L030ccm64_dec_break:
+	movl	%ebp,%edx
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	xorps	%xmm0,%xmm6
+	leal	32(%edx),%edx
+	xorps	%xmm6,%xmm3
+.L032enc1_loop_6:
+.byte	102,15,56,220,217
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L032enc1_loop_6
+.byte	102,15,56,221,217
+	movl	48(%esp),%esp
+	movl	40(%esp),%edi
+	movups	%xmm3,(%edi)
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin
+.globl	aesni_ctr32_encrypt_blocks
+.type	aesni_ctr32_encrypt_blocks,@function
+.align	16
+aesni_ctr32_encrypt_blocks:
+.L_aesni_ctr32_encrypt_blocks_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	32(%esp),%edx
+	movl	36(%esp),%ebx
+	movl	%esp,%ebp
+	subl	$88,%esp
+	andl	$-16,%esp
+	movl	%ebp,80(%esp)
+	cmpl	$1,%eax
+	je	.L033ctr32_one_shortcut
+	movdqu	(%ebx),%xmm7
+	movl	$202182159,(%esp)
+	movl	$134810123,4(%esp)
+	movl	$67438087,8(%esp)
+	movl	$66051,12(%esp)
+	movl	$6,%ecx
+	xorl	%ebp,%ebp
+	movl	%ecx,16(%esp)
+	movl	%ecx,20(%esp)
+	movl	%ecx,24(%esp)
+	movl	%ebp,28(%esp)
+.byte	102,15,58,22,251,3
+.byte	102,15,58,34,253,3
+	movl	240(%edx),%ecx
+	bswap	%ebx
+	pxor	%xmm1,%xmm1
+	pxor	%xmm0,%xmm0
+	movdqa	(%esp),%xmm2
+.byte	102,15,58,34,203,0
+	leal	3(%ebx),%ebp
+.byte	102,15,58,34,197,0
+	incl	%ebx
+.byte	102,15,58,34,203,1
+	incl	%ebp
+.byte	102,15,58,34,197,1
+	incl	%ebx
+.byte	102,15,58,34,203,2
+	incl	%ebp
+.byte	102,15,58,34,197,2
+	movdqa	%xmm1,48(%esp)
+.byte	102,15,56,0,202
+	movdqa	%xmm0,64(%esp)
+.byte	102,15,56,0,194
+	pshufd	$192,%xmm1,%xmm2
+	pshufd	$128,%xmm1,%xmm3
+	cmpl	$6,%eax
+	jb	.L034ctr32_tail
+	movdqa	%xmm7,32(%esp)
+	shrl	$1,%ecx
+	movl	%edx,%ebp
+	movl	%ecx,%ebx
+	subl	$6,%eax
+	jmp	.L035ctr32_loop6
+.align	16
+.L035ctr32_loop6:
+	pshufd	$64,%xmm1,%xmm4
+	movdqa	32(%esp),%xmm1
+	pshufd	$192,%xmm0,%xmm5
+	por	%xmm1,%xmm2
+	pshufd	$128,%xmm0,%xmm6
+	por	%xmm1,%xmm3
+	pshufd	$64,%xmm0,%xmm7
+	por	%xmm1,%xmm4
+	por	%xmm1,%xmm5
+	por	%xmm1,%xmm6
+	por	%xmm1,%xmm7
+	movups	(%ebp),%xmm0
+	movups	16(%ebp),%xmm1
+	leal	32(%ebp),%edx
+	decl	%ecx
+	pxor	%xmm0,%xmm2
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,220,209
+	pxor	%xmm0,%xmm4
+.byte	102,15,56,220,217
+	pxor	%xmm0,%xmm5
+.byte	102,15,56,220,225
+	pxor	%xmm0,%xmm6
+.byte	102,15,56,220,233
+	pxor	%xmm0,%xmm7
+.byte	102,15,56,220,241
+	movups	(%edx),%xmm0
+.byte	102,15,56,220,249
+	call	.L_aesni_encrypt6_enter
+	movups	(%esi),%xmm1
+	movups	16(%esi),%xmm0
+	xorps	%xmm1,%xmm2
+	movups	32(%esi),%xmm1
+	xorps	%xmm0,%xmm3
+	movups	%xmm2,(%edi)
+	movdqa	16(%esp),%xmm0
+	xorps	%xmm1,%xmm4
+	movdqa	48(%esp),%xmm1
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	paddd	%xmm0,%xmm1
+	paddd	64(%esp),%xmm0
+	movdqa	(%esp),%xmm2
+	movups	48(%esi),%xmm3
+	movups	64(%esi),%xmm4
+	xorps	%xmm3,%xmm5
+	movups	80(%esi),%xmm3
+	leal	96(%esi),%esi
+	movdqa	%xmm1,48(%esp)
+.byte	102,15,56,0,202
+	xorps	%xmm4,%xmm6
+	movups	%xmm5,48(%edi)
+	xorps	%xmm3,%xmm7
+	movdqa	%xmm0,64(%esp)
+.byte	102,15,56,0,194
+	movups	%xmm6,64(%edi)
+	pshufd	$192,%xmm1,%xmm2
+	movups	%xmm7,80(%edi)
+	leal	96(%edi),%edi
+	movl	%ebx,%ecx
+	pshufd	$128,%xmm1,%xmm3
+	subl	$6,%eax
+	jnc	.L035ctr32_loop6
+	addl	$6,%eax
+	jz	.L036ctr32_ret
+	movl	%ebp,%edx
+	leal	1(,%ecx,2),%ecx
+	movdqa	32(%esp),%xmm7
+.L034ctr32_tail:
+	por	%xmm7,%xmm2
+	cmpl	$2,%eax
+	jb	.L037ctr32_one
+	pshufd	$64,%xmm1,%xmm4
+	por	%xmm7,%xmm3
+	je	.L038ctr32_two
+	pshufd	$192,%xmm0,%xmm5
+	por	%xmm7,%xmm4
+	cmpl	$4,%eax
+	jb	.L039ctr32_three
+	pshufd	$128,%xmm0,%xmm6
+	por	%xmm7,%xmm5
+	je	.L040ctr32_four
+	por	%xmm7,%xmm6
+	call	_aesni_encrypt6
+	movups	(%esi),%xmm1
+	movups	16(%esi),%xmm0
+	xorps	%xmm1,%xmm2
+	movups	32(%esi),%xmm1
+	xorps	%xmm0,%xmm3
+	movups	48(%esi),%xmm0
+	xorps	%xmm1,%xmm4
+	movups	64(%esi),%xmm1
+	xorps	%xmm0,%xmm5
+	movups	%xmm2,(%edi)
+	xorps	%xmm1,%xmm6
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	movups	%xmm6,64(%edi)
+	jmp	.L036ctr32_ret
+.align	16
+.L033ctr32_one_shortcut:
+	movups	(%ebx),%xmm2
+	movl	240(%edx),%ecx
+.L037ctr32_one:
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L041enc1_loop_7:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L041enc1_loop_7
+.byte	102,15,56,221,209
+	movups	(%esi),%xmm6
+	xorps	%xmm2,%xmm6
+	movups	%xmm6,(%edi)
+	jmp	.L036ctr32_ret
+.align	16
+.L038ctr32_two:
+	call	_aesni_encrypt3
+	movups	(%esi),%xmm5
+	movups	16(%esi),%xmm6
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	jmp	.L036ctr32_ret
+.align	16
+.L039ctr32_three:
+	call	_aesni_encrypt3
+	movups	(%esi),%xmm5
+	movups	16(%esi),%xmm6
+	xorps	%xmm5,%xmm2
+	movups	32(%esi),%xmm7
+	xorps	%xmm6,%xmm3
+	movups	%xmm2,(%edi)
+	xorps	%xmm7,%xmm4
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	jmp	.L036ctr32_ret
+.align	16
+.L040ctr32_four:
+	call	_aesni_encrypt4
+	movups	(%esi),%xmm6
+	movups	16(%esi),%xmm7
+	movups	32(%esi),%xmm1
+	xorps	%xmm6,%xmm2
+	movups	48(%esi),%xmm0
+	xorps	%xmm7,%xmm3
+	movups	%xmm2,(%edi)
+	xorps	%xmm1,%xmm4
+	movups	%xmm3,16(%edi)
+	xorps	%xmm0,%xmm5
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+.L036ctr32_ret:
+	movl	80(%esp),%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin
+.globl	aesni_xts_encrypt
+.type	aesni_xts_encrypt,@function
+.align	16
+aesni_xts_encrypt:
+.L_aesni_xts_encrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	36(%esp),%edx
+	movl	40(%esp),%esi
+	movl	240(%edx),%ecx
+	movups	(%esi),%xmm2
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L042enc1_loop_8:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L042enc1_loop_8
+.byte	102,15,56,221,209
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	32(%esp),%edx
+	movl	%esp,%ebp
+	subl	$120,%esp
+	movl	240(%edx),%ecx
+	andl	$-16,%esp
+	movl	$135,96(%esp)
+	movl	$0,100(%esp)
+	movl	$1,104(%esp)
+	movl	$0,108(%esp)
+	movl	%eax,112(%esp)
+	movl	%ebp,116(%esp)
+	movdqa	%xmm2,%xmm1
+	pxor	%xmm0,%xmm0
+	movdqa	96(%esp),%xmm3
+	pcmpgtd	%xmm1,%xmm0
+	andl	$-16,%eax
+	movl	%edx,%ebp
+	movl	%ecx,%ebx
+	subl	$96,%eax
+	jc	.L043xts_enc_short
+	shrl	$1,%ecx
+	movl	%ecx,%ebx
+	jmp	.L044xts_enc_loop6
+.align	16
+.L044xts_enc_loop6:
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,16(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,32(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,48(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	pshufd	$19,%xmm0,%xmm7
+	movdqa	%xmm1,64(%esp)
+	paddq	%xmm1,%xmm1
+	movups	(%ebp),%xmm0
+	pand	%xmm3,%xmm7
+	movups	(%esi),%xmm2
+	pxor	%xmm1,%xmm7
+	movdqu	16(%esi),%xmm3
+	xorps	%xmm0,%xmm2
+	movdqu	32(%esi),%xmm4
+	pxor	%xmm0,%xmm3
+	movdqu	48(%esi),%xmm5
+	pxor	%xmm0,%xmm4
+	movdqu	64(%esi),%xmm6
+	pxor	%xmm0,%xmm5
+	movdqu	80(%esi),%xmm1
+	pxor	%xmm0,%xmm6
+	leal	96(%esi),%esi
+	pxor	(%esp),%xmm2
+	movdqa	%xmm7,80(%esp)
+	pxor	%xmm1,%xmm7
+	movups	16(%ebp),%xmm1
+	leal	32(%ebp),%edx
+	pxor	16(%esp),%xmm3
+.byte	102,15,56,220,209
+	pxor	32(%esp),%xmm4
+.byte	102,15,56,220,217
+	pxor	48(%esp),%xmm5
+	decl	%ecx
+.byte	102,15,56,220,225
+	pxor	64(%esp),%xmm6
+.byte	102,15,56,220,233
+	pxor	%xmm0,%xmm7
+.byte	102,15,56,220,241
+	movups	(%edx),%xmm0
+.byte	102,15,56,220,249
+	call	.L_aesni_encrypt6_enter
+	movdqa	80(%esp),%xmm1
+	pxor	%xmm0,%xmm0
+	xorps	(%esp),%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	xorps	16(%esp),%xmm3
+	movups	%xmm2,(%edi)
+	xorps	32(%esp),%xmm4
+	movups	%xmm3,16(%edi)
+	xorps	48(%esp),%xmm5
+	movups	%xmm4,32(%edi)
+	xorps	64(%esp),%xmm6
+	movups	%xmm5,48(%edi)
+	xorps	%xmm1,%xmm7
+	movups	%xmm6,64(%edi)
+	pshufd	$19,%xmm0,%xmm2
+	movups	%xmm7,80(%edi)
+	leal	96(%edi),%edi
+	movdqa	96(%esp),%xmm3
+	pxor	%xmm0,%xmm0
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	movl	%ebx,%ecx
+	pxor	%xmm2,%xmm1
+	subl	$96,%eax
+	jnc	.L044xts_enc_loop6
+	leal	1(,%ecx,2),%ecx
+	movl	%ebp,%edx
+	movl	%ecx,%ebx
+.L043xts_enc_short:
+	addl	$96,%eax
+	jz	.L045xts_enc_done6x
+	movdqa	%xmm1,%xmm5
+	cmpl	$32,%eax
+	jb	.L046xts_enc_one
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	je	.L047xts_enc_two
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,%xmm6
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	cmpl	$64,%eax
+	jb	.L048xts_enc_three
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,%xmm7
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm5,(%esp)
+	movdqa	%xmm6,16(%esp)
+	je	.L049xts_enc_four
+	movdqa	%xmm7,32(%esp)
+	pshufd	$19,%xmm0,%xmm7
+	movdqa	%xmm1,48(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm7
+	pxor	%xmm1,%xmm7
+	movdqu	(%esi),%xmm2
+	movdqu	16(%esi),%xmm3
+	movdqu	32(%esi),%xmm4
+	pxor	(%esp),%xmm2
+	movdqu	48(%esi),%xmm5
+	pxor	16(%esp),%xmm3
+	movdqu	64(%esi),%xmm6
+	pxor	32(%esp),%xmm4
+	leal	80(%esi),%esi
+	pxor	48(%esp),%xmm5
+	movdqa	%xmm7,64(%esp)
+	pxor	%xmm7,%xmm6
+	call	_aesni_encrypt6
+	movaps	64(%esp),%xmm1
+	xorps	(%esp),%xmm2
+	xorps	16(%esp),%xmm3
+	xorps	32(%esp),%xmm4
+	movups	%xmm2,(%edi)
+	xorps	48(%esp),%xmm5
+	movups	%xmm3,16(%edi)
+	xorps	%xmm1,%xmm6
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	movups	%xmm6,64(%edi)
+	leal	80(%edi),%edi
+	jmp	.L050xts_enc_done
+.align	16
+.L046xts_enc_one:
+	movups	(%esi),%xmm2
+	leal	16(%esi),%esi
+	xorps	%xmm5,%xmm2
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L051enc1_loop_9:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L051enc1_loop_9
+.byte	102,15,56,221,209
+	xorps	%xmm5,%xmm2
+	movups	%xmm2,(%edi)
+	leal	16(%edi),%edi
+	movdqa	%xmm5,%xmm1
+	jmp	.L050xts_enc_done
+.align	16
+.L047xts_enc_two:
+	movaps	%xmm1,%xmm6
+	movups	(%esi),%xmm2
+	movups	16(%esi),%xmm3
+	leal	32(%esi),%esi
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	xorps	%xmm4,%xmm4
+	call	_aesni_encrypt3
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	leal	32(%edi),%edi
+	movdqa	%xmm6,%xmm1
+	jmp	.L050xts_enc_done
+.align	16
+.L048xts_enc_three:
+	movaps	%xmm1,%xmm7
+	movups	(%esi),%xmm2
+	movups	16(%esi),%xmm3
+	movups	32(%esi),%xmm4
+	leal	48(%esi),%esi
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	xorps	%xmm7,%xmm4
+	call	_aesni_encrypt3
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	xorps	%xmm7,%xmm4
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	leal	48(%edi),%edi
+	movdqa	%xmm7,%xmm1
+	jmp	.L050xts_enc_done
+.align	16
+.L049xts_enc_four:
+	movaps	%xmm1,%xmm6
+	movups	(%esi),%xmm2
+	movups	16(%esi),%xmm3
+	movups	32(%esi),%xmm4
+	xorps	(%esp),%xmm2
+	movups	48(%esi),%xmm5
+	leal	64(%esi),%esi
+	xorps	16(%esp),%xmm3
+	xorps	%xmm7,%xmm4
+	xorps	%xmm6,%xmm5
+	call	_aesni_encrypt4
+	xorps	(%esp),%xmm2
+	xorps	16(%esp),%xmm3
+	xorps	%xmm7,%xmm4
+	movups	%xmm2,(%edi)
+	xorps	%xmm6,%xmm5
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	leal	64(%edi),%edi
+	movdqa	%xmm6,%xmm1
+	jmp	.L050xts_enc_done
+.align	16
+.L045xts_enc_done6x:
+	movl	112(%esp),%eax
+	andl	$15,%eax
+	jz	.L052xts_enc_ret
+	movdqa	%xmm1,%xmm5
+	movl	%eax,112(%esp)
+	jmp	.L053xts_enc_steal
+.align	16
+.L050xts_enc_done:
+	movl	112(%esp),%eax
+	pxor	%xmm0,%xmm0
+	andl	$15,%eax
+	jz	.L052xts_enc_ret
+	pcmpgtd	%xmm1,%xmm0
+	movl	%eax,112(%esp)
+	pshufd	$19,%xmm0,%xmm5
+	paddq	%xmm1,%xmm1
+	pand	96(%esp),%xmm5
+	pxor	%xmm1,%xmm5
+.L053xts_enc_steal:
+	movzbl	(%esi),%ecx
+	movzbl	-16(%edi),%edx
+	leal	1(%esi),%esi
+	movb	%cl,-16(%edi)
+	movb	%dl,(%edi)
+	leal	1(%edi),%edi
+	subl	$1,%eax
+	jnz	.L053xts_enc_steal
+	subl	112(%esp),%edi
+	movl	%ebp,%edx
+	movl	%ebx,%ecx
+	movups	-16(%edi),%xmm2
+	xorps	%xmm5,%xmm2
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L054enc1_loop_10:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L054enc1_loop_10
+.byte	102,15,56,221,209
+	xorps	%xmm5,%xmm2
+	movups	%xmm2,-16(%edi)
+.L052xts_enc_ret:
+	movl	116(%esp),%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin
+.globl	aesni_xts_decrypt
+.type	aesni_xts_decrypt,@function
+.align	16
+aesni_xts_decrypt:
+.L_aesni_xts_decrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	36(%esp),%edx
+	movl	40(%esp),%esi
+	movl	240(%edx),%ecx
+	movups	(%esi),%xmm2
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L055enc1_loop_11:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L055enc1_loop_11
+.byte	102,15,56,221,209
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	32(%esp),%edx
+	movl	%esp,%ebp
+	subl	$120,%esp
+	andl	$-16,%esp
+	xorl	%ebx,%ebx
+	testl	$15,%eax
+	setnz	%bl
+	shll	$4,%ebx
+	subl	%ebx,%eax
+	movl	$135,96(%esp)
+	movl	$0,100(%esp)
+	movl	$1,104(%esp)
+	movl	$0,108(%esp)
+	movl	%eax,112(%esp)
+	movl	%ebp,116(%esp)
+	movl	240(%edx),%ecx
+	movl	%edx,%ebp
+	movl	%ecx,%ebx
+	movdqa	%xmm2,%xmm1
+	pxor	%xmm0,%xmm0
+	movdqa	96(%esp),%xmm3
+	pcmpgtd	%xmm1,%xmm0
+	andl	$-16,%eax
+	subl	$96,%eax
+	jc	.L056xts_dec_short
+	shrl	$1,%ecx
+	movl	%ecx,%ebx
+	jmp	.L057xts_dec_loop6
+.align	16
+.L057xts_dec_loop6:
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,16(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,32(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,48(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	pshufd	$19,%xmm0,%xmm7
+	movdqa	%xmm1,64(%esp)
+	paddq	%xmm1,%xmm1
+	movups	(%ebp),%xmm0
+	pand	%xmm3,%xmm7
+	movups	(%esi),%xmm2
+	pxor	%xmm1,%xmm7
+	movdqu	16(%esi),%xmm3
+	xorps	%xmm0,%xmm2
+	movdqu	32(%esi),%xmm4
+	pxor	%xmm0,%xmm3
+	movdqu	48(%esi),%xmm5
+	pxor	%xmm0,%xmm4
+	movdqu	64(%esi),%xmm6
+	pxor	%xmm0,%xmm5
+	movdqu	80(%esi),%xmm1
+	pxor	%xmm0,%xmm6
+	leal	96(%esi),%esi
+	pxor	(%esp),%xmm2
+	movdqa	%xmm7,80(%esp)
+	pxor	%xmm1,%xmm7
+	movups	16(%ebp),%xmm1
+	leal	32(%ebp),%edx
+	pxor	16(%esp),%xmm3
+.byte	102,15,56,222,209
+	pxor	32(%esp),%xmm4
+.byte	102,15,56,222,217
+	pxor	48(%esp),%xmm5
+	decl	%ecx
+.byte	102,15,56,222,225
+	pxor	64(%esp),%xmm6
+.byte	102,15,56,222,233
+	pxor	%xmm0,%xmm7
+.byte	102,15,56,222,241
+	movups	(%edx),%xmm0
+.byte	102,15,56,222,249
+	call	.L_aesni_decrypt6_enter
+	movdqa	80(%esp),%xmm1
+	pxor	%xmm0,%xmm0
+	xorps	(%esp),%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	xorps	16(%esp),%xmm3
+	movups	%xmm2,(%edi)
+	xorps	32(%esp),%xmm4
+	movups	%xmm3,16(%edi)
+	xorps	48(%esp),%xmm5
+	movups	%xmm4,32(%edi)
+	xorps	64(%esp),%xmm6
+	movups	%xmm5,48(%edi)
+	xorps	%xmm1,%xmm7
+	movups	%xmm6,64(%edi)
+	pshufd	$19,%xmm0,%xmm2
+	movups	%xmm7,80(%edi)
+	leal	96(%edi),%edi
+	movdqa	96(%esp),%xmm3
+	pxor	%xmm0,%xmm0
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	movl	%ebx,%ecx
+	pxor	%xmm2,%xmm1
+	subl	$96,%eax
+	jnc	.L057xts_dec_loop6
+	leal	1(,%ecx,2),%ecx
+	movl	%ebp,%edx
+	movl	%ecx,%ebx
+.L056xts_dec_short:
+	addl	$96,%eax
+	jz	.L058xts_dec_done6x
+	movdqa	%xmm1,%xmm5
+	cmpl	$32,%eax
+	jb	.L059xts_dec_one
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	je	.L060xts_dec_two
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,%xmm6
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	cmpl	$64,%eax
+	jb	.L061xts_dec_three
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	%xmm1,%xmm7
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm5,(%esp)
+	movdqa	%xmm6,16(%esp)
+	je	.L062xts_dec_four
+	movdqa	%xmm7,32(%esp)
+	pshufd	$19,%xmm0,%xmm7
+	movdqa	%xmm1,48(%esp)
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm7
+	pxor	%xmm1,%xmm7
+	movdqu	(%esi),%xmm2
+	movdqu	16(%esi),%xmm3
+	movdqu	32(%esi),%xmm4
+	pxor	(%esp),%xmm2
+	movdqu	48(%esi),%xmm5
+	pxor	16(%esp),%xmm3
+	movdqu	64(%esi),%xmm6
+	pxor	32(%esp),%xmm4
+	leal	80(%esi),%esi
+	pxor	48(%esp),%xmm5
+	movdqa	%xmm7,64(%esp)
+	pxor	%xmm7,%xmm6
+	call	_aesni_decrypt6
+	movaps	64(%esp),%xmm1
+	xorps	(%esp),%xmm2
+	xorps	16(%esp),%xmm3
+	xorps	32(%esp),%xmm4
+	movups	%xmm2,(%edi)
+	xorps	48(%esp),%xmm5
+	movups	%xmm3,16(%edi)
+	xorps	%xmm1,%xmm6
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	movups	%xmm6,64(%edi)
+	leal	80(%edi),%edi
+	jmp	.L063xts_dec_done
+.align	16
+.L059xts_dec_one:
+	movups	(%esi),%xmm2
+	leal	16(%esi),%esi
+	xorps	%xmm5,%xmm2
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L064dec1_loop_12:
+.byte	102,15,56,222,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L064dec1_loop_12
+.byte	102,15,56,223,209
+	xorps	%xmm5,%xmm2
+	movups	%xmm2,(%edi)
+	leal	16(%edi),%edi
+	movdqa	%xmm5,%xmm1
+	jmp	.L063xts_dec_done
+.align	16
+.L060xts_dec_two:
+	movaps	%xmm1,%xmm6
+	movups	(%esi),%xmm2
+	movups	16(%esi),%xmm3
+	leal	32(%esi),%esi
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	call	_aesni_decrypt3
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	leal	32(%edi),%edi
+	movdqa	%xmm6,%xmm1
+	jmp	.L063xts_dec_done
+.align	16
+.L061xts_dec_three:
+	movaps	%xmm1,%xmm7
+	movups	(%esi),%xmm2
+	movups	16(%esi),%xmm3
+	movups	32(%esi),%xmm4
+	leal	48(%esi),%esi
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	xorps	%xmm7,%xmm4
+	call	_aesni_decrypt3
+	xorps	%xmm5,%xmm2
+	xorps	%xmm6,%xmm3
+	xorps	%xmm7,%xmm4
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	leal	48(%edi),%edi
+	movdqa	%xmm7,%xmm1
+	jmp	.L063xts_dec_done
+.align	16
+.L062xts_dec_four:
+	movaps	%xmm1,%xmm6
+	movups	(%esi),%xmm2
+	movups	16(%esi),%xmm3
+	movups	32(%esi),%xmm4
+	xorps	(%esp),%xmm2
+	movups	48(%esi),%xmm5
+	leal	64(%esi),%esi
+	xorps	16(%esp),%xmm3
+	xorps	%xmm7,%xmm4
+	xorps	%xmm6,%xmm5
+	call	_aesni_decrypt4
+	xorps	(%esp),%xmm2
+	xorps	16(%esp),%xmm3
+	xorps	%xmm7,%xmm4
+	movups	%xmm2,(%edi)
+	xorps	%xmm6,%xmm5
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	leal	64(%edi),%edi
+	movdqa	%xmm6,%xmm1
+	jmp	.L063xts_dec_done
+.align	16
+.L058xts_dec_done6x:
+	movl	112(%esp),%eax
+	andl	$15,%eax
+	jz	.L065xts_dec_ret
+	movl	%eax,112(%esp)
+	jmp	.L066xts_dec_only_one_more
+.align	16
+.L063xts_dec_done:
+	movl	112(%esp),%eax
+	pxor	%xmm0,%xmm0
+	andl	$15,%eax
+	jz	.L065xts_dec_ret
+	pcmpgtd	%xmm1,%xmm0
+	movl	%eax,112(%esp)
+	pshufd	$19,%xmm0,%xmm2
+	pxor	%xmm0,%xmm0
+	movdqa	96(%esp),%xmm3
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm2
+	pcmpgtd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm1
+.L066xts_dec_only_one_more:
+	pshufd	$19,%xmm0,%xmm5
+	movdqa	%xmm1,%xmm6
+	paddq	%xmm1,%xmm1
+	pand	%xmm3,%xmm5
+	pxor	%xmm1,%xmm5
+	movl	%ebp,%edx
+	movl	%ebx,%ecx
+	movups	(%esi),%xmm2
+	xorps	%xmm5,%xmm2
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L067dec1_loop_13:
+.byte	102,15,56,222,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L067dec1_loop_13
+.byte	102,15,56,223,209
+	xorps	%xmm5,%xmm2
+	movups	%xmm2,(%edi)
+.L068xts_dec_steal:
+	movzbl	16(%esi),%ecx
+	movzbl	(%edi),%edx
+	leal	1(%esi),%esi
+	movb	%cl,(%edi)
+	movb	%dl,16(%edi)
+	leal	1(%edi),%edi
+	subl	$1,%eax
+	jnz	.L068xts_dec_steal
+	subl	112(%esp),%edi
+	movl	%ebp,%edx
+	movl	%ebx,%ecx
+	movups	(%edi),%xmm2
+	xorps	%xmm6,%xmm2
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L069dec1_loop_14:
+.byte	102,15,56,222,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L069dec1_loop_14
+.byte	102,15,56,223,209
+	xorps	%xmm6,%xmm2
+	movups	%xmm2,(%edi)
+.L065xts_dec_ret:
+	movl	116(%esp),%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin
+.globl	aesni_cbc_encrypt
+.type	aesni_cbc_encrypt,@function
+.align	16
+aesni_cbc_encrypt:
+.L_aesni_cbc_encrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	%esp,%ebx
+	movl	24(%esp),%edi
+	subl	$24,%ebx
+	movl	28(%esp),%eax
+	andl	$-16,%ebx
+	movl	32(%esp),%edx
+	movl	36(%esp),%ebp
+	testl	%eax,%eax
+	jz	.L070cbc_abort
+	cmpl	$0,40(%esp)
+	xchgl	%esp,%ebx
+	movups	(%ebp),%xmm7
+	movl	240(%edx),%ecx
+	movl	%edx,%ebp
+	movl	%ebx,16(%esp)
+	movl	%ecx,%ebx
+	je	.L071cbc_decrypt
+	movaps	%xmm7,%xmm2
+	cmpl	$16,%eax
+	jb	.L072cbc_enc_tail
+	subl	$16,%eax
+	jmp	.L073cbc_enc_loop
+.align	16
+.L073cbc_enc_loop:
+	movups	(%esi),%xmm7
+	leal	16(%esi),%esi
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	xorps	%xmm0,%xmm7
+	leal	32(%edx),%edx
+	xorps	%xmm7,%xmm2
+.L074enc1_loop_15:
+.byte	102,15,56,220,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L074enc1_loop_15
+.byte	102,15,56,221,209
+	movl	%ebx,%ecx
+	movl	%ebp,%edx
+	movups	%xmm2,(%edi)
+	leal	16(%edi),%edi
+	subl	$16,%eax
+	jnc	.L073cbc_enc_loop
+	addl	$16,%eax
+	jnz	.L072cbc_enc_tail
+	movaps	%xmm2,%xmm7
+	jmp	.L075cbc_ret
+.L072cbc_enc_tail:
+	movl	%eax,%ecx
+.long	2767451785
+	movl	$16,%ecx
+	subl	%eax,%ecx
+	xorl	%eax,%eax
+.long	2868115081
+	leal	-16(%edi),%edi
+	movl	%ebx,%ecx
+	movl	%edi,%esi
+	movl	%ebp,%edx
+	jmp	.L073cbc_enc_loop
+.align	16
+.L071cbc_decrypt:
+	cmpl	$80,%eax
+	jbe	.L076cbc_dec_tail
+	movaps	%xmm7,(%esp)
+	subl	$80,%eax
+	jmp	.L077cbc_dec_loop6_enter
+.align	16
+.L078cbc_dec_loop6:
+	movaps	%xmm0,(%esp)
+	movups	%xmm7,(%edi)
+	leal	16(%edi),%edi
+.L077cbc_dec_loop6_enter:
+	movdqu	(%esi),%xmm2
+	movdqu	16(%esi),%xmm3
+	movdqu	32(%esi),%xmm4
+	movdqu	48(%esi),%xmm5
+	movdqu	64(%esi),%xmm6
+	movdqu	80(%esi),%xmm7
+	call	_aesni_decrypt6
+	movups	(%esi),%xmm1
+	movups	16(%esi),%xmm0
+	xorps	(%esp),%xmm2
+	xorps	%xmm1,%xmm3
+	movups	32(%esi),%xmm1
+	xorps	%xmm0,%xmm4
+	movups	48(%esi),%xmm0
+	xorps	%xmm1,%xmm5
+	movups	64(%esi),%xmm1
+	xorps	%xmm0,%xmm6
+	movups	80(%esi),%xmm0
+	xorps	%xmm1,%xmm7
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	leal	96(%esi),%esi
+	movups	%xmm4,32(%edi)
+	movl	%ebx,%ecx
+	movups	%xmm5,48(%edi)
+	movl	%ebp,%edx
+	movups	%xmm6,64(%edi)
+	leal	80(%edi),%edi
+	subl	$96,%eax
+	ja	.L078cbc_dec_loop6
+	movaps	%xmm7,%xmm2
+	movaps	%xmm0,%xmm7
+	addl	$80,%eax
+	jle	.L079cbc_dec_tail_collected
+	movups	%xmm2,(%edi)
+	leal	16(%edi),%edi
+.L076cbc_dec_tail:
+	movups	(%esi),%xmm2
+	movaps	%xmm2,%xmm6
+	cmpl	$16,%eax
+	jbe	.L080cbc_dec_one
+	movups	16(%esi),%xmm3
+	movaps	%xmm3,%xmm5
+	cmpl	$32,%eax
+	jbe	.L081cbc_dec_two
+	movups	32(%esi),%xmm4
+	cmpl	$48,%eax
+	jbe	.L082cbc_dec_three
+	movups	48(%esi),%xmm5
+	cmpl	$64,%eax
+	jbe	.L083cbc_dec_four
+	movups	64(%esi),%xmm6
+	movaps	%xmm7,(%esp)
+	movups	(%esi),%xmm2
+	xorps	%xmm7,%xmm7
+	call	_aesni_decrypt6
+	movups	(%esi),%xmm1
+	movups	16(%esi),%xmm0
+	xorps	(%esp),%xmm2
+	xorps	%xmm1,%xmm3
+	movups	32(%esi),%xmm1
+	xorps	%xmm0,%xmm4
+	movups	48(%esi),%xmm0
+	xorps	%xmm1,%xmm5
+	movups	64(%esi),%xmm7
+	xorps	%xmm0,%xmm6
+	movups	%xmm2,(%edi)
+	movups	%xmm3,16(%edi)
+	movups	%xmm4,32(%edi)
+	movups	%xmm5,48(%edi)
+	leal	64(%edi),%edi
+	movaps	%xmm6,%xmm2
+	subl	$80,%eax
+	jmp	.L079cbc_dec_tail_collected
+.align	16
+.L080cbc_dec_one:
+	movups	(%edx),%xmm0
+	movups	16(%edx),%xmm1
+	leal	32(%edx),%edx
+	xorps	%xmm0,%xmm2
+.L084dec1_loop_16:
+.byte	102,15,56,222,209
+	decl	%ecx
+	movups	(%edx),%xmm1
+	leal	16(%edx),%edx
+	jnz	.L084dec1_loop_16
+.byte	102,15,56,223,209
+	xorps	%xmm7,%xmm2
+	movaps	%xmm6,%xmm7
+	subl	$16,%eax
+	jmp	.L079cbc_dec_tail_collected
+.align	16
+.L081cbc_dec_two:
+	xorps	%xmm4,%xmm4
+	call	_aesni_decrypt3
+	xorps	%xmm7,%xmm2
+	xorps	%xmm6,%xmm3
+	movups	%xmm2,(%edi)
+	movaps	%xmm3,%xmm2
+	leal	16(%edi),%edi
+	movaps	%xmm5,%xmm7
+	subl	$32,%eax
+	jmp	.L079cbc_dec_tail_collected
+.align	16
+.L082cbc_dec_three:
+	call	_aesni_decrypt3
+	xorps	%xmm7,%xmm2
+	xorps	%xmm6,%xmm3
+	xorps	%xmm5,%xmm4
+	movups	%xmm2,(%edi)
+	movaps	%xmm4,%xmm2
+	movups	%xmm3,16(%edi)
+	leal	32(%edi),%edi
+	movups	32(%esi),%xmm7
+	subl	$48,%eax
+	jmp	.L079cbc_dec_tail_collected
+.align	16
+.L083cbc_dec_four:
+	call	_aesni_decrypt4
+	movups	16(%esi),%xmm1
+	movups	32(%esi),%xmm0
+	xorps	%xmm7,%xmm2
+	movups	48(%esi),%xmm7
+	xorps	%xmm6,%xmm3
+	movups	%xmm2,(%edi)
+	xorps	%xmm1,%xmm4
+	movups	%xmm3,16(%edi)
+	xorps	%xmm0,%xmm5
+	movups	%xmm4,32(%edi)
+	leal	48(%edi),%edi
+	movaps	%xmm5,%xmm2
+	subl	$64,%eax
+.L079cbc_dec_tail_collected:
+	andl	$15,%eax
+	jnz	.L085cbc_dec_tail_partial
+	movups	%xmm2,(%edi)
+	jmp	.L075cbc_ret
+.align	16
+.L085cbc_dec_tail_partial:
+	movaps	%xmm2,(%esp)
+	movl	$16,%ecx
+	movl	%esp,%esi
+	subl	%eax,%ecx
+.long	2767451785
+.L075cbc_ret:
+	movl	16(%esp),%esp
+	movl	36(%esp),%ebp
+	movups	%xmm7,(%ebp)
+.L070cbc_abort:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin
+.type	_aesni_set_encrypt_key,@function
+.align	16
+_aesni_set_encrypt_key:
+	testl	%eax,%eax
+	jz	.L086bad_pointer
+	testl	%edx,%edx
+	jz	.L086bad_pointer
+	movups	(%eax),%xmm0
+	xorps	%xmm4,%xmm4
+	leal	16(%edx),%edx
+	cmpl	$256,%ecx
+	je	.L08714rounds
+	cmpl	$192,%ecx
+	je	.L08812rounds
+	cmpl	$128,%ecx
+	jne	.L089bad_keybits
+.align	16
+.L09010rounds:
+	movl	$9,%ecx
+	movups	%xmm0,-16(%edx)
+.byte	102,15,58,223,200,1
+	call	.L091key_128_cold
+.byte	102,15,58,223,200,2
+	call	.L092key_128
+.byte	102,15,58,223,200,4
+	call	.L092key_128
+.byte	102,15,58,223,200,8
+	call	.L092key_128
+.byte	102,15,58,223,200,16
+	call	.L092key_128
+.byte	102,15,58,223,200,32
+	call	.L092key_128
+.byte	102,15,58,223,200,64
+	call	.L092key_128
+.byte	102,15,58,223,200,128
+	call	.L092key_128
+.byte	102,15,58,223,200,27
+	call	.L092key_128
+.byte	102,15,58,223,200,54
+	call	.L092key_128
+	movups	%xmm0,(%edx)
+	movl	%ecx,80(%edx)
+	xorl	%eax,%eax
+	ret
+.align	16
+.L092key_128:
+	movups	%xmm0,(%edx)
+	leal	16(%edx),%edx
+.L091key_128_cold:
+	shufps	$16,%xmm0,%xmm4
+	xorps	%xmm4,%xmm0
+	shufps	$140,%xmm0,%xmm4
+	xorps	%xmm4,%xmm0
+	shufps	$255,%xmm1,%xmm1
+	xorps	%xmm1,%xmm0
+	ret
+.align	16
+.L08812rounds:
+	movq	16(%eax),%xmm2
+	movl	$11,%ecx
+	movups	%xmm0,-16(%edx)
+.byte	102,15,58,223,202,1
+	call	.L093key_192a_cold
+.byte	102,15,58,223,202,2
+	call	.L094key_192b
+.byte	102,15,58,223,202,4
+	call	.L095key_192a
+.byte	102,15,58,223,202,8
+	call	.L094key_192b
+.byte	102,15,58,223,202,16
+	call	.L095key_192a
+.byte	102,15,58,223,202,32
+	call	.L094key_192b
+.byte	102,15,58,223,202,64
+	call	.L095key_192a
+.byte	102,15,58,223,202,128
+	call	.L094key_192b
+	movups	%xmm0,(%edx)
+	movl	%ecx,48(%edx)
+	xorl	%eax,%eax
+	ret
+.align	16
+.L095key_192a:
+	movups	%xmm0,(%edx)
+	leal	16(%edx),%edx
+.align	16
+.L093key_192a_cold:
+	movaps	%xmm2,%xmm5
+.L096key_192b_warm:
+	shufps	$16,%xmm0,%xmm4
+	movdqa	%xmm2,%xmm3
+	xorps	%xmm4,%xmm0
+	shufps	$140,%xmm0,%xmm4
+	pslldq	$4,%xmm3
+	xorps	%xmm4,%xmm0
+	pshufd	$85,%xmm1,%xmm1
+	pxor	%xmm3,%xmm2
+	pxor	%xmm1,%xmm0
+	pshufd	$255,%xmm0,%xmm3
+	pxor	%xmm3,%xmm2
+	ret
+.align	16
+.L094key_192b:
+	movaps	%xmm0,%xmm3
+	shufps	$68,%xmm0,%xmm5
+	movups	%xmm5,(%edx)
+	shufps	$78,%xmm2,%xmm3
+	movups	%xmm3,16(%edx)
+	leal	32(%edx),%edx
+	jmp	.L096key_192b_warm
+.align	16
+.L08714rounds:
+	movups	16(%eax),%xmm2
+	movl	$13,%ecx
+	leal	16(%edx),%edx
+	movups	%xmm0,-32(%edx)
+	movups	%xmm2,-16(%edx)
+.byte	102,15,58,223,202,1
+	call	.L097key_256a_cold
+.byte	102,15,58,223,200,1
+	call	.L098key_256b
+.byte	102,15,58,223,202,2
+	call	.L099key_256a
+.byte	102,15,58,223,200,2
+	call	.L098key_256b
+.byte	102,15,58,223,202,4
+	call	.L099key_256a
+.byte	102,15,58,223,200,4
+	call	.L098key_256b
+.byte	102,15,58,223,202,8
+	call	.L099key_256a
+.byte	102,15,58,223,200,8
+	call	.L098key_256b
+.byte	102,15,58,223,202,16
+	call	.L099key_256a
+.byte	102,15,58,223,200,16
+	call	.L098key_256b
+.byte	102,15,58,223,202,32
+	call	.L099key_256a
+.byte	102,15,58,223,200,32
+	call	.L098key_256b
+.byte	102,15,58,223,202,64
+	call	.L099key_256a
+	movups	%xmm0,(%edx)
+	movl	%ecx,16(%edx)
+	xorl	%eax,%eax
+	ret
+.align	16
+.L099key_256a:
+	movups	%xmm2,(%edx)
+	leal	16(%edx),%edx
+.L097key_256a_cold:
+	shufps	$16,%xmm0,%xmm4
+	xorps	%xmm4,%xmm0
+	shufps	$140,%xmm0,%xmm4
+	xorps	%xmm4,%xmm0
+	shufps	$255,%xmm1,%xmm1
+	xorps	%xmm1,%xmm0
+	ret
+.align	16
+.L098key_256b:
+	movups	%xmm0,(%edx)
+	leal	16(%edx),%edx
+	shufps	$16,%xmm2,%xmm4
+	xorps	%xmm4,%xmm2
+	shufps	$140,%xmm2,%xmm4
+	xorps	%xmm4,%xmm2
+	shufps	$170,%xmm1,%xmm1
+	xorps	%xmm1,%xmm2
+	ret
+.align	4
+.L086bad_pointer:
+	movl	$-1,%eax
+	ret
+.align	4
+.L089bad_keybits:
+	movl	$-2,%eax
+	ret
+.size	_aesni_set_encrypt_key,.-_aesni_set_encrypt_key
+.globl	aesni_set_encrypt_key
+.type	aesni_set_encrypt_key,@function
+.align	16
+aesni_set_encrypt_key:
+.L_aesni_set_encrypt_key_begin:
+	movl	4(%esp),%eax
+	movl	8(%esp),%ecx
+	movl	12(%esp),%edx
+	call	_aesni_set_encrypt_key
+	ret
+.size	aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin
+.globl	aesni_set_decrypt_key
+.type	aesni_set_decrypt_key,@function
+.align	16
+aesni_set_decrypt_key:
+.L_aesni_set_decrypt_key_begin:
+	movl	4(%esp),%eax
+	movl	8(%esp),%ecx
+	movl	12(%esp),%edx
+	call	_aesni_set_encrypt_key
+	movl	12(%esp),%edx
+	shll	$4,%ecx
+	testl	%eax,%eax
+	jnz	.L100dec_key_ret
+	leal	16(%edx,%ecx,1),%eax
+	movups	(%edx),%xmm0
+	movups	(%eax),%xmm1
+	movups	%xmm0,(%eax)
+	movups	%xmm1,(%edx)
+	leal	16(%edx),%edx
+	leal	-16(%eax),%eax
+.L101dec_key_inverse:
+	movups	(%edx),%xmm0
+	movups	(%eax),%xmm1
+.byte	102,15,56,219,192
+.byte	102,15,56,219,201
+	leal	16(%edx),%edx
+	leal	-16(%eax),%eax
+	movups	%xmm0,16(%eax)
+	movups	%xmm1,-16(%edx)
+	cmpl	%edx,%eax
+	ja	.L101dec_key_inverse
+	movups	(%edx),%xmm0
+.byte	102,15,56,219,192
+	movups	%xmm0,(%edx)
+	xorl	%eax,%eax
+.L100dec_key_ret:
+	ret
+.size	aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
+.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+.byte	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+.byte	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+.byte	115,108,46,111,114,103,62,0
diff --git a/crypto/aes/asm/vpaes-x86.s b/crypto/aes/asm/vpaes-x86.s
new file mode 100644
index 0000000..c53a507
--- /dev/null
+++ b/crypto/aes/asm/vpaes-x86.s
@@ -0,0 +1,661 @@
+.file	"vpaes-x86.s"
+.text
+.align	64
+.L_vpaes_consts:
+.long	218628480,235210255,168496130,67568393
+.long	252381056,17041926,33884169,51187212
+.long	252645135,252645135,252645135,252645135
+.long	1512730624,3266504856,1377990664,3401244816
+.long	830229760,1275146365,2969422977,3447763452
+.long	3411033600,2979783055,338359620,2782886510
+.long	4209124096,907596821,221174255,1006095553
+.long	191964160,3799684038,3164090317,1589111125
+.long	182528256,1777043520,2877432650,3265356744
+.long	1874708224,3503451415,3305285752,363511674
+.long	1606117888,3487855781,1093350906,2384367825
+.long	197121,67569157,134941193,202313229
+.long	67569157,134941193,202313229,197121
+.long	134941193,202313229,197121,67569157
+.long	202313229,197121,67569157,134941193
+.long	33619971,100992007,168364043,235736079
+.long	235736079,33619971,100992007,168364043
+.long	168364043,235736079,33619971,100992007
+.long	100992007,168364043,235736079,33619971
+.long	50462976,117835012,185207048,252579084
+.long	252314880,51251460,117574920,184942860
+.long	184682752,252054788,50987272,118359308
+.long	118099200,185467140,251790600,50727180
+.long	2946363062,528716217,1300004225,1881839624
+.long	1532713819,1532713819,1532713819,1532713819
+.long	3602276352,4288629033,3737020424,4153884961
+.long	1354558464,32357713,2958822624,3775749553
+.long	1201988352,132424512,1572796698,503232858
+.long	2213177600,1597421020,4103937655,675398315
+.long	2749646592,4273543773,1511898873,121693092
+.long	3040248576,1103263732,2871565598,1608280554
+.long	2236667136,2588920351,482954393,64377734
+.long	3069987328,291237287,2117370568,3650299247
+.long	533321216,3573750986,2572112006,1401264716
+.long	1339849704,2721158661,548607111,3445553514
+.long	2128193280,3054596040,2183486460,1257083700
+.long	655635200,1165381986,3923443150,2344132524
+.long	190078720,256924420,290342170,357187870
+.long	1610966272,2263057382,4103205268,309794674
+.long	2592527872,2233205587,1335446729,3402964816
+.long	3973531904,3225098121,3002836325,1918774430
+.long	3870401024,2102906079,2284471353,4117666579
+.long	617007872,1021508343,366931923,691083277
+.long	2528395776,3491914898,2968704004,1613121270
+.long	3445188352,3247741094,844474987,4093578302
+.long	651481088,1190302358,1689581232,574775300
+.long	4289380608,206939853,2555985458,2489840491
+.long	2130264064,327674451,3566485037,3349835193
+.long	2470714624,316102159,3636825756,3393945945
+.byte	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
+.byte	111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
+.byte	83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
+.byte	114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
+.byte	118,101,114,115,105,116,121,41,0
+.align	64
+.type	_vpaes_preheat,@function
+.align	16
+_vpaes_preheat:
+	addl	(%esp),%ebp
+	movdqa	-48(%ebp),%xmm7
+	movdqa	-16(%ebp),%xmm6
+	ret
+.size	_vpaes_preheat,.-_vpaes_preheat
+.type	_vpaes_encrypt_core,@function
+.align	16
+_vpaes_encrypt_core:
+	movl	$16,%ecx
+	movl	240(%edx),%eax
+	movdqa	%xmm6,%xmm1
+	movdqa	(%ebp),%xmm2
+	pandn	%xmm0,%xmm1
+	movdqu	(%edx),%xmm5
+	psrld	$4,%xmm1
+	pand	%xmm6,%xmm0
+.byte	102,15,56,0,208
+	movdqa	16(%ebp),%xmm0
+.byte	102,15,56,0,193
+	pxor	%xmm5,%xmm2
+	pxor	%xmm2,%xmm0
+	addl	$16,%edx
+	leal	192(%ebp),%ebx
+	jmp	.L000enc_entry
+.align	16
+.L001enc_loop:
+	movdqa	32(%ebp),%xmm4
+.byte	102,15,56,0,226
+	pxor	%xmm5,%xmm4
+	movdqa	48(%ebp),%xmm0
+.byte	102,15,56,0,195
+	pxor	%xmm4,%xmm0
+	movdqa	64(%ebp),%xmm5
+.byte	102,15,56,0,234
+	movdqa	-64(%ebx,%ecx,1),%xmm1
+	movdqa	80(%ebp),%xmm2
+.byte	102,15,56,0,211
+	pxor	%xmm5,%xmm2
+	movdqa	(%ebx,%ecx,1),%xmm4
+	movdqa	%xmm0,%xmm3
+.byte	102,15,56,0,193
+	addl	$16,%edx
+	pxor	%xmm2,%xmm0
+.byte	102,15,56,0,220
+	addl	$16,%ecx
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,193
+	andl	$48,%ecx
+	pxor	%xmm3,%xmm0
+	subl	$1,%eax
+.L000enc_entry:
+	movdqa	%xmm6,%xmm1
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm6,%xmm0
+	movdqa	-32(%ebp),%xmm5
+.byte	102,15,56,0,232
+	pxor	%xmm1,%xmm0
+	movdqa	%xmm7,%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm5,%xmm3
+	movdqa	%xmm7,%xmm4
+.byte	102,15,56,0,224
+	pxor	%xmm5,%xmm4
+	movdqa	%xmm7,%xmm2
+.byte	102,15,56,0,211
+	pxor	%xmm0,%xmm2
+	movdqa	%xmm7,%xmm3
+	movdqu	(%edx),%xmm5
+.byte	102,15,56,0,220
+	pxor	%xmm1,%xmm3
+	jnz	.L001enc_loop
+	movdqa	96(%ebp),%xmm4
+	movdqa	112(%ebp),%xmm0
+.byte	102,15,56,0,226
+	pxor	%xmm5,%xmm4
+.byte	102,15,56,0,195
+	movdqa	64(%ebx,%ecx,1),%xmm1
+	pxor	%xmm4,%xmm0
+.byte	102,15,56,0,193
+	ret
+.size	_vpaes_encrypt_core,.-_vpaes_encrypt_core
+.type	_vpaes_decrypt_core,@function
+.align	16
+_vpaes_decrypt_core:
+	movl	240(%edx),%eax
+	leal	608(%ebp),%ebx
+	movdqa	%xmm6,%xmm1
+	movdqa	-64(%ebx),%xmm2
+	pandn	%xmm0,%xmm1
+	movl	%eax,%ecx
+	psrld	$4,%xmm1
+	movdqu	(%edx),%xmm5
+	shll	$4,%ecx
+	pand	%xmm6,%xmm0
+.byte	102,15,56,0,208
+	movdqa	-48(%ebx),%xmm0
+	xorl	$48,%ecx
+.byte	102,15,56,0,193
+	andl	$48,%ecx
+	pxor	%xmm5,%xmm2
+	movdqa	176(%ebp),%xmm5
+	pxor	%xmm2,%xmm0
+	addl	$16,%edx
+	leal	-352(%ebx,%ecx,1),%ecx
+	jmp	.L002dec_entry
+.align	16
+.L003dec_loop:
+	movdqa	-32(%ebx),%xmm4
+.byte	102,15,56,0,226
+	pxor	%xmm0,%xmm4
+	movdqa	-16(%ebx),%xmm0
+.byte	102,15,56,0,195
+	pxor	%xmm4,%xmm0
+	addl	$16,%edx
+.byte	102,15,56,0,197
+	movdqa	(%ebx),%xmm4
+.byte	102,15,56,0,226
+	pxor	%xmm0,%xmm4
+	movdqa	16(%ebx),%xmm0
+.byte	102,15,56,0,195
+	pxor	%xmm4,%xmm0
+	subl	$1,%eax
+.byte	102,15,56,0,197
+	movdqa	32(%ebx),%xmm4
+.byte	102,15,56,0,226
+	pxor	%xmm0,%xmm4
+	movdqa	48(%ebx),%xmm0
+.byte	102,15,56,0,195
+	pxor	%xmm4,%xmm0
+.byte	102,15,56,0,197
+	movdqa	64(%ebx),%xmm4
+.byte	102,15,56,0,226
+	pxor	%xmm0,%xmm4
+	movdqa	80(%ebx),%xmm0
+.byte	102,15,56,0,195
+	pxor	%xmm4,%xmm0
+.byte	102,15,58,15,237,12
+.L002dec_entry:
+	movdqa	%xmm6,%xmm1
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm6,%xmm0
+	movdqa	-32(%ebp),%xmm2
+.byte	102,15,56,0,208
+	pxor	%xmm1,%xmm0
+	movdqa	%xmm7,%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+	movdqa	%xmm7,%xmm4
+.byte	102,15,56,0,224
+	pxor	%xmm2,%xmm4
+	movdqa	%xmm7,%xmm2
+.byte	102,15,56,0,211
+	pxor	%xmm0,%xmm2
+	movdqa	%xmm7,%xmm3
+.byte	102,15,56,0,220
+	pxor	%xmm1,%xmm3
+	movdqu	(%edx),%xmm0
+	jnz	.L003dec_loop
+	movdqa	96(%ebx),%xmm4
+.byte	102,15,56,0,226
+	pxor	%xmm0,%xmm4
+	movdqa	112(%ebx),%xmm0
+	movdqa	(%ecx),%xmm2
+.byte	102,15,56,0,195
+	pxor	%xmm4,%xmm0
+.byte	102,15,56,0,194
+	ret
+.size	_vpaes_decrypt_core,.-_vpaes_decrypt_core
+.type	_vpaes_schedule_core,@function
+.align	16
+_vpaes_schedule_core:
+	addl	(%esp),%ebp
+	movdqu	(%esi),%xmm0
+	movdqa	320(%ebp),%xmm2
+	movdqa	%xmm0,%xmm3
+	leal	(%ebp),%ebx
+	movdqa	%xmm2,4(%esp)
+	call	_vpaes_schedule_transform
+	movdqa	%xmm0,%xmm7
+	testl	%edi,%edi
+	jnz	.L004schedule_am_decrypting
+	movdqu	%xmm0,(%edx)
+	jmp	.L005schedule_go
+.L004schedule_am_decrypting:
+	movdqa	256(%ebp,%ecx,1),%xmm1
+.byte	102,15,56,0,217
+	movdqu	%xmm3,(%edx)
+	xorl	$48,%ecx
+.L005schedule_go:
+	cmpl	$192,%eax
+	ja	.L006schedule_256
+	je	.L007schedule_192
+.L008schedule_128:
+	movl	$10,%eax
+.L009loop_schedule_128:
+	call	_vpaes_schedule_round
+	decl	%eax
+	jz	.L010schedule_mangle_last
+	call	_vpaes_schedule_mangle
+	jmp	.L009loop_schedule_128
+.align	16
+.L007schedule_192:
+	movdqu	8(%esi),%xmm0
+	call	_vpaes_schedule_transform
+	movdqa	%xmm0,%xmm6
+	pxor	%xmm4,%xmm4
+	movhlps	%xmm4,%xmm6
+	movl	$4,%eax
+.L011loop_schedule_192:
+	call	_vpaes_schedule_round
+.byte	102,15,58,15,198,8
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_192_smear
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_round
+	decl	%eax
+	jz	.L010schedule_mangle_last
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_192_smear
+	jmp	.L011loop_schedule_192
+.align	16
+.L006schedule_256:
+	movdqu	16(%esi),%xmm0
+	call	_vpaes_schedule_transform
+	movl	$7,%eax
+.L012loop_schedule_256:
+	call	_vpaes_schedule_mangle
+	movdqa	%xmm0,%xmm6
+	call	_vpaes_schedule_round
+	decl	%eax
+	jz	.L010schedule_mangle_last
+	call	_vpaes_schedule_mangle
+	pshufd	$255,%xmm0,%xmm0
+	movdqa	%xmm7,20(%esp)
+	movdqa	%xmm6,%xmm7
+	call	.L_vpaes_schedule_low_round
+	movdqa	20(%esp),%xmm7
+	jmp	.L012loop_schedule_256
+.align	16
+.L010schedule_mangle_last:
+	leal	384(%ebp),%ebx
+	testl	%edi,%edi
+	jnz	.L013schedule_mangle_last_dec
+	movdqa	256(%ebp,%ecx,1),%xmm1
+.byte	102,15,56,0,193
+	leal	352(%ebp),%ebx
+	addl	$32,%edx
+.L013schedule_mangle_last_dec:
+	addl	$-16,%edx
+	pxor	336(%ebp),%xmm0
+	call	_vpaes_schedule_transform
+	movdqu	%xmm0,(%edx)
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	pxor	%xmm7,%xmm7
+	ret
+.size	_vpaes_schedule_core,.-_vpaes_schedule_core
+.type	_vpaes_schedule_192_smear,@function
+.align	16
+_vpaes_schedule_192_smear:
+	pshufd	$128,%xmm6,%xmm0
+	pxor	%xmm0,%xmm6
+	pshufd	$254,%xmm7,%xmm0
+	pxor	%xmm0,%xmm6
+	movdqa	%xmm6,%xmm0
+	pxor	%xmm1,%xmm1
+	movhlps	%xmm1,%xmm6
+	ret
+.size	_vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
+.type	_vpaes_schedule_round,@function
+.align	16
+_vpaes_schedule_round:
+	movdqa	8(%esp),%xmm2
+	pxor	%xmm1,%xmm1
+.byte	102,15,58,15,202,15
+.byte	102,15,58,15,210,15
+	pxor	%xmm1,%xmm7
+	pshufd	$255,%xmm0,%xmm0
+.byte	102,15,58,15,192,1
+	movdqa	%xmm2,8(%esp)
+.L_vpaes_schedule_low_round:
+	movdqa	%xmm7,%xmm1
+	pslldq	$4,%xmm7
+	pxor	%xmm1,%xmm7
+	movdqa	%xmm7,%xmm1
+	pslldq	$8,%xmm7
+	pxor	%xmm1,%xmm7
+	pxor	336(%ebp),%xmm7
+	movdqa	-16(%ebp),%xmm4
+	movdqa	-48(%ebp),%xmm5
+	movdqa	%xmm4,%xmm1
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm4,%xmm0
+	movdqa	-32(%ebp),%xmm2
+.byte	102,15,56,0,208
+	pxor	%xmm1,%xmm0
+	movdqa	%xmm5,%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+	movdqa	%xmm5,%xmm4
+.byte	102,15,56,0,224
+	pxor	%xmm2,%xmm4
+	movdqa	%xmm5,%xmm2
+.byte	102,15,56,0,211
+	pxor	%xmm0,%xmm2
+	movdqa	%xmm5,%xmm3
+.byte	102,15,56,0,220
+	pxor	%xmm1,%xmm3
+	movdqa	32(%ebp),%xmm4
+.byte	102,15,56,0,226
+	movdqa	48(%ebp),%xmm0
+.byte	102,15,56,0,195
+	pxor	%xmm4,%xmm0
+	pxor	%xmm7,%xmm0
+	movdqa	%xmm0,%xmm7
+	ret
+.size	_vpaes_schedule_round,.-_vpaes_schedule_round
+.type	_vpaes_schedule_transform,@function
+.align	16
+_vpaes_schedule_transform:
+	movdqa	-16(%ebp),%xmm2
+	movdqa	%xmm2,%xmm1
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm2,%xmm0
+	movdqa	(%ebx),%xmm2
+.byte	102,15,56,0,208
+	movdqa	16(%ebx),%xmm0
+.byte	102,15,56,0,193
+	pxor	%xmm2,%xmm0
+	ret
+.size	_vpaes_schedule_transform,.-_vpaes_schedule_transform
+.type	_vpaes_schedule_mangle,@function
+.align	16
+_vpaes_schedule_mangle:
+	movdqa	%xmm0,%xmm4
+	movdqa	128(%ebp),%xmm5
+	testl	%edi,%edi
+	jnz	.L014schedule_mangle_dec
+	addl	$16,%edx
+	pxor	336(%ebp),%xmm4
+.byte	102,15,56,0,229
+	movdqa	%xmm4,%xmm3
+.byte	102,15,56,0,229
+	pxor	%xmm4,%xmm3
+.byte	102,15,56,0,229
+	pxor	%xmm4,%xmm3
+	jmp	.L015schedule_mangle_both
+.align	16
+.L014schedule_mangle_dec:
+	movdqa	-16(%ebp),%xmm2
+	leal	416(%ebp),%esi
+	movdqa	%xmm2,%xmm1
+	pandn	%xmm4,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm2,%xmm4
+	movdqa	(%esi),%xmm2
+.byte	102,15,56,0,212
+	movdqa	16(%esi),%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+.byte	102,15,56,0,221
+	movdqa	32(%esi),%xmm2
+.byte	102,15,56,0,212
+	pxor	%xmm3,%xmm2
+	movdqa	48(%esi),%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+.byte	102,15,56,0,221
+	movdqa	64(%esi),%xmm2
+.byte	102,15,56,0,212
+	pxor	%xmm3,%xmm2
+	movdqa	80(%esi),%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+.byte	102,15,56,0,221
+	movdqa	96(%esi),%xmm2
+.byte	102,15,56,0,212
+	pxor	%xmm3,%xmm2
+	movdqa	112(%esi),%xmm3
+.byte	102,15,56,0,217
+	pxor	%xmm2,%xmm3
+	addl	$-16,%edx
+.L015schedule_mangle_both:
+	movdqa	256(%ebp,%ecx,1),%xmm1
+.byte	102,15,56,0,217
+	addl	$-16,%ecx
+	andl	$48,%ecx
+	movdqu	%xmm3,(%edx)
+	ret
+.size	_vpaes_schedule_mangle,.-_vpaes_schedule_mangle
+.globl	vpaes_set_encrypt_key
+.type	vpaes_set_encrypt_key,@function
+.align	16
+vpaes_set_encrypt_key:
+.L_vpaes_set_encrypt_key_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	leal	-56(%esp),%ebx
+	movl	24(%esp),%eax
+	andl	$-16,%ebx
+	movl	28(%esp),%edx
+	xchgl	%esp,%ebx
+	movl	%ebx,48(%esp)
+	movl	%eax,%ebx
+	shrl	$5,%ebx
+	addl	$5,%ebx
+	movl	%ebx,240(%edx)
+	movl	$48,%ecx
+	movl	$0,%edi
+	leal	.L_vpaes_consts+0x30-.L016pic_point,%ebp
+	call	_vpaes_schedule_core
+.L016pic_point:
+	movl	48(%esp),%esp
+	xorl	%eax,%eax
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin
+.globl	vpaes_set_decrypt_key
+.type	vpaes_set_decrypt_key,@function
+.align	16
+vpaes_set_decrypt_key:
+.L_vpaes_set_decrypt_key_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	leal	-56(%esp),%ebx
+	movl	24(%esp),%eax
+	andl	$-16,%ebx
+	movl	28(%esp),%edx
+	xchgl	%esp,%ebx
+	movl	%ebx,48(%esp)
+	movl	%eax,%ebx
+	shrl	$5,%ebx
+	addl	$5,%ebx
+	movl	%ebx,240(%edx)
+	shll	$4,%ebx
+	leal	16(%edx,%ebx,1),%edx
+	movl	$1,%edi
+	movl	%eax,%ecx
+	shrl	$1,%ecx
+	andl	$32,%ecx
+	xorl	$32,%ecx
+	leal	.L_vpaes_consts+0x30-.L017pic_point,%ebp
+	call	_vpaes_schedule_core
+.L017pic_point:
+	movl	48(%esp),%esp
+	xorl	%eax,%eax
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin
+.globl	vpaes_encrypt
+.type	vpaes_encrypt,@function
+.align	16
+vpaes_encrypt:
+.L_vpaes_encrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	leal	.L_vpaes_consts+0x30-.L018pic_point,%ebp
+	call	_vpaes_preheat
+.L018pic_point:
+	movl	20(%esp),%esi
+	leal	-56(%esp),%ebx
+	movl	24(%esp),%edi
+	andl	$-16,%ebx
+	movl	28(%esp),%edx
+	xchgl	%esp,%ebx
+	movl	%ebx,48(%esp)
+	movdqu	(%esi),%xmm0
+	call	_vpaes_encrypt_core
+	movdqu	%xmm0,(%edi)
+	movl	48(%esp),%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	vpaes_encrypt,.-.L_vpaes_encrypt_begin
+.globl	vpaes_decrypt
+.type	vpaes_decrypt,@function
+.align	16
+vpaes_decrypt:
+.L_vpaes_decrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	leal	.L_vpaes_consts+0x30-.L019pic_point,%ebp
+	call	_vpaes_preheat
+.L019pic_point:
+	movl	20(%esp),%esi
+	leal	-56(%esp),%ebx
+	movl	24(%esp),%edi
+	andl	$-16,%ebx
+	movl	28(%esp),%edx
+	xchgl	%esp,%ebx
+	movl	%ebx,48(%esp)
+	movdqu	(%esi),%xmm0
+	call	_vpaes_decrypt_core
+	movdqu	%xmm0,(%edi)
+	movl	48(%esp),%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	vpaes_decrypt,.-.L_vpaes_decrypt_begin
+.globl	vpaes_cbc_encrypt
+.type	vpaes_cbc_encrypt,@function
+.align	16
+vpaes_cbc_encrypt:
+.L_vpaes_cbc_encrypt_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	32(%esp),%edx
+	subl	$16,%eax
+	jc	.L020cbc_abort
+	leal	-56(%esp),%ebx
+	movl	36(%esp),%ebp
+	andl	$-16,%ebx
+	movl	40(%esp),%ecx
+	xchgl	%esp,%ebx
+	movdqu	(%ebp),%xmm1
+	subl	%esi,%edi
+	movl	%ebx,48(%esp)
+	movl	%edi,(%esp)
+	movl	%edx,4(%esp)
+	movl	%ebp,8(%esp)
+	movl	%eax,%edi
+	leal	.L_vpaes_consts+0x30-.L021pic_point,%ebp
+	call	_vpaes_preheat
+.L021pic_point:
+	cmpl	$0,%ecx
+	je	.L022cbc_dec_loop
+	jmp	.L023cbc_enc_loop
+.align	16
+.L023cbc_enc_loop:
+	movdqu	(%esi),%xmm0
+	pxor	%xmm1,%xmm0
+	call	_vpaes_encrypt_core
+	movl	(%esp),%ebx
+	movl	4(%esp),%edx
+	movdqa	%xmm0,%xmm1
+	movdqu	%xmm0,(%ebx,%esi,1)
+	leal	16(%esi),%esi
+	subl	$16,%edi
+	jnc	.L023cbc_enc_loop
+	jmp	.L024cbc_done
+.align	16
+.L022cbc_dec_loop:
+	movdqu	(%esi),%xmm0
+	movdqa	%xmm1,16(%esp)
+	movdqa	%xmm0,32(%esp)
+	call	_vpaes_decrypt_core
+	movl	(%esp),%ebx
+	movl	4(%esp),%edx
+	pxor	16(%esp),%xmm0
+	movdqa	32(%esp),%xmm1
+	movdqu	%xmm0,(%ebx,%esi,1)
+	leal	16(%esi),%esi
+	subl	$16,%edi
+	jnc	.L022cbc_dec_loop
+.L024cbc_done:
+	movl	8(%esp),%ebx
+	movl	48(%esp),%esp
+	movdqu	%xmm1,(%ebx)
+.L020cbc_abort:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
diff --git a/crypto/bf/asm/bf-586.s b/crypto/bf/asm/bf-586.s
new file mode 100644
index 0000000..aa718d4
--- /dev/null
+++ b/crypto/bf/asm/bf-586.s
@@ -0,0 +1,896 @@
+.file	"bf-586.s"
+.text
+.globl	BF_encrypt
+.type	BF_encrypt,@function
+.align	16
+BF_encrypt:
+.L_BF_encrypt_begin:
+
+	pushl	%ebp
+	pushl	%ebx
+	movl	12(%esp),%ebx
+	movl	16(%esp),%ebp
+	pushl	%esi
+	pushl	%edi
+
+	movl	(%ebx),%edi
+	movl	4(%ebx),%esi
+	xorl	%eax,%eax
+	movl	(%ebp),%ebx
+	xorl	%ecx,%ecx
+	xorl	%ebx,%edi
+
+
+	movl	4(%ebp),%edx
+	movl	%edi,%ebx
+	xorl	%edx,%esi
+	shrl	$16,%ebx
+	movl	%edi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%esi
+
+
+	movl	8(%ebp),%edx
+	movl	%esi,%ebx
+	xorl	%edx,%edi
+	shrl	$16,%ebx
+	movl	%esi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%edi
+
+
+	movl	12(%ebp),%edx
+	movl	%edi,%ebx
+	xorl	%edx,%esi
+	shrl	$16,%ebx
+	movl	%edi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%esi
+
+
+	movl	16(%ebp),%edx
+	movl	%esi,%ebx
+	xorl	%edx,%edi
+	shrl	$16,%ebx
+	movl	%esi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%edi
+
+
+	movl	20(%ebp),%edx
+	movl	%edi,%ebx
+	xorl	%edx,%esi
+	shrl	$16,%ebx
+	movl	%edi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%esi
+
+
+	movl	24(%ebp),%edx
+	movl	%esi,%ebx
+	xorl	%edx,%edi
+	shrl	$16,%ebx
+	movl	%esi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%edi
+
+
+	movl	28(%ebp),%edx
+	movl	%edi,%ebx
+	xorl	%edx,%esi
+	shrl	$16,%ebx
+	movl	%edi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%esi
+
+
+	movl	32(%ebp),%edx
+	movl	%esi,%ebx
+	xorl	%edx,%edi
+	shrl	$16,%ebx
+	movl	%esi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%edi
+
+
+	movl	36(%ebp),%edx
+	movl	%edi,%ebx
+	xorl	%edx,%esi
+	shrl	$16,%ebx
+	movl	%edi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%esi
+
+
+	movl	40(%ebp),%edx
+	movl	%esi,%ebx
+	xorl	%edx,%edi
+	shrl	$16,%ebx
+	movl	%esi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%edi
+
+
+	movl	44(%ebp),%edx
+	movl	%edi,%ebx
+	xorl	%edx,%esi
+	shrl	$16,%ebx
+	movl	%edi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%esi
+
+
+	movl	48(%ebp),%edx
+	movl	%esi,%ebx
+	xorl	%edx,%edi
+	shrl	$16,%ebx
+	movl	%esi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%edi
+
+
+	movl	52(%ebp),%edx
+	movl	%edi,%ebx
+	xorl	%edx,%esi
+	shrl	$16,%ebx
+	movl	%edi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%esi
+
+
+	movl	56(%ebp),%edx
+	movl	%esi,%ebx
+	xorl	%edx,%edi
+	shrl	$16,%ebx
+	movl	%esi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%edi
+
+
+	movl	60(%ebp),%edx
+	movl	%edi,%ebx
+	xorl	%edx,%esi
+	shrl	$16,%ebx
+	movl	%edi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%esi
+
+
+	movl	64(%ebp),%edx
+	movl	%esi,%ebx
+	xorl	%edx,%edi
+	shrl	$16,%ebx
+	movl	%esi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+
+	movl	20(%esp),%eax
+	xorl	%ebx,%edi
+	movl	68(%ebp),%edx
+	xorl	%edx,%esi
+	movl	%edi,4(%eax)
+	movl	%esi,(%eax)
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	BF_encrypt,.-.L_BF_encrypt_begin
+.globl	BF_decrypt
+.type	BF_decrypt,@function
+.align	16
+BF_decrypt:
+.L_BF_decrypt_begin:
+
+	pushl	%ebp
+	pushl	%ebx
+	movl	12(%esp),%ebx
+	movl	16(%esp),%ebp
+	pushl	%esi
+	pushl	%edi
+
+	movl	(%ebx),%edi
+	movl	4(%ebx),%esi
+	xorl	%eax,%eax
+	movl	68(%ebp),%ebx
+	xorl	%ecx,%ecx
+	xorl	%ebx,%edi
+
+
+	movl	64(%ebp),%edx
+	movl	%edi,%ebx
+	xorl	%edx,%esi
+	shrl	$16,%ebx
+	movl	%edi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%esi
+
+
+	movl	60(%ebp),%edx
+	movl	%esi,%ebx
+	xorl	%edx,%edi
+	shrl	$16,%ebx
+	movl	%esi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%edi
+
+
+	movl	56(%ebp),%edx
+	movl	%edi,%ebx
+	xorl	%edx,%esi
+	shrl	$16,%ebx
+	movl	%edi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%esi
+
+
+	movl	52(%ebp),%edx
+	movl	%esi,%ebx
+	xorl	%edx,%edi
+	shrl	$16,%ebx
+	movl	%esi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%edi
+
+
+	movl	48(%ebp),%edx
+	movl	%edi,%ebx
+	xorl	%edx,%esi
+	shrl	$16,%ebx
+	movl	%edi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%esi
+
+
+	movl	44(%ebp),%edx
+	movl	%esi,%ebx
+	xorl	%edx,%edi
+	shrl	$16,%ebx
+	movl	%esi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%edi
+
+
+	movl	40(%ebp),%edx
+	movl	%edi,%ebx
+	xorl	%edx,%esi
+	shrl	$16,%ebx
+	movl	%edi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%esi
+
+
+	movl	36(%ebp),%edx
+	movl	%esi,%ebx
+	xorl	%edx,%edi
+	shrl	$16,%ebx
+	movl	%esi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%edi
+
+
+	movl	32(%ebp),%edx
+	movl	%edi,%ebx
+	xorl	%edx,%esi
+	shrl	$16,%ebx
+	movl	%edi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%esi
+
+
+	movl	28(%ebp),%edx
+	movl	%esi,%ebx
+	xorl	%edx,%edi
+	shrl	$16,%ebx
+	movl	%esi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%edi
+
+
+	movl	24(%ebp),%edx
+	movl	%edi,%ebx
+	xorl	%edx,%esi
+	shrl	$16,%ebx
+	movl	%edi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%esi
+
+
+	movl	20(%ebp),%edx
+	movl	%esi,%ebx
+	xorl	%edx,%edi
+	shrl	$16,%ebx
+	movl	%esi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%edi
+
+
+	movl	16(%ebp),%edx
+	movl	%edi,%ebx
+	xorl	%edx,%esi
+	shrl	$16,%ebx
+	movl	%edi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%esi
+
+
+	movl	12(%ebp),%edx
+	movl	%esi,%ebx
+	xorl	%edx,%edi
+	shrl	$16,%ebx
+	movl	%esi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%edi
+
+
+	movl	8(%ebp),%edx
+	movl	%edi,%ebx
+	xorl	%edx,%esi
+	shrl	$16,%ebx
+	movl	%edi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+	xorl	%eax,%eax
+	xorl	%ebx,%esi
+
+
+	movl	4(%ebp),%edx
+	movl	%esi,%ebx
+	xorl	%edx,%edi
+	shrl	$16,%ebx
+	movl	%esi,%edx
+	movb	%bh,%al
+	andl	$255,%ebx
+	movb	%dh,%cl
+	andl	$255,%edx
+	movl	72(%ebp,%eax,4),%eax
+	movl	1096(%ebp,%ebx,4),%ebx
+	addl	%eax,%ebx
+	movl	2120(%ebp,%ecx,4),%eax
+	xorl	%eax,%ebx
+	movl	3144(%ebp,%edx,4),%edx
+	addl	%edx,%ebx
+
+	movl	20(%esp),%eax
+	xorl	%ebx,%edi
+	movl	(%ebp),%edx
+	xorl	%edx,%esi
+	movl	%edi,4(%eax)
+	movl	%esi,(%eax)
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	BF_decrypt,.-.L_BF_decrypt_begin
+.globl	BF_cbc_encrypt
+.type	BF_cbc_encrypt,@function
+.align	16
+BF_cbc_encrypt:
+.L_BF_cbc_encrypt_begin:
+
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	28(%esp),%ebp
+
+	movl	36(%esp),%ebx
+	movl	(%ebx),%esi
+	movl	4(%ebx),%edi
+	pushl	%edi
+	pushl	%esi
+	pushl	%edi
+	pushl	%esi
+	movl	%esp,%ebx
+	movl	36(%esp),%esi
+	movl	40(%esp),%edi
+
+	movl	56(%esp),%ecx
+
+	movl	48(%esp),%eax
+	pushl	%eax
+	pushl	%ebx
+	cmpl	$0,%ecx
+	jz	.L000decrypt
+	andl	$4294967288,%ebp
+	movl	8(%esp),%eax
+	movl	12(%esp),%ebx
+	jz	.L001encrypt_finish
+.L002encrypt_loop:
+	movl	(%esi),%ecx
+	movl	4(%esi),%edx
+	xorl	%ecx,%eax
+	xorl	%edx,%ebx
+	bswap	%eax
+	bswap	%ebx
+	movl	%eax,8(%esp)
+	movl	%ebx,12(%esp)
+	call	.L_BF_encrypt_begin
+	movl	8(%esp),%eax
+	movl	12(%esp),%ebx
+	bswap	%eax
+	bswap	%ebx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	addl	$8,%esi
+	addl	$8,%edi
+	subl	$8,%ebp
+	jnz	.L002encrypt_loop
+.L001encrypt_finish:
+	movl	52(%esp),%ebp
+	andl	$7,%ebp
+	jz	.L003finish
+	call	.L004PIC_point
+.L004PIC_point:
+	popl	%edx
+	leal	.L005cbc_enc_jmp_table-.L004PIC_point(%edx),%ecx
+	movl	(%ecx,%ebp,4),%ebp
+	addl	%edx,%ebp
+	xorl	%ecx,%ecx
+	xorl	%edx,%edx
+	jmp	*%ebp
+.L006ej7:
+	movb	6(%esi),%dh
+	shll	$8,%edx
+.L007ej6:
+	movb	5(%esi),%dh
+.L008ej5:
+	movb	4(%esi),%dl
+.L009ej4:
+	movl	(%esi),%ecx
+	jmp	.L010ejend
+.L011ej3:
+	movb	2(%esi),%ch
+	shll	$8,%ecx
+.L012ej2:
+	movb	1(%esi),%ch
+.L013ej1:
+	movb	(%esi),%cl
+.L010ejend:
+	xorl	%ecx,%eax
+	xorl	%edx,%ebx
+	bswap	%eax
+	bswap	%ebx
+	movl	%eax,8(%esp)
+	movl	%ebx,12(%esp)
+	call	.L_BF_encrypt_begin
+	movl	8(%esp),%eax
+	movl	12(%esp),%ebx
+	bswap	%eax
+	bswap	%ebx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	jmp	.L003finish
+.L000decrypt:
+	andl	$4294967288,%ebp
+	movl	16(%esp),%eax
+	movl	20(%esp),%ebx
+	jz	.L014decrypt_finish
+.L015decrypt_loop:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	bswap	%eax
+	bswap	%ebx
+	movl	%eax,8(%esp)
+	movl	%ebx,12(%esp)
+	call	.L_BF_decrypt_begin
+	movl	8(%esp),%eax
+	movl	12(%esp),%ebx
+	bswap	%eax
+	bswap	%ebx
+	movl	16(%esp),%ecx
+	movl	20(%esp),%edx
+	xorl	%eax,%ecx
+	xorl	%ebx,%edx
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	%ecx,(%edi)
+	movl	%edx,4(%edi)
+	movl	%eax,16(%esp)
+	movl	%ebx,20(%esp)
+	addl	$8,%esi
+	addl	$8,%edi
+	subl	$8,%ebp
+	jnz	.L015decrypt_loop
+.L014decrypt_finish:
+	movl	52(%esp),%ebp
+	andl	$7,%ebp
+	jz	.L003finish
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	bswap	%eax
+	bswap	%ebx
+	movl	%eax,8(%esp)
+	movl	%ebx,12(%esp)
+	call	.L_BF_decrypt_begin
+	movl	8(%esp),%eax
+	movl	12(%esp),%ebx
+	bswap	%eax
+	bswap	%ebx
+	movl	16(%esp),%ecx
+	movl	20(%esp),%edx
+	xorl	%eax,%ecx
+	xorl	%ebx,%edx
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+.L016dj7:
+	rorl	$16,%edx
+	movb	%dl,6(%edi)
+	shrl	$16,%edx
+.L017dj6:
+	movb	%dh,5(%edi)
+.L018dj5:
+	movb	%dl,4(%edi)
+.L019dj4:
+	movl	%ecx,(%edi)
+	jmp	.L020djend
+.L021dj3:
+	rorl	$16,%ecx
+	movb	%cl,2(%edi)
+	shll	$16,%ecx
+.L022dj2:
+	movb	%ch,1(%esi)
+.L023dj1:
+	movb	%cl,(%esi)
+.L020djend:
+	jmp	.L003finish
+.L003finish:
+	movl	60(%esp),%ecx
+	addl	$24,%esp
+	movl	%eax,(%ecx)
+	movl	%ebx,4(%ecx)
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.align	64
+.L005cbc_enc_jmp_table:
+.long	0
+.long	.L013ej1-.L004PIC_point
+.long	.L012ej2-.L004PIC_point
+.long	.L011ej3-.L004PIC_point
+.long	.L009ej4-.L004PIC_point
+.long	.L008ej5-.L004PIC_point
+.long	.L007ej6-.L004PIC_point
+.long	.L006ej7-.L004PIC_point
+.align	64
+.size	BF_cbc_encrypt,.-.L_BF_cbc_encrypt_begin
diff --git a/crypto/bn/asm/bn-586.s b/crypto/bn/asm/bn-586.s
new file mode 100644
index 0000000..fe873ce
--- /dev/null
+++ b/crypto/bn/asm/bn-586.s
@@ -0,0 +1,1384 @@
+.file	"crypto/bn/asm/bn-586.s"
+.text
+.globl	bn_mul_add_words
+.type	bn_mul_add_words,@function
+.align	16
+bn_mul_add_words:
+.L_bn_mul_add_words_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+
+	xorl	%esi,%esi
+	movl	20(%esp),%edi
+	movl	28(%esp),%ecx
+	movl	24(%esp),%ebx
+	andl	$4294967288,%ecx
+	movl	32(%esp),%ebp
+	pushl	%ecx
+	jz	.L000maw_finish
+.align	16
+.L001maw_loop:
+
+	movl	(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,(%edi)
+	movl	%edx,%esi
+
+	movl	4(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	4(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,4(%edi)
+	movl	%edx,%esi
+
+	movl	8(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	8(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,8(%edi)
+	movl	%edx,%esi
+
+	movl	12(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	12(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,12(%edi)
+	movl	%edx,%esi
+
+	movl	16(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	16(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,16(%edi)
+	movl	%edx,%esi
+
+	movl	20(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	20(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,20(%edi)
+	movl	%edx,%esi
+
+	movl	24(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	24(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,24(%edi)
+	movl	%edx,%esi
+
+	movl	28(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	28(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,28(%edi)
+	movl	%edx,%esi
+
+	subl	$8,%ecx
+	leal	32(%ebx),%ebx
+	leal	32(%edi),%edi
+	jnz	.L001maw_loop
+.L000maw_finish:
+	movl	32(%esp),%ecx
+	andl	$7,%ecx
+	jnz	.L002maw_finish2
+	jmp	.L003maw_end
+.L002maw_finish2:
+
+	movl	(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	(%edi),%eax
+	adcl	$0,%edx
+	decl	%ecx
+	movl	%eax,(%edi)
+	movl	%edx,%esi
+	jz	.L003maw_end
+
+	movl	4(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	4(%edi),%eax
+	adcl	$0,%edx
+	decl	%ecx
+	movl	%eax,4(%edi)
+	movl	%edx,%esi
+	jz	.L003maw_end
+
+	movl	8(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	8(%edi),%eax
+	adcl	$0,%edx
+	decl	%ecx
+	movl	%eax,8(%edi)
+	movl	%edx,%esi
+	jz	.L003maw_end
+
+	movl	12(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	12(%edi),%eax
+	adcl	$0,%edx
+	decl	%ecx
+	movl	%eax,12(%edi)
+	movl	%edx,%esi
+	jz	.L003maw_end
+
+	movl	16(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	16(%edi),%eax
+	adcl	$0,%edx
+	decl	%ecx
+	movl	%eax,16(%edi)
+	movl	%edx,%esi
+	jz	.L003maw_end
+
+	movl	20(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	20(%edi),%eax
+	adcl	$0,%edx
+	decl	%ecx
+	movl	%eax,20(%edi)
+	movl	%edx,%esi
+	jz	.L003maw_end
+
+	movl	24(%ebx),%eax
+	mull	%ebp
+	addl	%esi,%eax
+	adcl	$0,%edx
+	addl	24(%edi),%eax
+	adcl	$0,%edx
+	movl	%eax,24(%edi)
+	movl	%edx,%esi
+.L003maw_end:
+	movl	%esi,%eax
+	popl	%ecx
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	bn_mul_add_words,.-.L_bn_mul_add_words_begin
+.globl	bn_mul_words
+.type	bn_mul_words,@function
+.align	16
+bn_mul_words:
+.L_bn_mul_words_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+
+	xorl	%esi,%esi
+	movl	20(%esp),%edi
+	movl	24(%esp),%ebx
+	movl	28(%esp),%ebp
+	movl	32(%esp),%ecx
+	andl	$4294967288,%ebp
+	jz	.L004mw_finish
+.L005mw_loop:
+
+	movl	(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,(%edi)
+	movl	%edx,%esi
+
+	movl	4(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,4(%edi)
+	movl	%edx,%esi
+
+	movl	8(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,8(%edi)
+	movl	%edx,%esi
+
+	movl	12(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,12(%edi)
+	movl	%edx,%esi
+
+	movl	16(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,16(%edi)
+	movl	%edx,%esi
+
+	movl	20(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,20(%edi)
+	movl	%edx,%esi
+
+	movl	24(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,24(%edi)
+	movl	%edx,%esi
+
+	movl	28(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,28(%edi)
+	movl	%edx,%esi
+
+	addl	$32,%ebx
+	addl	$32,%edi
+	subl	$8,%ebp
+	jz	.L004mw_finish
+	jmp	.L005mw_loop
+.L004mw_finish:
+	movl	28(%esp),%ebp
+	andl	$7,%ebp
+	jnz	.L006mw_finish2
+	jmp	.L007mw_end
+.L006mw_finish2:
+
+	movl	(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,(%edi)
+	movl	%edx,%esi
+	decl	%ebp
+	jz	.L007mw_end
+
+	movl	4(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,4(%edi)
+	movl	%edx,%esi
+	decl	%ebp
+	jz	.L007mw_end
+
+	movl	8(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,8(%edi)
+	movl	%edx,%esi
+	decl	%ebp
+	jz	.L007mw_end
+
+	movl	12(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,12(%edi)
+	movl	%edx,%esi
+	decl	%ebp
+	jz	.L007mw_end
+
+	movl	16(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,16(%edi)
+	movl	%edx,%esi
+	decl	%ebp
+	jz	.L007mw_end
+
+	movl	20(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,20(%edi)
+	movl	%edx,%esi
+	decl	%ebp
+	jz	.L007mw_end
+
+	movl	24(%ebx),%eax
+	mull	%ecx
+	addl	%esi,%eax
+	adcl	$0,%edx
+	movl	%eax,24(%edi)
+	movl	%edx,%esi
+.L007mw_end:
+	movl	%esi,%eax
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	bn_mul_words,.-.L_bn_mul_words_begin
+.globl	bn_sqr_words
+.type	bn_sqr_words,@function
+.align	16
+bn_sqr_words:
+.L_bn_sqr_words_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%ebx
+	andl	$4294967288,%ebx
+	jz	.L008sw_finish
+.L009sw_loop:
+
+	movl	(%edi),%eax
+	mull	%eax
+	movl	%eax,(%esi)
+	movl	%edx,4(%esi)
+
+	movl	4(%edi),%eax
+	mull	%eax
+	movl	%eax,8(%esi)
+	movl	%edx,12(%esi)
+
+	movl	8(%edi),%eax
+	mull	%eax
+	movl	%eax,16(%esi)
+	movl	%edx,20(%esi)
+
+	movl	12(%edi),%eax
+	mull	%eax
+	movl	%eax,24(%esi)
+	movl	%edx,28(%esi)
+
+	movl	16(%edi),%eax
+	mull	%eax
+	movl	%eax,32(%esi)
+	movl	%edx,36(%esi)
+
+	movl	20(%edi),%eax
+	mull	%eax
+	movl	%eax,40(%esi)
+	movl	%edx,44(%esi)
+
+	movl	24(%edi),%eax
+	mull	%eax
+	movl	%eax,48(%esi)
+	movl	%edx,52(%esi)
+
+	movl	28(%edi),%eax
+	mull	%eax
+	movl	%eax,56(%esi)
+	movl	%edx,60(%esi)
+
+	addl	$32,%edi
+	addl	$64,%esi
+	subl	$8,%ebx
+	jnz	.L009sw_loop
+.L008sw_finish:
+	movl	28(%esp),%ebx
+	andl	$7,%ebx
+	jz	.L010sw_end
+
+	movl	(%edi),%eax
+	mull	%eax
+	movl	%eax,(%esi)
+	decl	%ebx
+	movl	%edx,4(%esi)
+	jz	.L010sw_end
+
+	movl	4(%edi),%eax
+	mull	%eax
+	movl	%eax,8(%esi)
+	decl	%ebx
+	movl	%edx,12(%esi)
+	jz	.L010sw_end
+
+	movl	8(%edi),%eax
+	mull	%eax
+	movl	%eax,16(%esi)
+	decl	%ebx
+	movl	%edx,20(%esi)
+	jz	.L010sw_end
+
+	movl	12(%edi),%eax
+	mull	%eax
+	movl	%eax,24(%esi)
+	decl	%ebx
+	movl	%edx,28(%esi)
+	jz	.L010sw_end
+
+	movl	16(%edi),%eax
+	mull	%eax
+	movl	%eax,32(%esi)
+	decl	%ebx
+	movl	%edx,36(%esi)
+	jz	.L010sw_end
+
+	movl	20(%edi),%eax
+	mull	%eax
+	movl	%eax,40(%esi)
+	decl	%ebx
+	movl	%edx,44(%esi)
+	jz	.L010sw_end
+
+	movl	24(%edi),%eax
+	mull	%eax
+	movl	%eax,48(%esi)
+	movl	%edx,52(%esi)
+.L010sw_end:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	bn_sqr_words,.-.L_bn_sqr_words_begin
+.globl	bn_div_words
+.type	bn_div_words,@function
+.align	16
+bn_div_words:
+.L_bn_div_words_begin:
+	movl	4(%esp),%edx
+	movl	8(%esp),%eax
+	movl	12(%esp),%ecx
+	divl	%ecx
+	ret
+.size	bn_div_words,.-.L_bn_div_words_begin
+.globl	bn_add_words
+.type	bn_add_words,@function
+.align	16
+bn_add_words:
+.L_bn_add_words_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+
+	movl	20(%esp),%ebx
+	movl	24(%esp),%esi
+	movl	28(%esp),%edi
+	movl	32(%esp),%ebp
+	xorl	%eax,%eax
+	andl	$4294967288,%ebp
+	jz	.L011aw_finish
+.L012aw_loop:
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+
+	movl	4(%esi),%ecx
+	movl	4(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,4(%ebx)
+
+	movl	8(%esi),%ecx
+	movl	8(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,8(%ebx)
+
+	movl	12(%esi),%ecx
+	movl	12(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,12(%ebx)
+
+	movl	16(%esi),%ecx
+	movl	16(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,16(%ebx)
+
+	movl	20(%esi),%ecx
+	movl	20(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,20(%ebx)
+
+	movl	24(%esi),%ecx
+	movl	24(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,24(%ebx)
+
+	movl	28(%esi),%ecx
+	movl	28(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,28(%ebx)
+
+	addl	$32,%esi
+	addl	$32,%edi
+	addl	$32,%ebx
+	subl	$8,%ebp
+	jnz	.L012aw_loop
+.L011aw_finish:
+	movl	32(%esp),%ebp
+	andl	$7,%ebp
+	jz	.L013aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,(%ebx)
+	jz	.L013aw_end
+
+	movl	4(%esi),%ecx
+	movl	4(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,4(%ebx)
+	jz	.L013aw_end
+
+	movl	8(%esi),%ecx
+	movl	8(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,8(%ebx)
+	jz	.L013aw_end
+
+	movl	12(%esi),%ecx
+	movl	12(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,12(%ebx)
+	jz	.L013aw_end
+
+	movl	16(%esi),%ecx
+	movl	16(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,16(%ebx)
+	jz	.L013aw_end
+
+	movl	20(%esi),%ecx
+	movl	20(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,20(%ebx)
+	jz	.L013aw_end
+
+	movl	24(%esi),%ecx
+	movl	24(%edi),%edx
+	addl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	addl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,24(%ebx)
+.L013aw_end:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	bn_add_words,.-.L_bn_add_words_begin
+.globl	bn_sub_words
+.type	bn_sub_words,@function
+.align	16
+bn_sub_words:
+.L_bn_sub_words_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+
+	movl	20(%esp),%ebx
+	movl	24(%esp),%esi
+	movl	28(%esp),%edi
+	movl	32(%esp),%ebp
+	xorl	%eax,%eax
+	andl	$4294967288,%ebp
+	jz	.L014aw_finish
+.L015aw_loop:
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+
+	movl	4(%esi),%ecx
+	movl	4(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,4(%ebx)
+
+	movl	8(%esi),%ecx
+	movl	8(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,8(%ebx)
+
+	movl	12(%esi),%ecx
+	movl	12(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,12(%ebx)
+
+	movl	16(%esi),%ecx
+	movl	16(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,16(%ebx)
+
+	movl	20(%esi),%ecx
+	movl	20(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,20(%ebx)
+
+	movl	24(%esi),%ecx
+	movl	24(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,24(%ebx)
+
+	movl	28(%esi),%ecx
+	movl	28(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,28(%ebx)
+
+	addl	$32,%esi
+	addl	$32,%edi
+	addl	$32,%ebx
+	subl	$8,%ebp
+	jnz	.L015aw_loop
+.L014aw_finish:
+	movl	32(%esp),%ebp
+	andl	$7,%ebp
+	jz	.L016aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,(%ebx)
+	jz	.L016aw_end
+
+	movl	4(%esi),%ecx
+	movl	4(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,4(%ebx)
+	jz	.L016aw_end
+
+	movl	8(%esi),%ecx
+	movl	8(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,8(%ebx)
+	jz	.L016aw_end
+
+	movl	12(%esi),%ecx
+	movl	12(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,12(%ebx)
+	jz	.L016aw_end
+
+	movl	16(%esi),%ecx
+	movl	16(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,16(%ebx)
+	jz	.L016aw_end
+
+	movl	20(%esi),%ecx
+	movl	20(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,20(%ebx)
+	jz	.L016aw_end
+
+	movl	24(%esi),%ecx
+	movl	24(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,24(%ebx)
+.L016aw_end:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	bn_sub_words,.-.L_bn_sub_words_begin
+.globl	bn_sub_part_words
+.type	bn_sub_part_words,@function
+.align	16
+bn_sub_part_words:
+.L_bn_sub_part_words_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+
+	movl	20(%esp),%ebx
+	movl	24(%esp),%esi
+	movl	28(%esp),%edi
+	movl	32(%esp),%ebp
+	xorl	%eax,%eax
+	andl	$4294967288,%ebp
+	jz	.L017aw_finish
+.L018aw_loop:
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+
+	movl	4(%esi),%ecx
+	movl	4(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,4(%ebx)
+
+	movl	8(%esi),%ecx
+	movl	8(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,8(%ebx)
+
+	movl	12(%esi),%ecx
+	movl	12(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,12(%ebx)
+
+	movl	16(%esi),%ecx
+	movl	16(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,16(%ebx)
+
+	movl	20(%esi),%ecx
+	movl	20(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,20(%ebx)
+
+	movl	24(%esi),%ecx
+	movl	24(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,24(%ebx)
+
+	movl	28(%esi),%ecx
+	movl	28(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,28(%ebx)
+
+	addl	$32,%esi
+	addl	$32,%edi
+	addl	$32,%ebx
+	subl	$8,%ebp
+	jnz	.L018aw_loop
+.L017aw_finish:
+	movl	32(%esp),%ebp
+	andl	$7,%ebp
+	jz	.L019aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+	addl	$4,%esi
+	addl	$4,%edi
+	addl	$4,%ebx
+	decl	%ebp
+	jz	.L019aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+	addl	$4,%esi
+	addl	$4,%edi
+	addl	$4,%ebx
+	decl	%ebp
+	jz	.L019aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+	addl	$4,%esi
+	addl	$4,%edi
+	addl	$4,%ebx
+	decl	%ebp
+	jz	.L019aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+	addl	$4,%esi
+	addl	$4,%edi
+	addl	$4,%ebx
+	decl	%ebp
+	jz	.L019aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+	addl	$4,%esi
+	addl	$4,%edi
+	addl	$4,%ebx
+	decl	%ebp
+	jz	.L019aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+	addl	$4,%esi
+	addl	$4,%edi
+	addl	$4,%ebx
+	decl	%ebp
+	jz	.L019aw_end
+
+	movl	(%esi),%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+	addl	$4,%esi
+	addl	$4,%edi
+	addl	$4,%ebx
+.L019aw_end:
+	cmpl	$0,36(%esp)
+	je	.L020pw_end
+	movl	36(%esp),%ebp
+	cmpl	$0,%ebp
+	je	.L020pw_end
+	jge	.L021pw_pos
+
+	movl	$0,%edx
+	subl	%ebp,%edx
+	movl	%edx,%ebp
+	andl	$4294967288,%ebp
+	jz	.L022pw_neg_finish
+.L023pw_neg_loop:
+
+	movl	$0,%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,(%ebx)
+
+	movl	$0,%ecx
+	movl	4(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,4(%ebx)
+
+	movl	$0,%ecx
+	movl	8(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,8(%ebx)
+
+	movl	$0,%ecx
+	movl	12(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,12(%ebx)
+
+	movl	$0,%ecx
+	movl	16(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,16(%ebx)
+
+	movl	$0,%ecx
+	movl	20(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,20(%ebx)
+
+	movl	$0,%ecx
+	movl	24(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,24(%ebx)
+
+	movl	$0,%ecx
+	movl	28(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,28(%ebx)
+
+	addl	$32,%edi
+	addl	$32,%ebx
+	subl	$8,%ebp
+	jnz	.L023pw_neg_loop
+.L022pw_neg_finish:
+	movl	36(%esp),%edx
+	movl	$0,%ebp
+	subl	%edx,%ebp
+	andl	$7,%ebp
+	jz	.L020pw_end
+
+	movl	$0,%ecx
+	movl	(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,(%ebx)
+	jz	.L020pw_end
+
+	movl	$0,%ecx
+	movl	4(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,4(%ebx)
+	jz	.L020pw_end
+
+	movl	$0,%ecx
+	movl	8(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,8(%ebx)
+	jz	.L020pw_end
+
+	movl	$0,%ecx
+	movl	12(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,12(%ebx)
+	jz	.L020pw_end
+
+	movl	$0,%ecx
+	movl	16(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,16(%ebx)
+	jz	.L020pw_end
+
+	movl	$0,%ecx
+	movl	20(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	decl	%ebp
+	movl	%ecx,20(%ebx)
+	jz	.L020pw_end
+
+	movl	$0,%ecx
+	movl	24(%edi),%edx
+	subl	%eax,%ecx
+	movl	$0,%eax
+	adcl	%eax,%eax
+	subl	%edx,%ecx
+	adcl	$0,%eax
+	movl	%ecx,24(%ebx)
+	jmp	.L020pw_end
+.L021pw_pos:
+	andl	$4294967288,%ebp
+	jz	.L024pw_pos_finish
+.L025pw_pos_loop:
+
+	movl	(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,(%ebx)
+	jnc	.L026pw_nc0
+
+	movl	4(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,4(%ebx)
+	jnc	.L027pw_nc1
+
+	movl	8(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,8(%ebx)
+	jnc	.L028pw_nc2
+
+	movl	12(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,12(%ebx)
+	jnc	.L029pw_nc3
+
+	movl	16(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,16(%ebx)
+	jnc	.L030pw_nc4
+
+	movl	20(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,20(%ebx)
+	jnc	.L031pw_nc5
+
+	movl	24(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,24(%ebx)
+	jnc	.L032pw_nc6
+
+	movl	28(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,28(%ebx)
+	jnc	.L033pw_nc7
+
+	addl	$32,%esi
+	addl	$32,%ebx
+	subl	$8,%ebp
+	jnz	.L025pw_pos_loop
+.L024pw_pos_finish:
+	movl	36(%esp),%ebp
+	andl	$7,%ebp
+	jz	.L020pw_end
+
+	movl	(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,(%ebx)
+	jnc	.L034pw_tail_nc0
+	decl	%ebp
+	jz	.L020pw_end
+
+	movl	4(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,4(%ebx)
+	jnc	.L035pw_tail_nc1
+	decl	%ebp
+	jz	.L020pw_end
+
+	movl	8(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,8(%ebx)
+	jnc	.L036pw_tail_nc2
+	decl	%ebp
+	jz	.L020pw_end
+
+	movl	12(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,12(%ebx)
+	jnc	.L037pw_tail_nc3
+	decl	%ebp
+	jz	.L020pw_end
+
+	movl	16(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,16(%ebx)
+	jnc	.L038pw_tail_nc4
+	decl	%ebp
+	jz	.L020pw_end
+
+	movl	20(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,20(%ebx)
+	jnc	.L039pw_tail_nc5
+	decl	%ebp
+	jz	.L020pw_end
+
+	movl	24(%esi),%ecx
+	subl	%eax,%ecx
+	movl	%ecx,24(%ebx)
+	jnc	.L040pw_tail_nc6
+	movl	$1,%eax
+	jmp	.L020pw_end
+.L041pw_nc_loop:
+	movl	(%esi),%ecx
+	movl	%ecx,(%ebx)
+.L026pw_nc0:
+	movl	4(%esi),%ecx
+	movl	%ecx,4(%ebx)
+.L027pw_nc1:
+	movl	8(%esi),%ecx
+	movl	%ecx,8(%ebx)
+.L028pw_nc2:
+	movl	12(%esi),%ecx
+	movl	%ecx,12(%ebx)
+.L029pw_nc3:
+	movl	16(%esi),%ecx
+	movl	%ecx,16(%ebx)
+.L030pw_nc4:
+	movl	20(%esi),%ecx
+	movl	%ecx,20(%ebx)
+.L031pw_nc5:
+	movl	24(%esi),%ecx
+	movl	%ecx,24(%ebx)
+.L032pw_nc6:
+	movl	28(%esi),%ecx
+	movl	%ecx,28(%ebx)
+.L033pw_nc7:
+
+	addl	$32,%esi
+	addl	$32,%ebx
+	subl	$8,%ebp
+	jnz	.L041pw_nc_loop
+	movl	36(%esp),%ebp
+	andl	$7,%ebp
+	jz	.L042pw_nc_end
+	movl	(%esi),%ecx
+	movl	%ecx,(%ebx)
+.L034pw_tail_nc0:
+	decl	%ebp
+	jz	.L042pw_nc_end
+	movl	4(%esi),%ecx
+	movl	%ecx,4(%ebx)
+.L035pw_tail_nc1:
+	decl	%ebp
+	jz	.L042pw_nc_end
+	movl	8(%esi),%ecx
+	movl	%ecx,8(%ebx)
+.L036pw_tail_nc2:
+	decl	%ebp
+	jz	.L042pw_nc_end
+	movl	12(%esi),%ecx
+	movl	%ecx,12(%ebx)
+.L037pw_tail_nc3:
+	decl	%ebp
+	jz	.L042pw_nc_end
+	movl	16(%esi),%ecx
+	movl	%ecx,16(%ebx)
+.L038pw_tail_nc4:
+	decl	%ebp
+	jz	.L042pw_nc_end
+	movl	20(%esi),%ecx
+	movl	%ecx,20(%ebx)
+.L039pw_tail_nc5:
+	decl	%ebp
+	jz	.L042pw_nc_end
+	movl	24(%esi),%ecx
+	movl	%ecx,24(%ebx)
+.L040pw_tail_nc6:
+.L042pw_nc_end:
+	movl	$0,%eax
+.L020pw_end:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	bn_sub_part_words,.-.L_bn_sub_part_words_begin
diff --git a/crypto/bn/asm/co-586.s b/crypto/bn/asm/co-586.s
new file mode 100644
index 0000000..3cb8073
--- /dev/null
+++ b/crypto/bn/asm/co-586.s
@@ -0,0 +1,1254 @@
+.file	"crypto/bn/asm/co-586.s"
+.text
+.globl	bn_mul_comba8
+.type	bn_mul_comba8,@function
+.align	16
+bn_mul_comba8:
+.L_bn_mul_comba8_begin:
+	pushl	%esi
+	movl	12(%esp),%esi
+	pushl	%edi
+	movl	20(%esp),%edi
+	pushl	%ebp
+	pushl	%ebx
+	xorl	%ebx,%ebx
+	movl	(%esi),%eax
+	xorl	%ecx,%ecx
+	movl	(%edi),%edx
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esp),%eax
+	adcl	%edx,%ecx
+	movl	(%edi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,(%eax)
+	movl	4(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	(%esi),%eax
+	adcl	%edx,%ebp
+	movl	4(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esp),%eax
+	adcl	%edx,%ebp
+	movl	(%edi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,4(%eax)
+	movl	8(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	4(%esi),%eax
+	adcl	%edx,%ebx
+	movl	4(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	(%esi),%eax
+	adcl	%edx,%ebx
+	movl	8(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esp),%eax
+	adcl	%edx,%ebx
+	movl	(%edi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,8(%eax)
+	movl	12(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	8(%esi),%eax
+	adcl	%edx,%ecx
+	movl	4(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	4(%esi),%eax
+	adcl	%edx,%ecx
+	movl	8(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	(%esi),%eax
+	adcl	%edx,%ecx
+	movl	12(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esp),%eax
+	adcl	%edx,%ecx
+	movl	(%edi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,12(%eax)
+	movl	16(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	12(%esi),%eax
+	adcl	%edx,%ebp
+	movl	4(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	8(%esi),%eax
+	adcl	%edx,%ebp
+	movl	8(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	4(%esi),%eax
+	adcl	%edx,%ebp
+	movl	12(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	(%esi),%eax
+	adcl	%edx,%ebp
+	movl	16(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esp),%eax
+	adcl	%edx,%ebp
+	movl	(%edi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,16(%eax)
+	movl	20(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	16(%esi),%eax
+	adcl	%edx,%ebx
+	movl	4(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	12(%esi),%eax
+	adcl	%edx,%ebx
+	movl	8(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	8(%esi),%eax
+	adcl	%edx,%ebx
+	movl	12(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	4(%esi),%eax
+	adcl	%edx,%ebx
+	movl	16(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	(%esi),%eax
+	adcl	%edx,%ebx
+	movl	20(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esp),%eax
+	adcl	%edx,%ebx
+	movl	(%edi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,20(%eax)
+	movl	24(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esi),%eax
+	adcl	%edx,%ecx
+	movl	4(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	16(%esi),%eax
+	adcl	%edx,%ecx
+	movl	8(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	12(%esi),%eax
+	adcl	%edx,%ecx
+	movl	12(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	8(%esi),%eax
+	adcl	%edx,%ecx
+	movl	16(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	4(%esi),%eax
+	adcl	%edx,%ecx
+	movl	20(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	(%esi),%eax
+	adcl	%edx,%ecx
+	movl	24(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esp),%eax
+	adcl	%edx,%ecx
+	movl	(%edi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,24(%eax)
+	movl	28(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	24(%esi),%eax
+	adcl	%edx,%ebp
+	movl	4(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esi),%eax
+	adcl	%edx,%ebp
+	movl	8(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	16(%esi),%eax
+	adcl	%edx,%ebp
+	movl	12(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	12(%esi),%eax
+	adcl	%edx,%ebp
+	movl	16(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	8(%esi),%eax
+	adcl	%edx,%ebp
+	movl	20(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	4(%esi),%eax
+	adcl	%edx,%ebp
+	movl	24(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	(%esi),%eax
+	adcl	%edx,%ebp
+	movl	28(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esp),%eax
+	adcl	%edx,%ebp
+	movl	4(%edi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,28(%eax)
+	movl	28(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	24(%esi),%eax
+	adcl	%edx,%ebx
+	movl	8(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esi),%eax
+	adcl	%edx,%ebx
+	movl	12(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	16(%esi),%eax
+	adcl	%edx,%ebx
+	movl	16(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	12(%esi),%eax
+	adcl	%edx,%ebx
+	movl	20(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	8(%esi),%eax
+	adcl	%edx,%ebx
+	movl	24(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	4(%esi),%eax
+	adcl	%edx,%ebx
+	movl	28(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esp),%eax
+	adcl	%edx,%ebx
+	movl	8(%edi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,32(%eax)
+	movl	28(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	24(%esi),%eax
+	adcl	%edx,%ecx
+	movl	12(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esi),%eax
+	adcl	%edx,%ecx
+	movl	16(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	16(%esi),%eax
+	adcl	%edx,%ecx
+	movl	20(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	12(%esi),%eax
+	adcl	%edx,%ecx
+	movl	24(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	8(%esi),%eax
+	adcl	%edx,%ecx
+	movl	28(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esp),%eax
+	adcl	%edx,%ecx
+	movl	12(%edi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,36(%eax)
+	movl	28(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	24(%esi),%eax
+	adcl	%edx,%ebp
+	movl	16(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esi),%eax
+	adcl	%edx,%ebp
+	movl	20(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	16(%esi),%eax
+	adcl	%edx,%ebp
+	movl	24(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	12(%esi),%eax
+	adcl	%edx,%ebp
+	movl	28(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esp),%eax
+	adcl	%edx,%ebp
+	movl	16(%edi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,40(%eax)
+	movl	28(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	24(%esi),%eax
+	adcl	%edx,%ebx
+	movl	20(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esi),%eax
+	adcl	%edx,%ebx
+	movl	24(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	16(%esi),%eax
+	adcl	%edx,%ebx
+	movl	28(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esp),%eax
+	adcl	%edx,%ebx
+	movl	20(%edi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,44(%eax)
+	movl	28(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	24(%esi),%eax
+	adcl	%edx,%ecx
+	movl	24(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esi),%eax
+	adcl	%edx,%ecx
+	movl	28(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esp),%eax
+	adcl	%edx,%ecx
+	movl	24(%edi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,48(%eax)
+	movl	28(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	24(%esi),%eax
+	adcl	%edx,%ebp
+	movl	28(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esp),%eax
+	adcl	%edx,%ebp
+	movl	28(%edi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,52(%eax)
+	movl	28(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esp),%eax
+	adcl	%edx,%ebx
+	adcl	$0,%ecx
+	movl	%ebp,56(%eax)
+
+
+	movl	%ebx,60(%eax)
+	popl	%ebx
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	ret
+.size	bn_mul_comba8,.-.L_bn_mul_comba8_begin
+.globl	bn_mul_comba4
+.type	bn_mul_comba4,@function
+.align	16
+bn_mul_comba4:
+.L_bn_mul_comba4_begin:
+	pushl	%esi
+	movl	12(%esp),%esi
+	pushl	%edi
+	movl	20(%esp),%edi
+	pushl	%ebp
+	pushl	%ebx
+	xorl	%ebx,%ebx
+	movl	(%esi),%eax
+	xorl	%ecx,%ecx
+	movl	(%edi),%edx
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esp),%eax
+	adcl	%edx,%ecx
+	movl	(%edi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,(%eax)
+	movl	4(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	(%esi),%eax
+	adcl	%edx,%ebp
+	movl	4(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esp),%eax
+	adcl	%edx,%ebp
+	movl	(%edi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,4(%eax)
+	movl	8(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	4(%esi),%eax
+	adcl	%edx,%ebx
+	movl	4(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	(%esi),%eax
+	adcl	%edx,%ebx
+	movl	8(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esp),%eax
+	adcl	%edx,%ebx
+	movl	(%edi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,8(%eax)
+	movl	12(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	8(%esi),%eax
+	adcl	%edx,%ecx
+	movl	4(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	4(%esi),%eax
+	adcl	%edx,%ecx
+	movl	8(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	(%esi),%eax
+	adcl	%edx,%ecx
+	movl	12(%edi),%edx
+	adcl	$0,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esp),%eax
+	adcl	%edx,%ecx
+	movl	4(%edi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,12(%eax)
+	movl	12(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	8(%esi),%eax
+	adcl	%edx,%ebp
+	movl	8(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	4(%esi),%eax
+	adcl	%edx,%ebp
+	movl	12(%edi),%edx
+	adcl	$0,%ebx
+
+	mull	%edx
+	addl	%eax,%ecx
+	movl	20(%esp),%eax
+	adcl	%edx,%ebp
+	movl	8(%edi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,16(%eax)
+	movl	12(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	8(%esi),%eax
+	adcl	%edx,%ebx
+	movl	12(%edi),%edx
+	adcl	$0,%ecx
+
+	mull	%edx
+	addl	%eax,%ebp
+	movl	20(%esp),%eax
+	adcl	%edx,%ebx
+	movl	12(%edi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,20(%eax)
+	movl	12(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%ebx
+	movl	20(%esp),%eax
+	adcl	%edx,%ecx
+	adcl	$0,%ebp
+	movl	%ebx,24(%eax)
+
+
+	movl	%ecx,28(%eax)
+	popl	%ebx
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	ret
+.size	bn_mul_comba4,.-.L_bn_mul_comba4_begin
+.globl	bn_sqr_comba8
+.type	bn_sqr_comba8,@function
+.align	16
+bn_sqr_comba8:
+.L_bn_sqr_comba8_begin:
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebp
+	pushl	%ebx
+	movl	20(%esp),%edi
+	movl	24(%esp),%esi
+	xorl	%ebx,%ebx
+	xorl	%ecx,%ecx
+	movl	(%esi),%eax
+
+	xorl	%ebp,%ebp
+
+	mull	%eax
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	(%esi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,(%edi)
+	movl	4(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	8(%esi),%eax
+	adcl	$0,%ebx
+	movl	%ecx,4(%edi)
+	movl	(%esi),%edx
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	4(%esi),%eax
+	adcl	$0,%ecx
+
+	mull	%eax
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	(%esi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,8(%edi)
+	movl	12(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	8(%esi),%eax
+	adcl	$0,%ebp
+	movl	4(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	16(%esi),%eax
+	adcl	$0,%ebp
+	movl	%ebx,12(%edi)
+	movl	(%esi),%edx
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	12(%esi),%eax
+	adcl	$0,%ebx
+	movl	4(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	8(%esi),%eax
+	adcl	$0,%ebx
+
+	mull	%eax
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	(%esi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,16(%edi)
+	movl	20(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	16(%esi),%eax
+	adcl	$0,%ecx
+	movl	4(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	12(%esi),%eax
+	adcl	$0,%ecx
+	movl	8(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	24(%esi),%eax
+	adcl	$0,%ecx
+	movl	%ebp,20(%edi)
+	movl	(%esi),%edx
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	20(%esi),%eax
+	adcl	$0,%ebp
+	movl	4(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	16(%esi),%eax
+	adcl	$0,%ebp
+	movl	8(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	12(%esi),%eax
+	adcl	$0,%ebp
+
+	mull	%eax
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	(%esi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,24(%edi)
+	movl	28(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	24(%esi),%eax
+	adcl	$0,%ebx
+	movl	4(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	20(%esi),%eax
+	adcl	$0,%ebx
+	movl	8(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	16(%esi),%eax
+	adcl	$0,%ebx
+	movl	12(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	28(%esi),%eax
+	adcl	$0,%ebx
+	movl	%ecx,28(%edi)
+	movl	4(%esi),%edx
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	24(%esi),%eax
+	adcl	$0,%ecx
+	movl	8(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	20(%esi),%eax
+	adcl	$0,%ecx
+	movl	12(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	16(%esi),%eax
+	adcl	$0,%ecx
+
+	mull	%eax
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	8(%esi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,32(%edi)
+	movl	28(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	24(%esi),%eax
+	adcl	$0,%ebp
+	movl	12(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	20(%esi),%eax
+	adcl	$0,%ebp
+	movl	16(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	28(%esi),%eax
+	adcl	$0,%ebp
+	movl	%ebx,36(%edi)
+	movl	12(%esi),%edx
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	24(%esi),%eax
+	adcl	$0,%ebx
+	movl	16(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	20(%esi),%eax
+	adcl	$0,%ebx
+
+	mull	%eax
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	16(%esi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,40(%edi)
+	movl	28(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	24(%esi),%eax
+	adcl	$0,%ecx
+	movl	20(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	28(%esi),%eax
+	adcl	$0,%ecx
+	movl	%ebp,44(%edi)
+	movl	20(%esi),%edx
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	24(%esi),%eax
+	adcl	$0,%ebp
+
+	mull	%eax
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	24(%esi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,48(%edi)
+	movl	28(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	28(%esi),%eax
+	adcl	$0,%ebx
+	movl	%ecx,52(%edi)
+
+
+	xorl	%ecx,%ecx
+
+	mull	%eax
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	adcl	$0,%ecx
+	movl	%ebp,56(%edi)
+
+	movl	%ebx,60(%edi)
+	popl	%ebx
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	ret
+.size	bn_sqr_comba8,.-.L_bn_sqr_comba8_begin
+.globl	bn_sqr_comba4
+.type	bn_sqr_comba4,@function
+.align	16
+bn_sqr_comba4:
+.L_bn_sqr_comba4_begin:
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebp
+	pushl	%ebx
+	movl	20(%esp),%edi
+	movl	24(%esp),%esi
+	xorl	%ebx,%ebx
+	xorl	%ecx,%ecx
+	movl	(%esi),%eax
+
+	xorl	%ebp,%ebp
+
+	mull	%eax
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	(%esi),%edx
+	adcl	$0,%ebp
+	movl	%ebx,(%edi)
+	movl	4(%esi),%eax
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	8(%esi),%eax
+	adcl	$0,%ebx
+	movl	%ecx,4(%edi)
+	movl	(%esi),%edx
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	4(%esi),%eax
+	adcl	$0,%ecx
+
+	mull	%eax
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	(%esi),%edx
+	adcl	$0,%ecx
+	movl	%ebp,8(%edi)
+	movl	12(%esi),%eax
+
+
+	xorl	%ebp,%ebp
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	8(%esi),%eax
+	adcl	$0,%ebp
+	movl	4(%esi),%edx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebp
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	movl	12(%esi),%eax
+	adcl	$0,%ebp
+	movl	%ebx,12(%edi)
+	movl	4(%esi),%edx
+
+
+	xorl	%ebx,%ebx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ebx
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	8(%esi),%eax
+	adcl	$0,%ebx
+
+	mull	%eax
+	addl	%eax,%ecx
+	adcl	%edx,%ebp
+	movl	8(%esi),%edx
+	adcl	$0,%ebx
+	movl	%ecx,16(%edi)
+	movl	12(%esi),%eax
+
+
+	xorl	%ecx,%ecx
+
+	mull	%edx
+	addl	%eax,%eax
+	adcl	%edx,%edx
+	adcl	$0,%ecx
+	addl	%eax,%ebp
+	adcl	%edx,%ebx
+	movl	12(%esi),%eax
+	adcl	$0,%ecx
+	movl	%ebp,20(%edi)
+
+
+	xorl	%ebp,%ebp
+
+	mull	%eax
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+	adcl	$0,%ebp
+	movl	%ebx,24(%edi)
+
+	movl	%ecx,28(%edi)
+	popl	%ebx
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	ret
+.size	bn_sqr_comba4,.-.L_bn_sqr_comba4_begin
diff --git a/crypto/bn/asm/x86-gf2m.s b/crypto/bn/asm/x86-gf2m.s
new file mode 100644
index 0000000..f07843b
--- /dev/null
+++ b/crypto/bn/asm/x86-gf2m.s
@@ -0,0 +1,331 @@
+.file	"crypto/bn/asm/x86-gf2m.s"
+.text
+.type	_mul_1x1_mmx,@function
+.align	16
+_mul_1x1_mmx:
+	subl	$36,%esp
+	movl	%eax,%ecx
+	leal	(%eax,%eax,1),%edx
+	andl	$1073741823,%ecx
+	leal	(%edx,%edx,1),%ebp
+	movl	$0,(%esp)
+	andl	$2147483647,%edx
+	movd	%eax,%mm2
+	movd	%ebx,%mm3
+	movl	%ecx,4(%esp)
+	xorl	%edx,%ecx
+	pxor	%mm5,%mm5
+	pxor	%mm4,%mm4
+	movl	%edx,8(%esp)
+	xorl	%ebp,%edx
+	movl	%ecx,12(%esp)
+	pcmpgtd	%mm2,%mm5
+	paddd	%mm2,%mm2
+	xorl	%edx,%ecx
+	movl	%ebp,16(%esp)
+	xorl	%edx,%ebp
+	pand	%mm3,%mm5
+	pcmpgtd	%mm2,%mm4
+	movl	%ecx,20(%esp)
+	xorl	%ecx,%ebp
+	psllq	$31,%mm5
+	pand	%mm3,%mm4
+	movl	%edx,24(%esp)
+	movl	$7,%esi
+	movl	%ebp,28(%esp)
+	movl	%esi,%ebp
+	andl	%ebx,%esi
+	shrl	$3,%ebx
+	movl	%ebp,%edi
+	psllq	$30,%mm4
+	andl	%ebx,%edi
+	shrl	$3,%ebx
+	movd	(%esp,%esi,4),%mm0
+	movl	%ebp,%esi
+	andl	%ebx,%esi
+	shrl	$3,%ebx
+	movd	(%esp,%edi,4),%mm2
+	movl	%ebp,%edi
+	psllq	$3,%mm2
+	andl	%ebx,%edi
+	shrl	$3,%ebx
+	pxor	%mm2,%mm0
+	movd	(%esp,%esi,4),%mm1
+	movl	%ebp,%esi
+	psllq	$6,%mm1
+	andl	%ebx,%esi
+	shrl	$3,%ebx
+	pxor	%mm1,%mm0
+	movd	(%esp,%edi,4),%mm2
+	movl	%ebp,%edi
+	psllq	$9,%mm2
+	andl	%ebx,%edi
+	shrl	$3,%ebx
+	pxor	%mm2,%mm0
+	movd	(%esp,%esi,4),%mm1
+	movl	%ebp,%esi
+	psllq	$12,%mm1
+	andl	%ebx,%esi
+	shrl	$3,%ebx
+	pxor	%mm1,%mm0
+	movd	(%esp,%edi,4),%mm2
+	movl	%ebp,%edi
+	psllq	$15,%mm2
+	andl	%ebx,%edi
+	shrl	$3,%ebx
+	pxor	%mm2,%mm0
+	movd	(%esp,%esi,4),%mm1
+	movl	%ebp,%esi
+	psllq	$18,%mm1
+	andl	%ebx,%esi
+	shrl	$3,%ebx
+	pxor	%mm1,%mm0
+	movd	(%esp,%edi,4),%mm2
+	movl	%ebp,%edi
+	psllq	$21,%mm2
+	andl	%ebx,%edi
+	shrl	$3,%ebx
+	pxor	%mm2,%mm0
+	movd	(%esp,%esi,4),%mm1
+	movl	%ebp,%esi
+	psllq	$24,%mm1
+	andl	%ebx,%esi
+	shrl	$3,%ebx
+	pxor	%mm1,%mm0
+	movd	(%esp,%edi,4),%mm2
+	pxor	%mm4,%mm0
+	psllq	$27,%mm2
+	pxor	%mm2,%mm0
+	movd	(%esp,%esi,4),%mm1
+	pxor	%mm5,%mm0
+	psllq	$30,%mm1
+	addl	$36,%esp
+	pxor	%mm1,%mm0
+	ret
+.size	_mul_1x1_mmx,.-_mul_1x1_mmx
+.type	_mul_1x1_ialu,@function
+.align	16
+_mul_1x1_ialu:
+	subl	$36,%esp
+	movl	%eax,%ecx
+	leal	(%eax,%eax,1),%edx
+	leal	(,%eax,4),%ebp
+	andl	$1073741823,%ecx
+	leal	(%eax,%eax,1),%edi
+	sarl	$31,%eax
+	movl	$0,(%esp)
+	andl	$2147483647,%edx
+	movl	%ecx,4(%esp)
+	xorl	%edx,%ecx
+	movl	%edx,8(%esp)
+	xorl	%ebp,%edx
+	movl	%ecx,12(%esp)
+	xorl	%edx,%ecx
+	movl	%ebp,16(%esp)
+	xorl	%edx,%ebp
+	movl	%ecx,20(%esp)
+	xorl	%ecx,%ebp
+	sarl	$31,%edi
+	andl	%ebx,%eax
+	movl	%edx,24(%esp)
+	andl	%ebx,%edi
+	movl	%ebp,28(%esp)
+	movl	%eax,%edx
+	shll	$31,%eax
+	movl	%edi,%ecx
+	shrl	$1,%edx
+	movl	$7,%esi
+	shll	$30,%edi
+	andl	%ebx,%esi
+	shrl	$2,%ecx
+	xorl	%edi,%eax
+	shrl	$3,%ebx
+	movl	$7,%edi
+	andl	%ebx,%edi
+	shrl	$3,%ebx
+	xorl	%ecx,%edx
+	xorl	(%esp,%esi,4),%eax
+	movl	$7,%esi
+	andl	%ebx,%esi
+	shrl	$3,%ebx
+	movl	(%esp,%edi,4),%ebp
+	movl	$7,%edi
+	movl	%ebp,%ecx
+	shll	$3,%ebp
+	andl	%ebx,%edi
+	shrl	$29,%ecx
+	xorl	%ebp,%eax
+	shrl	$3,%ebx
+	xorl	%ecx,%edx
+	movl	(%esp,%esi,4),%ecx
+	movl	$7,%esi
+	movl	%ecx,%ebp
+	shll	$6,%ecx
+	andl	%ebx,%esi
+	shrl	$26,%ebp
+	xorl	%ecx,%eax
+	shrl	$3,%ebx
+	xorl	%ebp,%edx
+	movl	(%esp,%edi,4),%ebp
+	movl	$7,%edi
+	movl	%ebp,%ecx
+	shll	$9,%ebp
+	andl	%ebx,%edi
+	shrl	$23,%ecx
+	xorl	%ebp,%eax
+	shrl	$3,%ebx
+	xorl	%ecx,%edx
+	movl	(%esp,%esi,4),%ecx
+	movl	$7,%esi
+	movl	%ecx,%ebp
+	shll	$12,%ecx
+	andl	%ebx,%esi
+	shrl	$20,%ebp
+	xorl	%ecx,%eax
+	shrl	$3,%ebx
+	xorl	%ebp,%edx
+	movl	(%esp,%edi,4),%ebp
+	movl	$7,%edi
+	movl	%ebp,%ecx
+	shll	$15,%ebp
+	andl	%ebx,%edi
+	shrl	$17,%ecx
+	xorl	%ebp,%eax
+	shrl	$3,%ebx
+	xorl	%ecx,%edx
+	movl	(%esp,%esi,4),%ecx
+	movl	$7,%esi
+	movl	%ecx,%ebp
+	shll	$18,%ecx
+	andl	%ebx,%esi
+	shrl	$14,%ebp
+	xorl	%ecx,%eax
+	shrl	$3,%ebx
+	xorl	%ebp,%edx
+	movl	(%esp,%edi,4),%ebp
+	movl	$7,%edi
+	movl	%ebp,%ecx
+	shll	$21,%ebp
+	andl	%ebx,%edi
+	shrl	$11,%ecx
+	xorl	%ebp,%eax
+	shrl	$3,%ebx
+	xorl	%ecx,%edx
+	movl	(%esp,%esi,4),%ecx
+	movl	$7,%esi
+	movl	%ecx,%ebp
+	shll	$24,%ecx
+	andl	%ebx,%esi
+	shrl	$8,%ebp
+	xorl	%ecx,%eax
+	shrl	$3,%ebx
+	xorl	%ebp,%edx
+	movl	(%esp,%edi,4),%ebp
+	movl	%ebp,%ecx
+	shll	$27,%ebp
+	movl	(%esp,%esi,4),%edi
+	shrl	$5,%ecx
+	movl	%edi,%esi
+	xorl	%ebp,%eax
+	shll	$30,%edi
+	xorl	%ecx,%edx
+	shrl	$2,%esi
+	xorl	%edi,%eax
+	xorl	%esi,%edx
+	addl	$36,%esp
+	ret
+.size	_mul_1x1_ialu,.-_mul_1x1_ialu
+.globl	bn_GF2m_mul_2x2
+.type	bn_GF2m_mul_2x2,@function
+.align	16
+bn_GF2m_mul_2x2:
+.L_bn_GF2m_mul_2x2_begin:
+	leal	OPENSSL_ia32cap_P,%edx
+	movl	(%edx),%eax
+	movl	4(%edx),%edx
+	testl	$8388608,%eax
+	jz	.L000ialu
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	24(%esp),%eax
+	movl	32(%esp),%ebx
+	call	_mul_1x1_mmx
+	movq	%mm0,%mm7
+	movl	28(%esp),%eax
+	movl	36(%esp),%ebx
+	call	_mul_1x1_mmx
+	movq	%mm0,%mm6
+	movl	24(%esp),%eax
+	movl	32(%esp),%ebx
+	xorl	28(%esp),%eax
+	xorl	36(%esp),%ebx
+	call	_mul_1x1_mmx
+	pxor	%mm7,%mm0
+	movl	20(%esp),%eax
+	pxor	%mm6,%mm0
+	movq	%mm0,%mm2
+	psllq	$32,%mm0
+	popl	%edi
+	psrlq	$32,%mm2
+	popl	%esi
+	pxor	%mm6,%mm0
+	popl	%ebx
+	pxor	%mm7,%mm2
+	movq	%mm0,(%eax)
+	popl	%ebp
+	movq	%mm2,8(%eax)
+	emms
+	ret
+.align	16
+.L000ialu:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	subl	$20,%esp
+	movl	44(%esp),%eax
+	movl	52(%esp),%ebx
+	call	_mul_1x1_ialu
+	movl	%eax,8(%esp)
+	movl	%edx,12(%esp)
+	movl	48(%esp),%eax
+	movl	56(%esp),%ebx
+	call	_mul_1x1_ialu
+	movl	%eax,(%esp)
+	movl	%edx,4(%esp)
+	movl	44(%esp),%eax
+	movl	52(%esp),%ebx
+	xorl	48(%esp),%eax
+	xorl	56(%esp),%ebx
+	call	_mul_1x1_ialu
+	movl	40(%esp),%ebp
+	movl	(%esp),%ebx
+	movl	4(%esp),%ecx
+	movl	8(%esp),%edi
+	movl	12(%esp),%esi
+	xorl	%edx,%eax
+	xorl	%ecx,%edx
+	xorl	%ebx,%eax
+	movl	%ebx,(%ebp)
+	xorl	%edi,%edx
+	movl	%esi,12(%ebp)
+	xorl	%esi,%eax
+	addl	$20,%esp
+	xorl	%esi,%edx
+	popl	%edi
+	xorl	%edx,%eax
+	popl	%esi
+	movl	%edx,8(%ebp)
+	popl	%ebx
+	movl	%eax,4(%ebp)
+	popl	%ebp
+	ret
+.size	bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin
+.byte	71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
+.byte	99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32
+.byte	67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+.byte	112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+.byte	62,0
+.comm	OPENSSL_ia32cap_P,8,4
diff --git a/crypto/bn/asm/x86-mont.s b/crypto/bn/asm/x86-mont.s
new file mode 100644
index 0000000..2bbb0e3
--- /dev/null
+++ b/crypto/bn/asm/x86-mont.s
@@ -0,0 +1,338 @@
+.file	"crypto/bn/asm/x86-mont.s"
+.text
+.globl	bn_mul_mont
+.type	bn_mul_mont,@function
+.align	16
+bn_mul_mont:
+.L_bn_mul_mont_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	xorl	%eax,%eax
+	movl	40(%esp),%edi
+	cmpl	$4,%edi
+	jl	.L000just_leave
+	leal	20(%esp),%esi
+	leal	24(%esp),%edx
+	movl	%esp,%ebp
+	addl	$2,%edi
+	negl	%edi
+	leal	-32(%esp,%edi,4),%esp
+	negl	%edi
+	movl	%esp,%eax
+	subl	%edx,%eax
+	andl	$2047,%eax
+	subl	%eax,%esp
+	xorl	%esp,%edx
+	andl	$2048,%edx
+	xorl	$2048,%edx
+	subl	%edx,%esp
+	andl	$-64,%esp
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	movl	16(%esi),%esi
+	movl	(%esi),%esi
+	movl	%eax,4(%esp)
+	movl	%ebx,8(%esp)
+	movl	%ecx,12(%esp)
+	movl	%edx,16(%esp)
+	movl	%esi,20(%esp)
+	leal	-3(%edi),%ebx
+	movl	%ebp,24(%esp)
+	movl	8(%esp),%esi
+	leal	1(%ebx),%ebp
+	movl	12(%esp),%edi
+	xorl	%ecx,%ecx
+	movl	%esi,%edx
+	andl	$1,%ebp
+	subl	%edi,%edx
+	leal	4(%edi,%ebx,4),%eax
+	orl	%edx,%ebp
+	movl	(%edi),%edi
+	jz	.L001bn_sqr_mont
+	movl	%eax,28(%esp)
+	movl	(%esi),%eax
+	xorl	%edx,%edx
+.align	16
+.L002mull:
+	movl	%edx,%ebp
+	mull	%edi
+	addl	%eax,%ebp
+	leal	1(%ecx),%ecx
+	adcl	$0,%edx
+	movl	(%esi,%ecx,4),%eax
+	cmpl	%ebx,%ecx
+	movl	%ebp,28(%esp,%ecx,4)
+	jl	.L002mull
+	movl	%edx,%ebp
+	mull	%edi
+	movl	20(%esp),%edi
+	addl	%ebp,%eax
+	movl	16(%esp),%esi
+	adcl	$0,%edx
+	imull	32(%esp),%edi
+	movl	%eax,32(%esp,%ebx,4)
+	xorl	%ecx,%ecx
+	movl	%edx,36(%esp,%ebx,4)
+	movl	%ecx,40(%esp,%ebx,4)
+	movl	(%esi),%eax
+	mull	%edi
+	addl	32(%esp),%eax
+	movl	4(%esi),%eax
+	adcl	$0,%edx
+	incl	%ecx
+	jmp	.L0032ndmadd
+.align	16
+.L0041stmadd:
+	movl	%edx,%ebp
+	mull	%edi
+	addl	32(%esp,%ecx,4),%ebp
+	leal	1(%ecx),%ecx
+	adcl	$0,%edx
+	addl	%eax,%ebp
+	movl	(%esi,%ecx,4),%eax
+	adcl	$0,%edx
+	cmpl	%ebx,%ecx
+	movl	%ebp,28(%esp,%ecx,4)
+	jl	.L0041stmadd
+	movl	%edx,%ebp
+	mull	%edi
+	addl	32(%esp,%ebx,4),%eax
+	movl	20(%esp),%edi
+	adcl	$0,%edx
+	movl	16(%esp),%esi
+	addl	%eax,%ebp
+	adcl	$0,%edx
+	imull	32(%esp),%edi
+	xorl	%ecx,%ecx
+	addl	36(%esp,%ebx,4),%edx
+	movl	%ebp,32(%esp,%ebx,4)
+	adcl	$0,%ecx
+	movl	(%esi),%eax
+	movl	%edx,36(%esp,%ebx,4)
+	movl	%ecx,40(%esp,%ebx,4)
+	mull	%edi
+	addl	32(%esp),%eax
+	movl	4(%esi),%eax
+	adcl	$0,%edx
+	movl	$1,%ecx
+.align	16
+.L0032ndmadd:
+	movl	%edx,%ebp
+	mull	%edi
+	addl	32(%esp,%ecx,4),%ebp
+	leal	1(%ecx),%ecx
+	adcl	$0,%edx
+	addl	%eax,%ebp
+	movl	(%esi,%ecx,4),%eax
+	adcl	$0,%edx
+	cmpl	%ebx,%ecx
+	movl	%ebp,24(%esp,%ecx,4)
+	jl	.L0032ndmadd
+	movl	%edx,%ebp
+	mull	%edi
+	addl	32(%esp,%ebx,4),%ebp
+	adcl	$0,%edx
+	addl	%eax,%ebp
+	adcl	$0,%edx
+	movl	%ebp,28(%esp,%ebx,4)
+	xorl	%eax,%eax
+	movl	12(%esp),%ecx
+	addl	36(%esp,%ebx,4),%edx
+	adcl	40(%esp,%ebx,4),%eax
+	leal	4(%ecx),%ecx
+	movl	%edx,32(%esp,%ebx,4)
+	cmpl	28(%esp),%ecx
+	movl	%eax,36(%esp,%ebx,4)
+	je	.L005common_tail
+	movl	(%ecx),%edi
+	movl	8(%esp),%esi
+	movl	%ecx,12(%esp)
+	xorl	%ecx,%ecx
+	xorl	%edx,%edx
+	movl	(%esi),%eax
+	jmp	.L0041stmadd
+.align	16
+.L001bn_sqr_mont:
+	movl	%ebx,(%esp)
+	movl	%ecx,12(%esp)
+	movl	%edi,%eax
+	mull	%edi
+	movl	%eax,32(%esp)
+	movl	%edx,%ebx
+	shrl	$1,%edx
+	andl	$1,%ebx
+	incl	%ecx
+.align	16
+.L006sqr:
+	movl	(%esi,%ecx,4),%eax
+	movl	%edx,%ebp
+	mull	%edi
+	addl	%ebp,%eax
+	leal	1(%ecx),%ecx
+	adcl	$0,%edx
+	leal	(%ebx,%eax,2),%ebp
+	shrl	$31,%eax
+	cmpl	(%esp),%ecx
+	movl	%eax,%ebx
+	movl	%ebp,28(%esp,%ecx,4)
+	jl	.L006sqr
+	movl	(%esi,%ecx,4),%eax
+	movl	%edx,%ebp
+	mull	%edi
+	addl	%ebp,%eax
+	movl	20(%esp),%edi
+	adcl	$0,%edx
+	movl	16(%esp),%esi
+	leal	(%ebx,%eax,2),%ebp
+	imull	32(%esp),%edi
+	shrl	$31,%eax
+	movl	%ebp,32(%esp,%ecx,4)
+	leal	(%eax,%edx,2),%ebp
+	movl	(%esi),%eax
+	shrl	$31,%edx
+	movl	%ebp,36(%esp,%ecx,4)
+	movl	%edx,40(%esp,%ecx,4)
+	mull	%edi
+	addl	32(%esp),%eax
+	movl	%ecx,%ebx
+	adcl	$0,%edx
+	movl	4(%esi),%eax
+	movl	$1,%ecx
+.align	16
+.L0073rdmadd:
+	movl	%edx,%ebp
+	mull	%edi
+	addl	32(%esp,%ecx,4),%ebp
+	adcl	$0,%edx
+	addl	%eax,%ebp
+	movl	4(%esi,%ecx,4),%eax
+	adcl	$0,%edx
+	movl	%ebp,28(%esp,%ecx,4)
+	movl	%edx,%ebp
+	mull	%edi
+	addl	36(%esp,%ecx,4),%ebp
+	leal	2(%ecx),%ecx
+	adcl	$0,%edx
+	addl	%eax,%ebp
+	movl	(%esi,%ecx,4),%eax
+	adcl	$0,%edx
+	cmpl	%ebx,%ecx
+	movl	%ebp,24(%esp,%ecx,4)
+	jl	.L0073rdmadd
+	movl	%edx,%ebp
+	mull	%edi
+	addl	32(%esp,%ebx,4),%ebp
+	adcl	$0,%edx
+	addl	%eax,%ebp
+	adcl	$0,%edx
+	movl	%ebp,28(%esp,%ebx,4)
+	movl	12(%esp),%ecx
+	xorl	%eax,%eax
+	movl	8(%esp),%esi
+	addl	36(%esp,%ebx,4),%edx
+	adcl	40(%esp,%ebx,4),%eax
+	movl	%edx,32(%esp,%ebx,4)
+	cmpl	%ebx,%ecx
+	movl	%eax,36(%esp,%ebx,4)
+	je	.L005common_tail
+	movl	4(%esi,%ecx,4),%edi
+	leal	1(%ecx),%ecx
+	movl	%edi,%eax
+	movl	%ecx,12(%esp)
+	mull	%edi
+	addl	32(%esp,%ecx,4),%eax
+	adcl	$0,%edx
+	movl	%eax,32(%esp,%ecx,4)
+	xorl	%ebp,%ebp
+	cmpl	%ebx,%ecx
+	leal	1(%ecx),%ecx
+	je	.L008sqrlast
+	movl	%edx,%ebx
+	shrl	$1,%edx
+	andl	$1,%ebx
+.align	16
+.L009sqradd:
+	movl	(%esi,%ecx,4),%eax
+	movl	%edx,%ebp
+	mull	%edi
+	addl	%ebp,%eax
+	leal	(%eax,%eax,1),%ebp
+	adcl	$0,%edx
+	shrl	$31,%eax
+	addl	32(%esp,%ecx,4),%ebp
+	leal	1(%ecx),%ecx
+	adcl	$0,%eax
+	addl	%ebx,%ebp
+	adcl	$0,%eax
+	cmpl	(%esp),%ecx
+	movl	%ebp,28(%esp,%ecx,4)
+	movl	%eax,%ebx
+	jle	.L009sqradd
+	movl	%edx,%ebp
+	addl	%edx,%edx
+	shrl	$31,%ebp
+	addl	%ebx,%edx
+	adcl	$0,%ebp
+.L008sqrlast:
+	movl	20(%esp),%edi
+	movl	16(%esp),%esi
+	imull	32(%esp),%edi
+	addl	32(%esp,%ecx,4),%edx
+	movl	(%esi),%eax
+	adcl	$0,%ebp
+	movl	%edx,32(%esp,%ecx,4)
+	movl	%ebp,36(%esp,%ecx,4)
+	mull	%edi
+	addl	32(%esp),%eax
+	leal	-1(%ecx),%ebx
+	adcl	$0,%edx
+	movl	$1,%ecx
+	movl	4(%esi),%eax
+	jmp	.L0073rdmadd
+.align	16
+.L005common_tail:
+	movl	16(%esp),%ebp
+	movl	4(%esp),%edi
+	leal	32(%esp),%esi
+	movl	(%esi),%eax
+	movl	%ebx,%ecx
+	xorl	%edx,%edx
+.align	16
+.L010sub:
+	sbbl	(%ebp,%edx,4),%eax
+	movl	%eax,(%edi,%edx,4)
+	decl	%ecx
+	movl	4(%esi,%edx,4),%eax
+	leal	1(%edx),%edx
+	jge	.L010sub
+	sbbl	$0,%eax
+	andl	%eax,%esi
+	notl	%eax
+	movl	%edi,%ebp
+	andl	%eax,%ebp
+	orl	%ebp,%esi
+.align	16
+.L011copy:
+	movl	(%esi,%ebx,4),%eax
+	movl	%eax,(%edi,%ebx,4)
+	movl	%ecx,32(%esp,%ebx,4)
+	decl	%ebx
+	jge	.L011copy
+	movl	24(%esp),%esp
+	movl	$1,%eax
+.L000just_leave:
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	bn_mul_mont,.-.L_bn_mul_mont_begin
+.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
+.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
+.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
+.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
+.byte	111,114,103,62,0
diff --git a/crypto/des/asm/crypt586.s b/crypto/des/asm/crypt586.s
new file mode 100644
index 0000000..46c81c4
--- /dev/null
+++ b/crypto/des/asm/crypt586.s
@@ -0,0 +1,875 @@
+.file	"crypt586.s"
+.text
+.globl	fcrypt_body
+.type	fcrypt_body,@function
+.align	16
+fcrypt_body:
+.L_fcrypt_body_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+
+
+	xorl	%edi,%edi
+	xorl	%esi,%esi
+	leal	DES_SPtrans,%edx
+	pushl	%edx
+	movl	28(%esp),%ebp
+	pushl	$25
+.L000start:
+
+
+	movl	36(%esp),%eax
+	movl	%esi,%edx
+	shrl	$16,%edx
+	movl	40(%esp),%ecx
+	xorl	%esi,%edx
+	andl	%edx,%eax
+	andl	%ecx,%edx
+	movl	%eax,%ebx
+	shll	$16,%ebx
+	movl	%edx,%ecx
+	shll	$16,%ecx
+	xorl	%ebx,%eax
+	xorl	%ecx,%edx
+	movl	(%ebp),%ebx
+	xorl	%ebx,%eax
+	movl	4(%ebp),%ecx
+	xorl	%esi,%eax
+	xorl	%esi,%edx
+	xorl	%ecx,%edx
+	andl	$0xfcfcfcfc,%eax
+	xorl	%ebx,%ebx
+	andl	$0xcfcfcfcf,%edx
+	xorl	%ecx,%ecx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	movl	4(%esp),%ebp
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	movl	0x600(%ebp,%ebx,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x700(%ebp,%ecx,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x400(%ebp,%eax,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x500(%ebp,%edx,1),%ebx
+	xorl	%ebx,%edi
+	movl	32(%esp),%ebp
+
+
+	movl	36(%esp),%eax
+	movl	%edi,%edx
+	shrl	$16,%edx
+	movl	40(%esp),%ecx
+	xorl	%edi,%edx
+	andl	%edx,%eax
+	andl	%ecx,%edx
+	movl	%eax,%ebx
+	shll	$16,%ebx
+	movl	%edx,%ecx
+	shll	$16,%ecx
+	xorl	%ebx,%eax
+	xorl	%ecx,%edx
+	movl	8(%ebp),%ebx
+	xorl	%ebx,%eax
+	movl	12(%ebp),%ecx
+	xorl	%edi,%eax
+	xorl	%edi,%edx
+	xorl	%ecx,%edx
+	andl	$0xfcfcfcfc,%eax
+	xorl	%ebx,%ebx
+	andl	$0xcfcfcfcf,%edx
+	xorl	%ecx,%ecx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	movl	4(%esp),%ebp
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	movl	0x600(%ebp,%ebx,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x700(%ebp,%ecx,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x400(%ebp,%eax,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x500(%ebp,%edx,1),%ebx
+	xorl	%ebx,%esi
+	movl	32(%esp),%ebp
+
+
+	movl	36(%esp),%eax
+	movl	%esi,%edx
+	shrl	$16,%edx
+	movl	40(%esp),%ecx
+	xorl	%esi,%edx
+	andl	%edx,%eax
+	andl	%ecx,%edx
+	movl	%eax,%ebx
+	shll	$16,%ebx
+	movl	%edx,%ecx
+	shll	$16,%ecx
+	xorl	%ebx,%eax
+	xorl	%ecx,%edx
+	movl	16(%ebp),%ebx
+	xorl	%ebx,%eax
+	movl	20(%ebp),%ecx
+	xorl	%esi,%eax
+	xorl	%esi,%edx
+	xorl	%ecx,%edx
+	andl	$0xfcfcfcfc,%eax
+	xorl	%ebx,%ebx
+	andl	$0xcfcfcfcf,%edx
+	xorl	%ecx,%ecx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	movl	4(%esp),%ebp
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	movl	0x600(%ebp,%ebx,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x700(%ebp,%ecx,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x400(%ebp,%eax,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x500(%ebp,%edx,1),%ebx
+	xorl	%ebx,%edi
+	movl	32(%esp),%ebp
+
+
+	movl	36(%esp),%eax
+	movl	%edi,%edx
+	shrl	$16,%edx
+	movl	40(%esp),%ecx
+	xorl	%edi,%edx
+	andl	%edx,%eax
+	andl	%ecx,%edx
+	movl	%eax,%ebx
+	shll	$16,%ebx
+	movl	%edx,%ecx
+	shll	$16,%ecx
+	xorl	%ebx,%eax
+	xorl	%ecx,%edx
+	movl	24(%ebp),%ebx
+	xorl	%ebx,%eax
+	movl	28(%ebp),%ecx
+	xorl	%edi,%eax
+	xorl	%edi,%edx
+	xorl	%ecx,%edx
+	andl	$0xfcfcfcfc,%eax
+	xorl	%ebx,%ebx
+	andl	$0xcfcfcfcf,%edx
+	xorl	%ecx,%ecx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	movl	4(%esp),%ebp
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	movl	0x600(%ebp,%ebx,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x700(%ebp,%ecx,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x400(%ebp,%eax,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x500(%ebp,%edx,1),%ebx
+	xorl	%ebx,%esi
+	movl	32(%esp),%ebp
+
+
+	movl	36(%esp),%eax
+	movl	%esi,%edx
+	shrl	$16,%edx
+	movl	40(%esp),%ecx
+	xorl	%esi,%edx
+	andl	%edx,%eax
+	andl	%ecx,%edx
+	movl	%eax,%ebx
+	shll	$16,%ebx
+	movl	%edx,%ecx
+	shll	$16,%ecx
+	xorl	%ebx,%eax
+	xorl	%ecx,%edx
+	movl	32(%ebp),%ebx
+	xorl	%ebx,%eax
+	movl	36(%ebp),%ecx
+	xorl	%esi,%eax
+	xorl	%esi,%edx
+	xorl	%ecx,%edx
+	andl	$0xfcfcfcfc,%eax
+	xorl	%ebx,%ebx
+	andl	$0xcfcfcfcf,%edx
+	xorl	%ecx,%ecx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	movl	4(%esp),%ebp
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	movl	0x600(%ebp,%ebx,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x700(%ebp,%ecx,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x400(%ebp,%eax,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x500(%ebp,%edx,1),%ebx
+	xorl	%ebx,%edi
+	movl	32(%esp),%ebp
+
+
+	movl	36(%esp),%eax
+	movl	%edi,%edx
+	shrl	$16,%edx
+	movl	40(%esp),%ecx
+	xorl	%edi,%edx
+	andl	%edx,%eax
+	andl	%ecx,%edx
+	movl	%eax,%ebx
+	shll	$16,%ebx
+	movl	%edx,%ecx
+	shll	$16,%ecx
+	xorl	%ebx,%eax
+	xorl	%ecx,%edx
+	movl	40(%ebp),%ebx
+	xorl	%ebx,%eax
+	movl	44(%ebp),%ecx
+	xorl	%edi,%eax
+	xorl	%edi,%edx
+	xorl	%ecx,%edx
+	andl	$0xfcfcfcfc,%eax
+	xorl	%ebx,%ebx
+	andl	$0xcfcfcfcf,%edx
+	xorl	%ecx,%ecx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	movl	4(%esp),%ebp
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	movl	0x600(%ebp,%ebx,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x700(%ebp,%ecx,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x400(%ebp,%eax,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x500(%ebp,%edx,1),%ebx
+	xorl	%ebx,%esi
+	movl	32(%esp),%ebp
+
+
+	movl	36(%esp),%eax
+	movl	%esi,%edx
+	shrl	$16,%edx
+	movl	40(%esp),%ecx
+	xorl	%esi,%edx
+	andl	%edx,%eax
+	andl	%ecx,%edx
+	movl	%eax,%ebx
+	shll	$16,%ebx
+	movl	%edx,%ecx
+	shll	$16,%ecx
+	xorl	%ebx,%eax
+	xorl	%ecx,%edx
+	movl	48(%ebp),%ebx
+	xorl	%ebx,%eax
+	movl	52(%ebp),%ecx
+	xorl	%esi,%eax
+	xorl	%esi,%edx
+	xorl	%ecx,%edx
+	andl	$0xfcfcfcfc,%eax
+	xorl	%ebx,%ebx
+	andl	$0xcfcfcfcf,%edx
+	xorl	%ecx,%ecx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	movl	4(%esp),%ebp
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	movl	0x600(%ebp,%ebx,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x700(%ebp,%ecx,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x400(%ebp,%eax,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x500(%ebp,%edx,1),%ebx
+	xorl	%ebx,%edi
+	movl	32(%esp),%ebp
+
+
+	movl	36(%esp),%eax
+	movl	%edi,%edx
+	shrl	$16,%edx
+	movl	40(%esp),%ecx
+	xorl	%edi,%edx
+	andl	%edx,%eax
+	andl	%ecx,%edx
+	movl	%eax,%ebx
+	shll	$16,%ebx
+	movl	%edx,%ecx
+	shll	$16,%ecx
+	xorl	%ebx,%eax
+	xorl	%ecx,%edx
+	movl	56(%ebp),%ebx
+	xorl	%ebx,%eax
+	movl	60(%ebp),%ecx
+	xorl	%edi,%eax
+	xorl	%edi,%edx
+	xorl	%ecx,%edx
+	andl	$0xfcfcfcfc,%eax
+	xorl	%ebx,%ebx
+	andl	$0xcfcfcfcf,%edx
+	xorl	%ecx,%ecx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	movl	4(%esp),%ebp
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	movl	0x600(%ebp,%ebx,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x700(%ebp,%ecx,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x400(%ebp,%eax,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x500(%ebp,%edx,1),%ebx
+	xorl	%ebx,%esi
+	movl	32(%esp),%ebp
+
+
+	movl	36(%esp),%eax
+	movl	%esi,%edx
+	shrl	$16,%edx
+	movl	40(%esp),%ecx
+	xorl	%esi,%edx
+	andl	%edx,%eax
+	andl	%ecx,%edx
+	movl	%eax,%ebx
+	shll	$16,%ebx
+	movl	%edx,%ecx
+	shll	$16,%ecx
+	xorl	%ebx,%eax
+	xorl	%ecx,%edx
+	movl	64(%ebp),%ebx
+	xorl	%ebx,%eax
+	movl	68(%ebp),%ecx
+	xorl	%esi,%eax
+	xorl	%esi,%edx
+	xorl	%ecx,%edx
+	andl	$0xfcfcfcfc,%eax
+	xorl	%ebx,%ebx
+	andl	$0xcfcfcfcf,%edx
+	xorl	%ecx,%ecx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	movl	4(%esp),%ebp
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	movl	0x600(%ebp,%ebx,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x700(%ebp,%ecx,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x400(%ebp,%eax,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x500(%ebp,%edx,1),%ebx
+	xorl	%ebx,%edi
+	movl	32(%esp),%ebp
+
+
+	movl	36(%esp),%eax
+	movl	%edi,%edx
+	shrl	$16,%edx
+	movl	40(%esp),%ecx
+	xorl	%edi,%edx
+	andl	%edx,%eax
+	andl	%ecx,%edx
+	movl	%eax,%ebx
+	shll	$16,%ebx
+	movl	%edx,%ecx
+	shll	$16,%ecx
+	xorl	%ebx,%eax
+	xorl	%ecx,%edx
+	movl	72(%ebp),%ebx
+	xorl	%ebx,%eax
+	movl	76(%ebp),%ecx
+	xorl	%edi,%eax
+	xorl	%edi,%edx
+	xorl	%ecx,%edx
+	andl	$0xfcfcfcfc,%eax
+	xorl	%ebx,%ebx
+	andl	$0xcfcfcfcf,%edx
+	xorl	%ecx,%ecx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	movl	4(%esp),%ebp
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	movl	0x600(%ebp,%ebx,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x700(%ebp,%ecx,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x400(%ebp,%eax,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x500(%ebp,%edx,1),%ebx
+	xorl	%ebx,%esi
+	movl	32(%esp),%ebp
+
+
+	movl	36(%esp),%eax
+	movl	%esi,%edx
+	shrl	$16,%edx
+	movl	40(%esp),%ecx
+	xorl	%esi,%edx
+	andl	%edx,%eax
+	andl	%ecx,%edx
+	movl	%eax,%ebx
+	shll	$16,%ebx
+	movl	%edx,%ecx
+	shll	$16,%ecx
+	xorl	%ebx,%eax
+	xorl	%ecx,%edx
+	movl	80(%ebp),%ebx
+	xorl	%ebx,%eax
+	movl	84(%ebp),%ecx
+	xorl	%esi,%eax
+	xorl	%esi,%edx
+	xorl	%ecx,%edx
+	andl	$0xfcfcfcfc,%eax
+	xorl	%ebx,%ebx
+	andl	$0xcfcfcfcf,%edx
+	xorl	%ecx,%ecx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	movl	4(%esp),%ebp
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	movl	0x600(%ebp,%ebx,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x700(%ebp,%ecx,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x400(%ebp,%eax,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x500(%ebp,%edx,1),%ebx
+	xorl	%ebx,%edi
+	movl	32(%esp),%ebp
+
+
+	movl	36(%esp),%eax
+	movl	%edi,%edx
+	shrl	$16,%edx
+	movl	40(%esp),%ecx
+	xorl	%edi,%edx
+	andl	%edx,%eax
+	andl	%ecx,%edx
+	movl	%eax,%ebx
+	shll	$16,%ebx
+	movl	%edx,%ecx
+	shll	$16,%ecx
+	xorl	%ebx,%eax
+	xorl	%ecx,%edx
+	movl	88(%ebp),%ebx
+	xorl	%ebx,%eax
+	movl	92(%ebp),%ecx
+	xorl	%edi,%eax
+	xorl	%edi,%edx
+	xorl	%ecx,%edx
+	andl	$0xfcfcfcfc,%eax
+	xorl	%ebx,%ebx
+	andl	$0xcfcfcfcf,%edx
+	xorl	%ecx,%ecx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	movl	4(%esp),%ebp
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	movl	0x600(%ebp,%ebx,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x700(%ebp,%ecx,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x400(%ebp,%eax,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x500(%ebp,%edx,1),%ebx
+	xorl	%ebx,%esi
+	movl	32(%esp),%ebp
+
+
+	movl	36(%esp),%eax
+	movl	%esi,%edx
+	shrl	$16,%edx
+	movl	40(%esp),%ecx
+	xorl	%esi,%edx
+	andl	%edx,%eax
+	andl	%ecx,%edx
+	movl	%eax,%ebx
+	shll	$16,%ebx
+	movl	%edx,%ecx
+	shll	$16,%ecx
+	xorl	%ebx,%eax
+	xorl	%ecx,%edx
+	movl	96(%ebp),%ebx
+	xorl	%ebx,%eax
+	movl	100(%ebp),%ecx
+	xorl	%esi,%eax
+	xorl	%esi,%edx
+	xorl	%ecx,%edx
+	andl	$0xfcfcfcfc,%eax
+	xorl	%ebx,%ebx
+	andl	$0xcfcfcfcf,%edx
+	xorl	%ecx,%ecx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	movl	4(%esp),%ebp
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	movl	0x600(%ebp,%ebx,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x700(%ebp,%ecx,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x400(%ebp,%eax,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x500(%ebp,%edx,1),%ebx
+	xorl	%ebx,%edi
+	movl	32(%esp),%ebp
+
+
+	movl	36(%esp),%eax
+	movl	%edi,%edx
+	shrl	$16,%edx
+	movl	40(%esp),%ecx
+	xorl	%edi,%edx
+	andl	%edx,%eax
+	andl	%ecx,%edx
+	movl	%eax,%ebx
+	shll	$16,%ebx
+	movl	%edx,%ecx
+	shll	$16,%ecx
+	xorl	%ebx,%eax
+	xorl	%ecx,%edx
+	movl	104(%ebp),%ebx
+	xorl	%ebx,%eax
+	movl	108(%ebp),%ecx
+	xorl	%edi,%eax
+	xorl	%edi,%edx
+	xorl	%ecx,%edx
+	andl	$0xfcfcfcfc,%eax
+	xorl	%ebx,%ebx
+	andl	$0xcfcfcfcf,%edx
+	xorl	%ecx,%ecx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	movl	4(%esp),%ebp
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	movl	0x600(%ebp,%ebx,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x700(%ebp,%ecx,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x400(%ebp,%eax,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x500(%ebp,%edx,1),%ebx
+	xorl	%ebx,%esi
+	movl	32(%esp),%ebp
+
+
+	movl	36(%esp),%eax
+	movl	%esi,%edx
+	shrl	$16,%edx
+	movl	40(%esp),%ecx
+	xorl	%esi,%edx
+	andl	%edx,%eax
+	andl	%ecx,%edx
+	movl	%eax,%ebx
+	shll	$16,%ebx
+	movl	%edx,%ecx
+	shll	$16,%ecx
+	xorl	%ebx,%eax
+	xorl	%ecx,%edx
+	movl	112(%ebp),%ebx
+	xorl	%ebx,%eax
+	movl	116(%ebp),%ecx
+	xorl	%esi,%eax
+	xorl	%esi,%edx
+	xorl	%ecx,%edx
+	andl	$0xfcfcfcfc,%eax
+	xorl	%ebx,%ebx
+	andl	$0xcfcfcfcf,%edx
+	xorl	%ecx,%ecx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	movl	4(%esp),%ebp
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	movl	0x600(%ebp,%ebx,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x700(%ebp,%ecx,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x400(%ebp,%eax,1),%ebx
+	xorl	%ebx,%edi
+	movl	0x500(%ebp,%edx,1),%ebx
+	xorl	%ebx,%edi
+	movl	32(%esp),%ebp
+
+
+	movl	36(%esp),%eax
+	movl	%edi,%edx
+	shrl	$16,%edx
+	movl	40(%esp),%ecx
+	xorl	%edi,%edx
+	andl	%edx,%eax
+	andl	%ecx,%edx
+	movl	%eax,%ebx
+	shll	$16,%ebx
+	movl	%edx,%ecx
+	shll	$16,%ecx
+	xorl	%ebx,%eax
+	xorl	%ecx,%edx
+	movl	120(%ebp),%ebx
+	xorl	%ebx,%eax
+	movl	124(%ebp),%ecx
+	xorl	%edi,%eax
+	xorl	%edi,%edx
+	xorl	%ecx,%edx
+	andl	$0xfcfcfcfc,%eax
+	xorl	%ebx,%ebx
+	andl	$0xcfcfcfcf,%edx
+	xorl	%ecx,%ecx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	movl	4(%esp),%ebp
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	movl	0x600(%ebp,%ebx,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x700(%ebp,%ecx,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x400(%ebp,%eax,1),%ebx
+	xorl	%ebx,%esi
+	movl	0x500(%ebp,%edx,1),%ebx
+	xorl	%ebx,%esi
+	movl	32(%esp),%ebp
+	movl	(%esp),%ebx
+	movl	%edi,%eax
+	decl	%ebx
+	movl	%esi,%edi
+	movl	%eax,%esi
+	movl	%ebx,(%esp)
+	jnz	.L000start
+
+
+	movl	28(%esp),%edx
+	rorl	$1,%edi
+	movl	%esi,%eax
+	xorl	%edi,%esi
+	andl	$0xaaaaaaaa,%esi
+	xorl	%esi,%eax
+	xorl	%esi,%edi
+
+	roll	$23,%eax
+	movl	%eax,%esi
+	xorl	%edi,%eax
+	andl	$0x03fc03fc,%eax
+	xorl	%eax,%esi
+	xorl	%eax,%edi
+
+	roll	$10,%esi
+	movl	%esi,%eax
+	xorl	%edi,%esi
+	andl	$0x33333333,%esi
+	xorl	%esi,%eax
+	xorl	%esi,%edi
+
+	roll	$18,%edi
+	movl	%edi,%esi
+	xorl	%eax,%edi
+	andl	$0xfff0000f,%edi
+	xorl	%edi,%esi
+	xorl	%edi,%eax
+
+	roll	$12,%esi
+	movl	%esi,%edi
+	xorl	%eax,%esi
+	andl	$0xf0f0f0f0,%esi
+	xorl	%esi,%edi
+	xorl	%esi,%eax
+
+	rorl	$4,%eax
+	movl	%eax,(%edx)
+	movl	%edi,4(%edx)
+	addl	$8,%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	fcrypt_body,.-.L_fcrypt_body_begin
diff --git a/crypto/des/asm/des-586.s b/crypto/des/asm/des-586.s
new file mode 100644
index 0000000..2fbd340
--- /dev/null
+++ b/crypto/des/asm/des-586.s
@@ -0,0 +1,1837 @@
+.file	"des-586.s"
+.text
+.globl	DES_SPtrans
+.type	_x86_DES_encrypt,@function
+.align	16
+_x86_DES_encrypt:
+	pushl	%ecx
+
+	movl	(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	4(%ecx),%edx
+	xorl	%esi,%eax
+	xorl	%ecx,%ecx
+	xorl	%esi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%edi
+	xorl	0x700(%ebp,%ecx,1),%edi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%edi
+	xorl	0x500(%ebp,%edx,1),%edi
+
+	movl	8(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	12(%ecx),%edx
+	xorl	%edi,%eax
+	xorl	%ecx,%ecx
+	xorl	%edi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%esi
+	xorl	0x700(%ebp,%ecx,1),%esi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%esi
+	xorl	0x500(%ebp,%edx,1),%esi
+
+	movl	16(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	20(%ecx),%edx
+	xorl	%esi,%eax
+	xorl	%ecx,%ecx
+	xorl	%esi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%edi
+	xorl	0x700(%ebp,%ecx,1),%edi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%edi
+	xorl	0x500(%ebp,%edx,1),%edi
+
+	movl	24(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	28(%ecx),%edx
+	xorl	%edi,%eax
+	xorl	%ecx,%ecx
+	xorl	%edi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%esi
+	xorl	0x700(%ebp,%ecx,1),%esi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%esi
+	xorl	0x500(%ebp,%edx,1),%esi
+
+	movl	32(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	36(%ecx),%edx
+	xorl	%esi,%eax
+	xorl	%ecx,%ecx
+	xorl	%esi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%edi
+	xorl	0x700(%ebp,%ecx,1),%edi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%edi
+	xorl	0x500(%ebp,%edx,1),%edi
+
+	movl	40(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	44(%ecx),%edx
+	xorl	%edi,%eax
+	xorl	%ecx,%ecx
+	xorl	%edi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%esi
+	xorl	0x700(%ebp,%ecx,1),%esi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%esi
+	xorl	0x500(%ebp,%edx,1),%esi
+
+	movl	48(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	52(%ecx),%edx
+	xorl	%esi,%eax
+	xorl	%ecx,%ecx
+	xorl	%esi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%edi
+	xorl	0x700(%ebp,%ecx,1),%edi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%edi
+	xorl	0x500(%ebp,%edx,1),%edi
+
+	movl	56(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	60(%ecx),%edx
+	xorl	%edi,%eax
+	xorl	%ecx,%ecx
+	xorl	%edi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%esi
+	xorl	0x700(%ebp,%ecx,1),%esi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%esi
+	xorl	0x500(%ebp,%edx,1),%esi
+
+	movl	64(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	68(%ecx),%edx
+	xorl	%esi,%eax
+	xorl	%ecx,%ecx
+	xorl	%esi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%edi
+	xorl	0x700(%ebp,%ecx,1),%edi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%edi
+	xorl	0x500(%ebp,%edx,1),%edi
+
+	movl	72(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	76(%ecx),%edx
+	xorl	%edi,%eax
+	xorl	%ecx,%ecx
+	xorl	%edi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%esi
+	xorl	0x700(%ebp,%ecx,1),%esi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%esi
+	xorl	0x500(%ebp,%edx,1),%esi
+
+	movl	80(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	84(%ecx),%edx
+	xorl	%esi,%eax
+	xorl	%ecx,%ecx
+	xorl	%esi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%edi
+	xorl	0x700(%ebp,%ecx,1),%edi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%edi
+	xorl	0x500(%ebp,%edx,1),%edi
+
+	movl	88(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	92(%ecx),%edx
+	xorl	%edi,%eax
+	xorl	%ecx,%ecx
+	xorl	%edi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%esi
+	xorl	0x700(%ebp,%ecx,1),%esi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%esi
+	xorl	0x500(%ebp,%edx,1),%esi
+
+	movl	96(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	100(%ecx),%edx
+	xorl	%esi,%eax
+	xorl	%ecx,%ecx
+	xorl	%esi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%edi
+	xorl	0x700(%ebp,%ecx,1),%edi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%edi
+	xorl	0x500(%ebp,%edx,1),%edi
+
+	movl	104(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	108(%ecx),%edx
+	xorl	%edi,%eax
+	xorl	%ecx,%ecx
+	xorl	%edi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%esi
+	xorl	0x700(%ebp,%ecx,1),%esi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%esi
+	xorl	0x500(%ebp,%edx,1),%esi
+
+	movl	112(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	116(%ecx),%edx
+	xorl	%esi,%eax
+	xorl	%ecx,%ecx
+	xorl	%esi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%edi
+	xorl	0x700(%ebp,%ecx,1),%edi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%edi
+	xorl	0x500(%ebp,%edx,1),%edi
+
+	movl	120(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	124(%ecx),%edx
+	xorl	%edi,%eax
+	xorl	%ecx,%ecx
+	xorl	%edi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%esi
+	xorl	0x700(%ebp,%ecx,1),%esi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%esi
+	xorl	0x500(%ebp,%edx,1),%esi
+	addl	$4,%esp
+	ret
+.size	_x86_DES_encrypt,.-_x86_DES_encrypt
+.type	_x86_DES_decrypt,@function
+.align	16
+_x86_DES_decrypt:
+	pushl	%ecx
+
+	movl	120(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	124(%ecx),%edx
+	xorl	%esi,%eax
+	xorl	%ecx,%ecx
+	xorl	%esi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%edi
+	xorl	0x700(%ebp,%ecx,1),%edi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%edi
+	xorl	0x500(%ebp,%edx,1),%edi
+
+	movl	112(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	116(%ecx),%edx
+	xorl	%edi,%eax
+	xorl	%ecx,%ecx
+	xorl	%edi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%esi
+	xorl	0x700(%ebp,%ecx,1),%esi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%esi
+	xorl	0x500(%ebp,%edx,1),%esi
+
+	movl	104(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	108(%ecx),%edx
+	xorl	%esi,%eax
+	xorl	%ecx,%ecx
+	xorl	%esi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%edi
+	xorl	0x700(%ebp,%ecx,1),%edi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%edi
+	xorl	0x500(%ebp,%edx,1),%edi
+
+	movl	96(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	100(%ecx),%edx
+	xorl	%edi,%eax
+	xorl	%ecx,%ecx
+	xorl	%edi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%esi
+	xorl	0x700(%ebp,%ecx,1),%esi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%esi
+	xorl	0x500(%ebp,%edx,1),%esi
+
+	movl	88(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	92(%ecx),%edx
+	xorl	%esi,%eax
+	xorl	%ecx,%ecx
+	xorl	%esi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%edi
+	xorl	0x700(%ebp,%ecx,1),%edi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%edi
+	xorl	0x500(%ebp,%edx,1),%edi
+
+	movl	80(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	84(%ecx),%edx
+	xorl	%edi,%eax
+	xorl	%ecx,%ecx
+	xorl	%edi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%esi
+	xorl	0x700(%ebp,%ecx,1),%esi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%esi
+	xorl	0x500(%ebp,%edx,1),%esi
+
+	movl	72(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	76(%ecx),%edx
+	xorl	%esi,%eax
+	xorl	%ecx,%ecx
+	xorl	%esi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%edi
+	xorl	0x700(%ebp,%ecx,1),%edi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%edi
+	xorl	0x500(%ebp,%edx,1),%edi
+
+	movl	64(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	68(%ecx),%edx
+	xorl	%edi,%eax
+	xorl	%ecx,%ecx
+	xorl	%edi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%esi
+	xorl	0x700(%ebp,%ecx,1),%esi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%esi
+	xorl	0x500(%ebp,%edx,1),%esi
+
+	movl	56(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	60(%ecx),%edx
+	xorl	%esi,%eax
+	xorl	%ecx,%ecx
+	xorl	%esi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%edi
+	xorl	0x700(%ebp,%ecx,1),%edi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%edi
+	xorl	0x500(%ebp,%edx,1),%edi
+
+	movl	48(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	52(%ecx),%edx
+	xorl	%edi,%eax
+	xorl	%ecx,%ecx
+	xorl	%edi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%esi
+	xorl	0x700(%ebp,%ecx,1),%esi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%esi
+	xorl	0x500(%ebp,%edx,1),%esi
+
+	movl	40(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	44(%ecx),%edx
+	xorl	%esi,%eax
+	xorl	%ecx,%ecx
+	xorl	%esi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%edi
+	xorl	0x700(%ebp,%ecx,1),%edi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%edi
+	xorl	0x500(%ebp,%edx,1),%edi
+
+	movl	32(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	36(%ecx),%edx
+	xorl	%edi,%eax
+	xorl	%ecx,%ecx
+	xorl	%edi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%esi
+	xorl	0x700(%ebp,%ecx,1),%esi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%esi
+	xorl	0x500(%ebp,%edx,1),%esi
+
+	movl	24(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	28(%ecx),%edx
+	xorl	%esi,%eax
+	xorl	%ecx,%ecx
+	xorl	%esi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%edi
+	xorl	0x700(%ebp,%ecx,1),%edi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%edi
+	xorl	0x500(%ebp,%edx,1),%edi
+
+	movl	16(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	20(%ecx),%edx
+	xorl	%edi,%eax
+	xorl	%ecx,%ecx
+	xorl	%edi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%esi
+	xorl	0x700(%ebp,%ecx,1),%esi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%esi
+	xorl	0x500(%ebp,%edx,1),%esi
+
+	movl	8(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	12(%ecx),%edx
+	xorl	%esi,%eax
+	xorl	%ecx,%ecx
+	xorl	%esi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%edi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%edi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%edi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%edi
+	xorl	0x700(%ebp,%ecx,1),%edi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%edi
+	xorl	0x500(%ebp,%edx,1),%edi
+
+	movl	(%ecx),%eax
+	xorl	%ebx,%ebx
+	movl	4(%ecx),%edx
+	xorl	%edi,%eax
+	xorl	%ecx,%ecx
+	xorl	%edi,%edx
+	andl	$0xfcfcfcfc,%eax
+	andl	$0xcfcfcfcf,%edx
+	movb	%al,%bl
+	movb	%ah,%cl
+	rorl	$4,%edx
+	xorl	(%ebp,%ebx,1),%esi
+	movb	%dl,%bl
+	xorl	0x200(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	shrl	$16,%eax
+	xorl	0x100(%ebp,%ebx,1),%esi
+	movb	%ah,%bl
+	shrl	$16,%edx
+	xorl	0x300(%ebp,%ecx,1),%esi
+	movb	%dh,%cl
+	andl	$0xff,%eax
+	andl	$0xff,%edx
+	xorl	0x600(%ebp,%ebx,1),%esi
+	xorl	0x700(%ebp,%ecx,1),%esi
+	movl	(%esp),%ecx
+	xorl	0x400(%ebp,%eax,1),%esi
+	xorl	0x500(%ebp,%edx,1),%esi
+	addl	$4,%esp
+	ret
+.size	_x86_DES_decrypt,.-_x86_DES_decrypt
+.globl	DES_encrypt1
+.type	DES_encrypt1,@function
+.align	16
+DES_encrypt1:
+.L_DES_encrypt1_begin:
+	pushl	%esi
+	pushl	%edi
+
+
+	movl	12(%esp),%esi
+	xorl	%ecx,%ecx
+	pushl	%ebx
+	pushl	%ebp
+	movl	(%esi),%eax
+	movl	28(%esp),%ebx
+	movl	4(%esi),%edi
+
+
+	roll	$4,%eax
+	movl	%eax,%esi
+	xorl	%edi,%eax
+	andl	$0xf0f0f0f0,%eax
+	xorl	%eax,%esi
+	xorl	%eax,%edi
+
+	roll	$20,%edi
+	movl	%edi,%eax
+	xorl	%esi,%edi
+	andl	$0xfff0000f,%edi
+	xorl	%edi,%eax
+	xorl	%edi,%esi
+
+	roll	$14,%eax
+	movl	%eax,%edi
+	xorl	%esi,%eax
+	andl	$0x33333333,%eax
+	xorl	%eax,%edi
+	xorl	%eax,%esi
+
+	roll	$22,%esi
+	movl	%esi,%eax
+	xorl	%edi,%esi
+	andl	$0x03fc03fc,%esi
+	xorl	%esi,%eax
+	xorl	%esi,%edi
+
+	roll	$9,%eax
+	movl	%eax,%esi
+	xorl	%edi,%eax
+	andl	$0xaaaaaaaa,%eax
+	xorl	%eax,%esi
+	xorl	%eax,%edi
+
+	roll	$1,%edi
+	call	.L000pic_point
+.L000pic_point:
+	popl	%ebp
+	leal	DES_SPtrans-.L000pic_point(%ebp),%ebp
+	movl	24(%esp),%ecx
+	cmpl	$0,%ebx
+	je	.L001decrypt
+	call	_x86_DES_encrypt
+	jmp	.L002done
+.L001decrypt:
+	call	_x86_DES_decrypt
+.L002done:
+
+
+	movl	20(%esp),%edx
+	rorl	$1,%esi
+	movl	%edi,%eax
+	xorl	%esi,%edi
+	andl	$0xaaaaaaaa,%edi
+	xorl	%edi,%eax
+	xorl	%edi,%esi
+
+	roll	$23,%eax
+	movl	%eax,%edi
+	xorl	%esi,%eax
+	andl	$0x03fc03fc,%eax
+	xorl	%eax,%edi
+	xorl	%eax,%esi
+
+	roll	$10,%edi
+	movl	%edi,%eax
+	xorl	%esi,%edi
+	andl	$0x33333333,%edi
+	xorl	%edi,%eax
+	xorl	%edi,%esi
+
+	roll	$18,%esi
+	movl	%esi,%edi
+	xorl	%eax,%esi
+	andl	$0xfff0000f,%esi
+	xorl	%esi,%edi
+	xorl	%esi,%eax
+
+	roll	$12,%edi
+	movl	%edi,%esi
+	xorl	%eax,%edi
+	andl	$0xf0f0f0f0,%edi
+	xorl	%edi,%esi
+	xorl	%edi,%eax
+
+	rorl	$4,%eax
+	movl	%eax,(%edx)
+	movl	%esi,4(%edx)
+	popl	%ebp
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	ret
+.size	DES_encrypt1,.-.L_DES_encrypt1_begin
+.globl	DES_encrypt2
+.type	DES_encrypt2,@function
+.align	16
+DES_encrypt2:
+.L_DES_encrypt2_begin:
+	pushl	%esi
+	pushl	%edi
+
+
+	movl	12(%esp),%eax
+	xorl	%ecx,%ecx
+	pushl	%ebx
+	pushl	%ebp
+	movl	(%eax),%esi
+	movl	28(%esp),%ebx
+	roll	$3,%esi
+	movl	4(%eax),%edi
+	roll	$3,%edi
+	call	.L003pic_point
+.L003pic_point:
+	popl	%ebp
+	leal	DES_SPtrans-.L003pic_point(%ebp),%ebp
+	movl	24(%esp),%ecx
+	cmpl	$0,%ebx
+	je	.L004decrypt
+	call	_x86_DES_encrypt
+	jmp	.L005done
+.L004decrypt:
+	call	_x86_DES_decrypt
+.L005done:
+
+
+	rorl	$3,%edi
+	movl	20(%esp),%eax
+	rorl	$3,%esi
+	movl	%edi,(%eax)
+	movl	%esi,4(%eax)
+	popl	%ebp
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	ret
+.size	DES_encrypt2,.-.L_DES_encrypt2_begin
+.globl	DES_encrypt3
+.type	DES_encrypt3,@function
+.align	16
+DES_encrypt3:
+.L_DES_encrypt3_begin:
+	pushl	%ebx
+	movl	8(%esp),%ebx
+	pushl	%ebp
+	pushl	%esi
+	pushl	%edi
+
+
+	movl	(%ebx),%edi
+	movl	4(%ebx),%esi
+	subl	$12,%esp
+
+
+	roll	$4,%edi
+	movl	%edi,%edx
+	xorl	%esi,%edi
+	andl	$0xf0f0f0f0,%edi
+	xorl	%edi,%edx
+	xorl	%edi,%esi
+
+	roll	$20,%esi
+	movl	%esi,%edi
+	xorl	%edx,%esi
+	andl	$0xfff0000f,%esi
+	xorl	%esi,%edi
+	xorl	%esi,%edx
+
+	roll	$14,%edi
+	movl	%edi,%esi
+	xorl	%edx,%edi
+	andl	$0x33333333,%edi
+	xorl	%edi,%esi
+	xorl	%edi,%edx
+
+	roll	$22,%edx
+	movl	%edx,%edi
+	xorl	%esi,%edx
+	andl	$0x03fc03fc,%edx
+	xorl	%edx,%edi
+	xorl	%edx,%esi
+
+	roll	$9,%edi
+	movl	%edi,%edx
+	xorl	%esi,%edi
+	andl	$0xaaaaaaaa,%edi
+	xorl	%edi,%edx
+	xorl	%edi,%esi
+
+	rorl	$3,%edx
+	rorl	$2,%esi
+	movl	%esi,4(%ebx)
+	movl	36(%esp),%eax
+	movl	%edx,(%ebx)
+	movl	40(%esp),%edi
+	movl	44(%esp),%esi
+	movl	$1,8(%esp)
+	movl	%eax,4(%esp)
+	movl	%ebx,(%esp)
+	call	.L_DES_encrypt2_begin
+	movl	$0,8(%esp)
+	movl	%edi,4(%esp)
+	movl	%ebx,(%esp)
+	call	.L_DES_encrypt2_begin
+	movl	$1,8(%esp)
+	movl	%esi,4(%esp)
+	movl	%ebx,(%esp)
+	call	.L_DES_encrypt2_begin
+	addl	$12,%esp
+	movl	(%ebx),%edi
+	movl	4(%ebx),%esi
+
+
+	roll	$2,%esi
+	roll	$3,%edi
+	movl	%edi,%eax
+	xorl	%esi,%edi
+	andl	$0xaaaaaaaa,%edi
+	xorl	%edi,%eax
+	xorl	%edi,%esi
+
+	roll	$23,%eax
+	movl	%eax,%edi
+	xorl	%esi,%eax
+	andl	$0x03fc03fc,%eax
+	xorl	%eax,%edi
+	xorl	%eax,%esi
+
+	roll	$10,%edi
+	movl	%edi,%eax
+	xorl	%esi,%edi
+	andl	$0x33333333,%edi
+	xorl	%edi,%eax
+	xorl	%edi,%esi
+
+	roll	$18,%esi
+	movl	%esi,%edi
+	xorl	%eax,%esi
+	andl	$0xfff0000f,%esi
+	xorl	%esi,%edi
+	xorl	%esi,%eax
+
+	roll	$12,%edi
+	movl	%edi,%esi
+	xorl	%eax,%edi
+	andl	$0xf0f0f0f0,%edi
+	xorl	%edi,%esi
+	xorl	%edi,%eax
+
+	rorl	$4,%eax
+	movl	%eax,(%ebx)
+	movl	%esi,4(%ebx)
+	popl	%edi
+	popl	%esi
+	popl	%ebp
+	popl	%ebx
+	ret
+.size	DES_encrypt3,.-.L_DES_encrypt3_begin
+.globl	DES_decrypt3
+.type	DES_decrypt3,@function
+.align	16
+DES_decrypt3:
+.L_DES_decrypt3_begin:
+	pushl	%ebx
+	movl	8(%esp),%ebx
+	pushl	%ebp
+	pushl	%esi
+	pushl	%edi
+
+
+	movl	(%ebx),%edi
+	movl	4(%ebx),%esi
+	subl	$12,%esp
+
+
+	roll	$4,%edi
+	movl	%edi,%edx
+	xorl	%esi,%edi
+	andl	$0xf0f0f0f0,%edi
+	xorl	%edi,%edx
+	xorl	%edi,%esi
+
+	roll	$20,%esi
+	movl	%esi,%edi
+	xorl	%edx,%esi
+	andl	$0xfff0000f,%esi
+	xorl	%esi,%edi
+	xorl	%esi,%edx
+
+	roll	$14,%edi
+	movl	%edi,%esi
+	xorl	%edx,%edi
+	andl	$0x33333333,%edi
+	xorl	%edi,%esi
+	xorl	%edi,%edx
+
+	roll	$22,%edx
+	movl	%edx,%edi
+	xorl	%esi,%edx
+	andl	$0x03fc03fc,%edx
+	xorl	%edx,%edi
+	xorl	%edx,%esi
+
+	roll	$9,%edi
+	movl	%edi,%edx
+	xorl	%esi,%edi
+	andl	$0xaaaaaaaa,%edi
+	xorl	%edi,%edx
+	xorl	%edi,%esi
+
+	rorl	$3,%edx
+	rorl	$2,%esi
+	movl	%esi,4(%ebx)
+	movl	36(%esp),%esi
+	movl	%edx,(%ebx)
+	movl	40(%esp),%edi
+	movl	44(%esp),%eax
+	movl	$0,8(%esp)
+	movl	%eax,4(%esp)
+	movl	%ebx,(%esp)
+	call	.L_DES_encrypt2_begin
+	movl	$1,8(%esp)
+	movl	%edi,4(%esp)
+	movl	%ebx,(%esp)
+	call	.L_DES_encrypt2_begin
+	movl	$0,8(%esp)
+	movl	%esi,4(%esp)
+	movl	%ebx,(%esp)
+	call	.L_DES_encrypt2_begin
+	addl	$12,%esp
+	movl	(%ebx),%edi
+	movl	4(%ebx),%esi
+
+
+	roll	$2,%esi
+	roll	$3,%edi
+	movl	%edi,%eax
+	xorl	%esi,%edi
+	andl	$0xaaaaaaaa,%edi
+	xorl	%edi,%eax
+	xorl	%edi,%esi
+
+	roll	$23,%eax
+	movl	%eax,%edi
+	xorl	%esi,%eax
+	andl	$0x03fc03fc,%eax
+	xorl	%eax,%edi
+	xorl	%eax,%esi
+
+	roll	$10,%edi
+	movl	%edi,%eax
+	xorl	%esi,%edi
+	andl	$0x33333333,%edi
+	xorl	%edi,%eax
+	xorl	%edi,%esi
+
+	roll	$18,%esi
+	movl	%esi,%edi
+	xorl	%eax,%esi
+	andl	$0xfff0000f,%esi
+	xorl	%esi,%edi
+	xorl	%esi,%eax
+
+	roll	$12,%edi
+	movl	%edi,%esi
+	xorl	%eax,%edi
+	andl	$0xf0f0f0f0,%edi
+	xorl	%edi,%esi
+	xorl	%edi,%eax
+
+	rorl	$4,%eax
+	movl	%eax,(%ebx)
+	movl	%esi,4(%ebx)
+	popl	%edi
+	popl	%esi
+	popl	%ebp
+	popl	%ebx
+	ret
+.size	DES_decrypt3,.-.L_DES_decrypt3_begin
+.globl	DES_ncbc_encrypt
+.type	DES_ncbc_encrypt,@function
+.align	16
+DES_ncbc_encrypt:
+.L_DES_ncbc_encrypt_begin:
+
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	28(%esp),%ebp
+
+	movl	36(%esp),%ebx
+	movl	(%ebx),%esi
+	movl	4(%ebx),%edi
+	pushl	%edi
+	pushl	%esi
+	pushl	%edi
+	pushl	%esi
+	movl	%esp,%ebx
+	movl	36(%esp),%esi
+	movl	40(%esp),%edi
+
+	movl	56(%esp),%ecx
+
+	pushl	%ecx
+
+	movl	52(%esp),%eax
+	pushl	%eax
+	pushl	%ebx
+	cmpl	$0,%ecx
+	jz	.L006decrypt
+	andl	$4294967288,%ebp
+	movl	12(%esp),%eax
+	movl	16(%esp),%ebx
+	jz	.L007encrypt_finish
+.L008encrypt_loop:
+	movl	(%esi),%ecx
+	movl	4(%esi),%edx
+	xorl	%ecx,%eax
+	xorl	%edx,%ebx
+	movl	%eax,12(%esp)
+	movl	%ebx,16(%esp)
+	call	.L_DES_encrypt1_begin
+	movl	12(%esp),%eax
+	movl	16(%esp),%ebx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	addl	$8,%esi
+	addl	$8,%edi
+	subl	$8,%ebp
+	jnz	.L008encrypt_loop
+.L007encrypt_finish:
+	movl	56(%esp),%ebp
+	andl	$7,%ebp
+	jz	.L009finish
+	call	.L010PIC_point
+.L010PIC_point:
+	popl	%edx
+	leal	.L011cbc_enc_jmp_table-.L010PIC_point(%edx),%ecx
+	movl	(%ecx,%ebp,4),%ebp
+	addl	%edx,%ebp
+	xorl	%ecx,%ecx
+	xorl	%edx,%edx
+	jmp	*%ebp
+.L012ej7:
+	movb	6(%esi),%dh
+	shll	$8,%edx
+.L013ej6:
+	movb	5(%esi),%dh
+.L014ej5:
+	movb	4(%esi),%dl
+.L015ej4:
+	movl	(%esi),%ecx
+	jmp	.L016ejend
+.L017ej3:
+	movb	2(%esi),%ch
+	shll	$8,%ecx
+.L018ej2:
+	movb	1(%esi),%ch
+.L019ej1:
+	movb	(%esi),%cl
+.L016ejend:
+	xorl	%ecx,%eax
+	xorl	%edx,%ebx
+	movl	%eax,12(%esp)
+	movl	%ebx,16(%esp)
+	call	.L_DES_encrypt1_begin
+	movl	12(%esp),%eax
+	movl	16(%esp),%ebx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	jmp	.L009finish
+.L006decrypt:
+	andl	$4294967288,%ebp
+	movl	20(%esp),%eax
+	movl	24(%esp),%ebx
+	jz	.L020decrypt_finish
+.L021decrypt_loop:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	%eax,12(%esp)
+	movl	%ebx,16(%esp)
+	call	.L_DES_encrypt1_begin
+	movl	12(%esp),%eax
+	movl	16(%esp),%ebx
+	movl	20(%esp),%ecx
+	movl	24(%esp),%edx
+	xorl	%eax,%ecx
+	xorl	%ebx,%edx
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	%ecx,(%edi)
+	movl	%edx,4(%edi)
+	movl	%eax,20(%esp)
+	movl	%ebx,24(%esp)
+	addl	$8,%esi
+	addl	$8,%edi
+	subl	$8,%ebp
+	jnz	.L021decrypt_loop
+.L020decrypt_finish:
+	movl	56(%esp),%ebp
+	andl	$7,%ebp
+	jz	.L009finish
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	%eax,12(%esp)
+	movl	%ebx,16(%esp)
+	call	.L_DES_encrypt1_begin
+	movl	12(%esp),%eax
+	movl	16(%esp),%ebx
+	movl	20(%esp),%ecx
+	movl	24(%esp),%edx
+	xorl	%eax,%ecx
+	xorl	%ebx,%edx
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+.L022dj7:
+	rorl	$16,%edx
+	movb	%dl,6(%edi)
+	shrl	$16,%edx
+.L023dj6:
+	movb	%dh,5(%edi)
+.L024dj5:
+	movb	%dl,4(%edi)
+.L025dj4:
+	movl	%ecx,(%edi)
+	jmp	.L026djend
+.L027dj3:
+	rorl	$16,%ecx
+	movb	%cl,2(%edi)
+	shll	$16,%ecx
+.L028dj2:
+	movb	%ch,1(%esi)
+.L029dj1:
+	movb	%cl,(%esi)
+.L026djend:
+	jmp	.L009finish
+.L009finish:
+	movl	64(%esp),%ecx
+	addl	$28,%esp
+	movl	%eax,(%ecx)
+	movl	%ebx,4(%ecx)
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.align	64
+.L011cbc_enc_jmp_table:
+.long	0
+.long	.L019ej1-.L010PIC_point
+.long	.L018ej2-.L010PIC_point
+.long	.L017ej3-.L010PIC_point
+.long	.L015ej4-.L010PIC_point
+.long	.L014ej5-.L010PIC_point
+.long	.L013ej6-.L010PIC_point
+.long	.L012ej7-.L010PIC_point
+.align	64
+.size	DES_ncbc_encrypt,.-.L_DES_ncbc_encrypt_begin
+.globl	DES_ede3_cbc_encrypt
+.type	DES_ede3_cbc_encrypt,@function
+.align	16
+DES_ede3_cbc_encrypt:
+.L_DES_ede3_cbc_encrypt_begin:
+
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	28(%esp),%ebp
+
+	movl	44(%esp),%ebx
+	movl	(%ebx),%esi
+	movl	4(%ebx),%edi
+	pushl	%edi
+	pushl	%esi
+	pushl	%edi
+	pushl	%esi
+	movl	%esp,%ebx
+	movl	36(%esp),%esi
+	movl	40(%esp),%edi
+
+	movl	64(%esp),%ecx
+
+	movl	56(%esp),%eax
+	pushl	%eax
+
+	movl	56(%esp),%eax
+	pushl	%eax
+
+	movl	56(%esp),%eax
+	pushl	%eax
+	pushl	%ebx
+	cmpl	$0,%ecx
+	jz	.L030decrypt
+	andl	$4294967288,%ebp
+	movl	16(%esp),%eax
+	movl	20(%esp),%ebx
+	jz	.L031encrypt_finish
+.L032encrypt_loop:
+	movl	(%esi),%ecx
+	movl	4(%esi),%edx
+	xorl	%ecx,%eax
+	xorl	%edx,%ebx
+	movl	%eax,16(%esp)
+	movl	%ebx,20(%esp)
+	call	.L_DES_encrypt3_begin
+	movl	16(%esp),%eax
+	movl	20(%esp),%ebx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	addl	$8,%esi
+	addl	$8,%edi
+	subl	$8,%ebp
+	jnz	.L032encrypt_loop
+.L031encrypt_finish:
+	movl	60(%esp),%ebp
+	andl	$7,%ebp
+	jz	.L033finish
+	call	.L034PIC_point
+.L034PIC_point:
+	popl	%edx
+	leal	.L035cbc_enc_jmp_table-.L034PIC_point(%edx),%ecx
+	movl	(%ecx,%ebp,4),%ebp
+	addl	%edx,%ebp
+	xorl	%ecx,%ecx
+	xorl	%edx,%edx
+	jmp	*%ebp
+.L036ej7:
+	movb	6(%esi),%dh
+	shll	$8,%edx
+.L037ej6:
+	movb	5(%esi),%dh
+.L038ej5:
+	movb	4(%esi),%dl
+.L039ej4:
+	movl	(%esi),%ecx
+	jmp	.L040ejend
+.L041ej3:
+	movb	2(%esi),%ch
+	shll	$8,%ecx
+.L042ej2:
+	movb	1(%esi),%ch
+.L043ej1:
+	movb	(%esi),%cl
+.L040ejend:
+	xorl	%ecx,%eax
+	xorl	%edx,%ebx
+	movl	%eax,16(%esp)
+	movl	%ebx,20(%esp)
+	call	.L_DES_encrypt3_begin
+	movl	16(%esp),%eax
+	movl	20(%esp),%ebx
+	movl	%eax,(%edi)
+	movl	%ebx,4(%edi)
+	jmp	.L033finish
+.L030decrypt:
+	andl	$4294967288,%ebp
+	movl	24(%esp),%eax
+	movl	28(%esp),%ebx
+	jz	.L044decrypt_finish
+.L045decrypt_loop:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	%eax,16(%esp)
+	movl	%ebx,20(%esp)
+	call	.L_DES_decrypt3_begin
+	movl	16(%esp),%eax
+	movl	20(%esp),%ebx
+	movl	24(%esp),%ecx
+	movl	28(%esp),%edx
+	xorl	%eax,%ecx
+	xorl	%ebx,%edx
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	%ecx,(%edi)
+	movl	%edx,4(%edi)
+	movl	%eax,24(%esp)
+	movl	%ebx,28(%esp)
+	addl	$8,%esi
+	addl	$8,%edi
+	subl	$8,%ebp
+	jnz	.L045decrypt_loop
+.L044decrypt_finish:
+	movl	60(%esp),%ebp
+	andl	$7,%ebp
+	jz	.L033finish
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	%eax,16(%esp)
+	movl	%ebx,20(%esp)
+	call	.L_DES_decrypt3_begin
+	movl	16(%esp),%eax
+	movl	20(%esp),%ebx
+	movl	24(%esp),%ecx
+	movl	28(%esp),%edx
+	xorl	%eax,%ecx
+	xorl	%ebx,%edx
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+.L046dj7:
+	rorl	$16,%edx
+	movb	%dl,6(%edi)
+	shrl	$16,%edx
+.L047dj6:
+	movb	%dh,5(%edi)
+.L048dj5:
+	movb	%dl,4(%edi)
+.L049dj4:
+	movl	%ecx,(%edi)
+	jmp	.L050djend
+.L051dj3:
+	rorl	$16,%ecx
+	movb	%cl,2(%edi)
+	shll	$16,%ecx
+.L052dj2:
+	movb	%ch,1(%esi)
+.L053dj1:
+	movb	%cl,(%esi)
+.L050djend:
+	jmp	.L033finish
+.L033finish:
+	movl	76(%esp),%ecx
+	addl	$32,%esp
+	movl	%eax,(%ecx)
+	movl	%ebx,4(%ecx)
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.align	64
+.L035cbc_enc_jmp_table:
+.long	0
+.long	.L043ej1-.L034PIC_point
+.long	.L042ej2-.L034PIC_point
+.long	.L041ej3-.L034PIC_point
+.long	.L039ej4-.L034PIC_point
+.long	.L038ej5-.L034PIC_point
+.long	.L037ej6-.L034PIC_point
+.long	.L036ej7-.L034PIC_point
+.align	64
+.size	DES_ede3_cbc_encrypt,.-.L_DES_ede3_cbc_encrypt_begin
+.align	64
+DES_SPtrans:
+.long	34080768,524288,33554434,34080770
+.long	33554432,526338,524290,33554434
+.long	526338,34080768,34078720,2050
+.long	33556482,33554432,0,524290
+.long	524288,2,33556480,526336
+.long	34080770,34078720,2050,33556480
+.long	2,2048,526336,34078722
+.long	2048,33556482,34078722,0
+.long	0,34080770,33556480,524290
+.long	34080768,524288,2050,33556480
+.long	34078722,2048,526336,33554434
+.long	526338,2,33554434,34078720
+.long	34080770,526336,34078720,33556482
+.long	33554432,2050,524290,0
+.long	524288,33554432,33556482,34080768
+.long	2,34078722,2048,526338
+.long	1074823184,0,1081344,1074790400
+.long	1073741840,32784,1073774592,1081344
+.long	32768,1074790416,16,1073774592
+.long	1048592,1074823168,1074790400,16
+.long	1048576,1073774608,1074790416,32768
+.long	1081360,1073741824,0,1048592
+.long	1073774608,1081360,1074823168,1073741840
+.long	1073741824,1048576,32784,1074823184
+.long	1048592,1074823168,1073774592,1081360
+.long	1074823184,1048592,1073741840,0
+.long	1073741824,32784,1048576,1074790416
+.long	32768,1073741824,1081360,1073774608
+.long	1074823168,32768,0,1073741840
+.long	16,1074823184,1081344,1074790400
+.long	1074790416,1048576,32784,1073774592
+.long	1073774608,16,1074790400,1081344
+.long	67108865,67371264,256,67109121
+.long	262145,67108864,67109121,262400
+.long	67109120,262144,67371008,1
+.long	67371265,257,1,67371009
+.long	0,262145,67371264,256
+.long	257,67371265,262144,67108865
+.long	67371009,67109120,262401,67371008
+.long	262400,0,67108864,262401
+.long	67371264,256,1,262144
+.long	257,262145,67371008,67109121
+.long	0,67371264,262400,67371009
+.long	262145,67108864,67371265,1
+.long	262401,67108865,67108864,67371265
+.long	262144,67109120,67109121,262400
+.long	67109120,0,67371009,257
+.long	67108865,262401,256,67371008
+.long	4198408,268439552,8,272633864
+.long	0,272629760,268439560,4194312
+.long	272633856,268435464,268435456,4104
+.long	268435464,4198408,4194304,268435456
+.long	272629768,4198400,4096,8
+.long	4198400,268439560,272629760,4096
+.long	4104,0,4194312,272633856
+.long	268439552,272629768,272633864,4194304
+.long	272629768,4104,4194304,268435464
+.long	4198400,268439552,8,272629760
+.long	268439560,0,4096,4194312
+.long	0,272629768,272633856,4096
+.long	268435456,272633864,4198408,4194304
+.long	272633864,8,268439552,4198408
+.long	4194312,4198400,272629760,268439560
+.long	4104,268435456,268435464,272633856
+.long	134217728,65536,1024,134284320
+.long	134283296,134218752,66592,134283264
+.long	65536,32,134217760,66560
+.long	134218784,134283296,134284288,0
+.long	66560,134217728,65568,1056
+.long	134218752,66592,0,134217760
+.long	32,134218784,134284320,65568
+.long	134283264,1024,1056,134284288
+.long	134284288,134218784,65568,134283264
+.long	65536,32,134217760,134218752
+.long	134217728,66560,134284320,0
+.long	66592,134217728,1024,65568
+.long	134218784,1024,0,134284320
+.long	134283296,134284288,1056,65536
+.long	66560,134283296,134218752,1056
+.long	32,66592,134283264,134217760
+.long	2147483712,2097216,0,2149588992
+.long	2097216,8192,2147491904,2097152
+.long	8256,2149589056,2105344,2147483648
+.long	2147491840,2147483712,2149580800,2105408
+.long	2097152,2147491904,2149580864,0
+.long	8192,64,2149588992,2149580864
+.long	2149589056,2149580800,2147483648,8256
+.long	64,2105344,2105408,2147491840
+.long	8256,2147483648,2147491840,2105408
+.long	2149588992,2097216,0,2147491840
+.long	2147483648,8192,2149580864,2097152
+.long	2097216,2149589056,2105344,64
+.long	2149589056,2105344,2097152,2147491904
+.long	2147483712,2149580800,2105408,0
+.long	8192,2147483712,2147491904,2149588992
+.long	2149580800,8256,64,2149580864
+.long	16384,512,16777728,16777220
+.long	16794116,16388,16896,0
+.long	16777216,16777732,516,16793600
+.long	4,16794112,16793600,516
+.long	16777732,16384,16388,16794116
+.long	0,16777728,16777220,16896
+.long	16793604,16900,16794112,4
+.long	16900,16793604,512,16777216
+.long	16900,16793600,16793604,516
+.long	16384,512,16777216,16793604
+.long	16777732,16900,16896,0
+.long	512,16777220,4,16777728
+.long	0,16777732,16777728,16896
+.long	516,16384,16794116,16777216
+.long	16794112,4,16388,16794116
+.long	16777220,16794112,16793600,16388
+.long	545259648,545390592,131200,0
+.long	537001984,8388736,545259520,545390720
+.long	128,536870912,8519680,131200
+.long	8519808,537002112,536871040,545259520
+.long	131072,8519808,8388736,537001984
+.long	545390720,536871040,0,8519680
+.long	536870912,8388608,537002112,545259648
+.long	8388608,131072,545390592,128
+.long	8388608,131072,536871040,545390720
+.long	131200,536870912,0,8519680
+.long	545259648,537002112,537001984,8388736
+.long	545390592,128,8388736,537001984
+.long	545390720,8388608,545259520,536871040
+.long	8519680,131200,537002112,545259520
+.long	128,545390592,8519808,0
+.long	536870912,545259648,131072,8519808
diff --git a/crypto/md5/asm/md5-586.s b/crypto/md5/asm/md5-586.s
new file mode 100644
index 0000000..23e4de7
--- /dev/null
+++ b/crypto/md5/asm/md5-586.s
@@ -0,0 +1,679 @@
+.file	"crypto/md5/asm/md5-586.s"
+.text
+.globl	md5_block_asm_data_order
+.type	md5_block_asm_data_order,@function
+.align	16
+md5_block_asm_data_order:
+.L_md5_block_asm_data_order_begin:
+	pushl	%esi
+	pushl	%edi
+	movl	12(%esp),%edi
+	movl	16(%esp),%esi
+	movl	20(%esp),%ecx
+	pushl	%ebp
+	shll	$6,%ecx
+	pushl	%ebx
+	addl	%esi,%ecx
+	subl	$64,%ecx
+	movl	(%edi),%eax
+	pushl	%ecx
+	movl	4(%edi),%ebx
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+.L000start:
+
+
+	movl	%ecx,%edi
+	movl	(%esi),%ebp
+
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	3614090360(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	4(%esi),%ebp
+	addl	%ebx,%eax
+
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	3905402710(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	8(%esi),%ebp
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	606105819(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	12(%esi),%ebp
+	addl	%edx,%ecx
+
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	3250441966(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	16(%esi),%ebp
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	4118548399(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	20(%esi),%ebp
+	addl	%ebx,%eax
+
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	1200080426(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	24(%esi),%ebp
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	2821735955(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	28(%esi),%ebp
+	addl	%edx,%ecx
+
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	4249261313(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	32(%esi),%ebp
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	1770035416(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	36(%esi),%ebp
+	addl	%ebx,%eax
+
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	2336552879(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	40(%esi),%ebp
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	4294925233(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	44(%esi),%ebp
+	addl	%edx,%ecx
+
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	2304563134(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	48(%esi),%ebp
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	1804603682(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	52(%esi),%ebp
+	addl	%ebx,%eax
+
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	4254626195(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	56(%esi),%ebp
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	2792965006(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	60(%esi),%ebp
+	addl	%edx,%ecx
+
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	1236535329(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	4(%esi),%ebp
+	addl	%ecx,%ebx
+
+
+
+	leal	4129170786(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	24(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+
+	leal	3225465664(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	44(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+
+	leal	643717713(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+
+	leal	3921069994(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	20(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+
+	leal	3593408605(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	40(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+
+	leal	38016083(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	60(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+
+	leal	3634488961(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	16(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+
+	leal	3889429448(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	36(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+
+	leal	568446438(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	56(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+
+	leal	3275163606(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	12(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+
+	leal	4107603335(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	32(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+
+	leal	1163531501(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	52(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+
+	leal	2850285829(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	8(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+
+	leal	4243563512(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	28(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+
+	leal	1735328473(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	48(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+
+	leal	2368359562(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	20(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+
+
+
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	4294588738(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	32(%esi),%ebp
+	movl	%ebx,%edi
+
+	leal	2272392833(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	44(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	1839030562(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	56(%esi),%ebp
+	movl	%edx,%edi
+
+	leal	4259657740(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	4(%esi),%ebp
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	2763975236(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	16(%esi),%ebp
+	movl	%ebx,%edi
+
+	leal	1272893353(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	28(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	4139469664(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	40(%esi),%ebp
+	movl	%edx,%edi
+
+	leal	3200236656(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	52(%esi),%ebp
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	681279174(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	(%esi),%ebp
+	movl	%ebx,%edi
+
+	leal	3936430074(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	12(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	3572445317(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	24(%esi),%ebp
+	movl	%edx,%edi
+
+	leal	76029189(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	36(%esi),%ebp
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	3654602809(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	48(%esi),%ebp
+	movl	%ebx,%edi
+
+	leal	3873151461(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	60(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	530742520(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	8(%esi),%ebp
+	movl	%edx,%edi
+
+	leal	3299628645(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+
+
+
+	xorl	%edx,%edi
+	orl	%ebx,%edi
+	leal	4096336452(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	28(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+
+	orl	%eax,%edi
+	leal	1126891415(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	56(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+
+	orl	%edx,%edi
+	leal	2878612391(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	20(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+
+	orl	%ecx,%edi
+	leal	4237533241(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	48(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$21,%ebx
+	xorl	%edx,%edi
+	addl	%ecx,%ebx
+
+	orl	%ebx,%edi
+	leal	1700485571(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	12(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+
+	orl	%eax,%edi
+	leal	2399980690(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	40(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+
+	orl	%edx,%edi
+	leal	4293915773(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	4(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+
+	orl	%ecx,%edi
+	leal	2240044497(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	32(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$21,%ebx
+	xorl	%edx,%edi
+	addl	%ecx,%ebx
+
+	orl	%ebx,%edi
+	leal	1873313359(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	60(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+
+	orl	%eax,%edi
+	leal	4264355552(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	24(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+
+	orl	%edx,%edi
+	leal	2734768916(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	52(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+
+	orl	%ecx,%edi
+	leal	1309151649(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	16(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$21,%ebx
+	xorl	%edx,%edi
+	addl	%ecx,%ebx
+
+	orl	%ebx,%edi
+	leal	4149444226(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	44(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+
+	orl	%eax,%edi
+	leal	3174756917(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	8(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+
+	orl	%edx,%edi
+	leal	718787259(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	36(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+
+	orl	%ecx,%edi
+	leal	3951481745(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	24(%esp),%ebp
+	addl	%edi,%ebx
+	addl	$64,%esi
+	roll	$21,%ebx
+	movl	(%ebp),%edi
+	addl	%ecx,%ebx
+	addl	%edi,%eax
+	movl	4(%ebp),%edi
+	addl	%edi,%ebx
+	movl	8(%ebp),%edi
+	addl	%edi,%ecx
+	movl	12(%ebp),%edi
+	addl	%edi,%edx
+	movl	%eax,(%ebp)
+	movl	%ebx,4(%ebp)
+	movl	(%esp),%edi
+	movl	%ecx,8(%ebp)
+	movl	%edx,12(%ebp)
+	cmpl	%esi,%edi
+	jae	.L000start
+	popl	%eax
+	popl	%ebx
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	ret
+.size	md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
diff --git a/crypto/modes/asm/ghash-x86.s b/crypto/modes/asm/ghash-x86.s
new file mode 100644
index 0000000..cb9ae20
--- /dev/null
+++ b/crypto/modes/asm/ghash-x86.s
@@ -0,0 +1,728 @@
+.file	"ghash-x86.s"
+.text
+.globl	gcm_gmult_4bit_x86
+.type	gcm_gmult_4bit_x86,@function
+.align	16
+gcm_gmult_4bit_x86:
+.L_gcm_gmult_4bit_x86_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	subl	$84,%esp
+	movl	104(%esp),%edi
+	movl	108(%esp),%esi
+	movl	(%edi),%ebp
+	movl	4(%edi),%edx
+	movl	8(%edi),%ecx
+	movl	12(%edi),%ebx
+	movl	$0,16(%esp)
+	movl	$471859200,20(%esp)
+	movl	$943718400,24(%esp)
+	movl	$610271232,28(%esp)
+	movl	$1887436800,32(%esp)
+	movl	$1822425088,36(%esp)
+	movl	$1220542464,40(%esp)
+	movl	$1423966208,44(%esp)
+	movl	$3774873600,48(%esp)
+	movl	$4246732800,52(%esp)
+	movl	$3644850176,56(%esp)
+	movl	$3311403008,60(%esp)
+	movl	$2441084928,64(%esp)
+	movl	$2376073216,68(%esp)
+	movl	$2847932416,72(%esp)
+	movl	$3051356160,76(%esp)
+	movl	%ebp,(%esp)
+	movl	%edx,4(%esp)
+	movl	%ecx,8(%esp)
+	movl	%ebx,12(%esp)
+	shrl	$20,%ebx
+	andl	$240,%ebx
+	movl	4(%esi,%ebx,1),%ebp
+	movl	(%esi,%ebx,1),%edx
+	movl	12(%esi,%ebx,1),%ecx
+	movl	8(%esi,%ebx,1),%ebx
+	xorl	%eax,%eax
+	movl	$15,%edi
+	jmp	.L000x86_loop
+.align	16
+.L000x86_loop:
+	movb	%bl,%al
+	shrdl	$4,%ecx,%ebx
+	andb	$15,%al
+	shrdl	$4,%edx,%ecx
+	shrdl	$4,%ebp,%edx
+	shrl	$4,%ebp
+	xorl	16(%esp,%eax,4),%ebp
+	movb	(%esp,%edi,1),%al
+	andb	$240,%al
+	xorl	8(%esi,%eax,1),%ebx
+	xorl	12(%esi,%eax,1),%ecx
+	xorl	(%esi,%eax,1),%edx
+	xorl	4(%esi,%eax,1),%ebp
+	decl	%edi
+	js	.L001x86_break
+	movb	%bl,%al
+	shrdl	$4,%ecx,%ebx
+	andb	$15,%al
+	shrdl	$4,%edx,%ecx
+	shrdl	$4,%ebp,%edx
+	shrl	$4,%ebp
+	xorl	16(%esp,%eax,4),%ebp
+	movb	(%esp,%edi,1),%al
+	shlb	$4,%al
+	xorl	8(%esi,%eax,1),%ebx
+	xorl	12(%esi,%eax,1),%ecx
+	xorl	(%esi,%eax,1),%edx
+	xorl	4(%esi,%eax,1),%ebp
+	jmp	.L000x86_loop
+.align	16
+.L001x86_break:
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	bswap	%ebp
+	movl	104(%esp),%edi
+	movl	%ebx,12(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,4(%edi)
+	movl	%ebp,(%edi)
+	addl	$84,%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
+.globl	gcm_ghash_4bit_x86
+.type	gcm_ghash_4bit_x86,@function
+.align	16
+gcm_ghash_4bit_x86:
+.L_gcm_ghash_4bit_x86_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	subl	$84,%esp
+	movl	104(%esp),%ebx
+	movl	108(%esp),%esi
+	movl	112(%esp),%edi
+	movl	116(%esp),%ecx
+	addl	%edi,%ecx
+	movl	%ecx,116(%esp)
+	movl	(%ebx),%ebp
+	movl	4(%ebx),%edx
+	movl	8(%ebx),%ecx
+	movl	12(%ebx),%ebx
+	movl	$0,16(%esp)
+	movl	$471859200,20(%esp)
+	movl	$943718400,24(%esp)
+	movl	$610271232,28(%esp)
+	movl	$1887436800,32(%esp)
+	movl	$1822425088,36(%esp)
+	movl	$1220542464,40(%esp)
+	movl	$1423966208,44(%esp)
+	movl	$3774873600,48(%esp)
+	movl	$4246732800,52(%esp)
+	movl	$3644850176,56(%esp)
+	movl	$3311403008,60(%esp)
+	movl	$2441084928,64(%esp)
+	movl	$2376073216,68(%esp)
+	movl	$2847932416,72(%esp)
+	movl	$3051356160,76(%esp)
+.align	16
+.L002x86_outer_loop:
+	xorl	12(%edi),%ebx
+	xorl	8(%edi),%ecx
+	xorl	4(%edi),%edx
+	xorl	(%edi),%ebp
+	movl	%ebx,12(%esp)
+	movl	%ecx,8(%esp)
+	movl	%edx,4(%esp)
+	movl	%ebp,(%esp)
+	shrl	$20,%ebx
+	andl	$240,%ebx
+	movl	4(%esi,%ebx,1),%ebp
+	movl	(%esi,%ebx,1),%edx
+	movl	12(%esi,%ebx,1),%ecx
+	movl	8(%esi,%ebx,1),%ebx
+	xorl	%eax,%eax
+	movl	$15,%edi
+	jmp	.L003x86_loop
+.align	16
+.L003x86_loop:
+	movb	%bl,%al
+	shrdl	$4,%ecx,%ebx
+	andb	$15,%al
+	shrdl	$4,%edx,%ecx
+	shrdl	$4,%ebp,%edx
+	shrl	$4,%ebp
+	xorl	16(%esp,%eax,4),%ebp
+	movb	(%esp,%edi,1),%al
+	andb	$240,%al
+	xorl	8(%esi,%eax,1),%ebx
+	xorl	12(%esi,%eax,1),%ecx
+	xorl	(%esi,%eax,1),%edx
+	xorl	4(%esi,%eax,1),%ebp
+	decl	%edi
+	js	.L004x86_break
+	movb	%bl,%al
+	shrdl	$4,%ecx,%ebx
+	andb	$15,%al
+	shrdl	$4,%edx,%ecx
+	shrdl	$4,%ebp,%edx
+	shrl	$4,%ebp
+	xorl	16(%esp,%eax,4),%ebp
+	movb	(%esp,%edi,1),%al
+	shlb	$4,%al
+	xorl	8(%esi,%eax,1),%ebx
+	xorl	12(%esi,%eax,1),%ecx
+	xorl	(%esi,%eax,1),%edx
+	xorl	4(%esi,%eax,1),%ebp
+	jmp	.L003x86_loop
+.align	16
+.L004x86_break:
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	bswap	%ebp
+	movl	112(%esp),%edi
+	leal	16(%edi),%edi
+	cmpl	116(%esp),%edi
+	movl	%edi,112(%esp)
+	jb	.L002x86_outer_loop
+	movl	104(%esp),%edi
+	movl	%ebx,12(%edi)
+	movl	%ecx,8(%edi)
+	movl	%edx,4(%edi)
+	movl	%ebp,(%edi)
+	addl	$84,%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
+.type	_mmx_gmult_4bit_inner,@function
+.align	16
+_mmx_gmult_4bit_inner:
+	xorl	%ecx,%ecx
+	movl	%ebx,%edx
+	movb	%dl,%cl
+	shlb	$4,%cl
+	andl	$240,%edx
+	movq	8(%esi,%ecx,1),%mm0
+	movq	(%esi,%ecx,1),%mm1
+	movd	%mm0,%ebp
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	movb	14(%edi),%cl
+	psllq	$60,%mm2
+	andl	$15,%ebp
+	pxor	(%esi,%edx,1),%mm1
+	movl	%ecx,%edx
+	movd	%mm0,%ebx
+	pxor	%mm2,%mm0
+	shlb	$4,%cl
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%ecx,1),%mm0
+	psllq	$60,%mm2
+	andl	$240,%edx
+	pxor	(%eax,%ebp,8),%mm1
+	andl	$15,%ebx
+	pxor	(%esi,%ecx,1),%mm1
+	movd	%mm0,%ebp
+	pxor	%mm2,%mm0
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	movb	13(%edi),%cl
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	andl	$15,%ebp
+	pxor	(%esi,%edx,1),%mm1
+	movl	%ecx,%edx
+	movd	%mm0,%ebx
+	pxor	%mm2,%mm0
+	shlb	$4,%cl
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%ecx,1),%mm0
+	psllq	$60,%mm2
+	andl	$240,%edx
+	pxor	(%eax,%ebp,8),%mm1
+	andl	$15,%ebx
+	pxor	(%esi,%ecx,1),%mm1
+	movd	%mm0,%ebp
+	pxor	%mm2,%mm0
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	movb	12(%edi),%cl
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	andl	$15,%ebp
+	pxor	(%esi,%edx,1),%mm1
+	movl	%ecx,%edx
+	movd	%mm0,%ebx
+	pxor	%mm2,%mm0
+	shlb	$4,%cl
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%ecx,1),%mm0
+	psllq	$60,%mm2
+	andl	$240,%edx
+	pxor	(%eax,%ebp,8),%mm1
+	andl	$15,%ebx
+	pxor	(%esi,%ecx,1),%mm1
+	movd	%mm0,%ebp
+	pxor	%mm2,%mm0
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	movb	11(%edi),%cl
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	andl	$15,%ebp
+	pxor	(%esi,%edx,1),%mm1
+	movl	%ecx,%edx
+	movd	%mm0,%ebx
+	pxor	%mm2,%mm0
+	shlb	$4,%cl
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%ecx,1),%mm0
+	psllq	$60,%mm2
+	andl	$240,%edx
+	pxor	(%eax,%ebp,8),%mm1
+	andl	$15,%ebx
+	pxor	(%esi,%ecx,1),%mm1
+	movd	%mm0,%ebp
+	pxor	%mm2,%mm0
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	movb	10(%edi),%cl
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	andl	$15,%ebp
+	pxor	(%esi,%edx,1),%mm1
+	movl	%ecx,%edx
+	movd	%mm0,%ebx
+	pxor	%mm2,%mm0
+	shlb	$4,%cl
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%ecx,1),%mm0
+	psllq	$60,%mm2
+	andl	$240,%edx
+	pxor	(%eax,%ebp,8),%mm1
+	andl	$15,%ebx
+	pxor	(%esi,%ecx,1),%mm1
+	movd	%mm0,%ebp
+	pxor	%mm2,%mm0
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	movb	9(%edi),%cl
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	andl	$15,%ebp
+	pxor	(%esi,%edx,1),%mm1
+	movl	%ecx,%edx
+	movd	%mm0,%ebx
+	pxor	%mm2,%mm0
+	shlb	$4,%cl
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%ecx,1),%mm0
+	psllq	$60,%mm2
+	andl	$240,%edx
+	pxor	(%eax,%ebp,8),%mm1
+	andl	$15,%ebx
+	pxor	(%esi,%ecx,1),%mm1
+	movd	%mm0,%ebp
+	pxor	%mm2,%mm0
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	movb	8(%edi),%cl
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	andl	$15,%ebp
+	pxor	(%esi,%edx,1),%mm1
+	movl	%ecx,%edx
+	movd	%mm0,%ebx
+	pxor	%mm2,%mm0
+	shlb	$4,%cl
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%ecx,1),%mm0
+	psllq	$60,%mm2
+	andl	$240,%edx
+	pxor	(%eax,%ebp,8),%mm1
+	andl	$15,%ebx
+	pxor	(%esi,%ecx,1),%mm1
+	movd	%mm0,%ebp
+	pxor	%mm2,%mm0
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	movb	7(%edi),%cl
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	andl	$15,%ebp
+	pxor	(%esi,%edx,1),%mm1
+	movl	%ecx,%edx
+	movd	%mm0,%ebx
+	pxor	%mm2,%mm0
+	shlb	$4,%cl
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%ecx,1),%mm0
+	psllq	$60,%mm2
+	andl	$240,%edx
+	pxor	(%eax,%ebp,8),%mm1
+	andl	$15,%ebx
+	pxor	(%esi,%ecx,1),%mm1
+	movd	%mm0,%ebp
+	pxor	%mm2,%mm0
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	movb	6(%edi),%cl
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	andl	$15,%ebp
+	pxor	(%esi,%edx,1),%mm1
+	movl	%ecx,%edx
+	movd	%mm0,%ebx
+	pxor	%mm2,%mm0
+	shlb	$4,%cl
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%ecx,1),%mm0
+	psllq	$60,%mm2
+	andl	$240,%edx
+	pxor	(%eax,%ebp,8),%mm1
+	andl	$15,%ebx
+	pxor	(%esi,%ecx,1),%mm1
+	movd	%mm0,%ebp
+	pxor	%mm2,%mm0
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	movb	5(%edi),%cl
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	andl	$15,%ebp
+	pxor	(%esi,%edx,1),%mm1
+	movl	%ecx,%edx
+	movd	%mm0,%ebx
+	pxor	%mm2,%mm0
+	shlb	$4,%cl
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%ecx,1),%mm0
+	psllq	$60,%mm2
+	andl	$240,%edx
+	pxor	(%eax,%ebp,8),%mm1
+	andl	$15,%ebx
+	pxor	(%esi,%ecx,1),%mm1
+	movd	%mm0,%ebp
+	pxor	%mm2,%mm0
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	movb	4(%edi),%cl
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	andl	$15,%ebp
+	pxor	(%esi,%edx,1),%mm1
+	movl	%ecx,%edx
+	movd	%mm0,%ebx
+	pxor	%mm2,%mm0
+	shlb	$4,%cl
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%ecx,1),%mm0
+	psllq	$60,%mm2
+	andl	$240,%edx
+	pxor	(%eax,%ebp,8),%mm1
+	andl	$15,%ebx
+	pxor	(%esi,%ecx,1),%mm1
+	movd	%mm0,%ebp
+	pxor	%mm2,%mm0
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	movb	3(%edi),%cl
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	andl	$15,%ebp
+	pxor	(%esi,%edx,1),%mm1
+	movl	%ecx,%edx
+	movd	%mm0,%ebx
+	pxor	%mm2,%mm0
+	shlb	$4,%cl
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%ecx,1),%mm0
+	psllq	$60,%mm2
+	andl	$240,%edx
+	pxor	(%eax,%ebp,8),%mm1
+	andl	$15,%ebx
+	pxor	(%esi,%ecx,1),%mm1
+	movd	%mm0,%ebp
+	pxor	%mm2,%mm0
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	movb	2(%edi),%cl
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	andl	$15,%ebp
+	pxor	(%esi,%edx,1),%mm1
+	movl	%ecx,%edx
+	movd	%mm0,%ebx
+	pxor	%mm2,%mm0
+	shlb	$4,%cl
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%ecx,1),%mm0
+	psllq	$60,%mm2
+	andl	$240,%edx
+	pxor	(%eax,%ebp,8),%mm1
+	andl	$15,%ebx
+	pxor	(%esi,%ecx,1),%mm1
+	movd	%mm0,%ebp
+	pxor	%mm2,%mm0
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	movb	1(%edi),%cl
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	andl	$15,%ebp
+	pxor	(%esi,%edx,1),%mm1
+	movl	%ecx,%edx
+	movd	%mm0,%ebx
+	pxor	%mm2,%mm0
+	shlb	$4,%cl
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%ecx,1),%mm0
+	psllq	$60,%mm2
+	andl	$240,%edx
+	pxor	(%eax,%ebp,8),%mm1
+	andl	$15,%ebx
+	pxor	(%esi,%ecx,1),%mm1
+	movd	%mm0,%ebp
+	pxor	%mm2,%mm0
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	movb	(%edi),%cl
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	andl	$15,%ebp
+	pxor	(%esi,%edx,1),%mm1
+	movl	%ecx,%edx
+	movd	%mm0,%ebx
+	pxor	%mm2,%mm0
+	shlb	$4,%cl
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%ecx,1),%mm0
+	psllq	$60,%mm2
+	andl	$240,%edx
+	pxor	(%eax,%ebp,8),%mm1
+	andl	$15,%ebx
+	pxor	(%esi,%ecx,1),%mm1
+	movd	%mm0,%ebp
+	pxor	%mm2,%mm0
+	psrlq	$4,%mm0
+	movq	%mm1,%mm2
+	psrlq	$4,%mm1
+	pxor	8(%esi,%edx,1),%mm0
+	psllq	$60,%mm2
+	pxor	(%eax,%ebx,8),%mm1
+	andl	$15,%ebp
+	pxor	(%esi,%edx,1),%mm1
+	movd	%mm0,%ebx
+	pxor	%mm2,%mm0
+	movl	4(%eax,%ebp,8),%edi
+	psrlq	$32,%mm0
+	movd	%mm1,%edx
+	psrlq	$32,%mm1
+	movd	%mm0,%ecx
+	movd	%mm1,%ebp
+	shll	$4,%edi
+	bswap	%ebx
+	bswap	%edx
+	bswap	%ecx
+	xorl	%edi,%ebp
+	bswap	%ebp
+	ret
+.size	_mmx_gmult_4bit_inner,.-_mmx_gmult_4bit_inner
+.globl	gcm_gmult_4bit_mmx
+.type	gcm_gmult_4bit_mmx,@function
+.align	16
+gcm_gmult_4bit_mmx:
+.L_gcm_gmult_4bit_mmx_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%edi
+	movl	24(%esp),%esi
+	call	.L005pic_point
+.L005pic_point:
+	popl	%eax
+	leal	.Lrem_4bit-.L005pic_point(%eax),%eax
+	movzbl	15(%edi),%ebx
+	call	_mmx_gmult_4bit_inner
+	movl	20(%esp),%edi
+	emms
+	movl	%ebx,12(%edi)
+	movl	%edx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%ebp,(%edi)
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
+.globl	gcm_ghash_4bit_mmx
+.type	gcm_ghash_4bit_mmx,@function
+.align	16
+gcm_ghash_4bit_mmx:
+.L_gcm_ghash_4bit_mmx_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%ebp
+	movl	24(%esp),%esi
+	movl	28(%esp),%edi
+	movl	32(%esp),%ecx
+	call	.L006pic_point
+.L006pic_point:
+	popl	%eax
+	leal	.Lrem_4bit-.L006pic_point(%eax),%eax
+	addl	%edi,%ecx
+	movl	%ecx,32(%esp)
+	subl	$20,%esp
+	movl	12(%ebp),%ebx
+	movl	4(%ebp),%edx
+	movl	8(%ebp),%ecx
+	movl	(%ebp),%ebp
+	jmp	.L007mmx_outer_loop
+.align	16
+.L007mmx_outer_loop:
+	xorl	12(%edi),%ebx
+	xorl	4(%edi),%edx
+	xorl	8(%edi),%ecx
+	xorl	(%edi),%ebp
+	movl	%edi,48(%esp)
+	movl	%ebx,12(%esp)
+	movl	%edx,4(%esp)
+	movl	%ecx,8(%esp)
+	movl	%ebp,(%esp)
+	movl	%esp,%edi
+	shrl	$24,%ebx
+	call	_mmx_gmult_4bit_inner
+	movl	48(%esp),%edi
+	leal	16(%edi),%edi
+	cmpl	52(%esp),%edi
+	jb	.L007mmx_outer_loop
+	movl	40(%esp),%edi
+	emms
+	movl	%ebx,12(%edi)
+	movl	%edx,4(%edi)
+	movl	%ecx,8(%edi)
+	movl	%ebp,(%edi)
+	addl	$20,%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
+.align	64
+.Lrem_4bit:
+.long	0,0,0,29491200,0,58982400,0,38141952
+.long	0,117964800,0,113901568,0,76283904,0,88997888
+.long	0,235929600,0,265420800,0,227803136,0,206962688
+.long	0,152567808,0,148504576,0,177995776,0,190709760
+.align	64
+.L008rem_8bit:
+.value	0,450,900,582,1800,1738,1164,1358
+.value	3600,4050,3476,3158,2328,2266,2716,2910
+.value	7200,7650,8100,7782,6952,6890,6316,6510
+.value	4656,5106,4532,4214,5432,5370,5820,6014
+.value	14400,14722,15300,14854,16200,16010,15564,15630
+.value	13904,14226,13780,13334,12632,12442,13020,13086
+.value	9312,9634,10212,9766,9064,8874,8428,8494
+.value	10864,11186,10740,10294,11640,11450,12028,12094
+.value	28800,28994,29444,29382,30600,30282,29708,30158
+.value	32400,32594,32020,31958,31128,30810,31260,31710
+.value	27808,28002,28452,28390,27560,27242,26668,27118
+.value	25264,25458,24884,24822,26040,25722,26172,26622
+.value	18624,18690,19268,19078,20424,19978,19532,19854
+.value	18128,18194,17748,17558,16856,16410,16988,17310
+.value	21728,21794,22372,22182,21480,21034,20588,20910
+.value	23280,23346,22900,22710,24056,23610,24188,24510
+.value	57600,57538,57988,58182,58888,59338,58764,58446
+.value	61200,61138,60564,60758,59416,59866,60316,59998
+.value	64800,64738,65188,65382,64040,64490,63916,63598
+.value	62256,62194,61620,61814,62520,62970,63420,63102
+.value	55616,55426,56004,56070,56904,57226,56780,56334
+.value	55120,54930,54484,54550,53336,53658,54236,53790
+.value	50528,50338,50916,50982,49768,50090,49644,49198
+.value	52080,51890,51444,51510,52344,52666,53244,52798
+.value	37248,36930,37380,37830,38536,38730,38156,38094
+.value	40848,40530,39956,40406,39064,39258,39708,39646
+.value	36256,35938,36388,36838,35496,35690,35116,35054
+.value	33712,33394,32820,33270,33976,34170,34620,34558
+.value	43456,43010,43588,43910,44744,44810,44364,44174
+.value	42960,42514,42068,42390,41176,41242,41820,41630
+.value	46560,46114,46692,47014,45800,45866,45420,45230
+.value	48112,47666,47220,47542,48376,48442,49020,48830
+.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
+.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
+.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
+.byte	0
diff --git a/crypto/sha/asm/sha1-586.s b/crypto/sha/asm/sha1-586.s
new file mode 100644
index 0000000..e77f654
--- /dev/null
+++ b/crypto/sha/asm/sha1-586.s
@@ -0,0 +1,1380 @@
+.file	"sha1-586.s"
+.text
+.globl	sha1_block_data_order
+.type	sha1_block_data_order,@function
+.align	16
+sha1_block_data_order:
+.L_sha1_block_data_order_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%ebp
+	movl	24(%esp),%esi
+	movl	28(%esp),%eax
+	subl	$76,%esp
+	shll	$6,%eax
+	addl	%esi,%eax
+	movl	%eax,104(%esp)
+	movl	16(%ebp),%edi
+	jmp	.L000loop
+.align	16
+.L000loop:
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	movl	%eax,(%esp)
+	movl	%ebx,4(%esp)
+	movl	%ecx,8(%esp)
+	movl	%edx,12(%esp)
+	movl	16(%esi),%eax
+	movl	20(%esi),%ebx
+	movl	24(%esi),%ecx
+	movl	28(%esi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	movl	%eax,16(%esp)
+	movl	%ebx,20(%esp)
+	movl	%ecx,24(%esp)
+	movl	%edx,28(%esp)
+	movl	32(%esi),%eax
+	movl	36(%esi),%ebx
+	movl	40(%esi),%ecx
+	movl	44(%esi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	movl	%eax,32(%esp)
+	movl	%ebx,36(%esp)
+	movl	%ecx,40(%esp)
+	movl	%edx,44(%esp)
+	movl	48(%esi),%eax
+	movl	52(%esi),%ebx
+	movl	56(%esi),%ecx
+	movl	60(%esi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	movl	%eax,48(%esp)
+	movl	%ebx,52(%esp)
+	movl	%ecx,56(%esp)
+	movl	%edx,60(%esp)
+	movl	%esi,100(%esp)
+	movl	(%ebp),%eax
+	movl	4(%ebp),%ebx
+	movl	8(%ebp),%ecx
+	movl	12(%ebp),%edx
+
+	movl	%ecx,%esi
+	movl	%eax,%ebp
+	roll	$5,%ebp
+	xorl	%edx,%esi
+	addl	%edi,%ebp
+	movl	(%esp),%edi
+	andl	%ebx,%esi
+	rorl	$2,%ebx
+	xorl	%edx,%esi
+	leal	1518500249(%ebp,%edi,1),%ebp
+	addl	%esi,%ebp
+
+	movl	%ebx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	xorl	%ecx,%edi
+	addl	%edx,%ebp
+	movl	4(%esp),%edx
+	andl	%eax,%edi
+	rorl	$2,%eax
+	xorl	%ecx,%edi
+	leal	1518500249(%ebp,%edx,1),%ebp
+	addl	%edi,%ebp
+
+	movl	%eax,%edx
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	xorl	%ebx,%edx
+	addl	%ecx,%ebp
+	movl	8(%esp),%ecx
+	andl	%esi,%edx
+	rorl	$2,%esi
+	xorl	%ebx,%edx
+	leal	1518500249(%ebp,%ecx,1),%ebp
+	addl	%edx,%ebp
+
+	movl	%esi,%ecx
+	movl	%ebp,%edx
+	roll	$5,%ebp
+	xorl	%eax,%ecx
+	addl	%ebx,%ebp
+	movl	12(%esp),%ebx
+	andl	%edi,%ecx
+	rorl	$2,%edi
+	xorl	%eax,%ecx
+	leal	1518500249(%ebp,%ebx,1),%ebp
+	addl	%ecx,%ebp
+
+	movl	%edi,%ebx
+	movl	%ebp,%ecx
+	roll	$5,%ebp
+	xorl	%esi,%ebx
+	addl	%eax,%ebp
+	movl	16(%esp),%eax
+	andl	%edx,%ebx
+	rorl	$2,%edx
+	xorl	%esi,%ebx
+	leal	1518500249(%ebp,%eax,1),%ebp
+	addl	%ebx,%ebp
+
+	movl	%edx,%eax
+	movl	%ebp,%ebx
+	roll	$5,%ebp
+	xorl	%edi,%eax
+	addl	%esi,%ebp
+	movl	20(%esp),%esi
+	andl	%ecx,%eax
+	rorl	$2,%ecx
+	xorl	%edi,%eax
+	leal	1518500249(%ebp,%esi,1),%ebp
+	addl	%eax,%ebp
+
+	movl	%ecx,%esi
+	movl	%ebp,%eax
+	roll	$5,%ebp
+	xorl	%edx,%esi
+	addl	%edi,%ebp
+	movl	24(%esp),%edi
+	andl	%ebx,%esi
+	rorl	$2,%ebx
+	xorl	%edx,%esi
+	leal	1518500249(%ebp,%edi,1),%ebp
+	addl	%esi,%ebp
+
+	movl	%ebx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	xorl	%ecx,%edi
+	addl	%edx,%ebp
+	movl	28(%esp),%edx
+	andl	%eax,%edi
+	rorl	$2,%eax
+	xorl	%ecx,%edi
+	leal	1518500249(%ebp,%edx,1),%ebp
+	addl	%edi,%ebp
+
+	movl	%eax,%edx
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	xorl	%ebx,%edx
+	addl	%ecx,%ebp
+	movl	32(%esp),%ecx
+	andl	%esi,%edx
+	rorl	$2,%esi
+	xorl	%ebx,%edx
+	leal	1518500249(%ebp,%ecx,1),%ebp
+	addl	%edx,%ebp
+
+	movl	%esi,%ecx
+	movl	%ebp,%edx
+	roll	$5,%ebp
+	xorl	%eax,%ecx
+	addl	%ebx,%ebp
+	movl	36(%esp),%ebx
+	andl	%edi,%ecx
+	rorl	$2,%edi
+	xorl	%eax,%ecx
+	leal	1518500249(%ebp,%ebx,1),%ebp
+	addl	%ecx,%ebp
+
+	movl	%edi,%ebx
+	movl	%ebp,%ecx
+	roll	$5,%ebp
+	xorl	%esi,%ebx
+	addl	%eax,%ebp
+	movl	40(%esp),%eax
+	andl	%edx,%ebx
+	rorl	$2,%edx
+	xorl	%esi,%ebx
+	leal	1518500249(%ebp,%eax,1),%ebp
+	addl	%ebx,%ebp
+
+	movl	%edx,%eax
+	movl	%ebp,%ebx
+	roll	$5,%ebp
+	xorl	%edi,%eax
+	addl	%esi,%ebp
+	movl	44(%esp),%esi
+	andl	%ecx,%eax
+	rorl	$2,%ecx
+	xorl	%edi,%eax
+	leal	1518500249(%ebp,%esi,1),%ebp
+	addl	%eax,%ebp
+
+	movl	%ecx,%esi
+	movl	%ebp,%eax
+	roll	$5,%ebp
+	xorl	%edx,%esi
+	addl	%edi,%ebp
+	movl	48(%esp),%edi
+	andl	%ebx,%esi
+	rorl	$2,%ebx
+	xorl	%edx,%esi
+	leal	1518500249(%ebp,%edi,1),%ebp
+	addl	%esi,%ebp
+
+	movl	%ebx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	xorl	%ecx,%edi
+	addl	%edx,%ebp
+	movl	52(%esp),%edx
+	andl	%eax,%edi
+	rorl	$2,%eax
+	xorl	%ecx,%edi
+	leal	1518500249(%ebp,%edx,1),%ebp
+	addl	%edi,%ebp
+
+	movl	%eax,%edx
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	xorl	%ebx,%edx
+	addl	%ecx,%ebp
+	movl	56(%esp),%ecx
+	andl	%esi,%edx
+	rorl	$2,%esi
+	xorl	%ebx,%edx
+	leal	1518500249(%ebp,%ecx,1),%ebp
+	addl	%edx,%ebp
+
+	movl	%esi,%ecx
+	movl	%ebp,%edx
+	roll	$5,%ebp
+	xorl	%eax,%ecx
+	addl	%ebx,%ebp
+	movl	60(%esp),%ebx
+	andl	%edi,%ecx
+	rorl	$2,%edi
+	xorl	%eax,%ecx
+	leal	1518500249(%ebp,%ebx,1),%ebp
+	movl	(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edi,%ebp
+	xorl	8(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	32(%esp),%ebx
+	andl	%edx,%ebp
+	xorl	52(%esp),%ebx
+	roll	$1,%ebx
+	xorl	%esi,%ebp
+	addl	%ebp,%eax
+	movl	%ecx,%ebp
+	rorl	$2,%edx
+	movl	%ebx,(%esp)
+	roll	$5,%ebp
+	leal	1518500249(%ebx,%eax,1),%ebx
+	movl	4(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%edx,%ebp
+	xorl	12(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	36(%esp),%eax
+	andl	%ecx,%ebp
+	xorl	56(%esp),%eax
+	roll	$1,%eax
+	xorl	%edi,%ebp
+	addl	%ebp,%esi
+	movl	%ebx,%ebp
+	rorl	$2,%ecx
+	movl	%eax,4(%esp)
+	roll	$5,%ebp
+	leal	1518500249(%eax,%esi,1),%eax
+	movl	8(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ecx,%ebp
+	xorl	16(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	40(%esp),%esi
+	andl	%ebx,%ebp
+	xorl	60(%esp),%esi
+	roll	$1,%esi
+	xorl	%edx,%ebp
+	addl	%ebp,%edi
+	movl	%eax,%ebp
+	rorl	$2,%ebx
+	movl	%esi,8(%esp)
+	roll	$5,%ebp
+	leal	1518500249(%esi,%edi,1),%esi
+	movl	12(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%ebx,%ebp
+	xorl	20(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	44(%esp),%edi
+	andl	%eax,%ebp
+	xorl	(%esp),%edi
+	roll	$1,%edi
+	xorl	%ecx,%ebp
+	addl	%ebp,%edx
+	movl	%esi,%ebp
+	rorl	$2,%eax
+	movl	%edi,12(%esp)
+	roll	$5,%ebp
+	leal	1518500249(%edi,%edx,1),%edi
+	movl	16(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%esi,%ebp
+	xorl	24(%esp),%edx
+	xorl	%eax,%ebp
+	xorl	48(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	4(%esp),%edx
+	roll	$1,%edx
+	addl	%ebp,%ecx
+	rorl	$2,%esi
+	movl	%edi,%ebp
+	roll	$5,%ebp
+	movl	%edx,16(%esp)
+	leal	1859775393(%edx,%ecx,1),%edx
+	movl	20(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%edi,%ebp
+	xorl	28(%esp),%ecx
+	xorl	%esi,%ebp
+	xorl	52(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	8(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebp,%ebx
+	rorl	$2,%edi
+	movl	%edx,%ebp
+	roll	$5,%ebp
+	movl	%ecx,20(%esp)
+	leal	1859775393(%ecx,%ebx,1),%ecx
+	movl	24(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edx,%ebp
+	xorl	32(%esp),%ebx
+	xorl	%edi,%ebp
+	xorl	56(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	12(%esp),%ebx
+	roll	$1,%ebx
+	addl	%ebp,%eax
+	rorl	$2,%edx
+	movl	%ecx,%ebp
+	roll	$5,%ebp
+	movl	%ebx,24(%esp)
+	leal	1859775393(%ebx,%eax,1),%ebx
+	movl	28(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%ecx,%ebp
+	xorl	36(%esp),%eax
+	xorl	%edx,%ebp
+	xorl	60(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	16(%esp),%eax
+	roll	$1,%eax
+	addl	%ebp,%esi
+	rorl	$2,%ecx
+	movl	%ebx,%ebp
+	roll	$5,%ebp
+	movl	%eax,28(%esp)
+	leal	1859775393(%eax,%esi,1),%eax
+	movl	32(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ebx,%ebp
+	xorl	40(%esp),%esi
+	xorl	%ecx,%ebp
+	xorl	(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	20(%esp),%esi
+	roll	$1,%esi
+	addl	%ebp,%edi
+	rorl	$2,%ebx
+	movl	%eax,%ebp
+	roll	$5,%ebp
+	movl	%esi,32(%esp)
+	leal	1859775393(%esi,%edi,1),%esi
+	movl	36(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%eax,%ebp
+	xorl	44(%esp),%edi
+	xorl	%ebx,%ebp
+	xorl	4(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	24(%esp),%edi
+	roll	$1,%edi
+	addl	%ebp,%edx
+	rorl	$2,%eax
+	movl	%esi,%ebp
+	roll	$5,%ebp
+	movl	%edi,36(%esp)
+	leal	1859775393(%edi,%edx,1),%edi
+	movl	40(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%esi,%ebp
+	xorl	48(%esp),%edx
+	xorl	%eax,%ebp
+	xorl	8(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	28(%esp),%edx
+	roll	$1,%edx
+	addl	%ebp,%ecx
+	rorl	$2,%esi
+	movl	%edi,%ebp
+	roll	$5,%ebp
+	movl	%edx,40(%esp)
+	leal	1859775393(%edx,%ecx,1),%edx
+	movl	44(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%edi,%ebp
+	xorl	52(%esp),%ecx
+	xorl	%esi,%ebp
+	xorl	12(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	32(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebp,%ebx
+	rorl	$2,%edi
+	movl	%edx,%ebp
+	roll	$5,%ebp
+	movl	%ecx,44(%esp)
+	leal	1859775393(%ecx,%ebx,1),%ecx
+	movl	48(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edx,%ebp
+	xorl	56(%esp),%ebx
+	xorl	%edi,%ebp
+	xorl	16(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	36(%esp),%ebx
+	roll	$1,%ebx
+	addl	%ebp,%eax
+	rorl	$2,%edx
+	movl	%ecx,%ebp
+	roll	$5,%ebp
+	movl	%ebx,48(%esp)
+	leal	1859775393(%ebx,%eax,1),%ebx
+	movl	52(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%ecx,%ebp
+	xorl	60(%esp),%eax
+	xorl	%edx,%ebp
+	xorl	20(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	40(%esp),%eax
+	roll	$1,%eax
+	addl	%ebp,%esi
+	rorl	$2,%ecx
+	movl	%ebx,%ebp
+	roll	$5,%ebp
+	movl	%eax,52(%esp)
+	leal	1859775393(%eax,%esi,1),%eax
+	movl	56(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ebx,%ebp
+	xorl	(%esp),%esi
+	xorl	%ecx,%ebp
+	xorl	24(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	44(%esp),%esi
+	roll	$1,%esi
+	addl	%ebp,%edi
+	rorl	$2,%ebx
+	movl	%eax,%ebp
+	roll	$5,%ebp
+	movl	%esi,56(%esp)
+	leal	1859775393(%esi,%edi,1),%esi
+	movl	60(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%eax,%ebp
+	xorl	4(%esp),%edi
+	xorl	%ebx,%ebp
+	xorl	28(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	48(%esp),%edi
+	roll	$1,%edi
+	addl	%ebp,%edx
+	rorl	$2,%eax
+	movl	%esi,%ebp
+	roll	$5,%ebp
+	movl	%edi,60(%esp)
+	leal	1859775393(%edi,%edx,1),%edi
+	movl	(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%esi,%ebp
+	xorl	8(%esp),%edx
+	xorl	%eax,%ebp
+	xorl	32(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	52(%esp),%edx
+	roll	$1,%edx
+	addl	%ebp,%ecx
+	rorl	$2,%esi
+	movl	%edi,%ebp
+	roll	$5,%ebp
+	movl	%edx,(%esp)
+	leal	1859775393(%edx,%ecx,1),%edx
+	movl	4(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%edi,%ebp
+	xorl	12(%esp),%ecx
+	xorl	%esi,%ebp
+	xorl	36(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	56(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebp,%ebx
+	rorl	$2,%edi
+	movl	%edx,%ebp
+	roll	$5,%ebp
+	movl	%ecx,4(%esp)
+	leal	1859775393(%ecx,%ebx,1),%ecx
+	movl	8(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edx,%ebp
+	xorl	16(%esp),%ebx
+	xorl	%edi,%ebp
+	xorl	40(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	60(%esp),%ebx
+	roll	$1,%ebx
+	addl	%ebp,%eax
+	rorl	$2,%edx
+	movl	%ecx,%ebp
+	roll	$5,%ebp
+	movl	%ebx,8(%esp)
+	leal	1859775393(%ebx,%eax,1),%ebx
+	movl	12(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%ecx,%ebp
+	xorl	20(%esp),%eax
+	xorl	%edx,%ebp
+	xorl	44(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	(%esp),%eax
+	roll	$1,%eax
+	addl	%ebp,%esi
+	rorl	$2,%ecx
+	movl	%ebx,%ebp
+	roll	$5,%ebp
+	movl	%eax,12(%esp)
+	leal	1859775393(%eax,%esi,1),%eax
+	movl	16(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ebx,%ebp
+	xorl	24(%esp),%esi
+	xorl	%ecx,%ebp
+	xorl	48(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	4(%esp),%esi
+	roll	$1,%esi
+	addl	%ebp,%edi
+	rorl	$2,%ebx
+	movl	%eax,%ebp
+	roll	$5,%ebp
+	movl	%esi,16(%esp)
+	leal	1859775393(%esi,%edi,1),%esi
+	movl	20(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%eax,%ebp
+	xorl	28(%esp),%edi
+	xorl	%ebx,%ebp
+	xorl	52(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	8(%esp),%edi
+	roll	$1,%edi
+	addl	%ebp,%edx
+	rorl	$2,%eax
+	movl	%esi,%ebp
+	roll	$5,%ebp
+	movl	%edi,20(%esp)
+	leal	1859775393(%edi,%edx,1),%edi
+	movl	24(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%esi,%ebp
+	xorl	32(%esp),%edx
+	xorl	%eax,%ebp
+	xorl	56(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	12(%esp),%edx
+	roll	$1,%edx
+	addl	%ebp,%ecx
+	rorl	$2,%esi
+	movl	%edi,%ebp
+	roll	$5,%ebp
+	movl	%edx,24(%esp)
+	leal	1859775393(%edx,%ecx,1),%edx
+	movl	28(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%edi,%ebp
+	xorl	36(%esp),%ecx
+	xorl	%esi,%ebp
+	xorl	60(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	16(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebp,%ebx
+	rorl	$2,%edi
+	movl	%edx,%ebp
+	roll	$5,%ebp
+	movl	%ecx,28(%esp)
+	leal	1859775393(%ecx,%ebx,1),%ecx
+	movl	32(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edi,%ebp
+	xorl	40(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	(%esp),%ebx
+	andl	%edx,%ebp
+	xorl	20(%esp),%ebx
+	roll	$1,%ebx
+	addl	%eax,%ebp
+	rorl	$2,%edx
+	movl	%ecx,%eax
+	roll	$5,%eax
+	movl	%ebx,32(%esp)
+	leal	2400959708(%ebx,%ebp,1),%ebx
+	movl	%edi,%ebp
+	addl	%eax,%ebx
+	andl	%esi,%ebp
+	movl	36(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%edx,%ebp
+	xorl	44(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	4(%esp),%eax
+	andl	%ecx,%ebp
+	xorl	24(%esp),%eax
+	roll	$1,%eax
+	addl	%esi,%ebp
+	rorl	$2,%ecx
+	movl	%ebx,%esi
+	roll	$5,%esi
+	movl	%eax,36(%esp)
+	leal	2400959708(%eax,%ebp,1),%eax
+	movl	%edx,%ebp
+	addl	%esi,%eax
+	andl	%edi,%ebp
+	movl	40(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ecx,%ebp
+	xorl	48(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	8(%esp),%esi
+	andl	%ebx,%ebp
+	xorl	28(%esp),%esi
+	roll	$1,%esi
+	addl	%edi,%ebp
+	rorl	$2,%ebx
+	movl	%eax,%edi
+	roll	$5,%edi
+	movl	%esi,40(%esp)
+	leal	2400959708(%esi,%ebp,1),%esi
+	movl	%ecx,%ebp
+	addl	%edi,%esi
+	andl	%edx,%ebp
+	movl	44(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%ebx,%ebp
+	xorl	52(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	12(%esp),%edi
+	andl	%eax,%ebp
+	xorl	32(%esp),%edi
+	roll	$1,%edi
+	addl	%edx,%ebp
+	rorl	$2,%eax
+	movl	%esi,%edx
+	roll	$5,%edx
+	movl	%edi,44(%esp)
+	leal	2400959708(%edi,%ebp,1),%edi
+	movl	%ebx,%ebp
+	addl	%edx,%edi
+	andl	%ecx,%ebp
+	movl	48(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%eax,%ebp
+	xorl	56(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	16(%esp),%edx
+	andl	%esi,%ebp
+	xorl	36(%esp),%edx
+	roll	$1,%edx
+	addl	%ecx,%ebp
+	rorl	$2,%esi
+	movl	%edi,%ecx
+	roll	$5,%ecx
+	movl	%edx,48(%esp)
+	leal	2400959708(%edx,%ebp,1),%edx
+	movl	%eax,%ebp
+	addl	%ecx,%edx
+	andl	%ebx,%ebp
+	movl	52(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%esi,%ebp
+	xorl	60(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	20(%esp),%ecx
+	andl	%edi,%ebp
+	xorl	40(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebx,%ebp
+	rorl	$2,%edi
+	movl	%edx,%ebx
+	roll	$5,%ebx
+	movl	%ecx,52(%esp)
+	leal	2400959708(%ecx,%ebp,1),%ecx
+	movl	%esi,%ebp
+	addl	%ebx,%ecx
+	andl	%eax,%ebp
+	movl	56(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edi,%ebp
+	xorl	(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	24(%esp),%ebx
+	andl	%edx,%ebp
+	xorl	44(%esp),%ebx
+	roll	$1,%ebx
+	addl	%eax,%ebp
+	rorl	$2,%edx
+	movl	%ecx,%eax
+	roll	$5,%eax
+	movl	%ebx,56(%esp)
+	leal	2400959708(%ebx,%ebp,1),%ebx
+	movl	%edi,%ebp
+	addl	%eax,%ebx
+	andl	%esi,%ebp
+	movl	60(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%edx,%ebp
+	xorl	4(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	28(%esp),%eax
+	andl	%ecx,%ebp
+	xorl	48(%esp),%eax
+	roll	$1,%eax
+	addl	%esi,%ebp
+	rorl	$2,%ecx
+	movl	%ebx,%esi
+	roll	$5,%esi
+	movl	%eax,60(%esp)
+	leal	2400959708(%eax,%ebp,1),%eax
+	movl	%edx,%ebp
+	addl	%esi,%eax
+	andl	%edi,%ebp
+	movl	(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ecx,%ebp
+	xorl	8(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	32(%esp),%esi
+	andl	%ebx,%ebp
+	xorl	52(%esp),%esi
+	roll	$1,%esi
+	addl	%edi,%ebp
+	rorl	$2,%ebx
+	movl	%eax,%edi
+	roll	$5,%edi
+	movl	%esi,(%esp)
+	leal	2400959708(%esi,%ebp,1),%esi
+	movl	%ecx,%ebp
+	addl	%edi,%esi
+	andl	%edx,%ebp
+	movl	4(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%ebx,%ebp
+	xorl	12(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	36(%esp),%edi
+	andl	%eax,%ebp
+	xorl	56(%esp),%edi
+	roll	$1,%edi
+	addl	%edx,%ebp
+	rorl	$2,%eax
+	movl	%esi,%edx
+	roll	$5,%edx
+	movl	%edi,4(%esp)
+	leal	2400959708(%edi,%ebp,1),%edi
+	movl	%ebx,%ebp
+	addl	%edx,%edi
+	andl	%ecx,%ebp
+	movl	8(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%eax,%ebp
+	xorl	16(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	40(%esp),%edx
+	andl	%esi,%ebp
+	xorl	60(%esp),%edx
+	roll	$1,%edx
+	addl	%ecx,%ebp
+	rorl	$2,%esi
+	movl	%edi,%ecx
+	roll	$5,%ecx
+	movl	%edx,8(%esp)
+	leal	2400959708(%edx,%ebp,1),%edx
+	movl	%eax,%ebp
+	addl	%ecx,%edx
+	andl	%ebx,%ebp
+	movl	12(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%esi,%ebp
+	xorl	20(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	44(%esp),%ecx
+	andl	%edi,%ebp
+	xorl	(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebx,%ebp
+	rorl	$2,%edi
+	movl	%edx,%ebx
+	roll	$5,%ebx
+	movl	%ecx,12(%esp)
+	leal	2400959708(%ecx,%ebp,1),%ecx
+	movl	%esi,%ebp
+	addl	%ebx,%ecx
+	andl	%eax,%ebp
+	movl	16(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edi,%ebp
+	xorl	24(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	48(%esp),%ebx
+	andl	%edx,%ebp
+	xorl	4(%esp),%ebx
+	roll	$1,%ebx
+	addl	%eax,%ebp
+	rorl	$2,%edx
+	movl	%ecx,%eax
+	roll	$5,%eax
+	movl	%ebx,16(%esp)
+	leal	2400959708(%ebx,%ebp,1),%ebx
+	movl	%edi,%ebp
+	addl	%eax,%ebx
+	andl	%esi,%ebp
+	movl	20(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%edx,%ebp
+	xorl	28(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	52(%esp),%eax
+	andl	%ecx,%ebp
+	xorl	8(%esp),%eax
+	roll	$1,%eax
+	addl	%esi,%ebp
+	rorl	$2,%ecx
+	movl	%ebx,%esi
+	roll	$5,%esi
+	movl	%eax,20(%esp)
+	leal	2400959708(%eax,%ebp,1),%eax
+	movl	%edx,%ebp
+	addl	%esi,%eax
+	andl	%edi,%ebp
+	movl	24(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ecx,%ebp
+	xorl	32(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	56(%esp),%esi
+	andl	%ebx,%ebp
+	xorl	12(%esp),%esi
+	roll	$1,%esi
+	addl	%edi,%ebp
+	rorl	$2,%ebx
+	movl	%eax,%edi
+	roll	$5,%edi
+	movl	%esi,24(%esp)
+	leal	2400959708(%esi,%ebp,1),%esi
+	movl	%ecx,%ebp
+	addl	%edi,%esi
+	andl	%edx,%ebp
+	movl	28(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%ebx,%ebp
+	xorl	36(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	60(%esp),%edi
+	andl	%eax,%ebp
+	xorl	16(%esp),%edi
+	roll	$1,%edi
+	addl	%edx,%ebp
+	rorl	$2,%eax
+	movl	%esi,%edx
+	roll	$5,%edx
+	movl	%edi,28(%esp)
+	leal	2400959708(%edi,%ebp,1),%edi
+	movl	%ebx,%ebp
+	addl	%edx,%edi
+	andl	%ecx,%ebp
+	movl	32(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%eax,%ebp
+	xorl	40(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	(%esp),%edx
+	andl	%esi,%ebp
+	xorl	20(%esp),%edx
+	roll	$1,%edx
+	addl	%ecx,%ebp
+	rorl	$2,%esi
+	movl	%edi,%ecx
+	roll	$5,%ecx
+	movl	%edx,32(%esp)
+	leal	2400959708(%edx,%ebp,1),%edx
+	movl	%eax,%ebp
+	addl	%ecx,%edx
+	andl	%ebx,%ebp
+	movl	36(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%esi,%ebp
+	xorl	44(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	4(%esp),%ecx
+	andl	%edi,%ebp
+	xorl	24(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebx,%ebp
+	rorl	$2,%edi
+	movl	%edx,%ebx
+	roll	$5,%ebx
+	movl	%ecx,36(%esp)
+	leal	2400959708(%ecx,%ebp,1),%ecx
+	movl	%esi,%ebp
+	addl	%ebx,%ecx
+	andl	%eax,%ebp
+	movl	40(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edi,%ebp
+	xorl	48(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	8(%esp),%ebx
+	andl	%edx,%ebp
+	xorl	28(%esp),%ebx
+	roll	$1,%ebx
+	addl	%eax,%ebp
+	rorl	$2,%edx
+	movl	%ecx,%eax
+	roll	$5,%eax
+	movl	%ebx,40(%esp)
+	leal	2400959708(%ebx,%ebp,1),%ebx
+	movl	%edi,%ebp
+	addl	%eax,%ebx
+	andl	%esi,%ebp
+	movl	44(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%edx,%ebp
+	xorl	52(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	12(%esp),%eax
+	andl	%ecx,%ebp
+	xorl	32(%esp),%eax
+	roll	$1,%eax
+	addl	%esi,%ebp
+	rorl	$2,%ecx
+	movl	%ebx,%esi
+	roll	$5,%esi
+	movl	%eax,44(%esp)
+	leal	2400959708(%eax,%ebp,1),%eax
+	movl	%edx,%ebp
+	addl	%esi,%eax
+	andl	%edi,%ebp
+	movl	48(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ebx,%ebp
+	xorl	56(%esp),%esi
+	xorl	%ecx,%ebp
+	xorl	16(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	36(%esp),%esi
+	roll	$1,%esi
+	addl	%ebp,%edi
+	rorl	$2,%ebx
+	movl	%eax,%ebp
+	roll	$5,%ebp
+	movl	%esi,48(%esp)
+	leal	3395469782(%esi,%edi,1),%esi
+	movl	52(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%eax,%ebp
+	xorl	60(%esp),%edi
+	xorl	%ebx,%ebp
+	xorl	20(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	40(%esp),%edi
+	roll	$1,%edi
+	addl	%ebp,%edx
+	rorl	$2,%eax
+	movl	%esi,%ebp
+	roll	$5,%ebp
+	movl	%edi,52(%esp)
+	leal	3395469782(%edi,%edx,1),%edi
+	movl	56(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%esi,%ebp
+	xorl	(%esp),%edx
+	xorl	%eax,%ebp
+	xorl	24(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	44(%esp),%edx
+	roll	$1,%edx
+	addl	%ebp,%ecx
+	rorl	$2,%esi
+	movl	%edi,%ebp
+	roll	$5,%ebp
+	movl	%edx,56(%esp)
+	leal	3395469782(%edx,%ecx,1),%edx
+	movl	60(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%edi,%ebp
+	xorl	4(%esp),%ecx
+	xorl	%esi,%ebp
+	xorl	28(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	48(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebp,%ebx
+	rorl	$2,%edi
+	movl	%edx,%ebp
+	roll	$5,%ebp
+	movl	%ecx,60(%esp)
+	leal	3395469782(%ecx,%ebx,1),%ecx
+	movl	(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edx,%ebp
+	xorl	8(%esp),%ebx
+	xorl	%edi,%ebp
+	xorl	32(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	52(%esp),%ebx
+	roll	$1,%ebx
+	addl	%ebp,%eax
+	rorl	$2,%edx
+	movl	%ecx,%ebp
+	roll	$5,%ebp
+	movl	%ebx,(%esp)
+	leal	3395469782(%ebx,%eax,1),%ebx
+	movl	4(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%ecx,%ebp
+	xorl	12(%esp),%eax
+	xorl	%edx,%ebp
+	xorl	36(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	56(%esp),%eax
+	roll	$1,%eax
+	addl	%ebp,%esi
+	rorl	$2,%ecx
+	movl	%ebx,%ebp
+	roll	$5,%ebp
+	movl	%eax,4(%esp)
+	leal	3395469782(%eax,%esi,1),%eax
+	movl	8(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ebx,%ebp
+	xorl	16(%esp),%esi
+	xorl	%ecx,%ebp
+	xorl	40(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	60(%esp),%esi
+	roll	$1,%esi
+	addl	%ebp,%edi
+	rorl	$2,%ebx
+	movl	%eax,%ebp
+	roll	$5,%ebp
+	movl	%esi,8(%esp)
+	leal	3395469782(%esi,%edi,1),%esi
+	movl	12(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%eax,%ebp
+	xorl	20(%esp),%edi
+	xorl	%ebx,%ebp
+	xorl	44(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	(%esp),%edi
+	roll	$1,%edi
+	addl	%ebp,%edx
+	rorl	$2,%eax
+	movl	%esi,%ebp
+	roll	$5,%ebp
+	movl	%edi,12(%esp)
+	leal	3395469782(%edi,%edx,1),%edi
+	movl	16(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%esi,%ebp
+	xorl	24(%esp),%edx
+	xorl	%eax,%ebp
+	xorl	48(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	4(%esp),%edx
+	roll	$1,%edx
+	addl	%ebp,%ecx
+	rorl	$2,%esi
+	movl	%edi,%ebp
+	roll	$5,%ebp
+	movl	%edx,16(%esp)
+	leal	3395469782(%edx,%ecx,1),%edx
+	movl	20(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%edi,%ebp
+	xorl	28(%esp),%ecx
+	xorl	%esi,%ebp
+	xorl	52(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	8(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebp,%ebx
+	rorl	$2,%edi
+	movl	%edx,%ebp
+	roll	$5,%ebp
+	movl	%ecx,20(%esp)
+	leal	3395469782(%ecx,%ebx,1),%ecx
+	movl	24(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edx,%ebp
+	xorl	32(%esp),%ebx
+	xorl	%edi,%ebp
+	xorl	56(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	12(%esp),%ebx
+	roll	$1,%ebx
+	addl	%ebp,%eax
+	rorl	$2,%edx
+	movl	%ecx,%ebp
+	roll	$5,%ebp
+	movl	%ebx,24(%esp)
+	leal	3395469782(%ebx,%eax,1),%ebx
+	movl	28(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%ecx,%ebp
+	xorl	36(%esp),%eax
+	xorl	%edx,%ebp
+	xorl	60(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	16(%esp),%eax
+	roll	$1,%eax
+	addl	%ebp,%esi
+	rorl	$2,%ecx
+	movl	%ebx,%ebp
+	roll	$5,%ebp
+	movl	%eax,28(%esp)
+	leal	3395469782(%eax,%esi,1),%eax
+	movl	32(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ebx,%ebp
+	xorl	40(%esp),%esi
+	xorl	%ecx,%ebp
+	xorl	(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	20(%esp),%esi
+	roll	$1,%esi
+	addl	%ebp,%edi
+	rorl	$2,%ebx
+	movl	%eax,%ebp
+	roll	$5,%ebp
+	movl	%esi,32(%esp)
+	leal	3395469782(%esi,%edi,1),%esi
+	movl	36(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%eax,%ebp
+	xorl	44(%esp),%edi
+	xorl	%ebx,%ebp
+	xorl	4(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	24(%esp),%edi
+	roll	$1,%edi
+	addl	%ebp,%edx
+	rorl	$2,%eax
+	movl	%esi,%ebp
+	roll	$5,%ebp
+	movl	%edi,36(%esp)
+	leal	3395469782(%edi,%edx,1),%edi
+	movl	40(%esp),%edx
+	addl	%ebp,%edi
+
+	movl	%esi,%ebp
+	xorl	48(%esp),%edx
+	xorl	%eax,%ebp
+	xorl	8(%esp),%edx
+	xorl	%ebx,%ebp
+	xorl	28(%esp),%edx
+	roll	$1,%edx
+	addl	%ebp,%ecx
+	rorl	$2,%esi
+	movl	%edi,%ebp
+	roll	$5,%ebp
+	movl	%edx,40(%esp)
+	leal	3395469782(%edx,%ecx,1),%edx
+	movl	44(%esp),%ecx
+	addl	%ebp,%edx
+
+	movl	%edi,%ebp
+	xorl	52(%esp),%ecx
+	xorl	%esi,%ebp
+	xorl	12(%esp),%ecx
+	xorl	%eax,%ebp
+	xorl	32(%esp),%ecx
+	roll	$1,%ecx
+	addl	%ebp,%ebx
+	rorl	$2,%edi
+	movl	%edx,%ebp
+	roll	$5,%ebp
+	movl	%ecx,44(%esp)
+	leal	3395469782(%ecx,%ebx,1),%ecx
+	movl	48(%esp),%ebx
+	addl	%ebp,%ecx
+
+	movl	%edx,%ebp
+	xorl	56(%esp),%ebx
+	xorl	%edi,%ebp
+	xorl	16(%esp),%ebx
+	xorl	%esi,%ebp
+	xorl	36(%esp),%ebx
+	roll	$1,%ebx
+	addl	%ebp,%eax
+	rorl	$2,%edx
+	movl	%ecx,%ebp
+	roll	$5,%ebp
+	movl	%ebx,48(%esp)
+	leal	3395469782(%ebx,%eax,1),%ebx
+	movl	52(%esp),%eax
+	addl	%ebp,%ebx
+
+	movl	%ecx,%ebp
+	xorl	60(%esp),%eax
+	xorl	%edx,%ebp
+	xorl	20(%esp),%eax
+	xorl	%edi,%ebp
+	xorl	40(%esp),%eax
+	roll	$1,%eax
+	addl	%ebp,%esi
+	rorl	$2,%ecx
+	movl	%ebx,%ebp
+	roll	$5,%ebp
+	leal	3395469782(%eax,%esi,1),%eax
+	movl	56(%esp),%esi
+	addl	%ebp,%eax
+
+	movl	%ebx,%ebp
+	xorl	(%esp),%esi
+	xorl	%ecx,%ebp
+	xorl	24(%esp),%esi
+	xorl	%edx,%ebp
+	xorl	44(%esp),%esi
+	roll	$1,%esi
+	addl	%ebp,%edi
+	rorl	$2,%ebx
+	movl	%eax,%ebp
+	roll	$5,%ebp
+	leal	3395469782(%esi,%edi,1),%esi
+	movl	60(%esp),%edi
+	addl	%ebp,%esi
+
+	movl	%eax,%ebp
+	xorl	4(%esp),%edi
+	xorl	%ebx,%ebp
+	xorl	28(%esp),%edi
+	xorl	%ecx,%ebp
+	xorl	48(%esp),%edi
+	roll	$1,%edi
+	addl	%ebp,%edx
+	rorl	$2,%eax
+	movl	%esi,%ebp
+	roll	$5,%ebp
+	leal	3395469782(%edi,%edx,1),%edi
+	addl	%ebp,%edi
+	movl	96(%esp),%ebp
+	movl	100(%esp),%edx
+	addl	(%ebp),%edi
+	addl	4(%ebp),%esi
+	addl	8(%ebp),%eax
+	addl	12(%ebp),%ebx
+	addl	16(%ebp),%ecx
+	movl	%edi,(%ebp)
+	addl	$64,%edx
+	movl	%esi,4(%ebp)
+	cmpl	104(%esp),%edx
+	movl	%eax,8(%ebp)
+	movl	%ecx,%edi
+	movl	%ebx,12(%ebp)
+	movl	%edx,%esi
+	movl	%ecx,16(%ebp)
+	jb	.L000loop
+	addl	$76,%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	sha1_block_data_order,.-.L_sha1_block_data_order_begin
+.byte	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
+.byte	102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
+.byte	89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
+.byte	114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
diff --git a/crypto/sha/asm/sha256-586.s b/crypto/sha/asm/sha256-586.s
new file mode 100644
index 0000000..77a8951
--- /dev/null
+++ b/crypto/sha/asm/sha256-586.s
@@ -0,0 +1,258 @@
+.file	"sha512-586.s"
+.text
+.globl	sha256_block_data_order
+.type	sha256_block_data_order,@function
+.align	16
+sha256_block_data_order:
+.L_sha256_block_data_order_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	%esp,%ebx
+	call	.L000pic_point
+.L000pic_point:
+	popl	%ebp
+	leal	.L001K256-.L000pic_point(%ebp),%ebp
+	subl	$16,%esp
+	andl	$-64,%esp
+	shll	$6,%eax
+	addl	%edi,%eax
+	movl	%esi,(%esp)
+	movl	%edi,4(%esp)
+	movl	%eax,8(%esp)
+	movl	%ebx,12(%esp)
+.align	16
+.L002loop:
+	movl	(%edi),%eax
+	movl	4(%edi),%ebx
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	16(%edi),%eax
+	movl	20(%edi),%ebx
+	movl	24(%edi),%ecx
+	movl	28(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	32(%edi),%eax
+	movl	36(%edi),%ebx
+	movl	40(%edi),%ecx
+	movl	44(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	48(%edi),%eax
+	movl	52(%edi),%ebx
+	movl	56(%edi),%ecx
+	movl	60(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	addl	$64,%edi
+	subl	$32,%esp
+	movl	%edi,100(%esp)
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edi
+	movl	%ebx,4(%esp)
+	movl	%ecx,8(%esp)
+	movl	%edi,12(%esp)
+	movl	16(%esi),%edx
+	movl	20(%esi),%ebx
+	movl	24(%esi),%ecx
+	movl	28(%esi),%edi
+	movl	%ebx,20(%esp)
+	movl	%ecx,24(%esp)
+	movl	%edi,28(%esp)
+.align	16
+.L00300_15:
+	movl	92(%esp),%ebx
+	movl	%edx,%ecx
+	rorl	$14,%ecx
+	movl	20(%esp),%esi
+	xorl	%edx,%ecx
+	rorl	$5,%ecx
+	xorl	%edx,%ecx
+	rorl	$6,%ecx
+	movl	24(%esp),%edi
+	addl	%ecx,%ebx
+	xorl	%edi,%esi
+	movl	%edx,16(%esp)
+	movl	%eax,%ecx
+	andl	%edx,%esi
+	movl	12(%esp),%edx
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	addl	%esi,%ebx
+	rorl	$9,%ecx
+	addl	28(%esp),%ebx
+	xorl	%eax,%ecx
+	rorl	$11,%ecx
+	movl	4(%esp),%esi
+	xorl	%eax,%ecx
+	rorl	$2,%ecx
+	addl	%ebx,%edx
+	movl	8(%esp),%edi
+	addl	%ecx,%ebx
+	movl	%eax,(%esp)
+	movl	%eax,%ecx
+	subl	$4,%esp
+	orl	%esi,%eax
+	andl	%esi,%ecx
+	andl	%edi,%eax
+	movl	(%ebp),%esi
+	orl	%ecx,%eax
+	addl	$4,%ebp
+	addl	%ebx,%eax
+	addl	%esi,%edx
+	addl	%esi,%eax
+	cmpl	$3248222580,%esi
+	jne	.L00300_15
+	movl	152(%esp),%ebx
+.align	16
+.L00416_63:
+	movl	%ebx,%esi
+	movl	100(%esp),%ecx
+	rorl	$11,%esi
+	movl	%ecx,%edi
+	xorl	%ebx,%esi
+	rorl	$7,%esi
+	shrl	$3,%ebx
+	rorl	$2,%edi
+	xorl	%esi,%ebx
+	xorl	%ecx,%edi
+	rorl	$17,%edi
+	shrl	$10,%ecx
+	addl	156(%esp),%ebx
+	xorl	%ecx,%edi
+	addl	120(%esp),%ebx
+	movl	%edx,%ecx
+	addl	%edi,%ebx
+	rorl	$14,%ecx
+	movl	20(%esp),%esi
+	xorl	%edx,%ecx
+	rorl	$5,%ecx
+	movl	%ebx,92(%esp)
+	xorl	%edx,%ecx
+	rorl	$6,%ecx
+	movl	24(%esp),%edi
+	addl	%ecx,%ebx
+	xorl	%edi,%esi
+	movl	%edx,16(%esp)
+	movl	%eax,%ecx
+	andl	%edx,%esi
+	movl	12(%esp),%edx
+	xorl	%edi,%esi
+	movl	%eax,%edi
+	addl	%esi,%ebx
+	rorl	$9,%ecx
+	addl	28(%esp),%ebx
+	xorl	%eax,%ecx
+	rorl	$11,%ecx
+	movl	4(%esp),%esi
+	xorl	%eax,%ecx
+	rorl	$2,%ecx
+	addl	%ebx,%edx
+	movl	8(%esp),%edi
+	addl	%ecx,%ebx
+	movl	%eax,(%esp)
+	movl	%eax,%ecx
+	subl	$4,%esp
+	orl	%esi,%eax
+	andl	%esi,%ecx
+	andl	%edi,%eax
+	movl	(%ebp),%esi
+	orl	%ecx,%eax
+	addl	$4,%ebp
+	addl	%ebx,%eax
+	movl	152(%esp),%ebx
+	addl	%esi,%edx
+	addl	%esi,%eax
+	cmpl	$3329325298,%esi
+	jne	.L00416_63
+	movl	352(%esp),%esi
+	movl	4(%esp),%ebx
+	movl	8(%esp),%ecx
+	movl	12(%esp),%edi
+	addl	(%esi),%eax
+	addl	4(%esi),%ebx
+	addl	8(%esi),%ecx
+	addl	12(%esi),%edi
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%ecx,8(%esi)
+	movl	%edi,12(%esi)
+	movl	20(%esp),%eax
+	movl	24(%esp),%ebx
+	movl	28(%esp),%ecx
+	movl	356(%esp),%edi
+	addl	16(%esi),%edx
+	addl	20(%esi),%eax
+	addl	24(%esi),%ebx
+	addl	28(%esi),%ecx
+	movl	%edx,16(%esi)
+	movl	%eax,20(%esi)
+	movl	%ebx,24(%esi)
+	movl	%ecx,28(%esi)
+	addl	$352,%esp
+	subl	$256,%ebp
+	cmpl	8(%esp),%edi
+	jb	.L002loop
+	movl	12(%esp),%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.align	64
+.L001K256:
+.long	1116352408,1899447441,3049323471,3921009573
+.long	961987163,1508970993,2453635748,2870763221
+.long	3624381080,310598401,607225278,1426881987
+.long	1925078388,2162078206,2614888103,3248222580
+.long	3835390401,4022224774,264347078,604807628
+.long	770255983,1249150122,1555081692,1996064986
+.long	2554220882,2821834349,2952996808,3210313671
+.long	3336571891,3584528711,113926993,338241895
+.long	666307205,773529912,1294757372,1396182291
+.long	1695183700,1986661051,2177026350,2456956037
+.long	2730485921,2820302411,3259730800,3345764771
+.long	3516065817,3600352804,4094571909,275423344
+.long	430227734,506948616,659060556,883997877
+.long	958139571,1322822218,1537002063,1747873779
+.long	1955562222,2024104815,2227730452,2361852424
+.long	2428436474,2756734187,3204031479,3329325298
+.size	sha256_block_data_order,.-.L_sha256_block_data_order_begin
+.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
+.byte	110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+.byte	67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+.byte	112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+.byte	62,0
diff --git a/crypto/sha/asm/sha512-586.s b/crypto/sha/asm/sha512-586.s
new file mode 100644
index 0000000..4b806f3
--- /dev/null
+++ b/crypto/sha/asm/sha512-586.s
@@ -0,0 +1,563 @@
+.file	"sha512-586.s"
+.text
+.globl	sha512_block_data_order
+.type	sha512_block_data_order,@function
+.align	16
+sha512_block_data_order:
+.L_sha512_block_data_order_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%esi
+	movl	24(%esp),%edi
+	movl	28(%esp),%eax
+	movl	%esp,%ebx
+	call	.L000pic_point
+.L000pic_point:
+	popl	%ebp
+	leal	.L001K512-.L000pic_point(%ebp),%ebp
+	subl	$16,%esp
+	andl	$-64,%esp
+	shll	$7,%eax
+	addl	%edi,%eax
+	movl	%esi,(%esp)
+	movl	%edi,4(%esp)
+	movl	%eax,8(%esp)
+	movl	%ebx,12(%esp)
+.align	16
+.L002loop_x86:
+	movl	(%edi),%eax
+	movl	4(%edi),%ebx
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	16(%edi),%eax
+	movl	20(%edi),%ebx
+	movl	24(%edi),%ecx
+	movl	28(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	32(%edi),%eax
+	movl	36(%edi),%ebx
+	movl	40(%edi),%ecx
+	movl	44(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	48(%edi),%eax
+	movl	52(%edi),%ebx
+	movl	56(%edi),%ecx
+	movl	60(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	64(%edi),%eax
+	movl	68(%edi),%ebx
+	movl	72(%edi),%ecx
+	movl	76(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	80(%edi),%eax
+	movl	84(%edi),%ebx
+	movl	88(%edi),%ecx
+	movl	92(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	96(%edi),%eax
+	movl	100(%edi),%ebx
+	movl	104(%edi),%ecx
+	movl	108(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	112(%edi),%eax
+	movl	116(%edi),%ebx
+	movl	120(%edi),%ecx
+	movl	124(%edi),%edx
+	bswap	%eax
+	bswap	%ebx
+	bswap	%ecx
+	bswap	%edx
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	addl	$128,%edi
+	subl	$72,%esp
+	movl	%edi,204(%esp)
+	leal	8(%esp),%edi
+	movl	$16,%ecx
+.long	2784229001
+.align	16
+.L00300_15_x86:
+	movl	40(%esp),%ecx
+	movl	44(%esp),%edx
+	movl	%ecx,%esi
+	shrl	$9,%ecx
+	movl	%edx,%edi
+	shrl	$9,%edx
+	movl	%ecx,%ebx
+	shll	$14,%esi
+	movl	%edx,%eax
+	shll	$14,%edi
+	xorl	%esi,%ebx
+	shrl	$5,%ecx
+	xorl	%edi,%eax
+	shrl	$5,%edx
+	xorl	%ecx,%eax
+	shll	$4,%esi
+	xorl	%edx,%ebx
+	shll	$4,%edi
+	xorl	%esi,%ebx
+	shrl	$4,%ecx
+	xorl	%edi,%eax
+	shrl	$4,%edx
+	xorl	%ecx,%eax
+	shll	$5,%esi
+	xorl	%edx,%ebx
+	shll	$5,%edi
+	xorl	%esi,%eax
+	xorl	%edi,%ebx
+	movl	48(%esp),%ecx
+	movl	52(%esp),%edx
+	movl	56(%esp),%esi
+	movl	60(%esp),%edi
+	addl	64(%esp),%eax
+	adcl	68(%esp),%ebx
+	xorl	%esi,%ecx
+	xorl	%edi,%edx
+	andl	40(%esp),%ecx
+	andl	44(%esp),%edx
+	addl	192(%esp),%eax
+	adcl	196(%esp),%ebx
+	xorl	%esi,%ecx
+	xorl	%edi,%edx
+	movl	(%ebp),%esi
+	movl	4(%ebp),%edi
+	addl	%ecx,%eax
+	adcl	%edx,%ebx
+	movl	32(%esp),%ecx
+	movl	36(%esp),%edx
+	addl	%esi,%eax
+	adcl	%edi,%ebx
+	movl	%eax,(%esp)
+	movl	%ebx,4(%esp)
+	addl	%ecx,%eax
+	adcl	%edx,%ebx
+	movl	8(%esp),%ecx
+	movl	12(%esp),%edx
+	movl	%eax,32(%esp)
+	movl	%ebx,36(%esp)
+	movl	%ecx,%esi
+	shrl	$2,%ecx
+	movl	%edx,%edi
+	shrl	$2,%edx
+	movl	%ecx,%ebx
+	shll	$4,%esi
+	movl	%edx,%eax
+	shll	$4,%edi
+	xorl	%esi,%ebx
+	shrl	$5,%ecx
+	xorl	%edi,%eax
+	shrl	$5,%edx
+	xorl	%ecx,%ebx
+	shll	$21,%esi
+	xorl	%edx,%eax
+	shll	$21,%edi
+	xorl	%esi,%eax
+	shrl	$21,%ecx
+	xorl	%edi,%ebx
+	shrl	$21,%edx
+	xorl	%ecx,%eax
+	shll	$5,%esi
+	xorl	%edx,%ebx
+	shll	$5,%edi
+	xorl	%esi,%eax
+	xorl	%edi,%ebx
+	movl	8(%esp),%ecx
+	movl	12(%esp),%edx
+	movl	16(%esp),%esi
+	movl	20(%esp),%edi
+	addl	(%esp),%eax
+	adcl	4(%esp),%ebx
+	orl	%esi,%ecx
+	orl	%edi,%edx
+	andl	24(%esp),%ecx
+	andl	28(%esp),%edx
+	andl	8(%esp),%esi
+	andl	12(%esp),%edi
+	orl	%esi,%ecx
+	orl	%edi,%edx
+	addl	%ecx,%eax
+	adcl	%edx,%ebx
+	movl	%eax,(%esp)
+	movl	%ebx,4(%esp)
+	movb	(%ebp),%dl
+	subl	$8,%esp
+	leal	8(%ebp),%ebp
+	cmpb	$148,%dl
+	jne	.L00300_15_x86
+.align	16
+.L00416_79_x86:
+	movl	312(%esp),%ecx
+	movl	316(%esp),%edx
+	movl	%ecx,%esi
+	shrl	$1,%ecx
+	movl	%edx,%edi
+	shrl	$1,%edx
+	movl	%ecx,%eax
+	shll	$24,%esi
+	movl	%edx,%ebx
+	shll	$24,%edi
+	xorl	%esi,%ebx
+	shrl	$6,%ecx
+	xorl	%edi,%eax
+	shrl	$6,%edx
+	xorl	%ecx,%eax
+	shll	$7,%esi
+	xorl	%edx,%ebx
+	shll	$1,%edi
+	xorl	%esi,%ebx
+	shrl	$1,%ecx
+	xorl	%edi,%eax
+	shrl	$1,%edx
+	xorl	%ecx,%eax
+	shll	$6,%edi
+	xorl	%edx,%ebx
+	xorl	%edi,%eax
+	movl	%eax,(%esp)
+	movl	%ebx,4(%esp)
+	movl	208(%esp),%ecx
+	movl	212(%esp),%edx
+	movl	%ecx,%esi
+	shrl	$6,%ecx
+	movl	%edx,%edi
+	shrl	$6,%edx
+	movl	%ecx,%eax
+	shll	$3,%esi
+	movl	%edx,%ebx
+	shll	$3,%edi
+	xorl	%esi,%eax
+	shrl	$13,%ecx
+	xorl	%edi,%ebx
+	shrl	$13,%edx
+	xorl	%ecx,%eax
+	shll	$10,%esi
+	xorl	%edx,%ebx
+	shll	$10,%edi
+	xorl	%esi,%ebx
+	shrl	$10,%ecx
+	xorl	%edi,%eax
+	shrl	$10,%edx
+	xorl	%ecx,%ebx
+	shll	$13,%edi
+	xorl	%edx,%eax
+	xorl	%edi,%eax
+	movl	320(%esp),%ecx
+	movl	324(%esp),%edx
+	addl	(%esp),%eax
+	adcl	4(%esp),%ebx
+	movl	248(%esp),%esi
+	movl	252(%esp),%edi
+	addl	%ecx,%eax
+	adcl	%edx,%ebx
+	addl	%esi,%eax
+	adcl	%edi,%ebx
+	movl	%eax,192(%esp)
+	movl	%ebx,196(%esp)
+	movl	40(%esp),%ecx
+	movl	44(%esp),%edx
+	movl	%ecx,%esi
+	shrl	$9,%ecx
+	movl	%edx,%edi
+	shrl	$9,%edx
+	movl	%ecx,%ebx
+	shll	$14,%esi
+	movl	%edx,%eax
+	shll	$14,%edi
+	xorl	%esi,%ebx
+	shrl	$5,%ecx
+	xorl	%edi,%eax
+	shrl	$5,%edx
+	xorl	%ecx,%eax
+	shll	$4,%esi
+	xorl	%edx,%ebx
+	shll	$4,%edi
+	xorl	%esi,%ebx
+	shrl	$4,%ecx
+	xorl	%edi,%eax
+	shrl	$4,%edx
+	xorl	%ecx,%eax
+	shll	$5,%esi
+	xorl	%edx,%ebx
+	shll	$5,%edi
+	xorl	%esi,%eax
+	xorl	%edi,%ebx
+	movl	48(%esp),%ecx
+	movl	52(%esp),%edx
+	movl	56(%esp),%esi
+	movl	60(%esp),%edi
+	addl	64(%esp),%eax
+	adcl	68(%esp),%ebx
+	xorl	%esi,%ecx
+	xorl	%edi,%edx
+	andl	40(%esp),%ecx
+	andl	44(%esp),%edx
+	addl	192(%esp),%eax
+	adcl	196(%esp),%ebx
+	xorl	%esi,%ecx
+	xorl	%edi,%edx
+	movl	(%ebp),%esi
+	movl	4(%ebp),%edi
+	addl	%ecx,%eax
+	adcl	%edx,%ebx
+	movl	32(%esp),%ecx
+	movl	36(%esp),%edx
+	addl	%esi,%eax
+	adcl	%edi,%ebx
+	movl	%eax,(%esp)
+	movl	%ebx,4(%esp)
+	addl	%ecx,%eax
+	adcl	%edx,%ebx
+	movl	8(%esp),%ecx
+	movl	12(%esp),%edx
+	movl	%eax,32(%esp)
+	movl	%ebx,36(%esp)
+	movl	%ecx,%esi
+	shrl	$2,%ecx
+	movl	%edx,%edi
+	shrl	$2,%edx
+	movl	%ecx,%ebx
+	shll	$4,%esi
+	movl	%edx,%eax
+	shll	$4,%edi
+	xorl	%esi,%ebx
+	shrl	$5,%ecx
+	xorl	%edi,%eax
+	shrl	$5,%edx
+	xorl	%ecx,%ebx
+	shll	$21,%esi
+	xorl	%edx,%eax
+	shll	$21,%edi
+	xorl	%esi,%eax
+	shrl	$21,%ecx
+	xorl	%edi,%ebx
+	shrl	$21,%edx
+	xorl	%ecx,%eax
+	shll	$5,%esi
+	xorl	%edx,%ebx
+	shll	$5,%edi
+	xorl	%esi,%eax
+	xorl	%edi,%ebx
+	movl	8(%esp),%ecx
+	movl	12(%esp),%edx
+	movl	16(%esp),%esi
+	movl	20(%esp),%edi
+	addl	(%esp),%eax
+	adcl	4(%esp),%ebx
+	orl	%esi,%ecx
+	orl	%edi,%edx
+	andl	24(%esp),%ecx
+	andl	28(%esp),%edx
+	andl	8(%esp),%esi
+	andl	12(%esp),%edi
+	orl	%esi,%ecx
+	orl	%edi,%edx
+	addl	%ecx,%eax
+	adcl	%edx,%ebx
+	movl	%eax,(%esp)
+	movl	%ebx,4(%esp)
+	movb	(%ebp),%dl
+	subl	$8,%esp
+	leal	8(%ebp),%ebp
+	cmpb	$23,%dl
+	jne	.L00416_79_x86
+	movl	840(%esp),%esi
+	movl	844(%esp),%edi
+	movl	(%esi),%eax
+	movl	4(%esi),%ebx
+	movl	8(%esi),%ecx
+	movl	12(%esi),%edx
+	addl	8(%esp),%eax
+	adcl	12(%esp),%ebx
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	addl	16(%esp),%ecx
+	adcl	20(%esp),%edx
+	movl	%ecx,8(%esi)
+	movl	%edx,12(%esi)
+	movl	16(%esi),%eax
+	movl	20(%esi),%ebx
+	movl	24(%esi),%ecx
+	movl	28(%esi),%edx
+	addl	24(%esp),%eax
+	adcl	28(%esp),%ebx
+	movl	%eax,16(%esi)
+	movl	%ebx,20(%esi)
+	addl	32(%esp),%ecx
+	adcl	36(%esp),%edx
+	movl	%ecx,24(%esi)
+	movl	%edx,28(%esi)
+	movl	32(%esi),%eax
+	movl	36(%esi),%ebx
+	movl	40(%esi),%ecx
+	movl	44(%esi),%edx
+	addl	40(%esp),%eax
+	adcl	44(%esp),%ebx
+	movl	%eax,32(%esi)
+	movl	%ebx,36(%esi)
+	addl	48(%esp),%ecx
+	adcl	52(%esp),%edx
+	movl	%ecx,40(%esi)
+	movl	%edx,44(%esi)
+	movl	48(%esi),%eax
+	movl	52(%esi),%ebx
+	movl	56(%esi),%ecx
+	movl	60(%esi),%edx
+	addl	56(%esp),%eax
+	adcl	60(%esp),%ebx
+	movl	%eax,48(%esi)
+	movl	%ebx,52(%esi)
+	addl	64(%esp),%ecx
+	adcl	68(%esp),%edx
+	movl	%ecx,56(%esi)
+	movl	%edx,60(%esi)
+	addl	$840,%esp
+	subl	$640,%ebp
+	cmpl	8(%esp),%edi
+	jb	.L002loop_x86
+	movl	12(%esp),%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.align	64
+.L001K512:
+.long	3609767458,1116352408
+.long	602891725,1899447441
+.long	3964484399,3049323471
+.long	2173295548,3921009573
+.long	4081628472,961987163
+.long	3053834265,1508970993
+.long	2937671579,2453635748
+.long	3664609560,2870763221
+.long	2734883394,3624381080
+.long	1164996542,310598401
+.long	1323610764,607225278
+.long	3590304994,1426881987
+.long	4068182383,1925078388
+.long	991336113,2162078206
+.long	633803317,2614888103
+.long	3479774868,3248222580
+.long	2666613458,3835390401
+.long	944711139,4022224774
+.long	2341262773,264347078
+.long	2007800933,604807628
+.long	1495990901,770255983
+.long	1856431235,1249150122
+.long	3175218132,1555081692
+.long	2198950837,1996064986
+.long	3999719339,2554220882
+.long	766784016,2821834349
+.long	2566594879,2952996808
+.long	3203337956,3210313671
+.long	1034457026,3336571891
+.long	2466948901,3584528711
+.long	3758326383,113926993
+.long	168717936,338241895
+.long	1188179964,666307205
+.long	1546045734,773529912
+.long	1522805485,1294757372
+.long	2643833823,1396182291
+.long	2343527390,1695183700
+.long	1014477480,1986661051
+.long	1206759142,2177026350
+.long	344077627,2456956037
+.long	1290863460,2730485921
+.long	3158454273,2820302411
+.long	3505952657,3259730800
+.long	106217008,3345764771
+.long	3606008344,3516065817
+.long	1432725776,3600352804
+.long	1467031594,4094571909
+.long	851169720,275423344
+.long	3100823752,430227734
+.long	1363258195,506948616
+.long	3750685593,659060556
+.long	3785050280,883997877
+.long	3318307427,958139571
+.long	3812723403,1322822218
+.long	2003034995,1537002063
+.long	3602036899,1747873779
+.long	1575990012,1955562222
+.long	1125592928,2024104815
+.long	2716904306,2227730452
+.long	442776044,2361852424
+.long	593698344,2428436474
+.long	3733110249,2756734187
+.long	2999351573,3204031479
+.long	3815920427,3329325298
+.long	3928383900,3391569614
+.long	566280711,3515267271
+.long	3454069534,3940187606
+.long	4000239992,4118630271
+.long	1914138554,116418474
+.long	2731055270,174292421
+.long	3203993006,289380356
+.long	320620315,460393269
+.long	587496836,685471733
+.long	1086792851,852142971
+.long	365543100,1017036298
+.long	2618297676,1126000580
+.long	3409855158,1288033470
+.long	4234509866,1501505948
+.long	987167468,1607167915
+.long	1246189591,1816402316
+.size	sha512_block_data_order,.-.L_sha512_block_data_order_begin
+.byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
+.byte	110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+.byte	67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+.byte	112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+.byte	62,0
diff --git a/import_openssl.sh b/import_openssl.sh
index e4e46c7..0630769 100755
--- a/import_openssl.sh
+++ b/import_openssl.sh
@@ -148,6 +148,23 @@
   CC=true perl crypto/sha/asm/sha1-mips.pl o32     > crypto/sha/asm/sha1-mips.s
   CC=true perl crypto/sha/asm/sha512-mips.pl o32   > crypto/sha/asm/sha256-mips.s
 
+  # Generate x86 asm
+  perl crypto/aes/asm/aes-586.pl      elf  > crypto/aes/asm/aes-586.s
+  perl crypto/aes/asm/vpaes-x86.pl    elf  > crypto/aes/asm/vpaes-x86.s
+  perl crypto/aes/asm/aesni-x86.pl    elf  > crypto/aes/asm/aesni-x86.s
+  perl crypto/bn/asm/bn-586.pl        elf  > crypto/bn/asm/bn-586.s
+  perl crypto/bn/asm/co-586.pl        elf  > crypto/bn/asm/co-586.s
+  perl crypto/bn/asm/x86-mont.pl      elf  > crypto/bn/asm/x86-mont.s
+  perl crypto/bn/asm/x86-gf2m.pl      elf  > crypto/bn/asm/x86-gf2m.s
+  perl crypto/modes/asm/ghash-x86.pl  elf  > crypto/modes/asm/ghash-x86.s
+  perl crypto/sha/asm/sha1-586.pl     elf  > crypto/sha/asm/sha1-586.s
+  perl crypto/sha/asm/sha256-586.pl   elf  > crypto/sha/asm/sha256-586.s
+  perl crypto/sha/asm/sha512-586.pl   elf  > crypto/sha/asm/sha512-586.s
+  perl crypto/md5/asm/md5-586.pl      elf  > crypto/md5/asm/md5-586.s
+  perl crypto/des/asm/des-586.pl      elf  > crypto/des/asm/des-586.s
+  perl crypto/des/asm/crypt586.pl     elf  > crypto/des/asm/crypt586.s
+  perl crypto/bf/asm/bf-586.pl        elf  > crypto/bf/asm/bf-586.s
+
   # Setup android.testssl directory
   mkdir android.testssl
   cat test/testssl | \