am 75b7f959: am 2b5a3d1f: am 0f2dac0f: resolved conflicts for merge of 69c63910 to jb-mr2-dev-plus-aosp
* commit '75b7f959caa48362bbc4d6f3592e66d312380d1b':
Add support for TLS_FALLBACK_SCSV
diff --git a/Android.mk b/Android.mk
index 46af985..5fbcfc6 100644
--- a/Android.mk
+++ b/Android.mk
@@ -7,11 +7,8 @@
# These makefiles are here instead of being Android.mk files in the
# respective crypto, ssl, and apps directories so
# that import_openssl.sh import won't remove them.
-ifeq ($(strip $(TARGET_IS_64_BIT)),true)
- include $(LOCAL_PATH)/build-config-64.mk
-else
- include $(LOCAL_PATH)/build-config-32.mk
-endif
+include $(LOCAL_PATH)/build-config-64.mk
+include $(LOCAL_PATH)/build-config-32.mk
include $(LOCAL_PATH)/Crypto.mk
include $(LOCAL_PATH)/Ssl.mk
include $(LOCAL_PATH)/Apps.mk
diff --git a/Apps-config-host.mk b/Apps-config-host.mk
new file mode 100644
index 0000000..5c1604e
--- /dev/null
+++ b/Apps-config-host.mk
@@ -0,0 +1,119 @@
+# Auto-generated - DO NOT EDIT!
+# To regenerate, edit openssl.config, then run:
+# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz
+#
+# This script will append to the following variables:
+#
+# LOCAL_CFLAGS
+# LOCAL_C_INCLUDES
+# LOCAL_SRC_FILES_$(TARGET_ARCH)
+# LOCAL_SRC_FILES_$(TARGET_2ND_ARCH)
+# LOCAL_CFLAGS_$(TARGET_ARCH)
+# LOCAL_CFLAGS_$(TARGET_2ND_ARCH)
+# LOCAL_ADDITIONAL_DEPENDENCIES
+
+
+LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Apps-config-host.mk
+
+common_cflags := \
+ -DMONOLITH \
+
+common_src_files := \
+ apps/app_rand.c \
+ apps/apps.c \
+ apps/asn1pars.c \
+ apps/ca.c \
+ apps/ciphers.c \
+ apps/cms.c \
+ apps/crl.c \
+ apps/crl2p7.c \
+ apps/dgst.c \
+ apps/dh.c \
+ apps/dhparam.c \
+ apps/dsa.c \
+ apps/dsaparam.c \
+ apps/ec.c \
+ apps/ecparam.c \
+ apps/enc.c \
+ apps/engine.c \
+ apps/errstr.c \
+ apps/gendh.c \
+ apps/gendsa.c \
+ apps/genpkey.c \
+ apps/genrsa.c \
+ apps/nseq.c \
+ apps/ocsp.c \
+ apps/openssl.c \
+ apps/passwd.c \
+ apps/pkcs12.c \
+ apps/pkcs7.c \
+ apps/pkcs8.c \
+ apps/pkey.c \
+ apps/pkeyparam.c \
+ apps/pkeyutl.c \
+ apps/prime.c \
+ apps/rand.c \
+ apps/req.c \
+ apps/rsa.c \
+ apps/rsautl.c \
+ apps/s_cb.c \
+ apps/s_client.c \
+ apps/s_server.c \
+ apps/s_socket.c \
+ apps/s_time.c \
+ apps/sess_id.c \
+ apps/smime.c \
+ apps/speed.c \
+ apps/spkac.c \
+ apps/srp.c \
+ apps/verify.c \
+ apps/version.c \
+ apps/x509.c \
+
+common_c_includes := \
+ external/openssl/. \
+ external/openssl/include \
+
+arm_cflags :=
+
+arm_src_files :=
+
+arm_exclude_files :=
+
+arm64_cflags :=
+
+arm64_src_files :=
+
+arm64_exclude_files :=
+
+x86_cflags :=
+
+x86_src_files :=
+
+x86_exclude_files :=
+
+x86_64_cflags :=
+
+x86_64_src_files :=
+
+x86_64_exclude_files :=
+
+mips_cflags :=
+
+mips_src_files :=
+
+mips_exclude_files :=
+
+
+LOCAL_CFLAGS += $(common_cflags)
+LOCAL_C_INCLUDES += $(common_c_includes) $(local_c_includes)
+
+ifeq ($(HOST_OS),linux)
+LOCAL_CFLAGS_x86 += $(x86_cflags)
+LOCAL_SRC_FILES_x86 += $(filter-out $(x86_exclude_files), $(common_src_files) $(x86_src_files))
+LOCAL_CFLAGS_x86_64 += $(x86_64_cflags)
+LOCAL_SRC_FILES_x86_64 += $(filter-out $(x86_64_exclude_files), $(common_src_files) $(x86_64_src_files))
+else
+$(warning Unknown host OS $(HOST_OS))
+LOCAL_SRC_FILES += $(common_src_files)
+endif
diff --git a/Apps-config-target.mk b/Apps-config-target.mk
new file mode 100644
index 0000000..0c567d4
--- /dev/null
+++ b/Apps-config-target.mk
@@ -0,0 +1,124 @@
+# Auto-generated - DO NOT EDIT!
+# To regenerate, edit openssl.config, then run:
+# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz
+#
+# This script will append to the following variables:
+#
+# LOCAL_CFLAGS
+# LOCAL_C_INCLUDES
+# LOCAL_SRC_FILES_$(TARGET_ARCH)
+# LOCAL_SRC_FILES_$(TARGET_2ND_ARCH)
+# LOCAL_CFLAGS_$(TARGET_ARCH)
+# LOCAL_CFLAGS_$(TARGET_2ND_ARCH)
+# LOCAL_ADDITIONAL_DEPENDENCIES
+
+
+LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Apps-config-target.mk
+
+common_cflags := \
+ -DMONOLITH \
+
+common_src_files := \
+ apps/app_rand.c \
+ apps/apps.c \
+ apps/asn1pars.c \
+ apps/ca.c \
+ apps/ciphers.c \
+ apps/cms.c \
+ apps/crl.c \
+ apps/crl2p7.c \
+ apps/dgst.c \
+ apps/dh.c \
+ apps/dhparam.c \
+ apps/dsa.c \
+ apps/dsaparam.c \
+ apps/ec.c \
+ apps/ecparam.c \
+ apps/enc.c \
+ apps/engine.c \
+ apps/errstr.c \
+ apps/gendh.c \
+ apps/gendsa.c \
+ apps/genpkey.c \
+ apps/genrsa.c \
+ apps/nseq.c \
+ apps/ocsp.c \
+ apps/openssl.c \
+ apps/passwd.c \
+ apps/pkcs12.c \
+ apps/pkcs7.c \
+ apps/pkcs8.c \
+ apps/pkey.c \
+ apps/pkeyparam.c \
+ apps/pkeyutl.c \
+ apps/prime.c \
+ apps/rand.c \
+ apps/req.c \
+ apps/rsa.c \
+ apps/rsautl.c \
+ apps/s_cb.c \
+ apps/s_client.c \
+ apps/s_server.c \
+ apps/s_socket.c \
+ apps/s_time.c \
+ apps/sess_id.c \
+ apps/smime.c \
+ apps/speed.c \
+ apps/spkac.c \
+ apps/srp.c \
+ apps/verify.c \
+ apps/version.c \
+ apps/x509.c \
+
+common_c_includes := \
+ external/openssl/. \
+ external/openssl/include \
+
+arm_cflags :=
+
+arm_src_files :=
+
+arm_exclude_files :=
+
+arm64_cflags :=
+
+arm64_src_files :=
+
+arm64_exclude_files :=
+
+x86_cflags :=
+
+x86_src_files :=
+
+x86_exclude_files :=
+
+x86_64_cflags :=
+
+x86_64_src_files :=
+
+x86_64_exclude_files :=
+
+mips_cflags :=
+
+mips_src_files :=
+
+mips_exclude_files :=
+
+
+LOCAL_CFLAGS += $(common_cflags)
+LOCAL_C_INCLUDES += $(common_c_includes)
+
+LOCAL_SRC_FILES_arm += $(filter-out $(arm_exclude_files),$(common_src_files) $(arm_src_files))
+LOCAL_CFLAGS_arm += $(arm_cflags)
+
+LOCAL_SRC_FILES_arm64 += $(filter-out $(arm64_exclude_files),$(common_src_files) $(arm64_src_files))
+LOCAL_CFLAGS_arm64 += $(arm64_cflags)
+
+LOCAL_SRC_FILES_x86 += $(filter-out $(x86_exclude_files),$(common_src_files) $(x86_src_files))
+LOCAL_CFLAGS_x86 += $(x86_cflags)
+
+LOCAL_SRC_FILES_x86_64 += $(filter-out $(x86_64_exclude_files),$(common_src_files) $(x86_64_src_files))
+LOCAL_CFLAGS_x86_64 += $(x86_64_cflags)
+
+LOCAL_SRC_FILES_mips += $(filter-out $(mips_exclude_files),$(common_src_files) $(mips_src_files))
+LOCAL_CFLAGS_mips += $(mips_cflags)
diff --git a/Apps-config.mk b/Apps-config.mk
deleted file mode 100644
index bc67db2..0000000
--- a/Apps-config.mk
+++ /dev/null
@@ -1,140 +0,0 @@
-# Auto-generated - DO NOT EDIT!
-# To regenerate, edit openssl.config, then run:
-# ./import_openssl.sh import /path/to/openssl-1.0.1f.tar.gz
-#
-# Before including this file, the local Android.mk must define the following
-# variables:
-#
-# local_c_flags
-# local_c_includes
-# local_additional_dependencies
-#
-# This script will define the following variables:
-#
-# target_c_flags
-# target_c_includes
-# target_src_files
-#
-# host_c_flags
-# host_c_includes
-# host_src_files
-#
-
-# Ensure these are empty.
-unknown_arch_c_flags :=
-unknown_arch_src_files :=
-unknown_arch_exclude_files :=
-
-
-common_c_flags := \
- -DMONOLITH \
-
-common_src_files := \
- apps/app_rand.c \
- apps/apps.c \
- apps/asn1pars.c \
- apps/ca.c \
- apps/ciphers.c \
- apps/cms.c \
- apps/crl.c \
- apps/crl2p7.c \
- apps/dgst.c \
- apps/dh.c \
- apps/dhparam.c \
- apps/dsa.c \
- apps/dsaparam.c \
- apps/ec.c \
- apps/ecparam.c \
- apps/enc.c \
- apps/engine.c \
- apps/errstr.c \
- apps/gendh.c \
- apps/gendsa.c \
- apps/genpkey.c \
- apps/genrsa.c \
- apps/nseq.c \
- apps/ocsp.c \
- apps/openssl.c \
- apps/passwd.c \
- apps/pkcs12.c \
- apps/pkcs7.c \
- apps/pkcs8.c \
- apps/pkey.c \
- apps/pkeyparam.c \
- apps/pkeyutl.c \
- apps/prime.c \
- apps/rand.c \
- apps/req.c \
- apps/rsa.c \
- apps/rsautl.c \
- apps/s_cb.c \
- apps/s_client.c \
- apps/s_server.c \
- apps/s_socket.c \
- apps/s_time.c \
- apps/sess_id.c \
- apps/smime.c \
- apps/speed.c \
- apps/spkac.c \
- apps/srp.c \
- apps/verify.c \
- apps/version.c \
- apps/x509.c \
-
-common_c_includes := \
- . \
- include \
-
-arm_c_flags :=
-
-arm_src_files :=
-
-arm_exclude_files :=
-
-arm64_c_flags :=
-
-arm64_src_files :=
-
-arm64_exclude_files :=
-
-x86_c_flags :=
-
-x86_src_files :=
-
-x86_exclude_files :=
-
-x86_64_c_flags :=
-
-x86_64_src_files :=
-
-x86_64_exclude_files :=
-
-mips_c_flags :=
-
-mips_src_files :=
-
-mips_exclude_files :=
-
-target_arch := $(TARGET_ARCH)
-ifeq ($(target_arch)-$(TARGET_HAS_BIGENDIAN),mips-true)
-target_arch := unknown_arch
-endif
-
-target_c_flags := $(common_c_flags) $($(target_arch)_c_flags) $(local_c_flags)
-target_c_includes := $(addprefix external/openssl/,$(common_c_includes)) $(local_c_includes)
-target_src_files := $(common_src_files) $($(target_arch)_src_files)
-target_src_files := $(filter-out $($(target_arch)_exclude_files), $(target_src_files))
-
-ifeq ($(HOST_OS)-$(HOST_ARCH),linux-x86)
-host_arch := x86
-else
-host_arch := unknown_arch
-endif
-
-host_c_flags := $(common_c_flags) $($(host_arch)_c_flags) $(local_c_flags)
-host_c_includes := $(addprefix external/openssl/,$(common_c_includes)) $(local_c_includes)
-host_src_files := $(common_src_files) $($(host_arch)_src_files)
-host_src_files := $(filter-out $($(host_arch)_exclude_files), $(host_src_files))
-
-local_additional_dependencies += $(LOCAL_PATH)/Apps-config.mk
-
diff --git a/Apps.mk b/Apps.mk
index 0acf046..b2d871c 100644
--- a/Apps.mk
+++ b/Apps.mk
@@ -1,33 +1,25 @@
# Copyright 2006 The Android Open Source Project
-LOCAL_PATH:= $(call my-dir)
-
-local_c_includes :=
-local_c_flags :=
-
-local_additional_dependencies := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Apps.mk
-
-include $(LOCAL_PATH)/Apps-config.mk
+LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
-LOCAL_MODULE:= openssl
+LOCAL_MODULE := openssl
+LOCAL_MULTILIB := both
+LOCAL_MODULE_STEM_32 := openssl
+LOCAL_MODULE_STEM_64 := openssl64
LOCAL_CLANG := true
LOCAL_MODULE_TAGS := optional
-LOCAL_SRC_FILES := $(target_src_files)
LOCAL_SHARED_LIBRARIES := libssl libcrypto
-LOCAL_C_INCLUDES := $(target_c_includes)
-LOCAL_CFLAGS := $(target_c_flags)
-LOCAL_ADDITIONAL_DEPENDENCIES := $(local_additional_dependencies)
+include $(LOCAL_PATH)/Apps-config-target.mk
include $(LOCAL_PATH)/android-config.mk
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Apps.mk
include $(BUILD_EXECUTABLE)
include $(CLEAR_VARS)
-LOCAL_MODULE:= openssl
+LOCAL_MODULE := openssl
LOCAL_MODULE_TAGS := optional
-LOCAL_SRC_FILES := $(host_src_files)
LOCAL_SHARED_LIBRARIES := libssl-host libcrypto-host
-LOCAL_C_INCLUDES := $(host_c_includes)
-LOCAL_CFLAGS := $(host_c_flags)
-LOCAL_ADDITIONAL_DEPENDENCIES := $(local_additional_dependencies)
+include $(LOCAL_PATH)/Apps-config-host.mk
include $(LOCAL_PATH)/android-config.mk
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Apps.mk
include $(BUILD_HOST_EXECUTABLE)
diff --git a/Crypto-config.mk b/Crypto-config-host.mk
similarity index 89%
rename from Crypto-config.mk
rename to Crypto-config-host.mk
index b5b9a5f..5b64379 100644
--- a/Crypto-config.mk
+++ b/Crypto-config-host.mk
@@ -1,32 +1,21 @@
# Auto-generated - DO NOT EDIT!
# To regenerate, edit openssl.config, then run:
-# ./import_openssl.sh import /path/to/openssl-1.0.1f.tar.gz
+# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz
#
-# Before including this file, the local Android.mk must define the following
-# variables:
+# This script will append to the following variables:
#
-# local_c_flags
-# local_c_includes
-# local_additional_dependencies
-#
-# This script will define the following variables:
-#
-# target_c_flags
-# target_c_includes
-# target_src_files
-#
-# host_c_flags
-# host_c_includes
-# host_src_files
-#
-
-# Ensure these are empty.
-unknown_arch_c_flags :=
-unknown_arch_src_files :=
-unknown_arch_exclude_files :=
+# LOCAL_CFLAGS
+# LOCAL_C_INCLUDES
+# LOCAL_SRC_FILES_$(TARGET_ARCH)
+# LOCAL_SRC_FILES_$(TARGET_2ND_ARCH)
+# LOCAL_CFLAGS_$(TARGET_ARCH)
+# LOCAL_CFLAGS_$(TARGET_2ND_ARCH)
+# LOCAL_ADDITIONAL_DEPENDENCIES
-common_c_flags := \
+LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Crypto-config-host.mk
+
+common_cflags := \
-DNO_WINDOWS_BRAINDEATH \
common_src_files := \
@@ -343,7 +332,6 @@
crypto/evp/m_md5.c \
crypto/evp/m_mdc2.c \
crypto/evp/m_null.c \
- crypto/evp/m_ripemd.c \
crypto/evp/m_sha1.c \
crypto/evp/m_sigver.c \
crypto/evp/m_wp.c \
@@ -449,8 +437,6 @@
crypto/rc4/rc4_enc.c \
crypto/rc4/rc4_skey.c \
crypto/rc4/rc4_utl.c \
- crypto/ripemd/rmd_dgst.c \
- crypto/ripemd/rmd_one.c \
crypto/rsa/rsa_ameth.c \
crypto/rsa/rsa_asn1.c \
crypto/rsa/rsa_chk.c \
@@ -546,43 +532,63 @@
crypto/x509v3/v3err.c \
common_c_includes := \
- . \
- crypto \
- crypto/asn1 \
- crypto/evp \
- crypto/modes \
- include \
- include/openssl \
+ external/openssl/. \
+ external/openssl/crypto \
+ external/openssl/crypto/asn1 \
+ external/openssl/crypto/evp \
+ external/openssl/crypto/modes \
+ external/openssl/include \
+ external/openssl/include/openssl \
-arm_c_flags := \
+arm_cflags := \
-DAES_ASM \
+ -DBSAES_ASM \
+ -DDES_UNROLL \
-DGHASH_ASM \
-DOPENSSL_BN_ASM_GF2m \
-DOPENSSL_BN_ASM_MONT \
+ -DOPENSSL_CPUID_OBJ \
-DSHA1_ASM \
-DSHA256_ASM \
-DSHA512_ASM \
arm_src_files := \
crypto/aes/asm/aes-armv4.S \
+ crypto/aes/asm/aesv8-armx.S \
+ crypto/aes/asm/bsaes-armv7.S \
+ crypto/armcap.c \
+ crypto/armv4cpuid.S \
crypto/bn/asm/armv4-gf2m.S \
crypto/bn/asm/armv4-mont.S \
crypto/modes/asm/ghash-armv4.S \
+ crypto/modes/asm/ghashv8-armx.S \
crypto/sha/asm/sha1-armv4-large.S \
crypto/sha/asm/sha256-armv4.S \
crypto/sha/asm/sha512-armv4.S \
arm_exclude_files := \
crypto/aes/aes_core.c \
+ crypto/mem_clr.c \
-arm64_c_flags := \
- -DOPENSSL_NO_ASM \
+arm64_cflags := \
+ -DDES_UNROLL \
+ -DOPENSSL_CPUID_OBJ \
+ -DSHA1_ASM \
+ -DSHA256_ASM \
+ -DSHA512_ASM \
-arm64_src_files :=
+arm64_src_files := \
+ crypto/aes/asm/aesv8-armx-64.S \
+ crypto/arm64cpuid.S \
+ crypto/armcap.c \
+ crypto/modes/asm/ghashv8-armx-64.S \
+ crypto/sha/asm/sha1-armv8.S \
+ crypto/sha/asm/sha256-armv8.S \
+ crypto/sha/asm/sha512-armv8.S \
arm64_exclude_files :=
-x86_c_flags := \
+x86_cflags := \
-DAES_ASM \
-DDES_PTR \
-DDES_RISC1 \
@@ -593,9 +599,13 @@
-DOPENSSL_BN_ASM_MONT \
-DOPENSSL_BN_ASM_PART_WORDS \
-DOPENSSL_CPUID_OBJ \
+ -DOPENSSL_IA32_SSE2 \
+ -DRC4_INDEX \
+ -DRMD160_ASM \
-DSHA1_ASM \
-DSHA256_ASM \
-DSHA512_ASM \
+ -DVPAES_ASM \
x86_src_files := \
crypto/aes/asm/aes-586.S \
@@ -624,19 +634,21 @@
crypto/des/fcrypt_b.c \
crypto/mem_clr.c \
-x86_64_c_flags := \
+x86_64_cflags := \
-DAES_ASM \
- -DDES_PTR \
- -DDES_RISC1 \
+ -DBSAES_ASM \
-DDES_UNROLL \
-DGHASH_ASM \
-DMD5_ASM \
-DOPENSSL_BN_ASM_GF2m \
-DOPENSSL_BN_ASM_MONT \
+ -DOPENSSL_BN_ASM_MONT5 \
-DOPENSSL_CPUID_OBJ \
+ -DOPENSSL_IA32_SSE2 \
-DSHA1_ASM \
-DSHA256_ASM \
-DSHA512_ASM \
+ -DVPAES_ASM \
x86_64_src_files := \
crypto/aes/asm/aes-x86_64.S \
@@ -666,7 +678,7 @@
crypto/rc4/rc4_enc.c \
crypto/rc4/rc4_skey.c \
-mips_c_flags := \
+mips_cflags := \
-DAES_ASM \
-DOPENSSL_BN_ASM_MONT \
-DSHA1_ASM \
@@ -683,26 +695,16 @@
crypto/aes/aes_core.c \
crypto/bn/bn_asm.c \
-target_arch := $(TARGET_ARCH)
-ifeq ($(target_arch)-$(TARGET_HAS_BIGENDIAN),mips-true)
-target_arch := unknown_arch
-endif
-target_c_flags := $(common_c_flags) $($(target_arch)_c_flags) $(local_c_flags)
-target_c_includes := $(addprefix external/openssl/,$(common_c_includes)) $(local_c_includes)
-target_src_files := $(common_src_files) $($(target_arch)_src_files)
-target_src_files := $(filter-out $($(target_arch)_exclude_files), $(target_src_files))
+LOCAL_CFLAGS += $(common_cflags)
+LOCAL_C_INCLUDES += $(common_c_includes) $(local_c_includes)
-ifeq ($(HOST_OS)-$(HOST_ARCH),linux-x86)
-host_arch := x86
+ifeq ($(HOST_OS),linux)
+LOCAL_CFLAGS_x86 += $(x86_cflags)
+LOCAL_SRC_FILES_x86 += $(filter-out $(x86_exclude_files), $(common_src_files) $(x86_src_files))
+LOCAL_CFLAGS_x86_64 += $(x86_64_cflags)
+LOCAL_SRC_FILES_x86_64 += $(filter-out $(x86_64_exclude_files), $(common_src_files) $(x86_64_src_files))
else
-host_arch := unknown_arch
+$(warning Unknown host OS $(HOST_OS))
+LOCAL_SRC_FILES += $(common_src_files)
endif
-
-host_c_flags := $(common_c_flags) $($(host_arch)_c_flags) $(local_c_flags)
-host_c_includes := $(addprefix external/openssl/,$(common_c_includes)) $(local_c_includes)
-host_src_files := $(common_src_files) $($(host_arch)_src_files)
-host_src_files := $(filter-out $($(host_arch)_exclude_files), $(host_src_files))
-
-local_additional_dependencies += $(LOCAL_PATH)/Crypto-config.mk
-
diff --git a/Crypto-config.mk b/Crypto-config-target.mk
similarity index 87%
copy from Crypto-config.mk
copy to Crypto-config-target.mk
index b5b9a5f..920ca64 100644
--- a/Crypto-config.mk
+++ b/Crypto-config-target.mk
@@ -1,32 +1,21 @@
# Auto-generated - DO NOT EDIT!
# To regenerate, edit openssl.config, then run:
-# ./import_openssl.sh import /path/to/openssl-1.0.1f.tar.gz
+# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz
#
-# Before including this file, the local Android.mk must define the following
-# variables:
+# This script will append to the following variables:
#
-# local_c_flags
-# local_c_includes
-# local_additional_dependencies
-#
-# This script will define the following variables:
-#
-# target_c_flags
-# target_c_includes
-# target_src_files
-#
-# host_c_flags
-# host_c_includes
-# host_src_files
-#
-
-# Ensure these are empty.
-unknown_arch_c_flags :=
-unknown_arch_src_files :=
-unknown_arch_exclude_files :=
+# LOCAL_CFLAGS
+# LOCAL_C_INCLUDES
+# LOCAL_SRC_FILES_$(TARGET_ARCH)
+# LOCAL_SRC_FILES_$(TARGET_2ND_ARCH)
+# LOCAL_CFLAGS_$(TARGET_ARCH)
+# LOCAL_CFLAGS_$(TARGET_2ND_ARCH)
+# LOCAL_ADDITIONAL_DEPENDENCIES
-common_c_flags := \
+LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Crypto-config-target.mk
+
+common_cflags := \
-DNO_WINDOWS_BRAINDEATH \
common_src_files := \
@@ -343,7 +332,6 @@
crypto/evp/m_md5.c \
crypto/evp/m_mdc2.c \
crypto/evp/m_null.c \
- crypto/evp/m_ripemd.c \
crypto/evp/m_sha1.c \
crypto/evp/m_sigver.c \
crypto/evp/m_wp.c \
@@ -449,8 +437,6 @@
crypto/rc4/rc4_enc.c \
crypto/rc4/rc4_skey.c \
crypto/rc4/rc4_utl.c \
- crypto/ripemd/rmd_dgst.c \
- crypto/ripemd/rmd_one.c \
crypto/rsa/rsa_ameth.c \
crypto/rsa/rsa_asn1.c \
crypto/rsa/rsa_chk.c \
@@ -546,43 +532,63 @@
crypto/x509v3/v3err.c \
common_c_includes := \
- . \
- crypto \
- crypto/asn1 \
- crypto/evp \
- crypto/modes \
- include \
- include/openssl \
+ external/openssl/. \
+ external/openssl/crypto \
+ external/openssl/crypto/asn1 \
+ external/openssl/crypto/evp \
+ external/openssl/crypto/modes \
+ external/openssl/include \
+ external/openssl/include/openssl \
-arm_c_flags := \
+arm_cflags := \
-DAES_ASM \
+ -DBSAES_ASM \
+ -DDES_UNROLL \
-DGHASH_ASM \
-DOPENSSL_BN_ASM_GF2m \
-DOPENSSL_BN_ASM_MONT \
+ -DOPENSSL_CPUID_OBJ \
-DSHA1_ASM \
-DSHA256_ASM \
-DSHA512_ASM \
arm_src_files := \
crypto/aes/asm/aes-armv4.S \
+ crypto/aes/asm/aesv8-armx.S \
+ crypto/aes/asm/bsaes-armv7.S \
+ crypto/armcap.c \
+ crypto/armv4cpuid.S \
crypto/bn/asm/armv4-gf2m.S \
crypto/bn/asm/armv4-mont.S \
crypto/modes/asm/ghash-armv4.S \
+ crypto/modes/asm/ghashv8-armx.S \
crypto/sha/asm/sha1-armv4-large.S \
crypto/sha/asm/sha256-armv4.S \
crypto/sha/asm/sha512-armv4.S \
arm_exclude_files := \
crypto/aes/aes_core.c \
+ crypto/mem_clr.c \
-arm64_c_flags := \
- -DOPENSSL_NO_ASM \
+arm64_cflags := \
+ -DDES_UNROLL \
+ -DOPENSSL_CPUID_OBJ \
+ -DSHA1_ASM \
+ -DSHA256_ASM \
+ -DSHA512_ASM \
-arm64_src_files :=
+arm64_src_files := \
+ crypto/aes/asm/aesv8-armx-64.S \
+ crypto/arm64cpuid.S \
+ crypto/armcap.c \
+ crypto/modes/asm/ghashv8-armx-64.S \
+ crypto/sha/asm/sha1-armv8.S \
+ crypto/sha/asm/sha256-armv8.S \
+ crypto/sha/asm/sha512-armv8.S \
arm64_exclude_files :=
-x86_c_flags := \
+x86_cflags := \
-DAES_ASM \
-DDES_PTR \
-DDES_RISC1 \
@@ -593,9 +599,13 @@
-DOPENSSL_BN_ASM_MONT \
-DOPENSSL_BN_ASM_PART_WORDS \
-DOPENSSL_CPUID_OBJ \
+ -DOPENSSL_IA32_SSE2 \
+ -DRC4_INDEX \
+ -DRMD160_ASM \
-DSHA1_ASM \
-DSHA256_ASM \
-DSHA512_ASM \
+ -DVPAES_ASM \
x86_src_files := \
crypto/aes/asm/aes-586.S \
@@ -624,19 +634,21 @@
crypto/des/fcrypt_b.c \
crypto/mem_clr.c \
-x86_64_c_flags := \
+x86_64_cflags := \
-DAES_ASM \
- -DDES_PTR \
- -DDES_RISC1 \
+ -DBSAES_ASM \
-DDES_UNROLL \
-DGHASH_ASM \
-DMD5_ASM \
-DOPENSSL_BN_ASM_GF2m \
-DOPENSSL_BN_ASM_MONT \
+ -DOPENSSL_BN_ASM_MONT5 \
-DOPENSSL_CPUID_OBJ \
+ -DOPENSSL_IA32_SSE2 \
-DSHA1_ASM \
-DSHA256_ASM \
-DSHA512_ASM \
+ -DVPAES_ASM \
x86_64_src_files := \
crypto/aes/asm/aes-x86_64.S \
@@ -666,7 +678,7 @@
crypto/rc4/rc4_enc.c \
crypto/rc4/rc4_skey.c \
-mips_c_flags := \
+mips_cflags := \
-DAES_ASM \
-DOPENSSL_BN_ASM_MONT \
-DSHA1_ASM \
@@ -683,26 +695,21 @@
crypto/aes/aes_core.c \
crypto/bn/bn_asm.c \
-target_arch := $(TARGET_ARCH)
-ifeq ($(target_arch)-$(TARGET_HAS_BIGENDIAN),mips-true)
-target_arch := unknown_arch
-endif
-target_c_flags := $(common_c_flags) $($(target_arch)_c_flags) $(local_c_flags)
-target_c_includes := $(addprefix external/openssl/,$(common_c_includes)) $(local_c_includes)
-target_src_files := $(common_src_files) $($(target_arch)_src_files)
-target_src_files := $(filter-out $($(target_arch)_exclude_files), $(target_src_files))
+LOCAL_CFLAGS += $(common_cflags)
+LOCAL_C_INCLUDES += $(common_c_includes)
-ifeq ($(HOST_OS)-$(HOST_ARCH),linux-x86)
-host_arch := x86
-else
-host_arch := unknown_arch
-endif
+LOCAL_SRC_FILES_arm += $(filter-out $(arm_exclude_files),$(common_src_files) $(arm_src_files))
+LOCAL_CFLAGS_arm += $(arm_cflags)
-host_c_flags := $(common_c_flags) $($(host_arch)_c_flags) $(local_c_flags)
-host_c_includes := $(addprefix external/openssl/,$(common_c_includes)) $(local_c_includes)
-host_src_files := $(common_src_files) $($(host_arch)_src_files)
-host_src_files := $(filter-out $($(host_arch)_exclude_files), $(host_src_files))
+LOCAL_SRC_FILES_arm64 += $(filter-out $(arm64_exclude_files),$(common_src_files) $(arm64_src_files))
+LOCAL_CFLAGS_arm64 += $(arm64_cflags)
-local_additional_dependencies += $(LOCAL_PATH)/Crypto-config.mk
+LOCAL_SRC_FILES_x86 += $(filter-out $(x86_exclude_files),$(common_src_files) $(x86_src_files))
+LOCAL_CFLAGS_x86 += $(x86_cflags)
+LOCAL_SRC_FILES_x86_64 += $(filter-out $(x86_64_exclude_files),$(common_src_files) $(x86_64_src_files))
+LOCAL_CFLAGS_x86_64 += $(x86_64_cflags)
+
+LOCAL_SRC_FILES_mips += $(filter-out $(mips_exclude_files),$(common_src_files) $(mips_src_files))
+LOCAL_CFLAGS_mips += $(mips_cflags)
diff --git a/Crypto-config-trusty.mk b/Crypto-config-trusty.mk
new file mode 100644
index 0000000..5991598
--- /dev/null
+++ b/Crypto-config-trusty.mk
@@ -0,0 +1,262 @@
+# Auto-generated - DO NOT EDIT!
+# To regenerate, edit openssl.config, then run:
+# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz
+#
+# This script will append to the following variables:
+#
+# LOCAL_CFLAGS
+# LOCAL_C_INCLUDES
+# LOCAL_SRC_FILES_$(TARGET_ARCH)
+# LOCAL_SRC_FILES_$(TARGET_2ND_ARCH)
+# LOCAL_CFLAGS_$(TARGET_ARCH)
+# LOCAL_CFLAGS_$(TARGET_2ND_ARCH)
+# LOCAL_ADDITIONAL_DEPENDENCIES
+
+
+LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Crypto-config-trusty.mk
+
+common_cflags := \
+ -DGETPID_IS_MEANINGLESS \
+ -DNO_WINDOWS_BRAINDEATH \
+
+common_src_files := \
+ Crypto-config.mk \
+ crypto/aes/aes_cbc.c \
+ crypto/aes/aes_misc.c \
+ crypto/asn1/a_bitstr.c \
+ crypto/asn1/a_d2i_fp.c \
+ crypto/asn1/a_int.c \
+ crypto/asn1/a_object.c \
+ crypto/asn1/a_octet.c \
+ crypto/asn1/a_type.c \
+ crypto/asn1/ameth_lib.c \
+ crypto/asn1/asn1_lib.c \
+ crypto/asn1/asn_pack.c \
+ crypto/asn1/d2i_pr.c \
+ crypto/asn1/f_int.c \
+ crypto/asn1/i2d_pr.c \
+ crypto/asn1/p8_pkey.c \
+ crypto/asn1/t_pkey.c \
+ crypto/asn1/t_x509.c \
+ crypto/asn1/tasn_dec.c \
+ crypto/asn1/tasn_enc.c \
+ crypto/asn1/tasn_fre.c \
+ crypto/asn1/tasn_new.c \
+ crypto/asn1/tasn_typ.c \
+ crypto/asn1/tasn_utl.c \
+ crypto/asn1/x_algor.c \
+ crypto/asn1/x_attrib.c \
+ crypto/asn1/x_bignum.c \
+ crypto/asn1/x_long.c \
+ crypto/asn1/x_pubkey.c \
+ crypto/asn1/x_sig.c \
+ crypto/bio/b_print.c \
+ crypto/bio/bio_lib.c \
+ crypto/bio/bss_mem.c \
+ crypto/bn/bn_add.c \
+ crypto/bn/bn_asm.c \
+ crypto/bn/bn_blind.c \
+ crypto/bn/bn_ctx.c \
+ crypto/bn/bn_div.c \
+ crypto/bn/bn_exp.c \
+ crypto/bn/bn_exp2.c \
+ crypto/bn/bn_gcd.c \
+ crypto/bn/bn_gf2m.c \
+ crypto/bn/bn_kron.c \
+ crypto/bn/bn_lib.c \
+ crypto/bn/bn_mod.c \
+ crypto/bn/bn_mont.c \
+ crypto/bn/bn_mul.c \
+ crypto/bn/bn_nist.c \
+ crypto/bn/bn_prime.c \
+ crypto/bn/bn_print.c \
+ crypto/bn/bn_rand.c \
+ crypto/bn/bn_recp.c \
+ crypto/bn/bn_shift.c \
+ crypto/bn/bn_sqr.c \
+ crypto/bn/bn_sqrt.c \
+ crypto/bn/bn_word.c \
+ crypto/buffer/buf_str.c \
+ crypto/buffer/buffer.c \
+ crypto/cmac/cm_ameth.c \
+ crypto/cmac/cm_pmeth.c \
+ crypto/cmac/cmac.c \
+ crypto/cryptlib.c \
+ crypto/dh/dh_ameth.c \
+ crypto/dh/dh_asn1.c \
+ crypto/dh/dh_check.c \
+ crypto/dh/dh_gen.c \
+ crypto/dh/dh_key.c \
+ crypto/dh/dh_lib.c \
+ crypto/dh/dh_pmeth.c \
+ crypto/dsa/dsa_ameth.c \
+ crypto/dsa/dsa_asn1.c \
+ crypto/dsa/dsa_gen.c \
+ crypto/dsa/dsa_key.c \
+ crypto/dsa/dsa_lib.c \
+ crypto/dsa/dsa_ossl.c \
+ crypto/dsa/dsa_pmeth.c \
+ crypto/dsa/dsa_sign.c \
+ crypto/dsa/dsa_vrf.c \
+ crypto/ec/ec2_mult.c \
+ crypto/ec/ec2_oct.c \
+ crypto/ec/ec2_smpl.c \
+ crypto/ec/ec_ameth.c \
+ crypto/ec/ec_asn1.c \
+ crypto/ec/ec_curve.c \
+ crypto/ec/ec_cvt.c \
+ crypto/ec/ec_key.c \
+ crypto/ec/ec_lib.c \
+ crypto/ec/ec_mult.c \
+ crypto/ec/ec_oct.c \
+ crypto/ec/ec_pmeth.c \
+ crypto/ec/ec_print.c \
+ crypto/ec/eck_prn.c \
+ crypto/ec/ecp_mont.c \
+ crypto/ec/ecp_nist.c \
+ crypto/ec/ecp_oct.c \
+ crypto/ec/ecp_smpl.c \
+ crypto/ecdh/ech_key.c \
+ crypto/ecdh/ech_lib.c \
+ crypto/ecdh/ech_ossl.c \
+ crypto/ecdsa/ecs_asn1.c \
+ crypto/ecdsa/ecs_lib.c \
+ crypto/ecdsa/ecs_ossl.c \
+ crypto/ecdsa/ecs_sign.c \
+ crypto/ecdsa/ecs_vrf.c \
+ crypto/engine/eng_init.c \
+ crypto/engine/eng_lib.c \
+ crypto/engine/eng_table.c \
+ crypto/engine/tb_asnmth.c \
+ crypto/engine/tb_cipher.c \
+ crypto/engine/tb_dh.c \
+ crypto/engine/tb_digest.c \
+ crypto/engine/tb_dsa.c \
+ crypto/engine/tb_ecdh.c \
+ crypto/engine/tb_ecdsa.c \
+ crypto/engine/tb_pkmeth.c \
+ crypto/engine/tb_rand.c \
+ crypto/engine/tb_rsa.c \
+ crypto/err/err.c \
+ crypto/evp/digest.c \
+ crypto/evp/e_aes.c \
+ crypto/evp/evp_enc.c \
+ crypto/evp/evp_lib.c \
+ crypto/evp/evp_pkey.c \
+ crypto/evp/m_sha1.c \
+ crypto/evp/m_sigver.c \
+ crypto/evp/names.c \
+ crypto/evp/p_lib.c \
+ crypto/evp/pmeth_fn.c \
+ crypto/evp/pmeth_gn.c \
+ crypto/evp/pmeth_lib.c \
+ crypto/ex_data.c \
+ crypto/hmac/hm_ameth.c \
+ crypto/hmac/hm_pmeth.c \
+ crypto/hmac/hmac.c \
+ crypto/lhash/lhash.c \
+ crypto/mem.c \
+ crypto/mem_clr.c \
+ crypto/mem_dbg.c \
+ crypto/modes/cbc128.c \
+ crypto/modes/ctr128.c \
+ crypto/objects/o_names.c \
+ crypto/objects/obj_dat.c \
+ crypto/objects/obj_xref.c \
+ crypto/pkcs7/pk7_lib.c \
+ crypto/rand/md_rand.c \
+ crypto/rand/rand_lib.c \
+ crypto/rsa/rsa_ameth.c \
+ crypto/rsa/rsa_asn1.c \
+ crypto/rsa/rsa_chk.c \
+ crypto/rsa/rsa_crpt.c \
+ crypto/rsa/rsa_eay.c \
+ crypto/rsa/rsa_gen.c \
+ crypto/rsa/rsa_lib.c \
+ crypto/rsa/rsa_none.c \
+ crypto/rsa/rsa_oaep.c \
+ crypto/rsa/rsa_pk1.c \
+ crypto/rsa/rsa_pmeth.c \
+ crypto/rsa/rsa_pss.c \
+ crypto/rsa/rsa_saos.c \
+ crypto/rsa/rsa_sign.c \
+ crypto/rsa/rsa_ssl.c \
+ crypto/rsa/rsa_x931.c \
+ crypto/sha/sha1_one.c \
+ crypto/sha/sha1dgst.c \
+ crypto/sha/sha256.c \
+ crypto/sha/sha512.c \
+ crypto/stack/stack.c \
+ crypto/x509/x_all.c \
+ crypto/x509v3/v3_utl.c \
+
+common_c_includes := \
+ external/openssl/. \
+ external/openssl/crypto \
+ external/openssl/crypto/asn1 \
+ external/openssl/crypto/evp \
+ external/openssl/crypto/modes \
+ external/openssl/include \
+ external/openssl/include/openssl \
+
+arm_cflags := \
+ -DAES_ASM \
+ -DGHASH_ASM \
+ -DOPENSSL_BN_ASM_GF2m \
+ -DOPENSSL_BN_ASM_MONT \
+ -DSHA1_ASM \
+ -DSHA256_ASM \
+ -DSHA512_ASM \
+
+arm_src_files := \
+ crypto/aes/asm/aes-armv4.S \
+ crypto/bn/asm/armv4-gf2m.S \
+ crypto/bn/asm/armv4-mont.S \
+ crypto/sha/asm/sha1-armv4-large.S \
+ crypto/sha/asm/sha256-armv4.S \
+ crypto/sha/asm/sha512-armv4.S \
+
+arm_exclude_files :=
+
+arm64_cflags :=
+
+arm64_src_files :=
+
+arm64_exclude_files :=
+
+x86_cflags :=
+
+x86_src_files :=
+
+x86_exclude_files :=
+
+x86_64_cflags :=
+
+x86_64_src_files :=
+
+x86_64_exclude_files :=
+
+mips_cflags :=
+
+mips_src_files :=
+
+mips_exclude_files :=
+
+
+LOCAL_CFLAGS += $(common_cflags)
+LOCAL_C_INCLUDES += $(common_c_includes)
+
+LOCAL_SRC_FILES_arm += $(filter-out $(arm_exclude_files),$(common_src_files) $(arm_src_files))
+LOCAL_CFLAGS_arm += $(arm_cflags)
+
+LOCAL_SRC_FILES_arm64 += $(filter-out $(arm64_exclude_files),$(common_src_files) $(arm64_src_files))
+LOCAL_CFLAGS_arm64 += $(arm64_cflags)
+
+LOCAL_SRC_FILES_x86 += $(filter-out $(x86_exclude_files),$(common_src_files) $(x86_src_files))
+LOCAL_CFLAGS_x86 += $(x86_cflags)
+
+LOCAL_SRC_FILES_x86_64 += $(filter-out $(x86_64_exclude_files),$(common_src_files) $(x86_64_src_files))
+LOCAL_CFLAGS_x86_64 += $(x86_64_cflags)
+
+LOCAL_SRC_FILES_mips += $(filter-out $(mips_exclude_files),$(common_src_files) $(mips_src_files))
+LOCAL_CFLAGS_mips += $(mips_cflags)
diff --git a/Crypto.mk b/Crypto.mk
index bcfd141..c0cef25 100644
--- a/Crypto.mk
+++ b/Crypto.mk
@@ -1,39 +1,26 @@
-local_c_flags :=
-
-local_c_includes := $(log_c_includes)
-
-local_additional_dependencies := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk
-
-include $(LOCAL_PATH)/Crypto-config.mk
-
#######################################
# target static library
include $(CLEAR_VARS)
-include $(LOCAL_PATH)/android-config.mk
-
LOCAL_SHARED_LIBRARIES := $(log_shared_libraries)
# The static library should be used in only unbundled apps
# and we don't have clang in unbundled build yet.
-ifeq ($(TARGET_ARCH),arm64)
-$(info TODOArm64: $(LOCAL_PATH)/Android.mk : Use NDK once it is available for Arm64)
-else
LOCAL_SDK_VERSION := 9
-endif
-LOCAL_SRC_FILES += $(target_src_files)
-LOCAL_CFLAGS += $(target_c_flags)
-LOCAL_C_INCLUDES += $(target_c_includes)
LOCAL_MODULE_TAGS := optional
-LOCAL_MODULE:= libcrypto_static
-LOCAL_ADDITIONAL_DEPENDENCIES := $(local_additional_dependencies)
+LOCAL_MODULE := libcrypto_static
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk
+include $(LOCAL_PATH)/Crypto-config-target.mk
+include $(LOCAL_PATH)/android-config.mk
+
+# Replace cflags with static-specific cflags so we dont build in libdl deps
+LOCAL_CFLAGS_32 := $(openssl_cflags_static_32)
+LOCAL_CFLAGS_64 := $(openssl_cflags_static_64)
include $(BUILD_STATIC_LIBRARY)
#######################################
# target shared library
include $(CLEAR_VARS)
-include $(LOCAL_PATH)/android-config.mk
-
LOCAL_SHARED_LIBRARIES := $(log_shared_libraries)
# If we're building an unbundled build, don't try to use clang since it's not
@@ -41,44 +28,46 @@
# in the NDK.
ifeq (,$(TARGET_BUILD_APPS))
LOCAL_CLANG := true
+ifeq ($(HOST_OS), darwin)
+LOCAL_ASFLAGS += -no-integrated-as
+LOCAL_CFLAGS += -no-integrated-as
+endif
else
LOCAL_SDK_VERSION := 9
endif
LOCAL_LDFLAGS += -ldl
-LOCAL_SRC_FILES += $(target_src_files)
-LOCAL_CFLAGS += $(target_c_flags)
-LOCAL_C_INCLUDES += $(target_c_includes)
LOCAL_MODULE_TAGS := optional
-LOCAL_MODULE:= libcrypto
-LOCAL_ADDITIONAL_DEPENDENCIES := $(local_additional_dependencies)
+LOCAL_MODULE := libcrypto
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk
+include $(LOCAL_PATH)/Crypto-config-target.mk
+include $(LOCAL_PATH)/android-config.mk
include $(BUILD_SHARED_LIBRARY)
#######################################
# host shared library
include $(CLEAR_VARS)
-include $(LOCAL_PATH)/android-config.mk
LOCAL_SHARED_LIBRARIES := $(log_shared_libraries)
-LOCAL_SRC_FILES += $(host_src_files)
-LOCAL_CFLAGS += $(host_c_flags) -DPURIFY
-LOCAL_C_INCLUDES += $(host_c_includes)
+LOCAL_CFLAGS += -DPURIFY
LOCAL_LDLIBS += -ldl
LOCAL_MODULE_TAGS := optional
-LOCAL_MODULE:= libcrypto-host
-LOCAL_ADDITIONAL_DEPENDENCIES := $(local_additional_dependencies)
+LOCAL_MODULE := libcrypto-host
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk
+LOCAL_MULTILIB := both
+include $(LOCAL_PATH)/Crypto-config-host.mk
+include $(LOCAL_PATH)/android-config.mk
include $(BUILD_HOST_SHARED_LIBRARY)
########################################
# host static library, which is used by some SDK tools.
include $(CLEAR_VARS)
-include $(LOCAL_PATH)/android-config.mk
LOCAL_SHARED_LIBRARIES := $(log_shared_libraries)
-LOCAL_SRC_FILES += $(host_src_files)
-LOCAL_CFLAGS += $(host_c_flags) -DPURIFY
-LOCAL_C_INCLUDES += $(host_c_includes)
+LOCAL_CFLAGS += -DPURIFY
LOCAL_LDLIBS += -ldl
LOCAL_MODULE_TAGS := optional
-LOCAL_MODULE:= libcrypto_static
-LOCAL_ADDITIONAL_DEPENDENCIES := $(local_additional_dependencies)
+LOCAL_MODULE := libcrypto_static
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Crypto.mk
+include $(LOCAL_PATH)/Crypto-config-host.mk
+include $(LOCAL_PATH)/android-config.mk
include $(BUILD_HOST_STATIC_LIBRARY)
diff --git a/Ssl-config-host.mk b/Ssl-config-host.mk
new file mode 100644
index 0000000..57ea377
--- /dev/null
+++ b/Ssl-config-host.mk
@@ -0,0 +1,113 @@
+# Auto-generated - DO NOT EDIT!
+# To regenerate, edit openssl.config, then run:
+# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz
+#
+# This script will append to the following variables:
+#
+# LOCAL_CFLAGS
+# LOCAL_C_INCLUDES
+# LOCAL_SRC_FILES_$(TARGET_ARCH)
+# LOCAL_SRC_FILES_$(TARGET_2ND_ARCH)
+# LOCAL_CFLAGS_$(TARGET_ARCH)
+# LOCAL_CFLAGS_$(TARGET_2ND_ARCH)
+# LOCAL_ADDITIONAL_DEPENDENCIES
+
+
+LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Ssl-config-host.mk
+
+common_cflags :=
+
+common_src_files := \
+ ssl/bio_ssl.c \
+ ssl/d1_both.c \
+ ssl/d1_enc.c \
+ ssl/d1_lib.c \
+ ssl/d1_pkt.c \
+ ssl/d1_srtp.c \
+ ssl/kssl.c \
+ ssl/s23_clnt.c \
+ ssl/s23_lib.c \
+ ssl/s23_meth.c \
+ ssl/s23_pkt.c \
+ ssl/s23_srvr.c \
+ ssl/s2_clnt.c \
+ ssl/s2_enc.c \
+ ssl/s2_lib.c \
+ ssl/s2_meth.c \
+ ssl/s2_pkt.c \
+ ssl/s2_srvr.c \
+ ssl/s3_both.c \
+ ssl/s3_cbc.c \
+ ssl/s3_clnt.c \
+ ssl/s3_enc.c \
+ ssl/s3_lib.c \
+ ssl/s3_meth.c \
+ ssl/s3_pkt.c \
+ ssl/s3_srvr.c \
+ ssl/ssl_algs.c \
+ ssl/ssl_asn1.c \
+ ssl/ssl_cert.c \
+ ssl/ssl_ciph.c \
+ ssl/ssl_err.c \
+ ssl/ssl_err2.c \
+ ssl/ssl_lib.c \
+ ssl/ssl_rsa.c \
+ ssl/ssl_sess.c \
+ ssl/ssl_stat.c \
+ ssl/ssl_txt.c \
+ ssl/t1_clnt.c \
+ ssl/t1_enc.c \
+ ssl/t1_lib.c \
+ ssl/t1_meth.c \
+ ssl/t1_reneg.c \
+ ssl/t1_srvr.c \
+ ssl/tls_srp.c \
+
+common_c_includes := \
+ external/openssl/. \
+ external/openssl/crypto \
+ external/openssl/include \
+
+arm_cflags :=
+
+arm_src_files :=
+
+arm_exclude_files :=
+
+arm64_cflags :=
+
+arm64_src_files :=
+
+arm64_exclude_files :=
+
+x86_cflags :=
+
+x86_src_files :=
+
+x86_exclude_files :=
+
+x86_64_cflags :=
+
+x86_64_src_files :=
+
+x86_64_exclude_files :=
+
+mips_cflags :=
+
+mips_src_files :=
+
+mips_exclude_files :=
+
+
+LOCAL_CFLAGS += $(common_cflags)
+LOCAL_C_INCLUDES += $(common_c_includes) $(local_c_includes)
+
+ifeq ($(HOST_OS),linux)
+LOCAL_CFLAGS_x86 += $(x86_cflags)
+LOCAL_SRC_FILES_x86 += $(filter-out $(x86_exclude_files), $(common_src_files) $(x86_src_files))
+LOCAL_CFLAGS_x86_64 += $(x86_64_cflags)
+LOCAL_SRC_FILES_x86_64 += $(filter-out $(x86_64_exclude_files), $(common_src_files) $(x86_64_src_files))
+else
+$(warning Unknown host OS $(HOST_OS))
+LOCAL_SRC_FILES += $(common_src_files)
+endif
diff --git a/Ssl-config-target.mk b/Ssl-config-target.mk
new file mode 100644
index 0000000..b39e329
--- /dev/null
+++ b/Ssl-config-target.mk
@@ -0,0 +1,118 @@
+# Auto-generated - DO NOT EDIT!
+# To regenerate, edit openssl.config, then run:
+# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz
+#
+# This script will append to the following variables:
+#
+# LOCAL_CFLAGS
+# LOCAL_C_INCLUDES
+# LOCAL_SRC_FILES_$(TARGET_ARCH)
+# LOCAL_SRC_FILES_$(TARGET_2ND_ARCH)
+# LOCAL_CFLAGS_$(TARGET_ARCH)
+# LOCAL_CFLAGS_$(TARGET_2ND_ARCH)
+# LOCAL_ADDITIONAL_DEPENDENCIES
+
+
+LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Ssl-config-target.mk
+
+common_cflags :=
+
+common_src_files := \
+ ssl/bio_ssl.c \
+ ssl/d1_both.c \
+ ssl/d1_enc.c \
+ ssl/d1_lib.c \
+ ssl/d1_pkt.c \
+ ssl/d1_srtp.c \
+ ssl/kssl.c \
+ ssl/s23_clnt.c \
+ ssl/s23_lib.c \
+ ssl/s23_meth.c \
+ ssl/s23_pkt.c \
+ ssl/s23_srvr.c \
+ ssl/s2_clnt.c \
+ ssl/s2_enc.c \
+ ssl/s2_lib.c \
+ ssl/s2_meth.c \
+ ssl/s2_pkt.c \
+ ssl/s2_srvr.c \
+ ssl/s3_both.c \
+ ssl/s3_cbc.c \
+ ssl/s3_clnt.c \
+ ssl/s3_enc.c \
+ ssl/s3_lib.c \
+ ssl/s3_meth.c \
+ ssl/s3_pkt.c \
+ ssl/s3_srvr.c \
+ ssl/ssl_algs.c \
+ ssl/ssl_asn1.c \
+ ssl/ssl_cert.c \
+ ssl/ssl_ciph.c \
+ ssl/ssl_err.c \
+ ssl/ssl_err2.c \
+ ssl/ssl_lib.c \
+ ssl/ssl_rsa.c \
+ ssl/ssl_sess.c \
+ ssl/ssl_stat.c \
+ ssl/ssl_txt.c \
+ ssl/t1_clnt.c \
+ ssl/t1_enc.c \
+ ssl/t1_lib.c \
+ ssl/t1_meth.c \
+ ssl/t1_reneg.c \
+ ssl/t1_srvr.c \
+ ssl/tls_srp.c \
+
+common_c_includes := \
+ external/openssl/. \
+ external/openssl/crypto \
+ external/openssl/include \
+
+arm_cflags :=
+
+arm_src_files :=
+
+arm_exclude_files :=
+
+arm64_cflags :=
+
+arm64_src_files :=
+
+arm64_exclude_files :=
+
+x86_cflags :=
+
+x86_src_files :=
+
+x86_exclude_files :=
+
+x86_64_cflags :=
+
+x86_64_src_files :=
+
+x86_64_exclude_files :=
+
+mips_cflags :=
+
+mips_src_files :=
+
+mips_exclude_files :=
+
+
+LOCAL_CFLAGS += $(common_cflags)
+LOCAL_C_INCLUDES += $(common_c_includes)
+
+LOCAL_SRC_FILES_arm += $(filter-out $(arm_exclude_files),$(common_src_files) $(arm_src_files))
+LOCAL_CFLAGS_arm += $(arm_cflags)
+
+LOCAL_SRC_FILES_arm64 += $(filter-out $(arm64_exclude_files),$(common_src_files) $(arm64_src_files))
+LOCAL_CFLAGS_arm64 += $(arm64_cflags)
+
+LOCAL_SRC_FILES_x86 += $(filter-out $(x86_exclude_files),$(common_src_files) $(x86_src_files))
+LOCAL_CFLAGS_x86 += $(x86_cflags)
+
+LOCAL_SRC_FILES_x86_64 += $(filter-out $(x86_64_exclude_files),$(common_src_files) $(x86_64_src_files))
+LOCAL_CFLAGS_x86_64 += $(x86_64_cflags)
+
+LOCAL_SRC_FILES_mips += $(filter-out $(mips_exclude_files),$(common_src_files) $(mips_src_files))
+LOCAL_CFLAGS_mips += $(mips_cflags)
diff --git a/Ssl-config.mk b/Ssl-config.mk
deleted file mode 100644
index b07c100..0000000
--- a/Ssl-config.mk
+++ /dev/null
@@ -1,134 +0,0 @@
-# Auto-generated - DO NOT EDIT!
-# To regenerate, edit openssl.config, then run:
-# ./import_openssl.sh import /path/to/openssl-1.0.1f.tar.gz
-#
-# Before including this file, the local Android.mk must define the following
-# variables:
-#
-# local_c_flags
-# local_c_includes
-# local_additional_dependencies
-#
-# This script will define the following variables:
-#
-# target_c_flags
-# target_c_includes
-# target_src_files
-#
-# host_c_flags
-# host_c_includes
-# host_src_files
-#
-
-# Ensure these are empty.
-unknown_arch_c_flags :=
-unknown_arch_src_files :=
-unknown_arch_exclude_files :=
-
-
-common_c_flags :=
-
-common_src_files := \
- ssl/bio_ssl.c \
- ssl/d1_both.c \
- ssl/d1_enc.c \
- ssl/d1_lib.c \
- ssl/d1_pkt.c \
- ssl/d1_srtp.c \
- ssl/kssl.c \
- ssl/s23_clnt.c \
- ssl/s23_lib.c \
- ssl/s23_meth.c \
- ssl/s23_pkt.c \
- ssl/s23_srvr.c \
- ssl/s2_clnt.c \
- ssl/s2_enc.c \
- ssl/s2_lib.c \
- ssl/s2_meth.c \
- ssl/s2_pkt.c \
- ssl/s2_srvr.c \
- ssl/s3_both.c \
- ssl/s3_cbc.c \
- ssl/s3_clnt.c \
- ssl/s3_enc.c \
- ssl/s3_lib.c \
- ssl/s3_meth.c \
- ssl/s3_pkt.c \
- ssl/s3_srvr.c \
- ssl/ssl_algs.c \
- ssl/ssl_asn1.c \
- ssl/ssl_cert.c \
- ssl/ssl_ciph.c \
- ssl/ssl_err.c \
- ssl/ssl_err2.c \
- ssl/ssl_lib.c \
- ssl/ssl_rsa.c \
- ssl/ssl_sess.c \
- ssl/ssl_stat.c \
- ssl/ssl_txt.c \
- ssl/t1_clnt.c \
- ssl/t1_enc.c \
- ssl/t1_lib.c \
- ssl/t1_meth.c \
- ssl/t1_reneg.c \
- ssl/t1_srvr.c \
- ssl/tls_srp.c \
-
-common_c_includes := \
- . \
- crypto \
- include \
-
-arm_c_flags :=
-
-arm_src_files :=
-
-arm_exclude_files :=
-
-arm64_c_flags :=
-
-arm64_src_files :=
-
-arm64_exclude_files :=
-
-x86_c_flags :=
-
-x86_src_files :=
-
-x86_exclude_files :=
-
-x86_64_c_flags :=
-
-x86_64_src_files :=
-
-x86_64_exclude_files :=
-
-mips_c_flags :=
-
-mips_src_files :=
-
-mips_exclude_files :=
-
-target_arch := $(TARGET_ARCH)
-ifeq ($(target_arch)-$(TARGET_HAS_BIGENDIAN),mips-true)
-target_arch := unknown_arch
-endif
-
-target_c_flags := $(common_c_flags) $($(target_arch)_c_flags) $(local_c_flags)
-target_c_includes := $(addprefix external/openssl/,$(common_c_includes)) $(local_c_includes)
-target_src_files := $(common_src_files) $($(target_arch)_src_files)
-target_src_files := $(filter-out $($(target_arch)_exclude_files), $(target_src_files))
-
-ifeq ($(HOST_OS)-$(HOST_ARCH),linux-x86)
-host_arch := x86
-else
-host_arch := unknown_arch
-endif
-
-host_c_flags := $(common_c_flags) $($(host_arch)_c_flags) $(local_c_flags)
-host_c_includes := $(addprefix external/openssl/,$(common_c_includes)) $(local_c_includes)
-host_src_files := $(common_src_files) $($(host_arch)_src_files)
-host_src_files := $(filter-out $($(host_arch)_exclude_files), $(host_src_files))
-
-local_additional_dependencies += $(LOCAL_PATH)/Ssl-config.mk
-
diff --git a/Ssl.mk b/Ssl.mk
index 4d1da48..22a61e0 100644
--- a/Ssl.mk
+++ b/Ssl.mk
@@ -1,37 +1,25 @@
-local_c_flags :=
-
-local_c_includes := $(log_c_includes)
-
-local_additional_dependencies := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk
-
-include $(LOCAL_PATH)/Ssl-config.mk
-
#######################################
# target static library
include $(CLEAR_VARS)
-include $(LOCAL_PATH)/android-config.mk
# The static library should be used in only unbundled apps
# and we don't have clang in unbundled build yet.
-ifeq ($(TARGET_ARCH),arm64)
-$(info TODOArm64: $(LOCAL_PATH)/Android.mk : Use NDK once it is available for Arm64)
-else
LOCAL_SDK_VERSION := 9
-endif
LOCAL_SRC_FILES += $(target_src_files)
LOCAL_CFLAGS += $(target_c_flags)
LOCAL_C_INCLUDES += $(target_c_includes)
LOCAL_SHARED_LIBRARIES = $(log_shared_libraries)
LOCAL_MODULE_TAGS := optional
-LOCAL_MODULE:= libssl_static
-LOCAL_ADDITIONAL_DEPENDENCIES := $(local_additional_dependencies)
+LOCAL_MODULE := libssl_static
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk
+include $(LOCAL_PATH)/Ssl-config-target.mk
+include $(LOCAL_PATH)/android-config.mk
include $(BUILD_STATIC_LIBRARY)
#######################################
# target shared library
include $(CLEAR_VARS)
-include $(LOCAL_PATH)/android-config.mk
# If we're building an unbundled build, don't try to use clang since it's not
# in the NDK yet. This can be removed when a clang version that is fast enough
@@ -42,36 +30,37 @@
LOCAL_SDK_VERSION := 9
endif
-LOCAL_SRC_FILES += $(target_src_files)
-LOCAL_CFLAGS += $(target_c_flags)
-LOCAL_C_INCLUDES += $(target_c_includes)
LOCAL_SHARED_LIBRARIES += libcrypto $(log_shared_libraries)
LOCAL_MODULE_TAGS := optional
-LOCAL_MODULE:= libssl
-LOCAL_ADDITIONAL_DEPENDENCIES := $(local_additional_dependencies)
+LOCAL_MODULE := libssl
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk
+include $(LOCAL_PATH)/Ssl-config-target.mk
+include $(LOCAL_PATH)/android-config.mk
include $(BUILD_SHARED_LIBRARY)
#######################################
# host shared library
include $(CLEAR_VARS)
-include $(LOCAL_PATH)/android-config.mk
-LOCAL_SRC_FILES += $(host_src_files)
-LOCAL_CFLAGS += $(host_c_flags)
-LOCAL_C_INCLUDES += $(host_c_includes)
LOCAL_SHARED_LIBRARIES += libcrypto-host $(log_shared_libraries)
LOCAL_MODULE_TAGS := optional
-LOCAL_MODULE:= libssl-host
-LOCAL_ADDITIONAL_DEPENDENCIES := $(local_additional_dependencies)
+LOCAL_MODULE := libssl-host
+LOCAL_MULTILIB := both
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk
+include $(LOCAL_PATH)/Ssl-config-host.mk
+include $(LOCAL_PATH)/android-config.mk
include $(BUILD_HOST_SHARED_LIBRARY)
#######################################
# ssltest
include $(CLEAR_VARS)
-include $(LOCAL_PATH)/android-config.mk
-LOCAL_SRC_FILES:= ssl/ssltest.c
-LOCAL_C_INCLUDES += $(host_c_includes)
+LOCAL_SRC_FILES := ssl/ssltest.c
LOCAL_SHARED_LIBRARIES := libssl libcrypto $(log_shared_libraries)
-LOCAL_MODULE:= ssltest
+LOCAL_MODULE := ssltest
+LOCAL_MULTILIB := both
+LOCAL_MODULE_STEM_32 := ssltest
+LOCAL_MODULE_STEM_64 := ssltest64
LOCAL_MODULE_TAGS := optional
-LOCAL_ADDITIONAL_DEPENDENCIES := $(local_additional_dependencies)
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/android-config.mk $(LOCAL_PATH)/Ssl.mk
+include $(LOCAL_PATH)/Ssl-config-host.mk
+include $(LOCAL_PATH)/android-config.mk
include $(BUILD_EXECUTABLE)
diff --git a/android-config.mk b/android-config.mk
index d1bda99..675a65b 100644
--- a/android-config.mk
+++ b/android-config.mk
@@ -7,21 +7,41 @@
# This script performs minor but required patching for the Android build.
#
-LOCAL_CFLAGS += $(openssl_cflags)
-
-LOCAL_CFLAGS := $(filter-out -DTERMIO, $(LOCAL_CFLAGS))
-
-ifeq ($(HOST_OS),windows)
-LOCAL_CFLAGS := $(filter-out -DDSO_DLFCN -DHAVE_DLFCN_H,$(LOCAL_CFLAGS))
-endif
-
-# Directories
-LOCAL_CFLAGS += \
+# Directories for ENGINE shared libraries
+openssl_cflags_32 += \
-DOPENSSLDIR="\"/system/lib/ssl\"" \
-DENGINESDIR="\"/system/lib/ssl/engines\""
+openssl_cflags_static_32 += \
+ -DOPENSSLDIR="\"/system/lib/ssl\"" \
+ -DENGINESDIR="\"/system/lib/ssl/engines\""
+openssl_cflags_64 += \
+ -DOPENSSLDIR="\"/system/lib64/ssl\"" \
+ -DENGINESDIR="\"/system/lib64/ssl/engines\""
+openssl_cflags_static_64 += \
+ -DOPENSSLDIR="\"/system/lib64/ssl\"" \
+ -DENGINESDIR="\"/system/lib64/ssl/engines\""
# Intentionally excluded http://b/7079965
-LOCAL_CFLAGS := $(filter-out -DZLIB, $(LOCAL_CFLAGS))
+ifneq (,$(filter -DZLIB, $(openssl_cflags_32) $(openssl_cflags_64) \
+ $(openssl_cflags_static_32) $(openssl_cflags_static_64)))
+$(error ZLIB should not be enabled in openssl configuration)
+endif
+
+LOCAL_CFLAGS_32 += $(openssl_cflags_32)
+LOCAL_CFLAGS_64 += $(openssl_cflags_64)
+
+LOCAL_CFLAGS_32 := $(filter-out -DTERMIO, $(LOCAL_CFLAGS_32))
+LOCAL_CFLAGS_64 := $(filter-out -DTERMIO, $(LOCAL_CFLAGS_64))
+# filter out static flags too
+openssl_cflags_static_32 := $(filter-out -DTERMIO, $(openssl_cflags_static_32))
+openssl_cflags_static_64 := $(filter-out -DTERMIO, $(openssl_cflags_static_64))
+
+ifeq ($(HOST_OS),windows)
+LOCAL_CFLAGS_32 := $(filter-out -DDSO_DLFCN -DHAVE_DLFCN_H,$(LOCAL_CFLAGS_32))
+LOCAL_CFLAGS_64 := $(filter-out -DDSO_DLFCN -DHAVE_DLFCN_H,$(LOCAL_CFLAGS_64))
+endif
+
+LOCAL_CFLAGS += -Wno-missing-field-initializers -Wno-unused-parameter
# Debug
# LOCAL_CFLAGS += -DCIPHER_DEBUG
diff --git a/apps/apps.c b/apps/apps.c
index 1096eee..b76db10 100644
--- a/apps/apps.c
+++ b/apps/apps.c
@@ -586,12 +586,12 @@
if (ok >= 0)
ok = UI_add_input_string(ui,prompt,ui_flags,buf,
- PW_MIN_LENGTH,BUFSIZ-1);
+ PW_MIN_LENGTH,bufsiz-1);
if (ok >= 0 && verify)
{
buff = (char *)OPENSSL_malloc(bufsiz);
ok = UI_add_verify_string(ui,prompt,ui_flags,buff,
- PW_MIN_LENGTH,BUFSIZ-1, buf);
+ PW_MIN_LENGTH,bufsiz-1, buf);
}
if (ok >= 0)
do
@@ -2841,7 +2841,7 @@
if (proc==NULL)
{
- if (GetVersion() < 0x80000000)
+ if (check_winnt())
proc = OpenProcess(PROCESS_QUERY_INFORMATION,FALSE,
GetCurrentProcessId());
if (proc==NULL) proc = (HANDLE)-1;
diff --git a/apps/crl.c b/apps/crl.c
index c395b2a..8797d30 100644
--- a/apps/crl.c
+++ b/apps/crl.c
@@ -81,6 +81,9 @@
" -in arg - input file - default stdin\n",
" -out arg - output file - default stdout\n",
" -hash - print hash value\n",
+#ifndef OPENSSL_NO_MD5
+" -hash_old - print old-style (MD5) hash value\n",
+#endif
" -fingerprint - print the crl fingerprint\n",
" -issuer - print issuer DN\n",
" -lastupdate - lastUpdate field\n",
@@ -108,6 +111,9 @@
int informat,outformat;
char *infile=NULL,*outfile=NULL;
int hash=0,issuer=0,lastupdate=0,nextupdate=0,noout=0,text=0;
+#ifndef OPENSSL_NO_MD5
+ int hash_old=0;
+#endif
int fingerprint = 0, crlnumber = 0;
const char **pp;
X509_STORE *store = NULL;
@@ -192,6 +198,10 @@
text = 1;
else if (strcmp(*argv,"-hash") == 0)
hash= ++num;
+#ifndef OPENSSL_NO_MD5
+ else if (strcmp(*argv,"-hash_old") == 0)
+ hash_old= ++num;
+#endif
else if (strcmp(*argv,"-nameopt") == 0)
{
if (--argc < 1) goto bad;
@@ -304,6 +314,14 @@
BIO_printf(bio_out,"%08lx\n",
X509_NAME_hash(X509_CRL_get_issuer(x)));
}
+#ifndef OPENSSL_NO_MD5
+ if (hash_old == i)
+ {
+ BIO_printf(bio_out,"%08lx\n",
+ X509_NAME_hash_old(
+ X509_CRL_get_issuer(x)));
+ }
+#endif
if (lastupdate == i)
{
BIO_printf(bio_out,"lastUpdate=");
diff --git a/apps/dgst.c b/apps/dgst.c
index 81bd870..f4aec77 100644
--- a/apps/dgst.c
+++ b/apps/dgst.c
@@ -427,9 +427,9 @@
goto end;
}
if (do_verify)
- r = EVP_DigestVerifyInit(mctx, &pctx, md, e, sigkey);
+ r = EVP_DigestVerifyInit(mctx, &pctx, md, NULL, sigkey);
else
- r = EVP_DigestSignInit(mctx, &pctx, md, e, sigkey);
+ r = EVP_DigestSignInit(mctx, &pctx, md, NULL, sigkey);
if (!r)
{
BIO_printf(bio_err, "Error setting context\n");
diff --git a/apps/ecparam.c b/apps/ecparam.c
index 465480b..976ebef 100644
--- a/apps/ecparam.c
+++ b/apps/ecparam.c
@@ -105,7 +105,7 @@
* in the asn1 der encoding
* possible values: named_curve (default)
* explicit
- * -no_seed - if 'explicit' parameters are choosen do not use the seed
+ * -no_seed - if 'explicit' parameters are chosen do not use the seed
* -genkey - generate ec key
* -rand file - files to use for random number input
* -engine e - use engine e, possibly a hardware device
@@ -286,7 +286,7 @@
BIO_printf(bio_err, " "
" explicit\n");
BIO_printf(bio_err, " -no_seed if 'explicit'"
- " parameters are choosen do not"
+ " parameters are chosen do not"
" use the seed\n");
BIO_printf(bio_err, " -genkey generate ec"
" key\n");
diff --git a/apps/enc.c b/apps/enc.c
index 719acc3..19ea3df 100644
--- a/apps/enc.c
+++ b/apps/enc.c
@@ -331,6 +331,12 @@
setup_engine(bio_err, engine, 0);
#endif
+ if (cipher && EVP_CIPHER_flags(cipher) & EVP_CIPH_FLAG_AEAD_CIPHER)
+ {
+ BIO_printf(bio_err, "AEAD ciphers not supported by the enc utility\n");
+ goto end;
+ }
+
if (md && (dgst=EVP_get_digestbyname(md)) == NULL)
{
BIO_printf(bio_err,"%s is an unsupported message digest type\n",md);
diff --git a/apps/ocsp.c b/apps/ocsp.c
index 83c5a76..767f12c 100644
--- a/apps/ocsp.c
+++ b/apps/ocsp.c
@@ -127,6 +127,7 @@
ENGINE *e = NULL;
char **args;
char *host = NULL, *port = NULL, *path = "/";
+ char *thost = NULL, *tport = NULL, *tpath = NULL;
char *reqin = NULL, *respin = NULL;
char *reqout = NULL, *respout = NULL;
char *signfile = NULL, *keyfile = NULL;
@@ -204,6 +205,12 @@
}
else if (!strcmp(*args, "-url"))
{
+ if (thost)
+ OPENSSL_free(thost);
+ if (tport)
+ OPENSSL_free(tport);
+ if (tpath)
+ OPENSSL_free(tpath);
if (args[1])
{
args++;
@@ -212,6 +219,9 @@
BIO_printf(bio_err, "Error parsing URL\n");
badarg = 1;
}
+ thost = host;
+ tport = port;
+ tpath = path;
}
else badarg = 1;
}
@@ -920,12 +930,12 @@
sk_X509_pop_free(verify_other, X509_free);
sk_CONF_VALUE_pop_free(headers, X509V3_conf_free);
- if (use_ssl != -1)
- {
- OPENSSL_free(host);
- OPENSSL_free(port);
- OPENSSL_free(path);
- }
+ if (thost)
+ OPENSSL_free(thost);
+ if (tport)
+ OPENSSL_free(tport);
+ if (tpath)
+ OPENSSL_free(tpath);
OPENSSL_EXIT(ret);
}
diff --git a/apps/req.c b/apps/req.c
index 8552658..d41385d 100644
--- a/apps/req.c
+++ b/apps/req.c
@@ -644,6 +644,11 @@
if (inrand)
app_RAND_load_files(inrand);
+ if (!NCONF_get_number(req_conf,SECTION,BITS, &newkey))
+ {
+ newkey=DEFAULT_KEY_LENGTH;
+ }
+
if (keyalg)
{
genctx = set_keygen_ctx(bio_err, keyalg, &pkey_type, &newkey,
@@ -652,12 +657,6 @@
goto end;
}
- if (newkey <= 0)
- {
- if (!NCONF_get_number(req_conf,SECTION,BITS, &newkey))
- newkey=DEFAULT_KEY_LENGTH;
- }
-
if (newkey < MIN_KEY_LENGTH && (pkey_type == EVP_PKEY_RSA || pkey_type == EVP_PKEY_DSA))
{
BIO_printf(bio_err,"private key length is too short,\n");
@@ -1490,7 +1489,13 @@
#ifdef CHARSET_EBCDIC
ebcdic2ascii(buf, buf, i);
#endif
- if(!req_check_len(i, n_min, n_max)) goto start;
+ if(!req_check_len(i, n_min, n_max))
+ {
+ if (batch || value)
+ return 0;
+ goto start;
+ }
+
if (!X509_NAME_add_entry_by_NID(n,nid, chtype,
(unsigned char *) buf, -1,-1,mval)) goto err;
ret=1;
@@ -1549,7 +1554,12 @@
#ifdef CHARSET_EBCDIC
ebcdic2ascii(buf, buf, i);
#endif
- if(!req_check_len(i, n_min, n_max)) goto start;
+ if(!req_check_len(i, n_min, n_max))
+ {
+ if (batch || value)
+ return 0;
+ goto start;
+ }
if(!X509_REQ_add1_attr_by_NID(req, nid, chtype,
(unsigned char *)buf, -1)) {
@@ -1649,6 +1659,8 @@
keylen = atol(p + 1);
*pkeylen = keylen;
}
+ else
+ keylen = *pkeylen;
}
else if (p)
paramfile = p + 1;
diff --git a/apps/s_cb.c b/apps/s_cb.c
index 84c3b44..146a960 100644
--- a/apps/s_cb.c
+++ b/apps/s_cb.c
@@ -747,6 +747,10 @@
break;
#endif
+ case TLSEXT_TYPE_padding:
+ extname = "TLS padding";
+ break;
+
default:
extname = "unknown";
break;
diff --git a/apps/s_socket.c b/apps/s_socket.c
index 380efdb..94eb40f 100644
--- a/apps/s_socket.c
+++ b/apps/s_socket.c
@@ -274,7 +274,7 @@
{
i=0;
i=setsockopt(s,SOL_SOCKET,SO_KEEPALIVE,(char *)&i,sizeof(i));
- if (i < 0) { perror("keepalive"); return(0); }
+ if (i < 0) { closesocket(s); perror("keepalive"); return(0); }
}
#endif
@@ -450,6 +450,7 @@
if ((*host=(char *)OPENSSL_malloc(strlen(h1->h_name)+1)) == NULL)
{
perror("OPENSSL_malloc");
+ closesocket(ret);
return(0);
}
BUF_strlcpy(*host,h1->h_name,strlen(h1->h_name)+1);
@@ -458,11 +459,13 @@
if (h2 == NULL)
{
BIO_printf(bio_err,"gethostbyname failure\n");
+ closesocket(ret);
return(0);
}
if (h2->h_addrtype != AF_INET)
{
BIO_printf(bio_err,"gethostbyname addr is not AF_INET\n");
+ closesocket(ret);
return(0);
}
}
diff --git a/apps/smime.c b/apps/smime.c
index c583f8a0..d1fe32d 100644
--- a/apps/smime.c
+++ b/apps/smime.c
@@ -541,8 +541,8 @@
{
if (!cipher)
{
-#ifndef OPENSSL_NO_RC2
- cipher = EVP_rc2_40_cbc();
+#ifndef OPENSSL_NO_DES
+ cipher = EVP_des_ede3_cbc();
#else
BIO_printf(bio_err, "No cipher selected\n");
goto end;
diff --git a/build-config-32.mk b/build-config-32.mk
index b62c342..d035f1e 100644
--- a/build-config-32.mk
+++ b/build-config-32.mk
@@ -1,8 +1,8 @@
# Auto-generated - DO NOT EDIT!
# To regenerate, edit openssl.config, then run:
-# ./import_openssl.sh import /path/to/openssl-1.0.1f.tar.gz
+# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz
#
-openssl_cflags := \
+openssl_cflags_32 := \
-DOPENSSL_THREADS \
-D_REENTRANT \
-DDSO_DLFCN \
@@ -24,6 +24,7 @@
-DOPENSSL_NO_RC5 \
-DOPENSSL_NO_RDRAND \
-DOPENSSL_NO_RFC3779 \
+ -DOPENSSL_NO_RIPEMD \
-DOPENSSL_NO_RSAX \
-DOPENSSL_NO_SCTP \
-DOPENSSL_NO_SEED \
@@ -31,3 +32,33 @@
-DOPENSSL_NO_STATIC_ENGINE \
-DOPENSSL_NO_STORE \
-DOPENSSL_NO_WHIRLPOOL \
+
+openssl_cflags_static_32 := \
+ -DOPENSSL_THREADS \
+ -D_REENTRANT \
+ -DL_ENDIAN \
+ -DTERMIO \
+ -DOPENSSL_NO_CAMELLIA \
+ -DOPENSSL_NO_CAPIENG \
+ -DOPENSSL_NO_CAST \
+ -DOPENSSL_NO_DTLS1 \
+ -DOPENSSL_NO_EC_NISTP_64_GCC_128 \
+ -DOPENSSL_NO_GMP \
+ -DOPENSSL_NO_GOST \
+ -DOPENSSL_NO_HEARTBEATS \
+ -DOPENSSL_NO_IDEA \
+ -DOPENSSL_NO_JPAKE \
+ -DOPENSSL_NO_MD2 \
+ -DOPENSSL_NO_MDC2 \
+ -DOPENSSL_NO_RC5 \
+ -DOPENSSL_NO_RDRAND \
+ -DOPENSSL_NO_RFC3779 \
+ -DOPENSSL_NO_RIPEMD \
+ -DOPENSSL_NO_RSAX \
+ -DOPENSSL_NO_SCTP \
+ -DOPENSSL_NO_SEED \
+ -DOPENSSL_NO_SHA0 \
+ -DOPENSSL_NO_STATIC_ENGINE \
+ -DOPENSSL_NO_STORE \
+ -DOPENSSL_NO_WHIRLPOOL \
+
diff --git a/build-config-64.mk b/build-config-64.mk
index b62c342..45a8141 100644
--- a/build-config-64.mk
+++ b/build-config-64.mk
@@ -1,8 +1,8 @@
# Auto-generated - DO NOT EDIT!
# To regenerate, edit openssl.config, then run:
-# ./import_openssl.sh import /path/to/openssl-1.0.1f.tar.gz
+# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz
#
-openssl_cflags := \
+openssl_cflags_64 := \
-DOPENSSL_THREADS \
-D_REENTRANT \
-DDSO_DLFCN \
@@ -24,6 +24,7 @@
-DOPENSSL_NO_RC5 \
-DOPENSSL_NO_RDRAND \
-DOPENSSL_NO_RFC3779 \
+ -DOPENSSL_NO_RIPEMD \
-DOPENSSL_NO_RSAX \
-DOPENSSL_NO_SCTP \
-DOPENSSL_NO_SEED \
@@ -31,3 +32,33 @@
-DOPENSSL_NO_STATIC_ENGINE \
-DOPENSSL_NO_STORE \
-DOPENSSL_NO_WHIRLPOOL \
+
+openssl_cflags_static_64 := \
+ -DOPENSSL_THREADS \
+ -D_REENTRANT \
+ -DL_ENDIAN \
+ -DTERMIO \
+ -DOPENSSL_NO_CAMELLIA \
+ -DOPENSSL_NO_CAPIENG \
+ -DOPENSSL_NO_CAST \
+ -DOPENSSL_NO_DTLS1 \
+ -DOPENSSL_NO_EC_NISTP_64_GCC_128 \
+ -DOPENSSL_NO_GMP \
+ -DOPENSSL_NO_GOST \
+ -DOPENSSL_NO_HEARTBEATS \
+ -DOPENSSL_NO_IDEA \
+ -DOPENSSL_NO_JPAKE \
+ -DOPENSSL_NO_MD2 \
+ -DOPENSSL_NO_MDC2 \
+ -DOPENSSL_NO_RC5 \
+ -DOPENSSL_NO_RDRAND \
+ -DOPENSSL_NO_RFC3779 \
+ -DOPENSSL_NO_RIPEMD \
+ -DOPENSSL_NO_RSAX \
+ -DOPENSSL_NO_SCTP \
+ -DOPENSSL_NO_SEED \
+ -DOPENSSL_NO_SHA0 \
+ -DOPENSSL_NO_STATIC_ENGINE \
+ -DOPENSSL_NO_STORE \
+ -DOPENSSL_NO_WHIRLPOOL \
+
diff --git a/build-config-static-32.mk b/build-config-static-32.mk
new file mode 100644
index 0000000..57bb55a
--- /dev/null
+++ b/build-config-static-32.mk
@@ -0,0 +1,33 @@
+# Auto-generated - DO NOT EDIT!
+# To regenerate, edit openssl.config, then run:
+# ./import_openssl.sh import /path/to/openssl-1.0.1f.tar.gz
+#
+openssl_cflags_32 := \
+ -DOPENSSL_THREADS \
+ -D_REENTRANT \
+ -DDSO_DLFCN \
+ -DHAVE_DLFCN_H \
+ -DL_ENDIAN \
+ -DTERMIO \
+ -DOPENSSL_NO_CAMELLIA \
+ -DOPENSSL_NO_CAPIENG \
+ -DOPENSSL_NO_CAST \
+ -DOPENSSL_NO_DTLS1 \
+ -DOPENSSL_NO_EC_NISTP_64_GCC_128 \
+ -DOPENSSL_NO_GMP \
+ -DOPENSSL_NO_GOST \
+ -DOPENSSL_NO_HEARTBEATS \
+ -DOPENSSL_NO_IDEA \
+ -DOPENSSL_NO_JPAKE \
+ -DOPENSSL_NO_MD2 \
+ -DOPENSSL_NO_MDC2 \
+ -DOPENSSL_NO_RC5 \
+ -DOPENSSL_NO_RDRAND \
+ -DOPENSSL_NO_RFC3779 \
+ -DOPENSSL_NO_RSAX \
+ -DOPENSSL_NO_SCTP \
+ -DOPENSSL_NO_SEED \
+ -DOPENSSL_NO_SHA0 \
+ -DOPENSSL_NO_STATIC_ENGINE \
+ -DOPENSSL_NO_STORE \
+ -DOPENSSL_NO_WHIRLPOOL \
diff --git a/build-config-static-64.mk b/build-config-static-64.mk
new file mode 100644
index 0000000..66943ac
--- /dev/null
+++ b/build-config-static-64.mk
@@ -0,0 +1,33 @@
+# Auto-generated - DO NOT EDIT!
+# To regenerate, edit openssl.config, then run:
+# ./import_openssl.sh import /path/to/openssl-1.0.1f.tar.gz
+#
+openssl_cflags_64 := \
+ -DOPENSSL_THREADS \
+ -D_REENTRANT \
+ -DDSO_DLFCN \
+ -DHAVE_DLFCN_H \
+ -DL_ENDIAN \
+ -DTERMIO \
+ -DOPENSSL_NO_CAMELLIA \
+ -DOPENSSL_NO_CAPIENG \
+ -DOPENSSL_NO_CAST \
+ -DOPENSSL_NO_DTLS1 \
+ -DOPENSSL_NO_EC_NISTP_64_GCC_128 \
+ -DOPENSSL_NO_GMP \
+ -DOPENSSL_NO_GOST \
+ -DOPENSSL_NO_HEARTBEATS \
+ -DOPENSSL_NO_IDEA \
+ -DOPENSSL_NO_JPAKE \
+ -DOPENSSL_NO_MD2 \
+ -DOPENSSL_NO_MDC2 \
+ -DOPENSSL_NO_RC5 \
+ -DOPENSSL_NO_RDRAND \
+ -DOPENSSL_NO_RFC3779 \
+ -DOPENSSL_NO_RSAX \
+ -DOPENSSL_NO_SCTP \
+ -DOPENSSL_NO_SEED \
+ -DOPENSSL_NO_SHA0 \
+ -DOPENSSL_NO_STATIC_ENGINE \
+ -DOPENSSL_NO_STORE \
+ -DOPENSSL_NO_WHIRLPOOL \
diff --git a/build-config-trusty.mk b/build-config-trusty.mk
new file mode 100644
index 0000000..4d6fb58
--- /dev/null
+++ b/build-config-trusty.mk
@@ -0,0 +1,100 @@
+# Auto-generated - DO NOT EDIT!
+# To regenerate, edit openssl.config, then run:
+# ./import_openssl.sh import /path/to/openssl-1.0.1h.tar.gz
+#
+openssl_cflags_trusty := \
+ -DL_ENDIAN \
+ -DOPENSSL_NO_ERR \
+ -DOPENSSL_NO_CAMELLIA \
+ -DOPENSSL_NO_CAPIENG \
+ -DOPENSSL_NO_CAST \
+ -DOPENSSL_NO_CMS \
+ -DOPENSSL_NO_COMP \
+ -DOPENSSL_NO_CONF \
+ -DOPENSSL_NO_DES \
+ -DOPENSSL_NO_DTLS1 \
+ -DOPENSSL_NO_EC_NISTP_64_GCC_128 \
+ -DOPENSSL_NO_GMP \
+ -DOPENSSL_NO_GOST \
+ -DOPENSSL_NO_HEARTBEATS \
+ -DOPENSSL_NO_IDEA \
+ -DOPENSSL_NO_JPAKE \
+ -DOPENSSL_NO_LOCKING \
+ -DOPENSSL_NO_MD2 \
+ -DOPENSSL_NO_MD4 \
+ -DOPENSSL_NO_MD5 \
+ -DOPENSSL_NO_MDC2 \
+ -DOPENSSL_NO_OCSP \
+ -DOPENSSL_NO_PEM \
+ -DOPENSSL_NO_PKCS12 \
+ -DOPENSSL_NO_PQUEUE \
+ -DOPENSSL_NO_RC2 \
+ -DOPENSSL_NO_RC4 \
+ -DOPENSSL_NO_RC5 \
+ -DOPENSSL_NO_RDRAND \
+ -DOPENSSL_NO_RFC3779 \
+ -DOPENSSL_NO_RIPEMD \
+ -DOPENSSL_NO_RSAX \
+ -DOPENSSL_NO_SCTP \
+ -DOPENSSL_NO_SEED \
+ -DOPENSSL_NO_SHA0 \
+ -DOPENSSL_NO_SRP \
+ -DOPENSSL_NO_SSL2 \
+ -DOPENSSL_NO_SSL3 \
+ -DOPENSSL_NO_STATIC_ENGINE \
+ -DOPENSSL_NO_STORE \
+ -DOPENSSL_NO_TLS1 \
+ -DOPENSSL_NO_TLSEXT \
+ -DOPENSSL_NO_TS \
+ -DOPENSSL_NO_TXT_DB \
+ -DOPENSSL_NO_UI \
+ -DOPENSSL_NO_WHIRLPOOL \
+
+openssl_cflags_static_trusty := \
+ -DL_ENDIAN \
+ -DOPENSSL_NO_ERR \
+ -DOPENSSL_NO_CAMELLIA \
+ -DOPENSSL_NO_CAPIENG \
+ -DOPENSSL_NO_CAST \
+ -DOPENSSL_NO_CMS \
+ -DOPENSSL_NO_COMP \
+ -DOPENSSL_NO_CONF \
+ -DOPENSSL_NO_DES \
+ -DOPENSSL_NO_DTLS1 \
+ -DOPENSSL_NO_EC_NISTP_64_GCC_128 \
+ -DOPENSSL_NO_GMP \
+ -DOPENSSL_NO_GOST \
+ -DOPENSSL_NO_HEARTBEATS \
+ -DOPENSSL_NO_IDEA \
+ -DOPENSSL_NO_JPAKE \
+ -DOPENSSL_NO_LOCKING \
+ -DOPENSSL_NO_MD2 \
+ -DOPENSSL_NO_MD4 \
+ -DOPENSSL_NO_MD5 \
+ -DOPENSSL_NO_MDC2 \
+ -DOPENSSL_NO_OCSP \
+ -DOPENSSL_NO_PEM \
+ -DOPENSSL_NO_PKCS12 \
+ -DOPENSSL_NO_PQUEUE \
+ -DOPENSSL_NO_RC2 \
+ -DOPENSSL_NO_RC4 \
+ -DOPENSSL_NO_RC5 \
+ -DOPENSSL_NO_RDRAND \
+ -DOPENSSL_NO_RFC3779 \
+ -DOPENSSL_NO_RIPEMD \
+ -DOPENSSL_NO_RSAX \
+ -DOPENSSL_NO_SCTP \
+ -DOPENSSL_NO_SEED \
+ -DOPENSSL_NO_SHA0 \
+ -DOPENSSL_NO_SRP \
+ -DOPENSSL_NO_SSL2 \
+ -DOPENSSL_NO_SSL3 \
+ -DOPENSSL_NO_STATIC_ENGINE \
+ -DOPENSSL_NO_STORE \
+ -DOPENSSL_NO_TLS1 \
+ -DOPENSSL_NO_TLSEXT \
+ -DOPENSSL_NO_TS \
+ -DOPENSSL_NO_TXT_DB \
+ -DOPENSSL_NO_UI \
+ -DOPENSSL_NO_WHIRLPOOL \
+
diff --git a/check-all-builds.sh b/check-all-builds.sh
index 98dc391..9743872 100755
--- a/check-all-builds.sh
+++ b/check-all-builds.sh
@@ -139,7 +139,11 @@
esac
# The list of supported Android target architectures.
-ANDROID_ARCHS="arm x86 mips"
+
+# NOTE: x86_64 is not ready yet, while the toolchain is in
+# prebuilts/ it doesn't have a sysroot which means it requires
+# a platform build to get Bionic and stuff.
+ANDROID_ARCHS="arm arm64 x86 x86_64 mips"
BUILD_TYPES=
for ARCH in $ANDROID_ARCHS; do
@@ -147,9 +151,8 @@
done
ANDROID_BUILD_TYPES=$BUILD_TYPES
-# NOTE: The $HOST_OS-x86_64 is currently broken because the single
-# <openssl/opensslconf.h> header is tailored for 32-bits.
HOST_BUILD_TYPES="$HOST_OS-x86 $HOST_OS-generic32 $HOST_OS-generic64"
+HOST_BUILD_TYPES="$HOST_BUILD_TYPES $HOST_OS-x86_64"
BUILD_TYPES="$ANDROID_BUILD_TYPES $HOST_BUILD_TYPES"
@@ -308,11 +311,17 @@
# Out: GNU configuration target (e.g. arm-linux-androideabi)
get_build_arch_target () {
case $1 in
+ arm64)
+ echo "aarch64-linux-android"
+ ;;
arm)
echo "arm-linux-androideabi"
;;
x86)
- echo "i686-linux-android"
+ echo "x86_64-linux-android"
+ ;;
+ x86_64)
+ echo "x86_64-linux-android"
;;
mips)
echo "mipsel-linux-android"
@@ -323,12 +332,22 @@
esac
}
-GCC_VERSION=4.7
-CLANG_VERSION=3.1
+GCC_VERSION=4.8
+CLANG_VERSION=3.2
get_prebuilt_gcc_dir_for_arch () {
local arch=$1
local target=$(get_build_arch_target $arch)
+ # Adjust $arch for x86_64 because the prebuilts are actually
+ # under prebuilts/gcc/<host>/x86/
+ case $arch in
+ x86_64)
+ arch=x86
+ ;;
+ arm64)
+ arch=aarch64
+ ;;
+ esac
echo "$ANDROID_BUILD_TOP/prebuilts/gcc/$ANDROID_HOST_TAG/$arch/$target-$GCC_VERSION"
}
@@ -384,7 +403,7 @@
# Force -m32 flag when needed for 32-bit builds.
case $1 in
- *-linux-x86|*-darwin-x86|*-generic32)
+ *-x86|*-generic32)
result="$result -m32"
;;
esac
@@ -623,6 +642,6 @@
dump "Error, try doing the following to inspect the issues:"
dump " $PROGNAME --build-dir=/tmp/mybuild"
dump " make -C /tmp/mybuild V=1"
- dump ""
+ dump " "
;;
esac
diff --git a/crypto/aes/asm/aes-armv4.S b/crypto/aes/asm/aes-armv4.S
index 2697d4c..333a522 100644
--- a/crypto/aes/asm/aes-armv4.S
+++ b/crypto/aes/asm/aes-armv4.S
@@ -1,6 +1,53 @@
-#include "arm_arch.h"
+
+@ ====================================================================
+@ Written by Andy Polyakov <[email protected]> for the OpenSSL
+@ project. The module is, however, dual licensed under OpenSSL and
+@ CRYPTOGAMS licenses depending on where you obtain it. For further
+@ details see http://www.openssl.org/~appro/cryptogams/.
+@ ====================================================================
+
+@ AES for ARMv4
+
+@ January 2007.
+@
+@ Code uses single 1K S-box and is >2 times faster than code generated
+@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
+@ allows to merge logical or arithmetic operation with shift or rotate
+@ in one instruction and emit combined result every cycle. The module
+@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit
+@ key [on single-issue Xscale PXA250 core].
+
+@ May 2007.
+@
+@ AES_set_[en|de]crypt_key is added.
+
+@ July 2010.
+@
+@ Rescheduling for dual-issue pipeline resulted in 12% improvement on
+@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
+
+@ February 2011.
+@
+@ Profiler-assisted and platform-specific optimization resulted in 16%
+@ improvement on Cortex A8 core and ~21.5 cycles per byte.
+
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+#endif
+
.text
+#if __ARM_ARCH__<7
.code 32
+#else
+.syntax unified
+# ifdef __thumb2__
+.thumb
+# else
+.code 32
+# endif
+#endif
.type AES_Te,%object
.align 5
@@ -114,7 +161,11 @@
.type AES_encrypt,%function
.align 5
AES_encrypt:
+#if __ARM_ARCH__<7
sub r3,pc,#8 @ AES_encrypt
+#else
+ adr r3,AES_encrypt
+#endif
stmdb sp!,{r1,r4-r12,lr}
mov r12,r0 @ inp
mov r11,r2
@@ -356,11 +407,21 @@
.align 5
private_AES_set_encrypt_key:
_armv4_AES_set_encrypt_key:
+#if __ARM_ARCH__<7
sub r3,pc,#8 @ AES_set_encrypt_key
+#else
+ adr r3,private_AES_set_encrypt_key
+#endif
teq r0,#0
+#if __ARM_ARCH__>=7
+ itt eq @ Thumb2 thing, sanity check in ARM
+#endif
moveq r0,#-1
beq .Labrt
teq r2,#0
+#if __ARM_ARCH__>=7
+ itt eq @ Thumb2 thing, sanity check in ARM
+#endif
moveq r0,#-1
beq .Labrt
@@ -369,6 +430,9 @@
teq r1,#192
beq .Lok
teq r1,#256
+#if __ARM_ARCH__>=7
+ itt ne @ Thumb2 thing, sanity check in ARM
+#endif
movne r0,#-1
bne .Labrt
@@ -523,6 +587,9 @@
str r2,[r11,#-16]
subs r12,r12,#1
str r3,[r11,#-12]
+#if __ARM_ARCH__>=7
+ itt eq @ Thumb2 thing, sanity check in ARM
+#endif
subeq r2,r11,#216
beq .Ldone
@@ -592,6 +659,9 @@
str r2,[r11,#-24]
subs r12,r12,#1
str r3,[r11,#-20]
+#if __ARM_ARCH__>=7
+ itt eq @ Thumb2 thing, sanity check in ARM
+#endif
subeq r2,r11,#256
beq .Ldone
@@ -621,11 +691,17 @@
str r9,[r11,#-4]
b .L256_loop
+.align 2
.Ldone: mov r0,#0
ldmia sp!,{r4-r12,lr}
-.Labrt: tst lr,#1
+.Labrt:
+#if __ARM_ARCH__>=5
+ bx lr @ .word 0xe12fff1e
+#else
+ tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
.global private_AES_set_decrypt_key
@@ -635,34 +711,57 @@
str lr,[sp,#-4]! @ push lr
bl _armv4_AES_set_encrypt_key
teq r0,#0
- ldrne lr,[sp],#4 @ pop lr
+ ldr lr,[sp],#4 @ pop lr
bne .Labrt
- stmdb sp!,{r4-r12}
+ mov r0,r2 @ AES_set_encrypt_key preserves r2,
+ mov r1,r2 @ which is AES_KEY *key
+ b _armv4_AES_set_enc2dec_key
+.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
- ldr r12,[r2,#240] @ AES_set_encrypt_key preserves r2,
- mov r11,r2 @ which is AES_KEY *key
- mov r7,r2
- add r8,r2,r12,lsl#4
+@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
+.global AES_set_enc2dec_key
+.type AES_set_enc2dec_key,%function
+.align 5
+AES_set_enc2dec_key:
+_armv4_AES_set_enc2dec_key:
+ stmdb sp!,{r4-r12,lr}
-.Linv: ldr r0,[r7]
+ ldr r12,[r0,#240]
+ mov r7,r0 @ input
+ add r8,r0,r12,lsl#4
+ mov r11,r1 @ ouput
+ add r10,r1,r12,lsl#4
+ str r12,[r1,#240]
+
+.Linv: ldr r0,[r7],#16
+ ldr r1,[r7,#-12]
+ ldr r2,[r7,#-8]
+ ldr r3,[r7,#-4]
+ ldr r4,[r8],#-16
+ ldr r5,[r8,#16+4]
+ ldr r6,[r8,#16+8]
+ ldr r9,[r8,#16+12]
+ str r0,[r10],#-16
+ str r1,[r10,#16+4]
+ str r2,[r10,#16+8]
+ str r3,[r10,#16+12]
+ str r4,[r11],#16
+ str r5,[r11,#-12]
+ str r6,[r11,#-8]
+ str r9,[r11,#-4]
+ teq r7,r8
+ bne .Linv
+
+ ldr r0,[r7]
ldr r1,[r7,#4]
ldr r2,[r7,#8]
ldr r3,[r7,#12]
- ldr r4,[r8]
- ldr r5,[r8,#4]
- ldr r6,[r8,#8]
- ldr r9,[r8,#12]
- str r0,[r8],#-16
- str r1,[r8,#16+4]
- str r2,[r8,#16+8]
- str r3,[r8,#16+12]
- str r4,[r7],#16
- str r5,[r7,#-12]
- str r6,[r7,#-8]
- str r9,[r7,#-4]
- teq r7,r8
- bne .Linv
+ str r0,[r11]
+ str r1,[r11,#4]
+ str r2,[r11,#8]
+ str r3,[r11,#12]
+ sub r11,r11,r12,lsl#3
ldr r0,[r11,#16]! @ prefetch tp1
mov r7,#0x80
mov r8,#0x1b
@@ -715,7 +814,7 @@
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
-.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
+.size AES_set_enc2dec_key,.-AES_set_enc2dec_key
.type AES_Td,%object
.align 5
@@ -825,7 +924,11 @@
.type AES_decrypt,%function
.align 5
AES_decrypt:
+#if __ARM_ARCH__<7
sub r3,pc,#8 @ AES_decrypt
+#else
+ adr r3,AES_decrypt
+#endif
stmdb sp!,{r1,r4-r12,lr}
mov r12,r0 @ inp
mov r11,r2
@@ -1022,8 +1125,9 @@
ldrb r6,[r10,r9] @ Td4[s0>>0]
and r9,lr,r1,lsr#8
+ add r1,r10,r1,lsr#24
ldrb r7,[r10,r7] @ Td4[s1>>0]
- ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24]
+ ldrb r1,[r1] @ Td4[s1>>24]
ldrb r8,[r10,r8] @ Td4[s1>>16]
eor r0,r7,r0,lsl#24
ldrb r9,[r10,r9] @ Td4[s1>>8]
@@ -1036,7 +1140,8 @@
ldrb r8,[r10,r8] @ Td4[s2>>0]
and r9,lr,r2,lsr#16
- ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24]
+ add r2,r10,r2,lsr#24
+ ldrb r2,[r2] @ Td4[s2>>24]
eor r0,r0,r7,lsl#8
ldrb r9,[r10,r9] @ Td4[s2>>16]
eor r1,r8,r1,lsl#16
@@ -1048,8 +1153,9 @@
ldrb r8,[r10,r8] @ Td4[s3>>8]
and r9,lr,r3 @ i2
+ add r3,r10,r3,lsr#24
ldrb r9,[r10,r9] @ Td4[s3>>0]
- ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24]
+ ldrb r3,[r3] @ Td4[s3>>24]
eor r0,r0,r7,lsl#16
ldr r7,[r11,#0]
eor r1,r1,r8,lsl#8
diff --git a/crypto/aes/asm/aes-armv4.pl b/crypto/aes/asm/aes-armv4.pl
index 86b86c4..4f89170 100644
--- a/crypto/aes/asm/aes-armv4.pl
+++ b/crypto/aes/asm/aes-armv4.pl
@@ -1,7 +1,7 @@
#!/usr/bin/env perl
# ====================================================================
-# Written by Andy Polyakov <[email protected]> for the OpenSSL
+# Written by Andy Polyakov <[email protected]> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@@ -51,9 +51,23 @@
$rounds="r12";
$code=<<___;
-#include "arm_arch.h"
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+#endif
+
.text
+#if __ARM_ARCH__<7
.code 32
+#else
+.syntax unified
+# ifdef __thumb2__
+.thumb
+# else
+.code 32
+# endif
+#endif
.type AES_Te,%object
.align 5
@@ -167,7 +181,11 @@
.type AES_encrypt,%function
.align 5
AES_encrypt:
+#if __ARM_ARCH__<7
sub r3,pc,#8 @ AES_encrypt
+#else
+ adr r3,AES_encrypt
+#endif
stmdb sp!,{r1,r4-r12,lr}
mov $rounds,r0 @ inp
mov $key,r2
@@ -409,11 +427,21 @@
.align 5
private_AES_set_encrypt_key:
_armv4_AES_set_encrypt_key:
+#if __ARM_ARCH__<7
sub r3,pc,#8 @ AES_set_encrypt_key
+#else
+ adr r3,private_AES_set_encrypt_key
+#endif
teq r0,#0
+#if __ARM_ARCH__>=7
+ itt eq @ Thumb2 thing, sanity check in ARM
+#endif
moveq r0,#-1
beq .Labrt
teq r2,#0
+#if __ARM_ARCH__>=7
+ itt eq @ Thumb2 thing, sanity check in ARM
+#endif
moveq r0,#-1
beq .Labrt
@@ -422,6 +450,9 @@
teq r1,#192
beq .Lok
teq r1,#256
+#if __ARM_ARCH__>=7
+ itt ne @ Thumb2 thing, sanity check in ARM
+#endif
movne r0,#-1
bne .Labrt
@@ -576,6 +607,9 @@
str $s2,[$key,#-16]
subs $rounds,$rounds,#1
str $s3,[$key,#-12]
+#if __ARM_ARCH__>=7
+ itt eq @ Thumb2 thing, sanity check in ARM
+#endif
subeq r2,$key,#216
beq .Ldone
@@ -645,6 +679,9 @@
str $s2,[$key,#-24]
subs $rounds,$rounds,#1
str $s3,[$key,#-20]
+#if __ARM_ARCH__>=7
+ itt eq @ Thumb2 thing, sanity check in ARM
+#endif
subeq r2,$key,#256
beq .Ldone
@@ -674,11 +711,17 @@
str $i3,[$key,#-4]
b .L256_loop
+.align 2
.Ldone: mov r0,#0
ldmia sp!,{r4-r12,lr}
-.Labrt: tst lr,#1
+.Labrt:
+#if __ARM_ARCH__>=5
+ ret @ bx lr
+#else
+ tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-)
+#endif
.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
.global private_AES_set_decrypt_key
@@ -688,34 +731,57 @@
str lr,[sp,#-4]! @ push lr
bl _armv4_AES_set_encrypt_key
teq r0,#0
- ldrne lr,[sp],#4 @ pop lr
+ ldr lr,[sp],#4 @ pop lr
bne .Labrt
- stmdb sp!,{r4-r12}
+ mov r0,r2 @ AES_set_encrypt_key preserves r2,
+ mov r1,r2 @ which is AES_KEY *key
+ b _armv4_AES_set_enc2dec_key
+.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
- ldr $rounds,[r2,#240] @ AES_set_encrypt_key preserves r2,
- mov $key,r2 @ which is AES_KEY *key
- mov $i1,r2
- add $i2,r2,$rounds,lsl#4
+@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
+.global AES_set_enc2dec_key
+.type AES_set_enc2dec_key,%function
+.align 5
+AES_set_enc2dec_key:
+_armv4_AES_set_enc2dec_key:
+ stmdb sp!,{r4-r12,lr}
-.Linv: ldr $s0,[$i1]
+ ldr $rounds,[r0,#240]
+ mov $i1,r0 @ input
+ add $i2,r0,$rounds,lsl#4
+ mov $key,r1 @ ouput
+ add $tbl,r1,$rounds,lsl#4
+ str $rounds,[r1,#240]
+
+.Linv: ldr $s0,[$i1],#16
+ ldr $s1,[$i1,#-12]
+ ldr $s2,[$i1,#-8]
+ ldr $s3,[$i1,#-4]
+ ldr $t1,[$i2],#-16
+ ldr $t2,[$i2,#16+4]
+ ldr $t3,[$i2,#16+8]
+ ldr $i3,[$i2,#16+12]
+ str $s0,[$tbl],#-16
+ str $s1,[$tbl,#16+4]
+ str $s2,[$tbl,#16+8]
+ str $s3,[$tbl,#16+12]
+ str $t1,[$key],#16
+ str $t2,[$key,#-12]
+ str $t3,[$key,#-8]
+ str $i3,[$key,#-4]
+ teq $i1,$i2
+ bne .Linv
+
+ ldr $s0,[$i1]
ldr $s1,[$i1,#4]
ldr $s2,[$i1,#8]
ldr $s3,[$i1,#12]
- ldr $t1,[$i2]
- ldr $t2,[$i2,#4]
- ldr $t3,[$i2,#8]
- ldr $i3,[$i2,#12]
- str $s0,[$i2],#-16
- str $s1,[$i2,#16+4]
- str $s2,[$i2,#16+8]
- str $s3,[$i2,#16+12]
- str $t1,[$i1],#16
- str $t2,[$i1,#-12]
- str $t3,[$i1,#-8]
- str $i3,[$i1,#-4]
- teq $i1,$i2
- bne .Linv
+ str $s0,[$key]
+ str $s1,[$key,#4]
+ str $s2,[$key,#8]
+ str $s3,[$key,#12]
+ sub $key,$key,$rounds,lsl#3
___
$mask80=$i1;
$mask1b=$i2;
@@ -773,7 +839,7 @@
moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-)
#endif
-.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
+.size AES_set_enc2dec_key,.-AES_set_enc2dec_key
.type AES_Td,%object
.align 5
@@ -883,7 +949,11 @@
.type AES_decrypt,%function
.align 5
AES_decrypt:
+#if __ARM_ARCH__<7
sub r3,pc,#8 @ AES_decrypt
+#else
+ adr r3,AES_decrypt
+#endif
stmdb sp!,{r1,r4-r12,lr}
mov $rounds,r0 @ inp
mov $key,r2
@@ -1080,8 +1150,9 @@
ldrb $t3,[$tbl,$i3] @ Td4[s0>>0]
and $i3,lr,$s1,lsr#8
+ add $s1,$tbl,$s1,lsr#24
ldrb $i1,[$tbl,$i1] @ Td4[s1>>0]
- ldrb $s1,[$tbl,$s1,lsr#24] @ Td4[s1>>24]
+ ldrb $s1,[$s1] @ Td4[s1>>24]
ldrb $i2,[$tbl,$i2] @ Td4[s1>>16]
eor $s0,$i1,$s0,lsl#24
ldrb $i3,[$tbl,$i3] @ Td4[s1>>8]
@@ -1094,7 +1165,8 @@
ldrb $i2,[$tbl,$i2] @ Td4[s2>>0]
and $i3,lr,$s2,lsr#16
- ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24]
+ add $s2,$tbl,$s2,lsr#24
+ ldrb $s2,[$s2] @ Td4[s2>>24]
eor $s0,$s0,$i1,lsl#8
ldrb $i3,[$tbl,$i3] @ Td4[s2>>16]
eor $s1,$i2,$s1,lsl#16
@@ -1106,8 +1178,9 @@
ldrb $i2,[$tbl,$i2] @ Td4[s3>>8]
and $i3,lr,$s3 @ i2
+ add $s3,$tbl,$s3,lsr#24
ldrb $i3,[$tbl,$i3] @ Td4[s3>>0]
- ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24]
+ ldrb $s3,[$s3] @ Td4[s3>>24]
eor $s0,$s0,$i1,lsl#16
ldr $i1,[$key,#0]
eor $s1,$s1,$i2,lsl#8
@@ -1130,5 +1203,15 @@
___
$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
+$code =~ s/\bret\b/bx\tlr/gm;
+
+open SELF,$0;
+while(<SELF>) {
+ next if (/^#!/);
+ last if (!s/^#/@/ and !/^$/);
+ print;
+}
+close SELF;
+
print $code;
close STDOUT; # enforce flush
diff --git a/crypto/aes/asm/aesv8-armx-64.S b/crypto/aes/asm/aesv8-armx-64.S
new file mode 100644
index 0000000..be0a13d
--- /dev/null
+++ b/crypto/aes/asm/aesv8-armx-64.S
@@ -0,0 +1,761 @@
+#include "arm_arch.h"
+
+#if __ARM_ARCH__>=7
+.text
+.arch armv8-a+crypto
+.align 5
+rcon:
+.long 0x01,0x01,0x01,0x01
+.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
+.long 0x1b,0x1b,0x1b,0x1b
+
+.globl aes_v8_set_encrypt_key
+.type aes_v8_set_encrypt_key,%function
+.align 5
+aes_v8_set_encrypt_key:
+.Lenc_key:
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+ adr x3,rcon
+ cmp w1,#192
+
+ eor v0.16b,v0.16b,v0.16b
+ ld1 {v3.16b},[x0],#16
+ mov w1,#8 // reuse w1
+ ld1 {v1.4s,v2.4s},[x3],#32
+
+ b.lt .Loop128
+ b.eq .L192
+ b .L256
+
+.align 4
+.Loop128:
+ tbl v6.16b,{v3.16b},v2.16b
+ ext v5.16b,v0.16b,v3.16b,#12
+ st1 {v3.4s},[x2],#16
+ aese v6.16b,v0.16b
+ subs w1,w1,#1
+
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v6.16b,v6.16b,v1.16b
+ eor v3.16b,v3.16b,v5.16b
+ shl v1.16b,v1.16b,#1
+ eor v3.16b,v3.16b,v6.16b
+ b.ne .Loop128
+
+ ld1 {v1.4s},[x3]
+
+ tbl v6.16b,{v3.16b},v2.16b
+ ext v5.16b,v0.16b,v3.16b,#12
+ st1 {v3.4s},[x2],#16
+ aese v6.16b,v0.16b
+
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v6.16b,v6.16b,v1.16b
+ eor v3.16b,v3.16b,v5.16b
+ shl v1.16b,v1.16b,#1
+ eor v3.16b,v3.16b,v6.16b
+
+ tbl v6.16b,{v3.16b},v2.16b
+ ext v5.16b,v0.16b,v3.16b,#12
+ st1 {v3.4s},[x2],#16
+ aese v6.16b,v0.16b
+
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v6.16b,v6.16b,v1.16b
+ eor v3.16b,v3.16b,v5.16b
+ eor v3.16b,v3.16b,v6.16b
+ st1 {v3.4s},[x2]
+ add x2,x2,#0x50
+
+ mov w12,#10
+ b .Ldone
+
+.align 4
+.L192:
+ ld1 {v4.8b},[x0],#8
+ movi v6.16b,#8 // borrow v6.16b
+ st1 {v3.4s},[x2],#16
+ sub v2.16b,v2.16b,v6.16b // adjust the mask
+
+.Loop192:
+ tbl v6.16b,{v4.16b},v2.16b
+ ext v5.16b,v0.16b,v3.16b,#12
+ st1 {v4.8b},[x2],#8
+ aese v6.16b,v0.16b
+ subs w1,w1,#1
+
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v3.16b,v3.16b,v5.16b
+
+ dup v5.4s,v3.s[3]
+ eor v5.16b,v5.16b,v4.16b
+ eor v6.16b,v6.16b,v1.16b
+ ext v4.16b,v0.16b,v4.16b,#12
+ shl v1.16b,v1.16b,#1
+ eor v4.16b,v4.16b,v5.16b
+ eor v3.16b,v3.16b,v6.16b
+ eor v4.16b,v4.16b,v6.16b
+ st1 {v3.4s},[x2],#16
+ b.ne .Loop192
+
+ mov w12,#12
+ add x2,x2,#0x20
+ b .Ldone
+
+.align 4
+.L256:
+ ld1 {v4.16b},[x0]
+ mov w1,#7
+ mov w12,#14
+ st1 {v3.4s},[x2],#16
+
+.Loop256:
+ tbl v6.16b,{v4.16b},v2.16b
+ ext v5.16b,v0.16b,v3.16b,#12
+ st1 {v4.4s},[x2],#16
+ aese v6.16b,v0.16b
+ subs w1,w1,#1
+
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v6.16b,v6.16b,v1.16b
+ eor v3.16b,v3.16b,v5.16b
+ shl v1.16b,v1.16b,#1
+ eor v3.16b,v3.16b,v6.16b
+ st1 {v3.4s},[x2],#16
+ b.eq .Ldone
+
+ dup v6.4s,v3.s[3] // just splat
+ ext v5.16b,v0.16b,v4.16b,#12
+ aese v6.16b,v0.16b
+
+ eor v4.16b,v4.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v4.16b,v4.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v4.16b,v4.16b,v5.16b
+
+ eor v4.16b,v4.16b,v6.16b
+ b .Loop256
+
+.Ldone:
+ str w12,[x2]
+
+ eor x0,x0,x0 // return value
+ ldr x29,[sp],#16
+ ret
+.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
+
+.globl aes_v8_set_decrypt_key
+.type aes_v8_set_decrypt_key,%function
+.align 5
+aes_v8_set_decrypt_key:
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+ bl .Lenc_key
+
+ sub x2,x2,#240 // restore original x2
+ mov x4,#-16
+ add x0,x2,x12,lsl#4 // end of key schedule
+
+ ld1 {v0.4s},[x2]
+ ld1 {v1.4s},[x0]
+ st1 {v0.4s},[x0],x4
+ st1 {v1.4s},[x2],#16
+
+.Loop_imc:
+ ld1 {v0.4s},[x2]
+ ld1 {v1.4s},[x0]
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ st1 {v0.4s},[x0],x4
+ st1 {v1.4s},[x2],#16
+ cmp x0,x2
+ b.hi .Loop_imc
+
+ ld1 {v0.4s},[x2]
+ aesimc v0.16b,v0.16b
+ st1 {v0.4s},[x0]
+
+ eor x0,x0,x0 // return value
+ ldp x29,x30,[sp],#16
+ ret
+.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
+.globl aes_v8_encrypt
+.type aes_v8_encrypt,%function
+.align 5
+aes_v8_encrypt:
+ ldr w3,[x2,#240]
+ ld1 {v0.4s},[x2],#16
+ ld1 {v2.16b},[x0]
+ sub w3,w3,#2
+ ld1 {v1.4s},[x2],#16
+
+.Loop_enc:
+ aese v2.16b,v0.16b
+ ld1 {v0.4s},[x2],#16
+ aesmc v2.16b,v2.16b
+ subs w3,w3,#2
+ aese v2.16b,v1.16b
+ ld1 {v1.4s},[x2],#16
+ aesmc v2.16b,v2.16b
+ b.gt .Loop_enc
+
+ aese v2.16b,v0.16b
+ ld1 {v0.4s},[x2]
+ aesmc v2.16b,v2.16b
+ aese v2.16b,v1.16b
+ eor v2.16b,v2.16b,v0.16b
+
+ st1 {v2.16b},[x1]
+ ret
+.size aes_v8_encrypt,.-aes_v8_encrypt
+.globl aes_v8_decrypt
+.type aes_v8_decrypt,%function
+.align 5
+aes_v8_decrypt:
+ ldr w3,[x2,#240]
+ ld1 {v0.4s},[x2],#16
+ ld1 {v2.16b},[x0]
+ sub w3,w3,#2
+ ld1 {v1.4s},[x2],#16
+
+.Loop_dec:
+ aesd v2.16b,v0.16b
+ ld1 {v0.4s},[x2],#16
+ aesimc v2.16b,v2.16b
+ subs w3,w3,#2
+ aesd v2.16b,v1.16b
+ ld1 {v1.4s},[x2],#16
+ aesimc v2.16b,v2.16b
+ b.gt .Loop_dec
+
+ aesd v2.16b,v0.16b
+ ld1 {v0.4s},[x2]
+ aesimc v2.16b,v2.16b
+ aesd v2.16b,v1.16b
+ eor v2.16b,v2.16b,v0.16b
+
+ st1 {v2.16b},[x1]
+ ret
+.size aes_v8_decrypt,.-aes_v8_decrypt
+.globl aes_v8_cbc_encrypt
+.type aes_v8_cbc_encrypt,%function
+.align 5
+aes_v8_cbc_encrypt:
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+ subs x2,x2,#16
+ mov x8,#16
+ b.lo .Lcbc_abort
+ csel x8,xzr,x8,eq
+
+ cmp w5,#0 // en- or decrypting?
+ ldr w5,[x3,#240]
+ and x2,x2,#-16
+ ld1 {v6.16b},[x4]
+ ld1 {v0.16b},[x0],x8
+
+ ld1 {v16.4s-v17.4s},[x3] // load key schedule...
+ sub w5,w5,#6
+ add x7,x3,x5,lsl#4 // pointer to last 7 round keys
+ sub w5,w5,#2
+ ld1 {v18.4s-v19.4s},[x7],#32
+ ld1 {v20.4s-v21.4s},[x7],#32
+ ld1 {v22.4s-v23.4s},[x7],#32
+ ld1 {v7.4s},[x7]
+
+ add x7,x3,#32
+ mov w6,w5
+ b.eq .Lcbc_dec
+
+ cmp w5,#2
+ eor v0.16b,v0.16b,v6.16b
+ eor v5.16b,v16.16b,v7.16b
+ b.eq .Lcbc_enc128
+
+.Loop_cbc_enc:
+ aese v0.16b,v16.16b
+ ld1 {v16.4s},[x7],#16
+ aesmc v0.16b,v0.16b
+ subs w6,w6,#2
+ aese v0.16b,v17.16b
+ ld1 {v17.4s},[x7],#16
+ aesmc v0.16b,v0.16b
+ b.gt .Loop_cbc_enc
+
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ subs x2,x2,#16
+ aese v0.16b,v17.16b
+ aesmc v0.16b,v0.16b
+ csel x8,xzr,x8,eq
+ aese v0.16b,v18.16b
+ aesmc v0.16b,v0.16b
+ add x7,x3,#16
+ aese v0.16b,v19.16b
+ aesmc v0.16b,v0.16b
+ ld1 {v16.16b},[x0],x8
+ aese v0.16b,v20.16b
+ aesmc v0.16b,v0.16b
+ eor v16.16b,v16.16b,v5.16b
+ aese v0.16b,v21.16b
+ aesmc v0.16b,v0.16b
+ ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
+ aese v0.16b,v22.16b
+ aesmc v0.16b,v0.16b
+ aese v0.16b,v23.16b
+
+ mov w6,w5
+ eor v6.16b,v0.16b,v7.16b
+ st1 {v6.16b},[x1],#16
+ b.hs .Loop_cbc_enc
+
+ b .Lcbc_done
+
+.align 5
+.Lcbc_enc128:
+ ld1 {v2.4s-v3.4s},[x7]
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ b .Lenter_cbc_enc128
+.Loop_cbc_enc128:
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ st1 {v6.16b},[x1],#16
+.Lenter_cbc_enc128:
+ aese v0.16b,v17.16b
+ aesmc v0.16b,v0.16b
+ subs x2,x2,#16
+ aese v0.16b,v2.16b
+ aesmc v0.16b,v0.16b
+ csel x8,xzr,x8,eq
+ aese v0.16b,v3.16b
+ aesmc v0.16b,v0.16b
+ aese v0.16b,v18.16b
+ aesmc v0.16b,v0.16b
+ aese v0.16b,v19.16b
+ aesmc v0.16b,v0.16b
+ ld1 {v16.16b},[x0],x8
+ aese v0.16b,v20.16b
+ aesmc v0.16b,v0.16b
+ aese v0.16b,v21.16b
+ aesmc v0.16b,v0.16b
+ aese v0.16b,v22.16b
+ aesmc v0.16b,v0.16b
+ eor v16.16b,v16.16b,v5.16b
+ aese v0.16b,v23.16b
+ eor v6.16b,v0.16b,v7.16b
+ b.hs .Loop_cbc_enc128
+
+ st1 {v6.16b},[x1],#16
+ b .Lcbc_done
+
+.align 5
+.Lcbc_dec128:
+ ld1 {v4.4s-v5.4s},[x7]
+ eor v6.16b,v6.16b,v7.16b
+ eor v2.16b,v0.16b,v7.16b
+ mov x12,x8
+
+.Loop2x_cbc_dec128:
+ aesd v0.16b,v16.16b
+ aesd v1.16b,v16.16b
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ subs x2,x2,#32
+ aesd v0.16b,v17.16b
+ aesd v1.16b,v17.16b
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ csel x8,xzr,x8,lo
+ aesd v0.16b,v4.16b
+ aesd v1.16b,v4.16b
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ csel x12,xzr,x12,ls
+ aesd v0.16b,v5.16b
+ aesd v1.16b,v5.16b
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ aesd v0.16b,v18.16b
+ aesd v1.16b,v18.16b
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ aesd v0.16b,v19.16b
+ aesd v1.16b,v19.16b
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ aesd v0.16b,v20.16b
+ aesd v1.16b,v20.16b
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ aesd v0.16b,v21.16b
+ aesd v1.16b,v21.16b
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ aesd v0.16b,v22.16b
+ aesd v1.16b,v22.16b
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ aesd v0.16b,v23.16b
+ aesd v1.16b,v23.16b
+
+ eor v6.16b,v6.16b,v0.16b
+ ld1 {v0.16b},[x0],x8
+ eor v2.16b,v2.16b,v1.16b
+ ld1 {v1.16b},[x0],x12
+ st1 {v6.16b},[x1],#16
+ eor v6.16b,v3.16b,v7.16b
+ st1 {v2.16b},[x1],#16
+ eor v2.16b,v0.16b,v7.16b
+ orr v3.16b,v1.16b,v1.16b
+ b.hs .Loop2x_cbc_dec128
+
+ adds x2,x2,#32
+ eor v6.16b,v6.16b,v7.16b
+ b.eq .Lcbc_done
+ eor v2.16b,v2.16b,v7.16b
+ b .Lcbc_dec_tail
+
+.align 5
+.Lcbc_dec:
+ subs x2,x2,#16
+ orr v2.16b,v0.16b,v0.16b
+ b.lo .Lcbc_dec_tail
+
+ csel x8,xzr,x8,eq
+ cmp w5,#2
+ ld1 {v1.16b},[x0],x8
+ orr v3.16b,v1.16b,v1.16b
+ b.eq .Lcbc_dec128
+
+.Loop2x_cbc_dec:
+ aesd v0.16b,v16.16b
+ aesd v1.16b,v16.16b
+ ld1 {v16.4s},[x7],#16
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ subs w6,w6,#2
+ aesd v0.16b,v17.16b
+ aesd v1.16b,v17.16b
+ ld1 {v17.4s},[x7],#16
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ b.gt .Loop2x_cbc_dec
+
+ aesd v0.16b,v16.16b
+ aesd v1.16b,v16.16b
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ eor v4.16b,v6.16b,v7.16b
+ eor v5.16b,v2.16b,v7.16b
+ aesd v0.16b,v17.16b
+ aesd v1.16b,v17.16b
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ orr v6.16b,v3.16b,v3.16b
+ subs x2,x2,#32
+ aesd v0.16b,v18.16b
+ aesd v1.16b,v18.16b
+ aesimc v0.16b,v0.16b
+ csel x8,xzr,x8,lo
+ aesimc v1.16b,v1.16b
+ mov x7,x3
+ aesd v0.16b,v19.16b
+ aesd v1.16b,v19.16b
+ aesimc v0.16b,v0.16b
+ ld1 {v2.16b},[x0],x8
+ aesimc v1.16b,v1.16b
+ csel x8,xzr,x8,ls
+ aesd v0.16b,v20.16b
+ aesd v1.16b,v20.16b
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ ld1 {v3.16b},[x0],x8
+ aesd v0.16b,v21.16b
+ aesd v1.16b,v21.16b
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
+ aesd v0.16b,v22.16b
+ aesd v1.16b,v22.16b
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
+ aesd v0.16b,v23.16b
+ aesd v1.16b,v23.16b
+
+ mov w6,w5
+ eor v4.16b,v4.16b,v0.16b
+ eor v5.16b,v5.16b,v1.16b
+ orr v0.16b,v2.16b,v2.16b
+ st1 {v4.16b},[x1],#16
+ orr v1.16b,v3.16b,v3.16b
+ st1 {v5.16b},[x1],#16
+ b.hs .Loop2x_cbc_dec
+
+ adds x2,x2,#32
+ b.eq .Lcbc_done
+
+.Lcbc_dec_tail:
+ aesd v0.16b,v16.16b
+ ld1 {v16.4s},[x7],#16
+ aesimc v0.16b,v0.16b
+ subs w6,w6,#2
+ aesd v0.16b,v17.16b
+ ld1 {v17.4s},[x7],#16
+ aesimc v0.16b,v0.16b
+ b.gt .Lcbc_dec_tail
+
+ aesd v0.16b,v16.16b
+ aesimc v0.16b,v0.16b
+ aesd v0.16b,v17.16b
+ aesimc v0.16b,v0.16b
+ eor v4.16b,v6.16b,v7.16b
+ aesd v0.16b,v18.16b
+ aesimc v0.16b,v0.16b
+ orr v6.16b,v2.16b,v2.16b
+ aesd v0.16b,v19.16b
+ aesimc v0.16b,v0.16b
+ aesd v0.16b,v20.16b
+ aesimc v0.16b,v0.16b
+ aesd v0.16b,v21.16b
+ aesimc v0.16b,v0.16b
+ aesd v0.16b,v22.16b
+ aesimc v0.16b,v0.16b
+ aesd v0.16b,v23.16b
+
+ eor v4.16b,v4.16b,v0.16b
+ st1 {v4.16b},[x1],#16
+
+.Lcbc_done:
+ st1 {v6.16b},[x4]
+.Lcbc_abort:
+ ldr x29,[sp],#16
+ ret
+.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
+.globl aes_v8_ctr32_encrypt_blocks
+.type aes_v8_ctr32_encrypt_blocks,%function
+.align 5
+aes_v8_ctr32_encrypt_blocks:
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+ ldr w5,[x3,#240]
+
+ ldr w8, [x4, #12]
+ ld1 {v0.4s},[x4]
+
+ ld1 {v16.4s-v17.4s},[x3] // load key schedule...
+ sub w5,w5,#6
+ add x7,x3,x5,lsl#4 // pointer to last 7 round keys
+ sub w5,w5,#2
+ ld1 {v18.4s-v19.4s},[x7],#32
+ ld1 {v20.4s-v21.4s},[x7],#32
+ ld1 {v22.4s-v23.4s},[x7],#32
+ ld1 {v7.4s},[x7]
+
+ add x7,x3,#32
+ mov w6,w5
+
+ subs x2,x2,#2
+ b.lo .Lctr32_tail
+
+#ifndef __ARMEB__
+ rev w8, w8
+#endif
+ orr v1.16b,v0.16b,v0.16b
+ add w8, w8, #1
+ orr v6.16b,v0.16b,v0.16b
+ rev w10, w8
+ cmp w5,#2
+ mov v1.s[3],w10
+ b.eq .Lctr32_128
+
+.Loop2x_ctr32:
+ aese v0.16b,v16.16b
+ aese v1.16b,v16.16b
+ ld1 {v16.4s},[x7],#16
+ aesmc v0.16b,v0.16b
+ aesmc v1.16b,v1.16b
+ subs w6,w6,#2
+ aese v0.16b,v17.16b
+ aese v1.16b,v17.16b
+ ld1 {v17.4s},[x7],#16
+ aesmc v0.16b,v0.16b
+ aesmc v1.16b,v1.16b
+ b.gt .Loop2x_ctr32
+
+ aese v0.16b,v16.16b
+ aese v1.16b,v16.16b
+ aesmc v4.16b,v0.16b
+ orr v0.16b,v6.16b,v6.16b
+ aesmc v5.16b,v1.16b
+ orr v1.16b,v6.16b,v6.16b
+ aese v4.16b,v17.16b
+ aese v5.16b,v17.16b
+ ld1 {v2.16b},[x0],#16
+ aesmc v4.16b,v4.16b
+ ld1 {v3.16b},[x0],#16
+ aesmc v5.16b,v5.16b
+ add w8,w8,#1
+ aese v4.16b,v18.16b
+ aese v5.16b,v18.16b
+ rev w9,w8
+ aesmc v4.16b,v4.16b
+ aesmc v5.16b,v5.16b
+ add w8,w8,#1
+ aese v4.16b,v19.16b
+ aese v5.16b,v19.16b
+ eor v2.16b,v2.16b,v7.16b
+ rev w10,w8
+ aesmc v4.16b,v4.16b
+ aesmc v5.16b,v5.16b
+ eor v3.16b,v3.16b,v7.16b
+ mov x7,x3
+ aese v4.16b,v20.16b
+ aese v5.16b,v20.16b
+ subs x2,x2,#2
+ aesmc v4.16b,v4.16b
+ aesmc v5.16b,v5.16b
+ ld1 {v16.4s-v17.4s},[x7],#32 // re-pre-load rndkey[0-1]
+ aese v4.16b,v21.16b
+ aese v5.16b,v21.16b
+ aesmc v4.16b,v4.16b
+ aesmc v5.16b,v5.16b
+ aese v4.16b,v22.16b
+ aese v5.16b,v22.16b
+ mov v0.s[3], w9
+ aesmc v4.16b,v4.16b
+ mov v1.s[3], w10
+ aesmc v5.16b,v5.16b
+ aese v4.16b,v23.16b
+ aese v5.16b,v23.16b
+
+ mov w6,w5
+ eor v2.16b,v2.16b,v4.16b
+ eor v3.16b,v3.16b,v5.16b
+ st1 {v2.16b},[x1],#16
+ st1 {v3.16b},[x1],#16
+ b.hs .Loop2x_ctr32
+
+ adds x2,x2,#2
+ b.eq .Lctr32_done
+ b .Lctr32_tail
+
+.Lctr32_128:
+ ld1 {v4.4s-v5.4s},[x7]
+
+.Loop2x_ctr32_128:
+ aese v0.16b,v16.16b
+ aese v1.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ ld1 {v2.16b},[x0],#16
+ aesmc v1.16b,v1.16b
+ ld1 {v3.16b},[x0],#16
+ aese v0.16b,v17.16b
+ aese v1.16b,v17.16b
+ add w8,w8,#1
+ aesmc v0.16b,v0.16b
+ aesmc v1.16b,v1.16b
+ rev w9,w8
+ aese v0.16b,v4.16b
+ aese v1.16b,v4.16b
+ add w8,w8,#1
+ aesmc v0.16b,v0.16b
+ aesmc v1.16b,v1.16b
+ rev w10,w8
+ aese v0.16b,v5.16b
+ aese v1.16b,v5.16b
+ subs x2,x2,#2
+ aesmc v0.16b,v0.16b
+ aesmc v1.16b,v1.16b
+ aese v0.16b,v18.16b
+ aese v1.16b,v18.16b
+ aesmc v0.16b,v0.16b
+ aesmc v1.16b,v1.16b
+ aese v0.16b,v19.16b
+ aese v1.16b,v19.16b
+ aesmc v0.16b,v0.16b
+ aesmc v1.16b,v1.16b
+ aese v0.16b,v20.16b
+ aese v1.16b,v20.16b
+ aesmc v0.16b,v0.16b
+ aesmc v1.16b,v1.16b
+ aese v0.16b,v21.16b
+ aese v1.16b,v21.16b
+ aesmc v0.16b,v0.16b
+ aesmc v1.16b,v1.16b
+ aese v0.16b,v22.16b
+ aese v1.16b,v22.16b
+ aesmc v0.16b,v0.16b
+ aesmc v1.16b,v1.16b
+ eor v2.16b,v2.16b,v7.16b
+ aese v0.16b,v23.16b
+ eor v3.16b,v3.16b,v7.16b
+ aese v1.16b,v23.16b
+
+ eor v2.16b,v2.16b,v0.16b
+ orr v0.16b,v6.16b,v6.16b
+ eor v3.16b,v3.16b,v1.16b
+ orr v1.16b,v6.16b,v6.16b
+ st1 {v2.16b},[x1],#16
+ mov v0.s[3], w9
+ st1 {v3.16b},[x1],#16
+ mov v1.s[3], w10
+ b.hs .Loop2x_ctr32_128
+
+ adds x2,x2,#2
+ b.eq .Lctr32_done
+
+.Lctr32_tail:
+ aese v0.16b,v16.16b
+ ld1 {v16.4s},[x7],#16
+ aesmc v0.16b,v0.16b
+ subs w6,w6,#2
+ aese v0.16b,v17.16b
+ ld1 {v17.4s},[x7],#16
+ aesmc v0.16b,v0.16b
+ b.gt .Lctr32_tail
+
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ aese v0.16b,v17.16b
+ aesmc v0.16b,v0.16b
+ ld1 {v2.16b},[x0]
+ aese v0.16b,v18.16b
+ aesmc v0.16b,v0.16b
+ aese v0.16b,v19.16b
+ aesmc v0.16b,v0.16b
+ aese v0.16b,v20.16b
+ aesmc v0.16b,v0.16b
+ aese v0.16b,v21.16b
+ aesmc v0.16b,v0.16b
+ aese v0.16b,v22.16b
+ aesmc v0.16b,v0.16b
+ eor v2.16b,v2.16b,v7.16b
+ aese v0.16b,v23.16b
+
+ eor v2.16b,v2.16b,v0.16b
+ st1 {v2.16b},[x1]
+
+.Lctr32_done:
+ ldr x29,[sp],#16
+ ret
+.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
+#endif
diff --git a/crypto/aes/asm/aesv8-armx.S b/crypto/aes/asm/aesv8-armx.S
new file mode 100644
index 0000000..1637e4d
--- /dev/null
+++ b/crypto/aes/asm/aesv8-armx.S
@@ -0,0 +1,767 @@
+#include "arm_arch.h"
+
+#if __ARM_ARCH__>=7
+.text
+.fpu neon
+.code 32
+.align 5
+rcon:
+.long 0x01,0x01,0x01,0x01
+.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
+.long 0x1b,0x1b,0x1b,0x1b
+
+.globl aes_v8_set_encrypt_key
+.type aes_v8_set_encrypt_key,%function
+.align 5
+aes_v8_set_encrypt_key:
+.Lenc_key:
+ adr r3,rcon
+ cmp r1,#192
+
+ veor q0,q0,q0
+ vld1.8 {q3},[r0]!
+ mov r1,#8 @ reuse r1
+ vld1.32 {q1,q2},[r3]!
+
+ blt .Loop128
+ beq .L192
+ b .L256
+
+.align 4
+.Loop128:
+ vtbl.8 d20,{q3},d4
+ vtbl.8 d21,{q3},d5
+ vext.8 q9,q0,q3,#12
+ vst1.32 {q3},[r2]!
+ .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
+ subs r1,r1,#1
+
+ veor q3,q3,q9
+ vext.8 q9,q0,q9,#12
+ veor q3,q3,q9
+ vext.8 q9,q0,q9,#12
+ veor q10,q10,q1
+ veor q3,q3,q9
+ vshl.u8 q1,q1,#1
+ veor q3,q3,q10
+ bne .Loop128
+
+ vld1.32 {q1},[r3]
+
+ vtbl.8 d20,{q3},d4
+ vtbl.8 d21,{q3},d5
+ vext.8 q9,q0,q3,#12
+ vst1.32 {q3},[r2]!
+ .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
+
+ veor q3,q3,q9
+ vext.8 q9,q0,q9,#12
+ veor q3,q3,q9
+ vext.8 q9,q0,q9,#12
+ veor q10,q10,q1
+ veor q3,q3,q9
+ vshl.u8 q1,q1,#1
+ veor q3,q3,q10
+
+ vtbl.8 d20,{q3},d4
+ vtbl.8 d21,{q3},d5
+ vext.8 q9,q0,q3,#12
+ vst1.32 {q3},[r2]!
+ .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
+
+ veor q3,q3,q9
+ vext.8 q9,q0,q9,#12
+ veor q3,q3,q9
+ vext.8 q9,q0,q9,#12
+ veor q10,q10,q1
+ veor q3,q3,q9
+ veor q3,q3,q10
+ vst1.32 {q3},[r2]
+ add r2,r2,#0x50
+
+ mov r12,#10
+ b .Ldone
+
+.align 4
+.L192:
+ vld1.8 {d16},[r0]!
+ vmov.i8 q10,#8 @ borrow q10
+ vst1.32 {q3},[r2]!
+ vsub.i8 q2,q2,q10 @ adjust the mask
+
+.Loop192:
+ vtbl.8 d20,{q8},d4
+ vtbl.8 d21,{q8},d5
+ vext.8 q9,q0,q3,#12
+ vst1.32 {d16},[r2]!
+ .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
+ subs r1,r1,#1
+
+ veor q3,q3,q9
+ vext.8 q9,q0,q9,#12
+ veor q3,q3,q9
+ vext.8 q9,q0,q9,#12
+ veor q3,q3,q9
+
+ vdup.32 q9,d7[1]
+ veor q9,q9,q8
+ veor q10,q10,q1
+ vext.8 q8,q0,q8,#12
+ vshl.u8 q1,q1,#1
+ veor q8,q8,q9
+ veor q3,q3,q10
+ veor q8,q8,q10
+ vst1.32 {q3},[r2]!
+ bne .Loop192
+
+ mov r12,#12
+ add r2,r2,#0x20
+ b .Ldone
+
+.align 4
+.L256:
+ vld1.8 {q8},[r0]
+ mov r1,#7
+ mov r12,#14
+ vst1.32 {q3},[r2]!
+
+.Loop256:
+ vtbl.8 d20,{q8},d4
+ vtbl.8 d21,{q8},d5
+ vext.8 q9,q0,q3,#12
+ vst1.32 {q8},[r2]!
+ .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
+ subs r1,r1,#1
+
+ veor q3,q3,q9
+ vext.8 q9,q0,q9,#12
+ veor q3,q3,q9
+ vext.8 q9,q0,q9,#12
+ veor q10,q10,q1
+ veor q3,q3,q9
+ vshl.u8 q1,q1,#1
+ veor q3,q3,q10
+ vst1.32 {q3},[r2]!
+ beq .Ldone
+
+ vdup.32 q10,d7[1]
+ vext.8 q9,q0,q8,#12
+ .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
+
+ veor q8,q8,q9
+ vext.8 q9,q0,q9,#12
+ veor q8,q8,q9
+ vext.8 q9,q0,q9,#12
+ veor q8,q8,q9
+
+ veor q8,q8,q10
+ b .Loop256
+
+.Ldone:
+ str r12,[r2]
+
+ eor r0,r0,r0 @ return value
+
+ bx lr
+.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
+
+.globl aes_v8_set_decrypt_key
+.type aes_v8_set_decrypt_key,%function
+.align 5
+aes_v8_set_decrypt_key:
+ stmdb sp!,{r4,lr}
+ bl .Lenc_key
+
+ sub r2,r2,#240 @ restore original r2
+ mov r4,#-16
+ add r0,r2,r12,lsl#4 @ end of key schedule
+
+ vld1.32 {q0},[r2]
+ vld1.32 {q1},[r0]
+ vst1.32 {q0},[r0],r4
+ vst1.32 {q1},[r2]!
+
+.Loop_imc:
+ vld1.32 {q0},[r2]
+ vld1.32 {q1},[r0]
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ vst1.32 {q0},[r0],r4
+ vst1.32 {q1},[r2]!
+ cmp r0,r2
+ bhi .Loop_imc
+
+ vld1.32 {q0},[r2]
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ vst1.32 {q0},[r0]
+
+ eor r0,r0,r0 @ return value
+ ldmia sp!,{r4,pc}
+.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
+.globl aes_v8_encrypt
+.type aes_v8_encrypt,%function
+.align 5
+aes_v8_encrypt:
+ ldr r3,[r2,#240]
+ vld1.32 {q0},[r2]!
+ vld1.8 {q2},[r0]
+ sub r3,r3,#2
+ vld1.32 {q1},[r2]!
+
+.Loop_enc:
+ .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
+ vld1.32 {q0},[r2]!
+ .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
+ subs r3,r3,#2
+ .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
+ vld1.32 {q1},[r2]!
+ .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
+ bgt .Loop_enc
+
+ .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
+ vld1.32 {q0},[r2]
+ .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
+ .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
+ veor q2,q2,q0
+
+ vst1.8 {q2},[r1]
+ bx lr
+.size aes_v8_encrypt,.-aes_v8_encrypt
+.globl aes_v8_decrypt
+.type aes_v8_decrypt,%function
+.align 5
+aes_v8_decrypt:
+ ldr r3,[r2,#240]
+ vld1.32 {q0},[r2]!
+ vld1.8 {q2},[r0]
+ sub r3,r3,#2
+ vld1.32 {q1},[r2]!
+
+.Loop_dec:
+ .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
+ vld1.32 {q0},[r2]!
+ .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
+ subs r3,r3,#2
+ .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
+ vld1.32 {q1},[r2]!
+ .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
+ bgt .Loop_dec
+
+ .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
+ vld1.32 {q0},[r2]
+ .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
+ .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
+ veor q2,q2,q0
+
+ vst1.8 {q2},[r1]
+ bx lr
+.size aes_v8_decrypt,.-aes_v8_decrypt
+.globl aes_v8_cbc_encrypt
+.type aes_v8_cbc_encrypt,%function
+.align 5
+aes_v8_cbc_encrypt:
+ mov ip,sp
+ stmdb sp!,{r4-r8,lr}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+ ldmia ip,{r4-r5} @ load remaining args
+ subs r2,r2,#16
+ mov r8,#16
+ blo .Lcbc_abort
+ moveq r8,#0
+
+ cmp r5,#0 @ en- or decrypting?
+ ldr r5,[r3,#240]
+ and r2,r2,#-16
+ vld1.8 {q6},[r4]
+ vld1.8 {q0},[r0],r8
+
+ vld1.32 {q8-q9},[r3] @ load key schedule...
+ sub r5,r5,#6
+ add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
+ sub r5,r5,#2
+ vld1.32 {q10-q11},[r7]!
+ vld1.32 {q12-q13},[r7]!
+ vld1.32 {q14-q15},[r7]!
+ vld1.32 {q7},[r7]
+
+ add r7,r3,#32
+ mov r6,r5
+ beq .Lcbc_dec
+
+ cmp r5,#2
+ veor q0,q0,q6
+ veor q5,q8,q7
+ beq .Lcbc_enc128
+
+.Loop_cbc_enc:
+ .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
+ vld1.32 {q8},[r7]!
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ subs r6,r6,#2
+ .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
+ vld1.32 {q9},[r7]!
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ bgt .Loop_cbc_enc
+
+ .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ subs r2,r2,#16
+ .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ moveq r8,#0
+ .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ add r7,r3,#16
+ .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ vld1.8 {q8},[r0],r8
+ .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ veor q8,q8,q5
+ .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
+ .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
+
+ mov r6,r5
+ veor q6,q0,q7
+ vst1.8 {q6},[r1]!
+ bhs .Loop_cbc_enc
+
+ b .Lcbc_done
+
+.align 5
+.Lcbc_enc128:
+ vld1.32 {q2-q3},[r7]
+ .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ b .Lenter_cbc_enc128
+.Loop_cbc_enc128:
+ .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ vst1.8 {q6},[r1]!
+.Lenter_cbc_enc128:
+ .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ subs r2,r2,#16
+ .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ moveq r8,#0
+ .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ vld1.8 {q8},[r0],r8
+ .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ veor q8,q8,q5
+ .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
+ veor q6,q0,q7
+ bhs .Loop_cbc_enc128
+
+ vst1.8 {q6},[r1]!
+ b .Lcbc_done
+
+.align 5
+.Lcbc_dec128:
+ vld1.32 {q4-q5},[r7]
+ veor q6,q6,q7
+ veor q2,q0,q7
+ mov r12,r8
+
+.Loop2x_cbc_dec128:
+ .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
+ .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ subs r2,r2,#32
+ .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
+ .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ movlo r8,#0
+ .byte 0x48,0x03,0xb0,0xf3 @ aesd q0,q4
+ .byte 0x48,0x23,0xb0,0xf3 @ aesd q1,q4
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ movls r12,#0
+ .byte 0x4a,0x03,0xb0,0xf3 @ aesd q0,q5
+ .byte 0x4a,0x23,0xb0,0xf3 @ aesd q1,q5
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ .byte 0x64,0x03,0xb0,0xf3 @ aesd q0,q10
+ .byte 0x64,0x23,0xb0,0xf3 @ aesd q1,q10
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ .byte 0x66,0x03,0xb0,0xf3 @ aesd q0,q11
+ .byte 0x66,0x23,0xb0,0xf3 @ aesd q1,q11
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
+ .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
+ .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
+ .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
+ .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
+
+ veor q6,q6,q0
+ vld1.8 {q0},[r0],r8
+ veor q2,q2,q1
+ vld1.8 {q1},[r0],r12
+ vst1.8 {q6},[r1]!
+ veor q6,q3,q7
+ vst1.8 {q2},[r1]!
+ veor q2,q0,q7
+ vorr q3,q1,q1
+ bhs .Loop2x_cbc_dec128
+
+ adds r2,r2,#32
+ veor q6,q6,q7
+ beq .Lcbc_done
+ veor q2,q2,q7
+ b .Lcbc_dec_tail
+
+.align 5
+.Lcbc_dec:
+ subs r2,r2,#16
+ vorr q2,q0,q0
+ blo .Lcbc_dec_tail
+
+ moveq r8,#0
+ cmp r5,#2
+ vld1.8 {q1},[r0],r8
+ vorr q3,q1,q1
+ beq .Lcbc_dec128
+
+.Loop2x_cbc_dec:
+ .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
+ .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
+ vld1.32 {q8},[r7]!
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ subs r6,r6,#2
+ .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
+ .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
+ vld1.32 {q9},[r7]!
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ bgt .Loop2x_cbc_dec
+
+ .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
+ .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ veor q4,q6,q7
+ veor q5,q2,q7
+ .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
+ .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ vorr q6,q3,q3
+ subs r2,r2,#32
+ .byte 0x64,0x03,0xb0,0xf3 @ aesd q0,q10
+ .byte 0x64,0x23,0xb0,0xf3 @ aesd q1,q10
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ movlo r8,#0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ mov r7,r3
+ .byte 0x66,0x03,0xb0,0xf3 @ aesd q0,q11
+ .byte 0x66,0x23,0xb0,0xf3 @ aesd q1,q11
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ vld1.8 {q2},[r0],r8
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ movls r8,#0
+ .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
+ .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ vld1.8 {q3},[r0],r8
+ .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
+ .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
+ .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
+ .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+ vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
+ .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
+ .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
+
+ mov r6,r5
+ veor q4,q4,q0
+ veor q5,q5,q1
+ vorr q0,q2,q2
+ vst1.8 {q4},[r1]!
+ vorr q1,q3,q3
+ vst1.8 {q5},[r1]!
+ bhs .Loop2x_cbc_dec
+
+ adds r2,r2,#32
+ beq .Lcbc_done
+
+.Lcbc_dec_tail:
+ .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
+ vld1.32 {q8},[r7]!
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ subs r6,r6,#2
+ .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
+ vld1.32 {q9},[r7]!
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ bgt .Lcbc_dec_tail
+
+ .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ veor q4,q6,q7
+ .byte 0x64,0x03,0xb0,0xf3 @ aesd q0,q10
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ vorr q6,q2,q2
+ .byte 0x66,0x03,0xb0,0xf3 @ aesd q0,q11
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
+ .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+ .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
+
+ veor q4,q4,q0
+ vst1.8 {q4},[r1]!
+
+.Lcbc_done:
+ vst1.8 {q6},[r4]
+.Lcbc_abort:
+ vldmia sp!,{d8-d15}
+ ldmia sp!,{r4-r8,pc}
+.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
+.globl aes_v8_ctr32_encrypt_blocks
+.type aes_v8_ctr32_encrypt_blocks,%function
+.align 5
+aes_v8_ctr32_encrypt_blocks:
+ mov ip,sp
+ stmdb sp!,{r4-r10,lr}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+ ldr r4, [ip] @ load remaining arg
+ ldr r5,[r3,#240]
+
+ ldr r8, [r4, #12]
+ vld1.32 {q0},[r4]
+
+ vld1.32 {q8-q9},[r3] @ load key schedule...
+ sub r5,r5,#6
+ add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
+ sub r5,r5,#2
+ vld1.32 {q10-q11},[r7]!
+ vld1.32 {q12-q13},[r7]!
+ vld1.32 {q14-q15},[r7]!
+ vld1.32 {q7},[r7]
+
+ add r7,r3,#32
+ mov r6,r5
+
+ subs r2,r2,#2
+ blo .Lctr32_tail
+
+#ifndef __ARMEB__
+ rev r8, r8
+#endif
+ vorr q1,q0,q0
+ add r8, r8, #1
+ vorr q6,q0,q0
+ rev r10, r8
+ cmp r5,#2
+ vmov.32 d3[1],r10
+ beq .Lctr32_128
+
+.Loop2x_ctr32:
+ .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
+ .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
+ vld1.32 {q8},[r7]!
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
+ subs r6,r6,#2
+ .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
+ .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
+ vld1.32 {q9},[r7]!
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
+ bgt .Loop2x_ctr32
+
+ .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
+ .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
+ .byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
+ vorr q0,q6,q6
+ .byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
+ vorr q1,q6,q6
+ .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
+ .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
+ vld1.8 {q2},[r0]!
+ .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
+ vld1.8 {q3},[r0]!
+ .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
+ add r8,r8,#1
+ .byte 0x24,0x83,0xb0,0xf3 @ aese q4,q10
+ .byte 0x24,0xa3,0xb0,0xf3 @ aese q5,q10
+ rev r9,r8
+ .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
+ .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
+ add r8,r8,#1
+ .byte 0x26,0x83,0xb0,0xf3 @ aese q4,q11
+ .byte 0x26,0xa3,0xb0,0xf3 @ aese q5,q11
+ veor q2,q2,q7
+ rev r10,r8
+ .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
+ .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
+ veor q3,q3,q7
+ mov r7,r3
+ .byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
+ .byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
+ subs r2,r2,#2
+ .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
+ .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
+ vld1.32 {q8-q9},[r7]! @ re-pre-load rndkey[0-1]
+ .byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
+ .byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
+ .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
+ .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
+ .byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
+ .byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
+ vmov.32 d1[1], r9
+ .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
+ vmov.32 d3[1], r10
+ .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
+ .byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
+ .byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
+
+ mov r6,r5
+ veor q2,q2,q4
+ veor q3,q3,q5
+ vst1.8 {q2},[r1]!
+ vst1.8 {q3},[r1]!
+ bhs .Loop2x_ctr32
+
+ adds r2,r2,#2
+ beq .Lctr32_done
+ b .Lctr32_tail
+
+.Lctr32_128:
+ vld1.32 {q4-q5},[r7]
+
+.Loop2x_ctr32_128:
+ .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
+ .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ vld1.8 {q2},[r0]!
+ .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
+ vld1.8 {q3},[r0]!
+ .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
+ .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
+ add r8,r8,#1
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
+ rev r9,r8
+ .byte 0x08,0x03,0xb0,0xf3 @ aese q0,q4
+ .byte 0x08,0x23,0xb0,0xf3 @ aese q1,q4
+ add r8,r8,#1
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
+ rev r10,r8
+ .byte 0x0a,0x03,0xb0,0xf3 @ aese q0,q5
+ .byte 0x0a,0x23,0xb0,0xf3 @ aese q1,q5
+ subs r2,r2,#2
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
+ .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
+ .byte 0x24,0x23,0xb0,0xf3 @ aese q1,q10
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
+ .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
+ .byte 0x26,0x23,0xb0,0xf3 @ aese q1,q11
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
+ .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
+ .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
+ .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
+ .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
+ .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
+ .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
+ veor q2,q2,q7
+ .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
+ veor q3,q3,q7
+ .byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
+
+ veor q2,q2,q0
+ vorr q0,q6,q6
+ veor q3,q3,q1
+ vorr q1,q6,q6
+ vst1.8 {q2},[r1]!
+ vmov.32 d1[1], r9
+ vst1.8 {q3},[r1]!
+ vmov.32 d3[1], r10
+ bhs .Loop2x_ctr32_128
+
+ adds r2,r2,#2
+ beq .Lctr32_done
+
+.Lctr32_tail:
+ .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
+ vld1.32 {q8},[r7]!
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ subs r6,r6,#2
+ .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
+ vld1.32 {q9},[r7]!
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ bgt .Lctr32_tail
+
+ .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ vld1.8 {q2},[r0]
+ .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
+ .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ veor q2,q2,q7
+ .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
+
+ veor q2,q2,q0
+ vst1.8 {q2},[r1]
+
+.Lctr32_done:
+ vldmia sp!,{d8-d15}
+ ldmia sp!,{r4-r10,pc}
+.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
+#endif
diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl
new file mode 100644
index 0000000..415dc04
--- /dev/null
+++ b/crypto/aes/asm/aesv8-armx.pl
@@ -0,0 +1,980 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <[email protected]> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# This module implements support for ARMv8 AES instructions. The
+# module is endian-agnostic in sense that it supports both big- and
+# little-endian cases. As does it support both 32- and 64-bit modes
+# of operation. Latter is achieved by limiting amount of utilized
+# registers to 16, which implies additional instructions. This has
+# no effect on mighty Apple A7, as results are literally equal to
+# the theoretical estimates based on instruction latencies and issue
+# rate. It remains to be seen how does it affect other platforms...
+#
+# Performance in cycles per byte processed with 128-bit key:
+#
+# CBC enc CBC dec CTR
+# Apple A7 2.39 1.20 1.20
+# Cortex-A5x n/a n/a n/a
+
+$flavour = shift;
+open STDOUT,">".shift;
+
+$prefix="aes_v8";
+
+$code=<<___;
+#include "arm_arch.h"
+
+#if __ARM_ARCH__>=7
+.text
+___
+$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/);
+$code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/);
+
+# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax,
+# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to
+# maintain both 32- and 64-bit codes within single module and
+# transliterate common code to either flavour with regex vodoo.
+#
+{{{
+my ($inp,$bits,$out,$ptr,$rounds)=("x0","w1","x2","x3","w12");
+my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)=
+ $flavour=~/64/? map("q$_",(0..6)) : map("q$_",(0..3,8..10));
+
+
+$code.=<<___;
+.align 5
+rcon:
+.long 0x01,0x01,0x01,0x01
+.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
+.long 0x1b,0x1b,0x1b,0x1b
+
+.globl ${prefix}_set_encrypt_key
+.type ${prefix}_set_encrypt_key,%function
+.align 5
+${prefix}_set_encrypt_key:
+.Lenc_key:
+___
+$code.=<<___ if ($flavour =~ /64/);
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+___
+$code.=<<___;
+ adr $ptr,rcon
+ cmp $bits,#192
+
+ veor $zero,$zero,$zero
+ vld1.8 {$in0},[$inp],#16
+ mov $bits,#8 // reuse $bits
+ vld1.32 {$rcon,$mask},[$ptr],#32
+
+ b.lt .Loop128
+ b.eq .L192
+ b .L256
+
+.align 4
+.Loop128:
+ vtbl.8 $key,{$in0},$mask
+ vext.8 $tmp,$zero,$in0,#12
+ vst1.32 {$in0},[$out],#16
+ aese $key,$zero
+ subs $bits,$bits,#1
+
+ veor $in0,$in0,$tmp
+ vext.8 $tmp,$zero,$tmp,#12
+ veor $in0,$in0,$tmp
+ vext.8 $tmp,$zero,$tmp,#12
+ veor $key,$key,$rcon
+ veor $in0,$in0,$tmp
+ vshl.u8 $rcon,$rcon,#1
+ veor $in0,$in0,$key
+ b.ne .Loop128
+
+ vld1.32 {$rcon},[$ptr]
+
+ vtbl.8 $key,{$in0},$mask
+ vext.8 $tmp,$zero,$in0,#12
+ vst1.32 {$in0},[$out],#16
+ aese $key,$zero
+
+ veor $in0,$in0,$tmp
+ vext.8 $tmp,$zero,$tmp,#12
+ veor $in0,$in0,$tmp
+ vext.8 $tmp,$zero,$tmp,#12
+ veor $key,$key,$rcon
+ veor $in0,$in0,$tmp
+ vshl.u8 $rcon,$rcon,#1
+ veor $in0,$in0,$key
+
+ vtbl.8 $key,{$in0},$mask
+ vext.8 $tmp,$zero,$in0,#12
+ vst1.32 {$in0},[$out],#16
+ aese $key,$zero
+
+ veor $in0,$in0,$tmp
+ vext.8 $tmp,$zero,$tmp,#12
+ veor $in0,$in0,$tmp
+ vext.8 $tmp,$zero,$tmp,#12
+ veor $key,$key,$rcon
+ veor $in0,$in0,$tmp
+ veor $in0,$in0,$key
+ vst1.32 {$in0},[$out]
+ add $out,$out,#0x50
+
+ mov $rounds,#10
+ b .Ldone
+
+.align 4
+.L192:
+ vld1.8 {$in1},[$inp],#8
+ vmov.i8 $key,#8 // borrow $key
+ vst1.32 {$in0},[$out],#16
+ vsub.i8 $mask,$mask,$key // adjust the mask
+
+.Loop192:
+ vtbl.8 $key,{$in1},$mask
+ vext.8 $tmp,$zero,$in0,#12
+ vst1.32 {$in1},[$out],#8
+ aese $key,$zero
+ subs $bits,$bits,#1
+
+ veor $in0,$in0,$tmp
+ vext.8 $tmp,$zero,$tmp,#12
+ veor $in0,$in0,$tmp
+ vext.8 $tmp,$zero,$tmp,#12
+ veor $in0,$in0,$tmp
+
+ vdup.32 $tmp,${in0}[3]
+ veor $tmp,$tmp,$in1
+ veor $key,$key,$rcon
+ vext.8 $in1,$zero,$in1,#12
+ vshl.u8 $rcon,$rcon,#1
+ veor $in1,$in1,$tmp
+ veor $in0,$in0,$key
+ veor $in1,$in1,$key
+ vst1.32 {$in0},[$out],#16
+ b.ne .Loop192
+
+ mov $rounds,#12
+ add $out,$out,#0x20
+ b .Ldone
+
+.align 4
+.L256:
+ vld1.8 {$in1},[$inp]
+ mov $bits,#7
+ mov $rounds,#14
+ vst1.32 {$in0},[$out],#16
+
+.Loop256:
+ vtbl.8 $key,{$in1},$mask
+ vext.8 $tmp,$zero,$in0,#12
+ vst1.32 {$in1},[$out],#16
+ aese $key,$zero
+ subs $bits,$bits,#1
+
+ veor $in0,$in0,$tmp
+ vext.8 $tmp,$zero,$tmp,#12
+ veor $in0,$in0,$tmp
+ vext.8 $tmp,$zero,$tmp,#12
+ veor $key,$key,$rcon
+ veor $in0,$in0,$tmp
+ vshl.u8 $rcon,$rcon,#1
+ veor $in0,$in0,$key
+ vst1.32 {$in0},[$out],#16
+ b.eq .Ldone
+
+ vdup.32 $key,${in0}[3] // just splat
+ vext.8 $tmp,$zero,$in1,#12
+ aese $key,$zero
+
+ veor $in1,$in1,$tmp
+ vext.8 $tmp,$zero,$tmp,#12
+ veor $in1,$in1,$tmp
+ vext.8 $tmp,$zero,$tmp,#12
+ veor $in1,$in1,$tmp
+
+ veor $in1,$in1,$key
+ b .Loop256
+
+.Ldone:
+ str $rounds,[$out]
+
+ eor x0,x0,x0 // return value
+ `"ldr x29,[sp],#16" if ($flavour =~ /64/)`
+ ret
+.size ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key
+
+.globl ${prefix}_set_decrypt_key
+.type ${prefix}_set_decrypt_key,%function
+.align 5
+${prefix}_set_decrypt_key:
+___
+$code.=<<___ if ($flavour =~ /64/);
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+___
+$code.=<<___ if ($flavour !~ /64/);
+ stmdb sp!,{r4,lr}
+___
+$code.=<<___;
+ bl .Lenc_key
+
+ sub $out,$out,#240 // restore original $out
+ mov x4,#-16
+ add $inp,$out,x12,lsl#4 // end of key schedule
+
+ vld1.32 {v0.16b},[$out]
+ vld1.32 {v1.16b},[$inp]
+ vst1.32 {v0.16b},[$inp],x4
+ vst1.32 {v1.16b},[$out],#16
+
+.Loop_imc:
+ vld1.32 {v0.16b},[$out]
+ vld1.32 {v1.16b},[$inp]
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ vst1.32 {v0.16b},[$inp],x4
+ vst1.32 {v1.16b},[$out],#16
+ cmp $inp,$out
+ b.hi .Loop_imc
+
+ vld1.32 {v0.16b},[$out]
+ aesimc v0.16b,v0.16b
+ vst1.32 {v0.16b},[$inp]
+
+ eor x0,x0,x0 // return value
+___
+$code.=<<___ if ($flavour !~ /64/);
+ ldmia sp!,{r4,pc}
+___
+$code.=<<___ if ($flavour =~ /64/);
+ ldp x29,x30,[sp],#16
+ ret
+___
+$code.=<<___;
+.size ${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key
+___
+}}}
+{{{
+sub gen_block () {
+my $dir = shift;
+my ($e,$mc) = $dir eq "en" ? ("e","mc") : ("d","imc");
+my ($inp,$out,$key)=map("x$_",(0..2));
+my $rounds="w3";
+my ($rndkey0,$rndkey1,$inout)=map("q$_",(0..3));
+
+$code.=<<___;
+.globl ${prefix}_${dir}crypt
+.type ${prefix}_${dir}crypt,%function
+.align 5
+${prefix}_${dir}crypt:
+ ldr $rounds,[$key,#240]
+ vld1.32 {$rndkey0},[$key],#16
+ vld1.8 {$inout},[$inp]
+ sub $rounds,$rounds,#2
+ vld1.32 {$rndkey1},[$key],#16
+
+.Loop_${dir}c:
+ aes$e $inout,$rndkey0
+ vld1.32 {$rndkey0},[$key],#16
+ aes$mc $inout,$inout
+ subs $rounds,$rounds,#2
+ aes$e $inout,$rndkey1
+ vld1.32 {$rndkey1},[$key],#16
+ aes$mc $inout,$inout
+ b.gt .Loop_${dir}c
+
+ aes$e $inout,$rndkey0
+ vld1.32 {$rndkey0},[$key]
+ aes$mc $inout,$inout
+ aes$e $inout,$rndkey1
+ veor $inout,$inout,$rndkey0
+
+ vst1.8 {$inout},[$out]
+ ret
+.size ${prefix}_${dir}crypt,.-${prefix}_${dir}crypt
+___
+}
+&gen_block("en");
+&gen_block("de");
+}}}
+{{{
+my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); my $enc="w5";
+my ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","x7","x8","x12");
+my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7));
+
+my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1);
+
+### q8-q15 preloaded key schedule
+
+$code.=<<___;
+.globl ${prefix}_cbc_encrypt
+.type ${prefix}_cbc_encrypt,%function
+.align 5
+${prefix}_cbc_encrypt:
+___
+$code.=<<___ if ($flavour =~ /64/);
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+___
+$code.=<<___ if ($flavour !~ /64/);
+ mov ip,sp
+ stmdb sp!,{r4-r8,lr}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+ ldmia ip,{r4-r5} @ load remaining args
+___
+$code.=<<___;
+ subs $len,$len,#16
+ mov $step,#16
+ b.lo .Lcbc_abort
+ cclr $step,eq
+
+ cmp $enc,#0 // en- or decrypting?
+ ldr $rounds,[$key,#240]
+ and $len,$len,#-16
+ vld1.8 {$ivec},[$ivp]
+ vld1.8 {$dat},[$inp],$step
+
+ vld1.32 {q8-q9},[$key] // load key schedule...
+ sub $rounds,$rounds,#6
+ add $key_,$key,x5,lsl#4 // pointer to last 7 round keys
+ sub $rounds,$rounds,#2
+ vld1.32 {q10-q11},[$key_],#32
+ vld1.32 {q12-q13},[$key_],#32
+ vld1.32 {q14-q15},[$key_],#32
+ vld1.32 {$rndlast},[$key_]
+
+ add $key_,$key,#32
+ mov $cnt,$rounds
+ b.eq .Lcbc_dec
+
+ cmp $rounds,#2
+ veor $dat,$dat,$ivec
+ veor $rndzero_n_last,q8,$rndlast
+ b.eq .Lcbc_enc128
+
+.Loop_cbc_enc:
+ aese $dat,q8
+ vld1.32 {q8},[$key_],#16
+ aesmc $dat,$dat
+ subs $cnt,$cnt,#2
+ aese $dat,q9
+ vld1.32 {q9},[$key_],#16
+ aesmc $dat,$dat
+ b.gt .Loop_cbc_enc
+
+ aese $dat,q8
+ aesmc $dat,$dat
+ subs $len,$len,#16
+ aese $dat,q9
+ aesmc $dat,$dat
+ cclr $step,eq
+ aese $dat,q10
+ aesmc $dat,$dat
+ add $key_,$key,#16
+ aese $dat,q11
+ aesmc $dat,$dat
+ vld1.8 {q8},[$inp],$step
+ aese $dat,q12
+ aesmc $dat,$dat
+ veor q8,q8,$rndzero_n_last
+ aese $dat,q13
+ aesmc $dat,$dat
+ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
+ aese $dat,q14
+ aesmc $dat,$dat
+ aese $dat,q15
+
+ mov $cnt,$rounds
+ veor $ivec,$dat,$rndlast
+ vst1.8 {$ivec},[$out],#16
+ b.hs .Loop_cbc_enc
+
+ b .Lcbc_done
+
+.align 5
+.Lcbc_enc128:
+ vld1.32 {$in0-$in1},[$key_]
+ aese $dat,q8
+ aesmc $dat,$dat
+ b .Lenter_cbc_enc128
+.Loop_cbc_enc128:
+ aese $dat,q8
+ aesmc $dat,$dat
+ vst1.8 {$ivec},[$out],#16
+.Lenter_cbc_enc128:
+ aese $dat,q9
+ aesmc $dat,$dat
+ subs $len,$len,#16
+ aese $dat,$in0
+ aesmc $dat,$dat
+ cclr $step,eq
+ aese $dat,$in1
+ aesmc $dat,$dat
+ aese $dat,q10
+ aesmc $dat,$dat
+ aese $dat,q11
+ aesmc $dat,$dat
+ vld1.8 {q8},[$inp],$step
+ aese $dat,q12
+ aesmc $dat,$dat
+ aese $dat,q13
+ aesmc $dat,$dat
+ aese $dat,q14
+ aesmc $dat,$dat
+ veor q8,q8,$rndzero_n_last
+ aese $dat,q15
+ veor $ivec,$dat,$rndlast
+ b.hs .Loop_cbc_enc128
+
+ vst1.8 {$ivec},[$out],#16
+ b .Lcbc_done
+
+.align 5
+.Lcbc_dec128:
+ vld1.32 {$tmp0-$tmp1},[$key_]
+ veor $ivec,$ivec,$rndlast
+ veor $in0,$dat0,$rndlast
+ mov $step1,$step
+
+.Loop2x_cbc_dec128:
+ aesd $dat0,q8
+ aesd $dat1,q8
+ aesimc $dat0,$dat0
+ aesimc $dat1,$dat1
+ subs $len,$len,#32
+ aesd $dat0,q9
+ aesd $dat1,q9
+ aesimc $dat0,$dat0
+ aesimc $dat1,$dat1
+ cclr $step,lo
+ aesd $dat0,$tmp0
+ aesd $dat1,$tmp0
+ aesimc $dat0,$dat0
+ aesimc $dat1,$dat1
+ cclr $step1,ls
+ aesd $dat0,$tmp1
+ aesd $dat1,$tmp1
+ aesimc $dat0,$dat0
+ aesimc $dat1,$dat1
+ aesd $dat0,q10
+ aesd $dat1,q10
+ aesimc $dat0,$dat0
+ aesimc $dat1,$dat1
+ aesd $dat0,q11
+ aesd $dat1,q11
+ aesimc $dat0,$dat0
+ aesimc $dat1,$dat1
+ aesd $dat0,q12
+ aesd $dat1,q12
+ aesimc $dat0,$dat0
+ aesimc $dat1,$dat1
+ aesd $dat0,q13
+ aesd $dat1,q13
+ aesimc $dat0,$dat0
+ aesimc $dat1,$dat1
+ aesd $dat0,q14
+ aesd $dat1,q14
+ aesimc $dat0,$dat0
+ aesimc $dat1,$dat1
+ aesd $dat0,q15
+ aesd $dat1,q15
+
+ veor $ivec,$ivec,$dat0
+ vld1.8 {$dat0},[$inp],$step
+ veor $in0,$in0,$dat1
+ vld1.8 {$dat1},[$inp],$step1
+ vst1.8 {$ivec},[$out],#16
+ veor $ivec,$in1,$rndlast
+ vst1.8 {$in0},[$out],#16
+ veor $in0,$dat0,$rndlast
+ vorr $in1,$dat1,$dat1
+ b.hs .Loop2x_cbc_dec128
+
+ adds $len,$len,#32
+ veor $ivec,$ivec,$rndlast
+ b.eq .Lcbc_done
+ veor $in0,$in0,$rndlast
+ b .Lcbc_dec_tail
+
+.align 5
+.Lcbc_dec:
+ subs $len,$len,#16
+ vorr $in0,$dat,$dat
+ b.lo .Lcbc_dec_tail
+
+ cclr $step,eq
+ cmp $rounds,#2
+ vld1.8 {$dat1},[$inp],$step
+ vorr $in1,$dat1,$dat1
+ b.eq .Lcbc_dec128
+
+.Loop2x_cbc_dec:
+ aesd $dat0,q8
+ aesd $dat1,q8
+ vld1.32 {q8},[$key_],#16
+ aesimc $dat0,$dat0
+ aesimc $dat1,$dat1
+ subs $cnt,$cnt,#2
+ aesd $dat0,q9
+ aesd $dat1,q9
+ vld1.32 {q9},[$key_],#16
+ aesimc $dat0,$dat0
+ aesimc $dat1,$dat1
+ b.gt .Loop2x_cbc_dec
+
+ aesd $dat0,q8
+ aesd $dat1,q8
+ aesimc $dat0,$dat0
+ aesimc $dat1,$dat1
+ veor $tmp0,$ivec,$rndlast
+ veor $tmp1,$in0,$rndlast
+ aesd $dat0,q9
+ aesd $dat1,q9
+ aesimc $dat0,$dat0
+ aesimc $dat1,$dat1
+ vorr $ivec,$in1,$in1
+ subs $len,$len,#32
+ aesd $dat0,q10
+ aesd $dat1,q10
+ aesimc $dat0,$dat0
+ cclr $step,lo
+ aesimc $dat1,$dat1
+ mov $key_,$key
+ aesd $dat0,q11
+ aesd $dat1,q11
+ aesimc $dat0,$dat0
+ vld1.8 {$in0},[$inp],$step
+ aesimc $dat1,$dat1
+ cclr $step,ls
+ aesd $dat0,q12
+ aesd $dat1,q12
+ aesimc $dat0,$dat0
+ aesimc $dat1,$dat1
+ vld1.8 {$in1},[$inp],$step
+ aesd $dat0,q13
+ aesd $dat1,q13
+ aesimc $dat0,$dat0
+ aesimc $dat1,$dat1
+ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
+ aesd $dat0,q14
+ aesd $dat1,q14
+ aesimc $dat0,$dat0
+ aesimc $dat1,$dat1
+ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
+ aesd $dat0,q15
+ aesd $dat1,q15
+
+ mov $cnt,$rounds
+ veor $tmp0,$tmp0,$dat0
+ veor $tmp1,$tmp1,$dat1
+ vorr $dat0,$in0,$in0
+ vst1.8 {$tmp0},[$out],#16
+ vorr $dat1,$in1,$in1
+ vst1.8 {$tmp1},[$out],#16
+ b.hs .Loop2x_cbc_dec
+
+ adds $len,$len,#32
+ b.eq .Lcbc_done
+
+.Lcbc_dec_tail:
+ aesd $dat,q8
+ vld1.32 {q8},[$key_],#16
+ aesimc $dat,$dat
+ subs $cnt,$cnt,#2
+ aesd $dat,q9
+ vld1.32 {q9},[$key_],#16
+ aesimc $dat,$dat
+ b.gt .Lcbc_dec_tail
+
+ aesd $dat,q8
+ aesimc $dat,$dat
+ aesd $dat,q9
+ aesimc $dat,$dat
+ veor $tmp,$ivec,$rndlast
+ aesd $dat,q10
+ aesimc $dat,$dat
+ vorr $ivec,$in0,$in0
+ aesd $dat,q11
+ aesimc $dat,$dat
+ aesd $dat,q12
+ aesimc $dat,$dat
+ aesd $dat,q13
+ aesimc $dat,$dat
+ aesd $dat,q14
+ aesimc $dat,$dat
+ aesd $dat,q15
+
+ veor $tmp,$tmp,$dat
+ vst1.8 {$tmp},[$out],#16
+
+.Lcbc_done:
+ vst1.8 {$ivec},[$ivp]
+.Lcbc_abort:
+___
+$code.=<<___ if ($flavour !~ /64/);
+ vldmia sp!,{d8-d15}
+ ldmia sp!,{r4-r8,pc}
+___
+$code.=<<___ if ($flavour =~ /64/);
+ ldr x29,[sp],#16
+ ret
+___
+$code.=<<___;
+.size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt
+___
+}}}
+{{{
+my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4));
+my ($rounds,$cnt,$key_,$ctr,$tctr,$tctr1)=("w5","w6","x7","w8","w9","w10");
+my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7));
+
+my ($dat,$tmp)=($dat0,$tmp0);
+
+### q8-q15 preloaded key schedule
+
+$code.=<<___;
+.globl ${prefix}_ctr32_encrypt_blocks
+.type ${prefix}_ctr32_encrypt_blocks,%function
+.align 5
+${prefix}_ctr32_encrypt_blocks:
+___
+$code.=<<___ if ($flavour =~ /64/);
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+___
+$code.=<<___ if ($flavour !~ /64/);
+ mov ip,sp
+ stmdb sp!,{r4-r10,lr}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+ ldr r4, [ip] @ load remaining arg
+___
+$code.=<<___;
+ ldr $rounds,[$key,#240]
+
+ ldr $ctr, [$ivp, #12]
+ vld1.32 {$dat0},[$ivp]
+
+ vld1.32 {q8-q9},[$key] // load key schedule...
+ sub $rounds,$rounds,#6
+ add $key_,$key,x5,lsl#4 // pointer to last 7 round keys
+ sub $rounds,$rounds,#2
+ vld1.32 {q10-q11},[$key_],#32
+ vld1.32 {q12-q13},[$key_],#32
+ vld1.32 {q14-q15},[$key_],#32
+ vld1.32 {$rndlast},[$key_]
+
+ add $key_,$key,#32
+ mov $cnt,$rounds
+
+ subs $len,$len,#2
+ b.lo .Lctr32_tail
+
+#ifndef __ARMEB__
+ rev $ctr, $ctr
+#endif
+ vorr $dat1,$dat0,$dat0
+ add $ctr, $ctr, #1
+ vorr $ivec,$dat0,$dat0
+ rev $tctr1, $ctr
+ cmp $rounds,#2
+ vmov.32 ${dat1}[3],$tctr1
+ b.eq .Lctr32_128
+
+.Loop2x_ctr32:
+ aese $dat0,q8
+ aese $dat1,q8
+ vld1.32 {q8},[$key_],#16
+ aesmc $dat0,$dat0
+ aesmc $dat1,$dat1
+ subs $cnt,$cnt,#2
+ aese $dat0,q9
+ aese $dat1,q9
+ vld1.32 {q9},[$key_],#16
+ aesmc $dat0,$dat0
+ aesmc $dat1,$dat1
+ b.gt .Loop2x_ctr32
+
+ aese $dat0,q8
+ aese $dat1,q8
+ aesmc $tmp0,$dat0
+ vorr $dat0,$ivec,$ivec
+ aesmc $tmp1,$dat1
+ vorr $dat1,$ivec,$ivec
+ aese $tmp0,q9
+ aese $tmp1,q9
+ vld1.8 {$in0},[$inp],#16
+ aesmc $tmp0,$tmp0
+ vld1.8 {$in1},[$inp],#16
+ aesmc $tmp1,$tmp1
+ add $ctr,$ctr,#1
+ aese $tmp0,q10
+ aese $tmp1,q10
+ rev $tctr,$ctr
+ aesmc $tmp0,$tmp0
+ aesmc $tmp1,$tmp1
+ add $ctr,$ctr,#1
+ aese $tmp0,q11
+ aese $tmp1,q11
+ veor $in0,$in0,$rndlast
+ rev $tctr1,$ctr
+ aesmc $tmp0,$tmp0
+ aesmc $tmp1,$tmp1
+ veor $in1,$in1,$rndlast
+ mov $key_,$key
+ aese $tmp0,q12
+ aese $tmp1,q12
+ subs $len,$len,#2
+ aesmc $tmp0,$tmp0
+ aesmc $tmp1,$tmp1
+ vld1.32 {q8-q9},[$key_],#32 // re-pre-load rndkey[0-1]
+ aese $tmp0,q13
+ aese $tmp1,q13
+ aesmc $tmp0,$tmp0
+ aesmc $tmp1,$tmp1
+ aese $tmp0,q14
+ aese $tmp1,q14
+ vmov.32 ${dat0}[3], $tctr
+ aesmc $tmp0,$tmp0
+ vmov.32 ${dat1}[3], $tctr1
+ aesmc $tmp1,$tmp1
+ aese $tmp0,q15
+ aese $tmp1,q15
+
+ mov $cnt,$rounds
+ veor $in0,$in0,$tmp0
+ veor $in1,$in1,$tmp1
+ vst1.8 {$in0},[$out],#16
+ vst1.8 {$in1},[$out],#16
+ b.hs .Loop2x_ctr32
+
+ adds $len,$len,#2
+ b.eq .Lctr32_done
+ b .Lctr32_tail
+
+.Lctr32_128:
+ vld1.32 {$tmp0-$tmp1},[$key_]
+
+.Loop2x_ctr32_128:
+ aese $dat0,q8
+ aese $dat1,q8
+ aesmc $dat0,$dat0
+ vld1.8 {$in0},[$inp],#16
+ aesmc $dat1,$dat1
+ vld1.8 {$in1},[$inp],#16
+ aese $dat0,q9
+ aese $dat1,q9
+ add $ctr,$ctr,#1
+ aesmc $dat0,$dat0
+ aesmc $dat1,$dat1
+ rev $tctr,$ctr
+ aese $dat0,$tmp0
+ aese $dat1,$tmp0
+ add $ctr,$ctr,#1
+ aesmc $dat0,$dat0
+ aesmc $dat1,$dat1
+ rev $tctr1,$ctr
+ aese $dat0,$tmp1
+ aese $dat1,$tmp1
+ subs $len,$len,#2
+ aesmc $dat0,$dat0
+ aesmc $dat1,$dat1
+ aese $dat0,q10
+ aese $dat1,q10
+ aesmc $dat0,$dat0
+ aesmc $dat1,$dat1
+ aese $dat0,q11
+ aese $dat1,q11
+ aesmc $dat0,$dat0
+ aesmc $dat1,$dat1
+ aese $dat0,q12
+ aese $dat1,q12
+ aesmc $dat0,$dat0
+ aesmc $dat1,$dat1
+ aese $dat0,q13
+ aese $dat1,q13
+ aesmc $dat0,$dat0
+ aesmc $dat1,$dat1
+ aese $dat0,q14
+ aese $dat1,q14
+ aesmc $dat0,$dat0
+ aesmc $dat1,$dat1
+ veor $in0,$in0,$rndlast
+ aese $dat0,q15
+ veor $in1,$in1,$rndlast
+ aese $dat1,q15
+
+ veor $in0,$in0,$dat0
+ vorr $dat0,$ivec,$ivec
+ veor $in1,$in1,$dat1
+ vorr $dat1,$ivec,$ivec
+ vst1.8 {$in0},[$out],#16
+ vmov.32 ${dat0}[3], $tctr
+ vst1.8 {$in1},[$out],#16
+ vmov.32 ${dat1}[3], $tctr1
+ b.hs .Loop2x_ctr32_128
+
+ adds $len,$len,#2
+ b.eq .Lctr32_done
+
+.Lctr32_tail:
+ aese $dat,q8
+ vld1.32 {q8},[$key_],#16
+ aesmc $dat,$dat
+ subs $cnt,$cnt,#2
+ aese $dat,q9
+ vld1.32 {q9},[$key_],#16
+ aesmc $dat,$dat
+ b.gt .Lctr32_tail
+
+ aese $dat,q8
+ aesmc $dat,$dat
+ aese $dat,q9
+ aesmc $dat,$dat
+ vld1.8 {$in0},[$inp]
+ aese $dat,q10
+ aesmc $dat,$dat
+ aese $dat,q11
+ aesmc $dat,$dat
+ aese $dat,q12
+ aesmc $dat,$dat
+ aese $dat,q13
+ aesmc $dat,$dat
+ aese $dat,q14
+ aesmc $dat,$dat
+ veor $in0,$in0,$rndlast
+ aese $dat,q15
+
+ veor $in0,$in0,$dat
+ vst1.8 {$in0},[$out]
+
+.Lctr32_done:
+___
+$code.=<<___ if ($flavour !~ /64/);
+ vldmia sp!,{d8-d15}
+ ldmia sp!,{r4-r10,pc}
+___
+$code.=<<___ if ($flavour =~ /64/);
+ ldr x29,[sp],#16
+ ret
+___
+$code.=<<___;
+.size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks
+___
+}}}
+$code.=<<___;
+#endif
+___
+########################################
+if ($flavour =~ /64/) { ######## 64-bit code
+ my %opcode = (
+ "aesd" => 0x4e285800, "aese" => 0x4e284800,
+ "aesimc"=> 0x4e287800, "aesmc" => 0x4e286800 );
+
+ local *unaes = sub {
+ my ($mnemonic,$arg)=@_;
+
+ $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o &&
+ sprintf ".inst\t0x%08x\t//%s %s",
+ $opcode{$mnemonic}|$1|($2<<5),
+ $mnemonic,$arg;
+ };
+
+ foreach(split("\n",$code)) {
+ s/\`([^\`]*)\`/eval($1)/geo;
+
+ s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers
+ s/@\s/\/\//o; # old->new style commentary
+
+ #s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or
+ s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or
+ s/vmov\.i8/movi/o or # fix up legacy mnemonics
+ s/vext\.8/ext/o or
+ s/vrev32\.8/rev32/o or
+ s/vtst\.8/cmtst/o or
+ s/vshr/ushr/o or
+ s/^(\s+)v/$1/o or # strip off v prefix
+ s/\bbx\s+lr\b/ret/o;
+
+ # fix up remainig legacy suffixes
+ s/\.[ui]?8//o;
+ m/\],#8/o and s/\.16b/\.8b/go;
+ s/\.[ui]?32//o and s/\.16b/\.4s/go;
+ s/\.[ui]?64//o and s/\.16b/\.2d/go;
+ s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o;
+
+ print $_,"\n";
+ }
+} else { ######## 32-bit code
+ my %opcode = (
+ "aesd" => 0xf3b00340, "aese" => 0xf3b00300,
+ "aesimc"=> 0xf3b003c0, "aesmc" => 0xf3b00380 );
+
+ local *unaes = sub {
+ my ($mnemonic,$arg)=@_;
+
+ if ($arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o) {
+ my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
+ |(($2&7)<<1) |(($2&8)<<2);
+ # since ARMv7 instructions are always encoded little-endian.
+ # correct solution is to use .inst directive, but older
+ # assemblers don't implement it:-(
+ sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+ $word&0xff,($word>>8)&0xff,
+ ($word>>16)&0xff,($word>>24)&0xff,
+ $mnemonic,$arg;
+ }
+ };
+
+ sub unvtbl {
+ my $arg=shift;
+
+ $arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o &&
+ sprintf "vtbl.8 d%d,{q%d},d%d\n\t".
+ "vtbl.8 d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1;
+ }
+
+ sub unvdup32 {
+ my $arg=shift;
+
+ $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o &&
+ sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1;
+ }
+
+ sub unvmov32 {
+ my $arg=shift;
+
+ $arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o &&
+ sprintf "vmov.32 d%d[%d],%s",2*$1+($2>>1),$2&1,$3;
+ }
+
+ foreach(split("\n",$code)) {
+ s/\`([^\`]*)\`/eval($1)/geo;
+
+ s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers
+ s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers
+ s/\/\/\s?/@ /o; # new->old style commentary
+
+ # fix up remainig new-style suffixes
+ s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo or
+ s/\],#[0-9]+/]!/o;
+
+ s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or
+ s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or
+ s/vtbl\.8\s+(.*)/unvtbl($1)/geo or
+ s/vdup\.32\s+(.*)/unvdup32($1)/geo or
+ s/vmov\.32\s+(.*)/unvmov32($1)/geo or
+ s/^(\s+)b\./$1b/o or
+ s/^(\s+)ret/$1bx\tlr/o;
+
+ print $_,"\n";
+ }
+}
+
+close STDOUT;
diff --git a/crypto/aes/asm/bsaes-armv7.S b/crypto/aes/asm/bsaes-armv7.S
new file mode 100644
index 0000000..64205d4
--- /dev/null
+++ b/crypto/aes/asm/bsaes-armv7.S
@@ -0,0 +1,2544 @@
+
+@ ====================================================================
+@ Written by Andy Polyakov <[email protected]> for the OpenSSL
+@ project. The module is, however, dual licensed under OpenSSL and
+@ CRYPTOGAMS licenses depending on where you obtain it. For further
+@ details see http://www.openssl.org/~appro/cryptogams/.
+@
+@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel
+@ <[email protected]>. Permission to use under GPL terms is
+@ granted.
+@ ====================================================================
+
+@ Bit-sliced AES for ARM NEON
+@
+@ February 2012.
+@
+@ This implementation is direct adaptation of bsaes-x86_64 module for
+@ ARM NEON. Except that this module is endian-neutral [in sense that
+@ it can be compiled for either endianness] by courtesy of vld1.8's
+@ neutrality. Initial version doesn't implement interface to OpenSSL,
+@ only low-level primitives and unsupported entry points, just enough
+@ to collect performance results, which for Cortex-A8 core are:
+@
+@ encrypt 19.5 cycles per byte processed with 128-bit key
+@ decrypt 22.1 cycles per byte processed with 128-bit key
+@ key conv. 440 cycles per 128-bit key/0.18 of 8x block
+@
+@ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7,
+@ which is [much] worse than anticipated (for further details see
+@ http://www.openssl.org/~appro/Snapdragon-S4.html).
+@
+@ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code
+@ manages in 20.0 cycles].
+@
+@ When comparing to x86_64 results keep in mind that NEON unit is
+@ [mostly] single-issue and thus can't [fully] benefit from
+@ instruction-level parallelism. And when comparing to aes-armv4
+@ results keep in mind key schedule conversion overhead (see
+@ bsaes-x86_64.pl for further details)...
+@
+@ <[email protected]>
+
+@ April-August 2013
+@
+@ Add CBC, CTR and XTS subroutines, adapt for kernel use.
+@
+@ <[email protected]>
+
+#ifndef __KERNEL__
+# include "arm_arch.h"
+
+# define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
+# define VFP_ABI_POP vldmia sp!,{d8-d15}
+# define VFP_ABI_FRAME 0x40
+#else
+# define VFP_ABI_PUSH
+# define VFP_ABI_POP
+# define VFP_ABI_FRAME 0
+# define BSAES_ASM_EXTENDED_KEY
+# define XTS_CHAIN_TWEAK
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+#endif
+
+#ifdef __thumb__
+# define adrl adr
+#endif
+
+#if __ARM_ARCH__>=7
+.text
+.syntax unified @ ARMv7-capable assembler is expected to handle this
+#ifdef __thumb2__
+.thumb
+#else
+.code 32
+#endif
+
+.fpu neon
+
+.type _bsaes_decrypt8,%function
+.align 4
+_bsaes_decrypt8:
+ adr r6,_bsaes_decrypt8
+ vldmia r4!, {q9} @ round 0 key
+ add r6,r6,#.LM0ISR-_bsaes_decrypt8
+
+ vldmia r6!, {q8} @ .LM0ISR
+ veor q10, q0, q9 @ xor with round0 key
+ veor q11, q1, q9
+ vtbl.8 d0, {q10}, d16
+ vtbl.8 d1, {q10}, d17
+ veor q12, q2, q9
+ vtbl.8 d2, {q11}, d16
+ vtbl.8 d3, {q11}, d17
+ veor q13, q3, q9
+ vtbl.8 d4, {q12}, d16
+ vtbl.8 d5, {q12}, d17
+ veor q14, q4, q9
+ vtbl.8 d6, {q13}, d16
+ vtbl.8 d7, {q13}, d17
+ veor q15, q5, q9
+ vtbl.8 d8, {q14}, d16
+ vtbl.8 d9, {q14}, d17
+ veor q10, q6, q9
+ vtbl.8 d10, {q15}, d16
+ vtbl.8 d11, {q15}, d17
+ veor q11, q7, q9
+ vtbl.8 d12, {q10}, d16
+ vtbl.8 d13, {q10}, d17
+ vtbl.8 d14, {q11}, d16
+ vtbl.8 d15, {q11}, d17
+ vmov.i8 q8,#0x55 @ compose .LBS0
+ vmov.i8 q9,#0x33 @ compose .LBS1
+ vshr.u64 q10, q6, #1
+ vshr.u64 q11, q4, #1
+ veor q10, q10, q7
+ veor q11, q11, q5
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q7, q7, q10
+ vshl.u64 q10, q10, #1
+ veor q5, q5, q11
+ vshl.u64 q11, q11, #1
+ veor q6, q6, q10
+ veor q4, q4, q11
+ vshr.u64 q10, q2, #1
+ vshr.u64 q11, q0, #1
+ veor q10, q10, q3
+ veor q11, q11, q1
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q3, q3, q10
+ vshl.u64 q10, q10, #1
+ veor q1, q1, q11
+ vshl.u64 q11, q11, #1
+ veor q2, q2, q10
+ veor q0, q0, q11
+ vmov.i8 q8,#0x0f @ compose .LBS2
+ vshr.u64 q10, q5, #2
+ vshr.u64 q11, q4, #2
+ veor q10, q10, q7
+ veor q11, q11, q6
+ vand q10, q10, q9
+ vand q11, q11, q9
+ veor q7, q7, q10
+ vshl.u64 q10, q10, #2
+ veor q6, q6, q11
+ vshl.u64 q11, q11, #2
+ veor q5, q5, q10
+ veor q4, q4, q11
+ vshr.u64 q10, q1, #2
+ vshr.u64 q11, q0, #2
+ veor q10, q10, q3
+ veor q11, q11, q2
+ vand q10, q10, q9
+ vand q11, q11, q9
+ veor q3, q3, q10
+ vshl.u64 q10, q10, #2
+ veor q2, q2, q11
+ vshl.u64 q11, q11, #2
+ veor q1, q1, q10
+ veor q0, q0, q11
+ vshr.u64 q10, q3, #4
+ vshr.u64 q11, q2, #4
+ veor q10, q10, q7
+ veor q11, q11, q6
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q7, q7, q10
+ vshl.u64 q10, q10, #4
+ veor q6, q6, q11
+ vshl.u64 q11, q11, #4
+ veor q3, q3, q10
+ veor q2, q2, q11
+ vshr.u64 q10, q1, #4
+ vshr.u64 q11, q0, #4
+ veor q10, q10, q5
+ veor q11, q11, q4
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q5, q5, q10
+ vshl.u64 q10, q10, #4
+ veor q4, q4, q11
+ vshl.u64 q11, q11, #4
+ veor q1, q1, q10
+ veor q0, q0, q11
+ sub r5,r5,#1
+ b .Ldec_sbox
+.align 4
+.Ldec_loop:
+ vldmia r4!, {q8-q11}
+ veor q8, q8, q0
+ veor q9, q9, q1
+ vtbl.8 d0, {q8}, d24
+ vtbl.8 d1, {q8}, d25
+ vldmia r4!, {q8}
+ veor q10, q10, q2
+ vtbl.8 d2, {q9}, d24
+ vtbl.8 d3, {q9}, d25
+ vldmia r4!, {q9}
+ veor q11, q11, q3
+ vtbl.8 d4, {q10}, d24
+ vtbl.8 d5, {q10}, d25
+ vldmia r4!, {q10}
+ vtbl.8 d6, {q11}, d24
+ vtbl.8 d7, {q11}, d25
+ vldmia r4!, {q11}
+ veor q8, q8, q4
+ veor q9, q9, q5
+ vtbl.8 d8, {q8}, d24
+ vtbl.8 d9, {q8}, d25
+ veor q10, q10, q6
+ vtbl.8 d10, {q9}, d24
+ vtbl.8 d11, {q9}, d25
+ veor q11, q11, q7
+ vtbl.8 d12, {q10}, d24
+ vtbl.8 d13, {q10}, d25
+ vtbl.8 d14, {q11}, d24
+ vtbl.8 d15, {q11}, d25
+.Ldec_sbox:
+ veor q1, q1, q4
+ veor q3, q3, q4
+
+ veor q4, q4, q7
+ veor q1, q1, q6
+ veor q2, q2, q7
+ veor q6, q6, q4
+
+ veor q0, q0, q1
+ veor q2, q2, q5
+ veor q7, q7, q6
+ veor q3, q3, q0
+ veor q5, q5, q0
+ veor q1, q1, q3
+ veor q11, q3, q0
+ veor q10, q7, q4
+ veor q9, q1, q6
+ veor q13, q4, q0
+ vmov q8, q10
+ veor q12, q5, q2
+
+ vorr q10, q10, q9
+ veor q15, q11, q8
+ vand q14, q11, q12
+ vorr q11, q11, q12
+ veor q12, q12, q9
+ vand q8, q8, q9
+ veor q9, q6, q2
+ vand q15, q15, q12
+ vand q13, q13, q9
+ veor q9, q3, q7
+ veor q12, q1, q5
+ veor q11, q11, q13
+ veor q10, q10, q13
+ vand q13, q9, q12
+ vorr q9, q9, q12
+ veor q11, q11, q15
+ veor q8, q8, q13
+ veor q10, q10, q14
+ veor q9, q9, q15
+ veor q8, q8, q14
+ vand q12, q4, q6
+ veor q9, q9, q14
+ vand q13, q0, q2
+ vand q14, q7, q1
+ vorr q15, q3, q5
+ veor q11, q11, q12
+ veor q9, q9, q14
+ veor q8, q8, q15
+ veor q10, q10, q13
+
+ @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3
+
+ @ new smaller inversion
+
+ vand q14, q11, q9
+ vmov q12, q8
+
+ veor q13, q10, q14
+ veor q15, q8, q14
+ veor q14, q8, q14 @ q14=q15
+
+ vbsl q13, q9, q8
+ vbsl q15, q11, q10
+ veor q11, q11, q10
+
+ vbsl q12, q13, q14
+ vbsl q8, q14, q13
+
+ vand q14, q12, q15
+ veor q9, q9, q8
+
+ veor q14, q14, q11
+ veor q12, q5, q2
+ veor q8, q1, q6
+ veor q10, q15, q14
+ vand q10, q10, q5
+ veor q5, q5, q1
+ vand q11, q1, q15
+ vand q5, q5, q14
+ veor q1, q11, q10
+ veor q5, q5, q11
+ veor q15, q15, q13
+ veor q14, q14, q9
+ veor q11, q15, q14
+ veor q10, q13, q9
+ vand q11, q11, q12
+ vand q10, q10, q2
+ veor q12, q12, q8
+ veor q2, q2, q6
+ vand q8, q8, q15
+ vand q6, q6, q13
+ vand q12, q12, q14
+ vand q2, q2, q9
+ veor q8, q8, q12
+ veor q2, q2, q6
+ veor q12, q12, q11
+ veor q6, q6, q10
+ veor q5, q5, q12
+ veor q2, q2, q12
+ veor q1, q1, q8
+ veor q6, q6, q8
+
+ veor q12, q3, q0
+ veor q8, q7, q4
+ veor q11, q15, q14
+ veor q10, q13, q9
+ vand q11, q11, q12
+ vand q10, q10, q0
+ veor q12, q12, q8
+ veor q0, q0, q4
+ vand q8, q8, q15
+ vand q4, q4, q13
+ vand q12, q12, q14
+ vand q0, q0, q9
+ veor q8, q8, q12
+ veor q0, q0, q4
+ veor q12, q12, q11
+ veor q4, q4, q10
+ veor q15, q15, q13
+ veor q14, q14, q9
+ veor q10, q15, q14
+ vand q10, q10, q3
+ veor q3, q3, q7
+ vand q11, q7, q15
+ vand q3, q3, q14
+ veor q7, q11, q10
+ veor q3, q3, q11
+ veor q3, q3, q12
+ veor q0, q0, q12
+ veor q7, q7, q8
+ veor q4, q4, q8
+ veor q1, q1, q7
+ veor q6, q6, q5
+
+ veor q4, q4, q1
+ veor q2, q2, q7
+ veor q5, q5, q7
+ veor q4, q4, q2
+ veor q7, q7, q0
+ veor q4, q4, q5
+ veor q3, q3, q6
+ veor q6, q6, q1
+ veor q3, q3, q4
+
+ veor q4, q4, q0
+ veor q7, q7, q3
+ subs r5,r5,#1
+ bcc .Ldec_done
+ @ multiplication by 0x05-0x00-0x04-0x00
+ vext.8 q8, q0, q0, #8
+ vext.8 q14, q3, q3, #8
+ vext.8 q15, q5, q5, #8
+ veor q8, q8, q0
+ vext.8 q9, q1, q1, #8
+ veor q14, q14, q3
+ vext.8 q10, q6, q6, #8
+ veor q15, q15, q5
+ vext.8 q11, q4, q4, #8
+ veor q9, q9, q1
+ vext.8 q12, q2, q2, #8
+ veor q10, q10, q6
+ vext.8 q13, q7, q7, #8
+ veor q11, q11, q4
+ veor q12, q12, q2
+ veor q13, q13, q7
+
+ veor q0, q0, q14
+ veor q1, q1, q14
+ veor q6, q6, q8
+ veor q2, q2, q10
+ veor q4, q4, q9
+ veor q1, q1, q15
+ veor q6, q6, q15
+ veor q2, q2, q14
+ veor q7, q7, q11
+ veor q4, q4, q14
+ veor q3, q3, q12
+ veor q2, q2, q15
+ veor q7, q7, q15
+ veor q5, q5, q13
+ vext.8 q8, q0, q0, #12 @ x0 <<< 32
+ vext.8 q9, q1, q1, #12
+ veor q0, q0, q8 @ x0 ^ (x0 <<< 32)
+ vext.8 q10, q6, q6, #12
+ veor q1, q1, q9
+ vext.8 q11, q4, q4, #12
+ veor q6, q6, q10
+ vext.8 q12, q2, q2, #12
+ veor q4, q4, q11
+ vext.8 q13, q7, q7, #12
+ veor q2, q2, q12
+ vext.8 q14, q3, q3, #12
+ veor q7, q7, q13
+ vext.8 q15, q5, q5, #12
+ veor q3, q3, q14
+
+ veor q9, q9, q0
+ veor q5, q5, q15
+ vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64)
+ veor q10, q10, q1
+ veor q8, q8, q5
+ veor q9, q9, q5
+ vext.8 q1, q1, q1, #8
+ veor q13, q13, q2
+ veor q0, q0, q8
+ veor q14, q14, q7
+ veor q1, q1, q9
+ vext.8 q8, q2, q2, #8
+ veor q12, q12, q4
+ vext.8 q9, q7, q7, #8
+ veor q15, q15, q3
+ vext.8 q2, q4, q4, #8
+ veor q11, q11, q6
+ vext.8 q7, q5, q5, #8
+ veor q12, q12, q5
+ vext.8 q4, q3, q3, #8
+ veor q11, q11, q5
+ vext.8 q3, q6, q6, #8
+ veor q5, q9, q13
+ veor q11, q11, q2
+ veor q7, q7, q15
+ veor q6, q4, q14
+ veor q4, q8, q12
+ veor q2, q3, q10
+ vmov q3, q11
+ @ vmov q5, q9
+ vldmia r6, {q12} @ .LISR
+ ite eq @ Thumb2 thing, sanity check in ARM
+ addeq r6,r6,#0x10
+ bne .Ldec_loop
+ vldmia r6, {q12} @ .LISRM0
+ b .Ldec_loop
+.align 4
+.Ldec_done:
+ vmov.i8 q8,#0x55 @ compose .LBS0
+ vmov.i8 q9,#0x33 @ compose .LBS1
+ vshr.u64 q10, q3, #1
+ vshr.u64 q11, q2, #1
+ veor q10, q10, q5
+ veor q11, q11, q7
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q5, q5, q10
+ vshl.u64 q10, q10, #1
+ veor q7, q7, q11
+ vshl.u64 q11, q11, #1
+ veor q3, q3, q10
+ veor q2, q2, q11
+ vshr.u64 q10, q6, #1
+ vshr.u64 q11, q0, #1
+ veor q10, q10, q4
+ veor q11, q11, q1
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q4, q4, q10
+ vshl.u64 q10, q10, #1
+ veor q1, q1, q11
+ vshl.u64 q11, q11, #1
+ veor q6, q6, q10
+ veor q0, q0, q11
+ vmov.i8 q8,#0x0f @ compose .LBS2
+ vshr.u64 q10, q7, #2
+ vshr.u64 q11, q2, #2
+ veor q10, q10, q5
+ veor q11, q11, q3
+ vand q10, q10, q9
+ vand q11, q11, q9
+ veor q5, q5, q10
+ vshl.u64 q10, q10, #2
+ veor q3, q3, q11
+ vshl.u64 q11, q11, #2
+ veor q7, q7, q10
+ veor q2, q2, q11
+ vshr.u64 q10, q1, #2
+ vshr.u64 q11, q0, #2
+ veor q10, q10, q4
+ veor q11, q11, q6
+ vand q10, q10, q9
+ vand q11, q11, q9
+ veor q4, q4, q10
+ vshl.u64 q10, q10, #2
+ veor q6, q6, q11
+ vshl.u64 q11, q11, #2
+ veor q1, q1, q10
+ veor q0, q0, q11
+ vshr.u64 q10, q4, #4
+ vshr.u64 q11, q6, #4
+ veor q10, q10, q5
+ veor q11, q11, q3
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q5, q5, q10
+ vshl.u64 q10, q10, #4
+ veor q3, q3, q11
+ vshl.u64 q11, q11, #4
+ veor q4, q4, q10
+ veor q6, q6, q11
+ vshr.u64 q10, q1, #4
+ vshr.u64 q11, q0, #4
+ veor q10, q10, q7
+ veor q11, q11, q2
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q7, q7, q10
+ vshl.u64 q10, q10, #4
+ veor q2, q2, q11
+ vshl.u64 q11, q11, #4
+ veor q1, q1, q10
+ veor q0, q0, q11
+ vldmia r4, {q8} @ last round key
+ veor q6, q6, q8
+ veor q4, q4, q8
+ veor q2, q2, q8
+ veor q7, q7, q8
+ veor q3, q3, q8
+ veor q5, q5, q8
+ veor q0, q0, q8
+ veor q1, q1, q8
+ bx lr
+.size _bsaes_decrypt8,.-_bsaes_decrypt8
+
+.type _bsaes_const,%object
+.align 6
+_bsaes_const:
+.LM0ISR: @ InvShiftRows constants
+ .quad 0x0a0e0206070b0f03, 0x0004080c0d010509
+.LISR:
+ .quad 0x0504070602010003, 0x0f0e0d0c080b0a09
+.LISRM0:
+ .quad 0x01040b0e0205080f, 0x0306090c00070a0d
+.LM0SR: @ ShiftRows constants
+ .quad 0x0a0e02060f03070b, 0x0004080c05090d01
+.LSR:
+ .quad 0x0504070600030201, 0x0f0e0d0c0a09080b
+.LSRM0:
+ .quad 0x0304090e00050a0f, 0x01060b0c0207080d
+.LM0:
+ .quad 0x02060a0e03070b0f, 0x0004080c0105090d
+.LREVM0SR:
+ .quad 0x090d01050c000408, 0x03070b0f060a0e02
+.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <[email protected]>"
+.align 6
+.size _bsaes_const,.-_bsaes_const
+
+.type _bsaes_encrypt8,%function
+.align 4
+_bsaes_encrypt8:
+ adr r6,_bsaes_encrypt8
+ vldmia r4!, {q9} @ round 0 key
+ sub r6,r6,#_bsaes_encrypt8-.LM0SR
+
+ vldmia r6!, {q8} @ .LM0SR
+_bsaes_encrypt8_alt:
+ veor q10, q0, q9 @ xor with round0 key
+ veor q11, q1, q9
+ vtbl.8 d0, {q10}, d16
+ vtbl.8 d1, {q10}, d17
+ veor q12, q2, q9
+ vtbl.8 d2, {q11}, d16
+ vtbl.8 d3, {q11}, d17
+ veor q13, q3, q9
+ vtbl.8 d4, {q12}, d16
+ vtbl.8 d5, {q12}, d17
+ veor q14, q4, q9
+ vtbl.8 d6, {q13}, d16
+ vtbl.8 d7, {q13}, d17
+ veor q15, q5, q9
+ vtbl.8 d8, {q14}, d16
+ vtbl.8 d9, {q14}, d17
+ veor q10, q6, q9
+ vtbl.8 d10, {q15}, d16
+ vtbl.8 d11, {q15}, d17
+ veor q11, q7, q9
+ vtbl.8 d12, {q10}, d16
+ vtbl.8 d13, {q10}, d17
+ vtbl.8 d14, {q11}, d16
+ vtbl.8 d15, {q11}, d17
+_bsaes_encrypt8_bitslice:
+ vmov.i8 q8,#0x55 @ compose .LBS0
+ vmov.i8 q9,#0x33 @ compose .LBS1
+ vshr.u64 q10, q6, #1
+ vshr.u64 q11, q4, #1
+ veor q10, q10, q7
+ veor q11, q11, q5
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q7, q7, q10
+ vshl.u64 q10, q10, #1
+ veor q5, q5, q11
+ vshl.u64 q11, q11, #1
+ veor q6, q6, q10
+ veor q4, q4, q11
+ vshr.u64 q10, q2, #1
+ vshr.u64 q11, q0, #1
+ veor q10, q10, q3
+ veor q11, q11, q1
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q3, q3, q10
+ vshl.u64 q10, q10, #1
+ veor q1, q1, q11
+ vshl.u64 q11, q11, #1
+ veor q2, q2, q10
+ veor q0, q0, q11
+ vmov.i8 q8,#0x0f @ compose .LBS2
+ vshr.u64 q10, q5, #2
+ vshr.u64 q11, q4, #2
+ veor q10, q10, q7
+ veor q11, q11, q6
+ vand q10, q10, q9
+ vand q11, q11, q9
+ veor q7, q7, q10
+ vshl.u64 q10, q10, #2
+ veor q6, q6, q11
+ vshl.u64 q11, q11, #2
+ veor q5, q5, q10
+ veor q4, q4, q11
+ vshr.u64 q10, q1, #2
+ vshr.u64 q11, q0, #2
+ veor q10, q10, q3
+ veor q11, q11, q2
+ vand q10, q10, q9
+ vand q11, q11, q9
+ veor q3, q3, q10
+ vshl.u64 q10, q10, #2
+ veor q2, q2, q11
+ vshl.u64 q11, q11, #2
+ veor q1, q1, q10
+ veor q0, q0, q11
+ vshr.u64 q10, q3, #4
+ vshr.u64 q11, q2, #4
+ veor q10, q10, q7
+ veor q11, q11, q6
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q7, q7, q10
+ vshl.u64 q10, q10, #4
+ veor q6, q6, q11
+ vshl.u64 q11, q11, #4
+ veor q3, q3, q10
+ veor q2, q2, q11
+ vshr.u64 q10, q1, #4
+ vshr.u64 q11, q0, #4
+ veor q10, q10, q5
+ veor q11, q11, q4
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q5, q5, q10
+ vshl.u64 q10, q10, #4
+ veor q4, q4, q11
+ vshl.u64 q11, q11, #4
+ veor q1, q1, q10
+ veor q0, q0, q11
+ sub r5,r5,#1
+ b .Lenc_sbox
+.align 4
+.Lenc_loop:
+ vldmia r4!, {q8-q11}
+ veor q8, q8, q0
+ veor q9, q9, q1
+ vtbl.8 d0, {q8}, d24
+ vtbl.8 d1, {q8}, d25
+ vldmia r4!, {q8}
+ veor q10, q10, q2
+ vtbl.8 d2, {q9}, d24
+ vtbl.8 d3, {q9}, d25
+ vldmia r4!, {q9}
+ veor q11, q11, q3
+ vtbl.8 d4, {q10}, d24
+ vtbl.8 d5, {q10}, d25
+ vldmia r4!, {q10}
+ vtbl.8 d6, {q11}, d24
+ vtbl.8 d7, {q11}, d25
+ vldmia r4!, {q11}
+ veor q8, q8, q4
+ veor q9, q9, q5
+ vtbl.8 d8, {q8}, d24
+ vtbl.8 d9, {q8}, d25
+ veor q10, q10, q6
+ vtbl.8 d10, {q9}, d24
+ vtbl.8 d11, {q9}, d25
+ veor q11, q11, q7
+ vtbl.8 d12, {q10}, d24
+ vtbl.8 d13, {q10}, d25
+ vtbl.8 d14, {q11}, d24
+ vtbl.8 d15, {q11}, d25
+.Lenc_sbox:
+ veor q2, q2, q1
+ veor q5, q5, q6
+ veor q3, q3, q0
+ veor q6, q6, q2
+ veor q5, q5, q0
+
+ veor q6, q6, q3
+ veor q3, q3, q7
+ veor q7, q7, q5
+ veor q3, q3, q4
+ veor q4, q4, q5
+
+ veor q2, q2, q7
+ veor q3, q3, q1
+ veor q1, q1, q5
+ veor q11, q7, q4
+ veor q10, q1, q2
+ veor q9, q5, q3
+ veor q13, q2, q4
+ vmov q8, q10
+ veor q12, q6, q0
+
+ vorr q10, q10, q9
+ veor q15, q11, q8
+ vand q14, q11, q12
+ vorr q11, q11, q12
+ veor q12, q12, q9
+ vand q8, q8, q9
+ veor q9, q3, q0
+ vand q15, q15, q12
+ vand q13, q13, q9
+ veor q9, q7, q1
+ veor q12, q5, q6
+ veor q11, q11, q13
+ veor q10, q10, q13
+ vand q13, q9, q12
+ vorr q9, q9, q12
+ veor q11, q11, q15
+ veor q8, q8, q13
+ veor q10, q10, q14
+ veor q9, q9, q15
+ veor q8, q8, q14
+ vand q12, q2, q3
+ veor q9, q9, q14
+ vand q13, q4, q0
+ vand q14, q1, q5
+ vorr q15, q7, q6
+ veor q11, q11, q12
+ veor q9, q9, q14
+ veor q8, q8, q15
+ veor q10, q10, q13
+
+ @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3
+
+ @ new smaller inversion
+
+ vand q14, q11, q9
+ vmov q12, q8
+
+ veor q13, q10, q14
+ veor q15, q8, q14
+ veor q14, q8, q14 @ q14=q15
+
+ vbsl q13, q9, q8
+ vbsl q15, q11, q10
+ veor q11, q11, q10
+
+ vbsl q12, q13, q14
+ vbsl q8, q14, q13
+
+ vand q14, q12, q15
+ veor q9, q9, q8
+
+ veor q14, q14, q11
+ veor q12, q6, q0
+ veor q8, q5, q3
+ veor q10, q15, q14
+ vand q10, q10, q6
+ veor q6, q6, q5
+ vand q11, q5, q15
+ vand q6, q6, q14
+ veor q5, q11, q10
+ veor q6, q6, q11
+ veor q15, q15, q13
+ veor q14, q14, q9
+ veor q11, q15, q14
+ veor q10, q13, q9
+ vand q11, q11, q12
+ vand q10, q10, q0
+ veor q12, q12, q8
+ veor q0, q0, q3
+ vand q8, q8, q15
+ vand q3, q3, q13
+ vand q12, q12, q14
+ vand q0, q0, q9
+ veor q8, q8, q12
+ veor q0, q0, q3
+ veor q12, q12, q11
+ veor q3, q3, q10
+ veor q6, q6, q12
+ veor q0, q0, q12
+ veor q5, q5, q8
+ veor q3, q3, q8
+
+ veor q12, q7, q4
+ veor q8, q1, q2
+ veor q11, q15, q14
+ veor q10, q13, q9
+ vand q11, q11, q12
+ vand q10, q10, q4
+ veor q12, q12, q8
+ veor q4, q4, q2
+ vand q8, q8, q15
+ vand q2, q2, q13
+ vand q12, q12, q14
+ vand q4, q4, q9
+ veor q8, q8, q12
+ veor q4, q4, q2
+ veor q12, q12, q11
+ veor q2, q2, q10
+ veor q15, q15, q13
+ veor q14, q14, q9
+ veor q10, q15, q14
+ vand q10, q10, q7
+ veor q7, q7, q1
+ vand q11, q1, q15
+ vand q7, q7, q14
+ veor q1, q11, q10
+ veor q7, q7, q11
+ veor q7, q7, q12
+ veor q4, q4, q12
+ veor q1, q1, q8
+ veor q2, q2, q8
+ veor q7, q7, q0
+ veor q1, q1, q6
+ veor q6, q6, q0
+ veor q4, q4, q7
+ veor q0, q0, q1
+
+ veor q1, q1, q5
+ veor q5, q5, q2
+ veor q2, q2, q3
+ veor q3, q3, q5
+ veor q4, q4, q5
+
+ veor q6, q6, q3
+ subs r5,r5,#1
+ bcc .Lenc_done
+ vext.8 q8, q0, q0, #12 @ x0 <<< 32
+ vext.8 q9, q1, q1, #12
+ veor q0, q0, q8 @ x0 ^ (x0 <<< 32)
+ vext.8 q10, q4, q4, #12
+ veor q1, q1, q9
+ vext.8 q11, q6, q6, #12
+ veor q4, q4, q10
+ vext.8 q12, q3, q3, #12
+ veor q6, q6, q11
+ vext.8 q13, q7, q7, #12
+ veor q3, q3, q12
+ vext.8 q14, q2, q2, #12
+ veor q7, q7, q13
+ vext.8 q15, q5, q5, #12
+ veor q2, q2, q14
+
+ veor q9, q9, q0
+ veor q5, q5, q15
+ vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64)
+ veor q10, q10, q1
+ veor q8, q8, q5
+ veor q9, q9, q5
+ vext.8 q1, q1, q1, #8
+ veor q13, q13, q3
+ veor q0, q0, q8
+ veor q14, q14, q7
+ veor q1, q1, q9
+ vext.8 q8, q3, q3, #8
+ veor q12, q12, q6
+ vext.8 q9, q7, q7, #8
+ veor q15, q15, q2
+ vext.8 q3, q6, q6, #8
+ veor q11, q11, q4
+ vext.8 q7, q5, q5, #8
+ veor q12, q12, q5
+ vext.8 q6, q2, q2, #8
+ veor q11, q11, q5
+ vext.8 q2, q4, q4, #8
+ veor q5, q9, q13
+ veor q4, q8, q12
+ veor q3, q3, q11
+ veor q7, q7, q15
+ veor q6, q6, q14
+ @ vmov q4, q8
+ veor q2, q2, q10
+ @ vmov q5, q9
+ vldmia r6, {q12} @ .LSR
+ ite eq @ Thumb2 thing, samity check in ARM
+ addeq r6,r6,#0x10
+ bne .Lenc_loop
+ vldmia r6, {q12} @ .LSRM0
+ b .Lenc_loop
+.align 4
+.Lenc_done:
+ vmov.i8 q8,#0x55 @ compose .LBS0
+ vmov.i8 q9,#0x33 @ compose .LBS1
+ vshr.u64 q10, q2, #1
+ vshr.u64 q11, q3, #1
+ veor q10, q10, q5
+ veor q11, q11, q7
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q5, q5, q10
+ vshl.u64 q10, q10, #1
+ veor q7, q7, q11
+ vshl.u64 q11, q11, #1
+ veor q2, q2, q10
+ veor q3, q3, q11
+ vshr.u64 q10, q4, #1
+ vshr.u64 q11, q0, #1
+ veor q10, q10, q6
+ veor q11, q11, q1
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q6, q6, q10
+ vshl.u64 q10, q10, #1
+ veor q1, q1, q11
+ vshl.u64 q11, q11, #1
+ veor q4, q4, q10
+ veor q0, q0, q11
+ vmov.i8 q8,#0x0f @ compose .LBS2
+ vshr.u64 q10, q7, #2
+ vshr.u64 q11, q3, #2
+ veor q10, q10, q5
+ veor q11, q11, q2
+ vand q10, q10, q9
+ vand q11, q11, q9
+ veor q5, q5, q10
+ vshl.u64 q10, q10, #2
+ veor q2, q2, q11
+ vshl.u64 q11, q11, #2
+ veor q7, q7, q10
+ veor q3, q3, q11
+ vshr.u64 q10, q1, #2
+ vshr.u64 q11, q0, #2
+ veor q10, q10, q6
+ veor q11, q11, q4
+ vand q10, q10, q9
+ vand q11, q11, q9
+ veor q6, q6, q10
+ vshl.u64 q10, q10, #2
+ veor q4, q4, q11
+ vshl.u64 q11, q11, #2
+ veor q1, q1, q10
+ veor q0, q0, q11
+ vshr.u64 q10, q6, #4
+ vshr.u64 q11, q4, #4
+ veor q10, q10, q5
+ veor q11, q11, q2
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q5, q5, q10
+ vshl.u64 q10, q10, #4
+ veor q2, q2, q11
+ vshl.u64 q11, q11, #4
+ veor q6, q6, q10
+ veor q4, q4, q11
+ vshr.u64 q10, q1, #4
+ vshr.u64 q11, q0, #4
+ veor q10, q10, q7
+ veor q11, q11, q3
+ vand q10, q10, q8
+ vand q11, q11, q8
+ veor q7, q7, q10
+ vshl.u64 q10, q10, #4
+ veor q3, q3, q11
+ vshl.u64 q11, q11, #4
+ veor q1, q1, q10
+ veor q0, q0, q11
+ vldmia r4, {q8} @ last round key
+ veor q4, q4, q8
+ veor q6, q6, q8
+ veor q3, q3, q8
+ veor q7, q7, q8
+ veor q2, q2, q8
+ veor q5, q5, q8
+ veor q0, q0, q8
+ veor q1, q1, q8
+ bx lr
+.size _bsaes_encrypt8,.-_bsaes_encrypt8
+.type _bsaes_key_convert,%function
+.align 4
+_bsaes_key_convert:
+ adr r6,_bsaes_key_convert
+ vld1.8 {q7}, [r4]! @ load round 0 key
+ sub r6,r6,#_bsaes_key_convert-.LM0
+ vld1.8 {q15}, [r4]! @ load round 1 key
+
+ vmov.i8 q8, #0x01 @ bit masks
+ vmov.i8 q9, #0x02
+ vmov.i8 q10, #0x04
+ vmov.i8 q11, #0x08
+ vmov.i8 q12, #0x10
+ vmov.i8 q13, #0x20
+ vldmia r6, {q14} @ .LM0
+
+#ifdef __ARMEL__
+ vrev32.8 q7, q7
+ vrev32.8 q15, q15
+#endif
+ sub r5,r5,#1
+ vstmia r12!, {q7} @ save round 0 key
+ b .Lkey_loop
+
+.align 4
+.Lkey_loop:
+ vtbl.8 d14,{q15},d28
+ vtbl.8 d15,{q15},d29
+ vmov.i8 q6, #0x40
+ vmov.i8 q15, #0x80
+
+ vtst.8 q0, q7, q8
+ vtst.8 q1, q7, q9
+ vtst.8 q2, q7, q10
+ vtst.8 q3, q7, q11
+ vtst.8 q4, q7, q12
+ vtst.8 q5, q7, q13
+ vtst.8 q6, q7, q6
+ vtst.8 q7, q7, q15
+ vld1.8 {q15}, [r4]! @ load next round key
+ vmvn q0, q0 @ "pnot"
+ vmvn q1, q1
+ vmvn q5, q5
+ vmvn q6, q6
+#ifdef __ARMEL__
+ vrev32.8 q15, q15
+#endif
+ subs r5,r5,#1
+ vstmia r12!,{q0-q7} @ write bit-sliced round key
+ bne .Lkey_loop
+
+ vmov.i8 q7,#0x63 @ compose .L63
+ @ don't save last round key
+ bx lr
+.size _bsaes_key_convert,.-_bsaes_key_convert
+.extern AES_cbc_encrypt
+.extern AES_decrypt
+
+.global bsaes_cbc_encrypt
+.type bsaes_cbc_encrypt,%function
+.align 5
+bsaes_cbc_encrypt:
+#ifndef __KERNEL__
+ cmp r2, #128
+#ifndef __thumb__
+ blo AES_cbc_encrypt
+#else
+ bhs 1f
+ b AES_cbc_encrypt
+1:
+#endif
+#endif
+
+ @ it is up to the caller to make sure we are called with enc == 0
+
+ mov ip, sp
+ stmdb sp!, {r4-r10, lr}
+ VFP_ABI_PUSH
+ ldr r8, [ip] @ IV is 1st arg on the stack
+ mov r2, r2, lsr#4 @ len in 16 byte blocks
+ sub sp, #0x10 @ scratch space to carry over the IV
+ mov r9, sp @ save sp
+
+ ldr r10, [r3, #240] @ get # of rounds
+#ifndef BSAES_ASM_EXTENDED_KEY
+ @ allocate the key schedule on the stack
+ sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key
+ add r12, #96 @ sifze of bit-slices key schedule
+
+ @ populate the key schedule
+ mov r4, r3 @ pass key
+ mov r5, r10 @ pass # of rounds
+ mov sp, r12 @ sp is sp
+ bl _bsaes_key_convert
+ vldmia sp, {q6}
+ vstmia r12, {q15} @ save last round key
+ veor q7, q7, q6 @ fix up round 0 key
+ vstmia sp, {q7}
+#else
+ ldr r12, [r3, #244]
+ eors r12, #1
+ beq 0f
+
+ @ populate the key schedule
+ str r12, [r3, #244]
+ mov r4, r3 @ pass key
+ mov r5, r10 @ pass # of rounds
+ add r12, r3, #248 @ pass key schedule
+ bl _bsaes_key_convert
+ add r4, r3, #248
+ vldmia r4, {q6}
+ vstmia r12, {q15} @ save last round key
+ veor q7, q7, q6 @ fix up round 0 key
+ vstmia r4, {q7}
+
+.align 2
+0:
+#endif
+
+ vld1.8 {q15}, [r8] @ load IV
+ b .Lcbc_dec_loop
+
+.align 4
+.Lcbc_dec_loop:
+ subs r2, r2, #0x8
+ bmi .Lcbc_dec_loop_finish
+
+ vld1.8 {q0-q1}, [r0]! @ load input
+ vld1.8 {q2-q3}, [r0]!
+#ifndef BSAES_ASM_EXTENDED_KEY
+ mov r4, sp @ pass the key
+#else
+ add r4, r3, #248
+#endif
+ vld1.8 {q4-q5}, [r0]!
+ mov r5, r10
+ vld1.8 {q6-q7}, [r0]
+ sub r0, r0, #0x60
+ vstmia r9, {q15} @ put aside IV
+
+ bl _bsaes_decrypt8
+
+ vldmia r9, {q14} @ reload IV
+ vld1.8 {q8-q9}, [r0]! @ reload input
+ veor q0, q0, q14 @ ^= IV
+ vld1.8 {q10-q11}, [r0]!
+ veor q1, q1, q8
+ veor q6, q6, q9
+ vld1.8 {q12-q13}, [r0]!
+ veor q4, q4, q10
+ veor q2, q2, q11
+ vld1.8 {q14-q15}, [r0]!
+ veor q7, q7, q12
+ vst1.8 {q0-q1}, [r1]! @ write output
+ veor q3, q3, q13
+ vst1.8 {q6}, [r1]!
+ veor q5, q5, q14
+ vst1.8 {q4}, [r1]!
+ vst1.8 {q2}, [r1]!
+ vst1.8 {q7}, [r1]!
+ vst1.8 {q3}, [r1]!
+ vst1.8 {q5}, [r1]!
+
+ b .Lcbc_dec_loop
+
+.Lcbc_dec_loop_finish:
+ adds r2, r2, #8
+ beq .Lcbc_dec_done
+
+ vld1.8 {q0}, [r0]! @ load input
+ cmp r2, #2
+ blo .Lcbc_dec_one
+ vld1.8 {q1}, [r0]!
+#ifndef BSAES_ASM_EXTENDED_KEY
+ mov r4, sp @ pass the key
+#else
+ add r4, r3, #248
+#endif
+ mov r5, r10
+ vstmia r9, {q15} @ put aside IV
+ beq .Lcbc_dec_two
+ vld1.8 {q2}, [r0]!
+ cmp r2, #4
+ blo .Lcbc_dec_three
+ vld1.8 {q3}, [r0]!
+ beq .Lcbc_dec_four
+ vld1.8 {q4}, [r0]!
+ cmp r2, #6
+ blo .Lcbc_dec_five
+ vld1.8 {q5}, [r0]!
+ beq .Lcbc_dec_six
+ vld1.8 {q6}, [r0]!
+ sub r0, r0, #0x70
+
+ bl _bsaes_decrypt8
+
+ vldmia r9, {q14} @ reload IV
+ vld1.8 {q8-q9}, [r0]! @ reload input
+ veor q0, q0, q14 @ ^= IV
+ vld1.8 {q10-q11}, [r0]!
+ veor q1, q1, q8
+ veor q6, q6, q9
+ vld1.8 {q12-q13}, [r0]!
+ veor q4, q4, q10
+ veor q2, q2, q11
+ vld1.8 {q15}, [r0]!
+ veor q7, q7, q12
+ vst1.8 {q0-q1}, [r1]! @ write output
+ veor q3, q3, q13
+ vst1.8 {q6}, [r1]!
+ vst1.8 {q4}, [r1]!
+ vst1.8 {q2}, [r1]!
+ vst1.8 {q7}, [r1]!
+ vst1.8 {q3}, [r1]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_six:
+ sub r0, r0, #0x60
+ bl _bsaes_decrypt8
+ vldmia r9,{q14} @ reload IV
+ vld1.8 {q8-q9}, [r0]! @ reload input
+ veor q0, q0, q14 @ ^= IV
+ vld1.8 {q10-q11}, [r0]!
+ veor q1, q1, q8
+ veor q6, q6, q9
+ vld1.8 {q12}, [r0]!
+ veor q4, q4, q10
+ veor q2, q2, q11
+ vld1.8 {q15}, [r0]!
+ veor q7, q7, q12
+ vst1.8 {q0-q1}, [r1]! @ write output
+ vst1.8 {q6}, [r1]!
+ vst1.8 {q4}, [r1]!
+ vst1.8 {q2}, [r1]!
+ vst1.8 {q7}, [r1]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_five:
+ sub r0, r0, #0x50
+ bl _bsaes_decrypt8
+ vldmia r9, {q14} @ reload IV
+ vld1.8 {q8-q9}, [r0]! @ reload input
+ veor q0, q0, q14 @ ^= IV
+ vld1.8 {q10-q11}, [r0]!
+ veor q1, q1, q8
+ veor q6, q6, q9
+ vld1.8 {q15}, [r0]!
+ veor q4, q4, q10
+ vst1.8 {q0-q1}, [r1]! @ write output
+ veor q2, q2, q11
+ vst1.8 {q6}, [r1]!
+ vst1.8 {q4}, [r1]!
+ vst1.8 {q2}, [r1]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_four:
+ sub r0, r0, #0x40
+ bl _bsaes_decrypt8
+ vldmia r9, {q14} @ reload IV
+ vld1.8 {q8-q9}, [r0]! @ reload input
+ veor q0, q0, q14 @ ^= IV
+ vld1.8 {q10}, [r0]!
+ veor q1, q1, q8
+ veor q6, q6, q9
+ vld1.8 {q15}, [r0]!
+ veor q4, q4, q10
+ vst1.8 {q0-q1}, [r1]! @ write output
+ vst1.8 {q6}, [r1]!
+ vst1.8 {q4}, [r1]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_three:
+ sub r0, r0, #0x30
+ bl _bsaes_decrypt8
+ vldmia r9, {q14} @ reload IV
+ vld1.8 {q8-q9}, [r0]! @ reload input
+ veor q0, q0, q14 @ ^= IV
+ vld1.8 {q15}, [r0]!
+ veor q1, q1, q8
+ veor q6, q6, q9
+ vst1.8 {q0-q1}, [r1]! @ write output
+ vst1.8 {q6}, [r1]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_two:
+ sub r0, r0, #0x20
+ bl _bsaes_decrypt8
+ vldmia r9, {q14} @ reload IV
+ vld1.8 {q8}, [r0]! @ reload input
+ veor q0, q0, q14 @ ^= IV
+ vld1.8 {q15}, [r0]! @ reload input
+ veor q1, q1, q8
+ vst1.8 {q0-q1}, [r1]! @ write output
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_one:
+ sub r0, r0, #0x10
+ mov r10, r1 @ save original out pointer
+ mov r1, r9 @ use the iv scratch space as out buffer
+ mov r2, r3
+ vmov q4,q15 @ just in case ensure that IV
+ vmov q5,q0 @ and input are preserved
+ bl AES_decrypt
+ vld1.8 {q0}, [r9,:64] @ load result
+ veor q0, q0, q4 @ ^= IV
+ vmov q15, q5 @ q5 holds input
+ vst1.8 {q0}, [r10] @ write output
+
+.Lcbc_dec_done:
+#ifndef BSAES_ASM_EXTENDED_KEY
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+.Lcbc_dec_bzero: @ wipe key schedule [if any]
+ vstmia sp!, {q0-q1}
+ cmp sp, r9
+ bne .Lcbc_dec_bzero
+#endif
+
+ mov sp, r9
+ add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb
+ vst1.8 {q15}, [r8] @ return IV
+ VFP_ABI_POP
+ ldmia sp!, {r4-r10, pc}
+.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
+.extern AES_encrypt
+.global bsaes_ctr32_encrypt_blocks
+.type bsaes_ctr32_encrypt_blocks,%function
+.align 5
+bsaes_ctr32_encrypt_blocks:
+ cmp r2, #8 @ use plain AES for
+ blo .Lctr_enc_short @ small sizes
+
+ mov ip, sp
+ stmdb sp!, {r4-r10, lr}
+ VFP_ABI_PUSH
+ ldr r8, [ip] @ ctr is 1st arg on the stack
+ sub sp, sp, #0x10 @ scratch space to carry over the ctr
+ mov r9, sp @ save sp
+
+ ldr r10, [r3, #240] @ get # of rounds
+#ifndef BSAES_ASM_EXTENDED_KEY
+ @ allocate the key schedule on the stack
+ sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key
+ add r12, #96 @ size of bit-sliced key schedule
+
+ @ populate the key schedule
+ mov r4, r3 @ pass key
+ mov r5, r10 @ pass # of rounds
+ mov sp, r12 @ sp is sp
+ bl _bsaes_key_convert
+ veor q7,q7,q15 @ fix up last round key
+ vstmia r12, {q7} @ save last round key
+
+ vld1.8 {q0}, [r8] @ load counter
+ add r8, r6, #.LREVM0SR-.LM0 @ borrow r8
+ vldmia sp, {q4} @ load round0 key
+#else
+ ldr r12, [r3, #244]
+ eors r12, #1
+ beq 0f
+
+ @ populate the key schedule
+ str r12, [r3, #244]
+ mov r4, r3 @ pass key
+ mov r5, r10 @ pass # of rounds
+ add r12, r3, #248 @ pass key schedule
+ bl _bsaes_key_convert
+ veor q7,q7,q15 @ fix up last round key
+ vstmia r12, {q7} @ save last round key
+
+.align 2
+0: add r12, r3, #248
+ vld1.8 {q0}, [r8] @ load counter
+ adrl r8, .LREVM0SR @ borrow r8
+ vldmia r12, {q4} @ load round0 key
+ sub sp, #0x10 @ place for adjusted round0 key
+#endif
+
+ vmov.i32 q8,#1 @ compose 1<<96
+ veor q9,q9,q9
+ vrev32.8 q0,q0
+ vext.8 q8,q9,q8,#4
+ vrev32.8 q4,q4
+ vadd.u32 q9,q8,q8 @ compose 2<<96
+ vstmia sp, {q4} @ save adjusted round0 key
+ b .Lctr_enc_loop
+
+.align 4
+.Lctr_enc_loop:
+ vadd.u32 q10, q8, q9 @ compose 3<<96
+ vadd.u32 q1, q0, q8 @ +1
+ vadd.u32 q2, q0, q9 @ +2
+ vadd.u32 q3, q0, q10 @ +3
+ vadd.u32 q4, q1, q10
+ vadd.u32 q5, q2, q10
+ vadd.u32 q6, q3, q10
+ vadd.u32 q7, q4, q10
+ vadd.u32 q10, q5, q10 @ next counter
+
+ @ Borrow prologue from _bsaes_encrypt8 to use the opportunity
+ @ to flip byte order in 32-bit counter
+
+ vldmia sp, {q9} @ load round0 key
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x10 @ pass next round key
+#else
+ add r4, r3, #264
+#endif
+ vldmia r8, {q8} @ .LREVM0SR
+ mov r5, r10 @ pass rounds
+ vstmia r9, {q10} @ save next counter
+ sub r6, r8, #.LREVM0SR-.LSR @ pass constants
+
+ bl _bsaes_encrypt8_alt
+
+ subs r2, r2, #8
+ blo .Lctr_enc_loop_done
+
+ vld1.8 {q8-q9}, [r0]! @ load input
+ vld1.8 {q10-q11}, [r0]!
+ veor q0, q8
+ veor q1, q9
+ vld1.8 {q12-q13}, [r0]!
+ veor q4, q10
+ veor q6, q11
+ vld1.8 {q14-q15}, [r0]!
+ veor q3, q12
+ vst1.8 {q0-q1}, [r1]! @ write output
+ veor q7, q13
+ veor q2, q14
+ vst1.8 {q4}, [r1]!
+ veor q5, q15
+ vst1.8 {q6}, [r1]!
+ vmov.i32 q8, #1 @ compose 1<<96
+ vst1.8 {q3}, [r1]!
+ veor q9, q9, q9
+ vst1.8 {q7}, [r1]!
+ vext.8 q8, q9, q8, #4
+ vst1.8 {q2}, [r1]!
+ vadd.u32 q9,q8,q8 @ compose 2<<96
+ vst1.8 {q5}, [r1]!
+ vldmia r9, {q0} @ load counter
+
+ bne .Lctr_enc_loop
+ b .Lctr_enc_done
+
+.align 4
+.Lctr_enc_loop_done:
+ add r2, r2, #8
+ vld1.8 {q8}, [r0]! @ load input
+ veor q0, q8
+ vst1.8 {q0}, [r1]! @ write output
+ cmp r2, #2
+ blo .Lctr_enc_done
+ vld1.8 {q9}, [r0]!
+ veor q1, q9
+ vst1.8 {q1}, [r1]!
+ beq .Lctr_enc_done
+ vld1.8 {q10}, [r0]!
+ veor q4, q10
+ vst1.8 {q4}, [r1]!
+ cmp r2, #4
+ blo .Lctr_enc_done
+ vld1.8 {q11}, [r0]!
+ veor q6, q11
+ vst1.8 {q6}, [r1]!
+ beq .Lctr_enc_done
+ vld1.8 {q12}, [r0]!
+ veor q3, q12
+ vst1.8 {q3}, [r1]!
+ cmp r2, #6
+ blo .Lctr_enc_done
+ vld1.8 {q13}, [r0]!
+ veor q7, q13
+ vst1.8 {q7}, [r1]!
+ beq .Lctr_enc_done
+ vld1.8 {q14}, [r0]
+ veor q2, q14
+ vst1.8 {q2}, [r1]!
+
+.Lctr_enc_done:
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+#ifndef BSAES_ASM_EXTENDED_KEY
+.Lctr_enc_bzero: @ wipe key schedule [if any]
+ vstmia sp!, {q0-q1}
+ cmp sp, r9
+ bne .Lctr_enc_bzero
+#else
+ vstmia sp, {q0-q1}
+#endif
+
+ mov sp, r9
+ add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb
+ VFP_ABI_POP
+ ldmia sp!, {r4-r10, pc} @ return
+
+.align 4
+.Lctr_enc_short:
+ ldr ip, [sp] @ ctr pointer is passed on stack
+ stmdb sp!, {r4-r8, lr}
+
+ mov r4, r0 @ copy arguments
+ mov r5, r1
+ mov r6, r2
+ mov r7, r3
+ ldr r8, [ip, #12] @ load counter LSW
+ vld1.8 {q1}, [ip] @ load whole counter value
+#ifdef __ARMEL__
+ rev r8, r8
+#endif
+ sub sp, sp, #0x10
+ vst1.8 {q1}, [sp,:64] @ copy counter value
+ sub sp, sp, #0x10
+
+.Lctr_enc_short_loop:
+ add r0, sp, #0x10 @ input counter value
+ mov r1, sp @ output on the stack
+ mov r2, r7 @ key
+
+ bl AES_encrypt
+
+ vld1.8 {q0}, [r4]! @ load input
+ vld1.8 {q1}, [sp,:64] @ load encrypted counter
+ add r8, r8, #1
+#ifdef __ARMEL__
+ rev r0, r8
+ str r0, [sp, #0x1c] @ next counter value
+#else
+ str r8, [sp, #0x1c] @ next counter value
+#endif
+ veor q0,q0,q1
+ vst1.8 {q0}, [r5]! @ store output
+ subs r6, r6, #1
+ bne .Lctr_enc_short_loop
+
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+ vstmia sp!, {q0-q1}
+
+ ldmia sp!, {r4-r8, pc}
+.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
+.globl bsaes_xts_encrypt
+.type bsaes_xts_encrypt,%function
+.align 4
+bsaes_xts_encrypt:
+ mov ip, sp
+ stmdb sp!, {r4-r10, lr} @ 0x20
+ VFP_ABI_PUSH
+ mov r6, sp @ future r3
+
+ mov r7, r0
+ mov r8, r1
+ mov r9, r2
+ mov r10, r3
+
+ sub r0, sp, #0x10 @ 0x10
+ bic r0, #0xf @ align at 16 bytes
+ mov sp, r0
+
+#ifdef XTS_CHAIN_TWEAK
+ ldr r0, [ip] @ pointer to input tweak
+#else
+ @ generate initial tweak
+ ldr r0, [ip, #4] @ iv[]
+ mov r1, sp
+ ldr r2, [ip, #0] @ key2
+ bl AES_encrypt
+ mov r0,sp @ pointer to initial tweak
+#endif
+
+ ldr r1, [r10, #240] @ get # of rounds
+ mov r3, r6
+#ifndef BSAES_ASM_EXTENDED_KEY
+ @ allocate the key schedule on the stack
+ sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key
+ @ add r12, #96 @ size of bit-sliced key schedule
+ sub r12, #48 @ place for tweak[9]
+
+ @ populate the key schedule
+ mov r4, r10 @ pass key
+ mov r5, r1 @ pass # of rounds
+ mov sp, r12
+ add r12, #0x90 @ pass key schedule
+ bl _bsaes_key_convert
+ veor q7, q7, q15 @ fix up last round key
+ vstmia r12, {q7} @ save last round key
+#else
+ ldr r12, [r10, #244]
+ eors r12, #1
+ beq 0f
+
+ str r12, [r10, #244]
+ mov r4, r10 @ pass key
+ mov r5, r1 @ pass # of rounds
+ add r12, r10, #248 @ pass key schedule
+ bl _bsaes_key_convert
+ veor q7, q7, q15 @ fix up last round key
+ vstmia r12, {q7}
+
+.align 2
+0: sub sp, #0x90 @ place for tweak[9]
+#endif
+
+ vld1.8 {q8}, [r0] @ initial tweak
+ adr r2, .Lxts_magic
+
+ subs r9, #0x80
+ blo .Lxts_enc_short
+ b .Lxts_enc_loop
+
+.align 4
+.Lxts_enc_loop:
+ vldmia r2, {q5} @ load XTS magic
+ vshr.s64 q6, q8, #63
+ mov r0, sp
+ vand q6, q6, q5
+ vadd.u64 q9, q8, q8
+ vst1.64 {q8}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q9, #63
+ veor q9, q9, q6
+ vand q7, q7, q5
+ vadd.u64 q10, q9, q9
+ vst1.64 {q9}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q10, #63
+ veor q10, q10, q7
+ vand q6, q6, q5
+ vld1.8 {q0}, [r7]!
+ vadd.u64 q11, q10, q10
+ vst1.64 {q10}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q11, #63
+ veor q11, q11, q6
+ vand q7, q7, q5
+ vld1.8 {q1}, [r7]!
+ veor q0, q0, q8
+ vadd.u64 q12, q11, q11
+ vst1.64 {q11}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q12, #63
+ veor q12, q12, q7
+ vand q6, q6, q5
+ vld1.8 {q2}, [r7]!
+ veor q1, q1, q9
+ vadd.u64 q13, q12, q12
+ vst1.64 {q12}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q13, #63
+ veor q13, q13, q6
+ vand q7, q7, q5
+ vld1.8 {q3}, [r7]!
+ veor q2, q2, q10
+ vadd.u64 q14, q13, q13
+ vst1.64 {q13}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q14, #63
+ veor q14, q14, q7
+ vand q6, q6, q5
+ vld1.8 {q4}, [r7]!
+ veor q3, q3, q11
+ vadd.u64 q15, q14, q14
+ vst1.64 {q14}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q15, #63
+ veor q15, q15, q6
+ vand q7, q7, q5
+ vld1.8 {q5}, [r7]!
+ veor q4, q4, q12
+ vadd.u64 q8, q15, q15
+ vst1.64 {q15}, [r0,:128]!
+ vswp d15,d14
+ veor q8, q8, q7
+ vst1.64 {q8}, [r0,:128] @ next round tweak
+
+ vld1.8 {q6-q7}, [r7]!
+ veor q5, q5, q13
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q6, q6, q14
+ mov r5, r1 @ pass rounds
+ veor q7, q7, q15
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ vld1.64 {q12-q13}, [r0,:128]!
+ veor q1, q1, q9
+ veor q8, q4, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q6, q11
+ vld1.64 {q14-q15}, [r0,:128]!
+ veor q10, q3, q12
+ vst1.8 {q8-q9}, [r8]!
+ veor q11, q7, q13
+ veor q12, q2, q14
+ vst1.8 {q10-q11}, [r8]!
+ veor q13, q5, q15
+ vst1.8 {q12-q13}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+
+ subs r9, #0x80
+ bpl .Lxts_enc_loop
+
+.Lxts_enc_short:
+ adds r9, #0x70
+ bmi .Lxts_enc_done
+
+ vldmia r2, {q5} @ load XTS magic
+ vshr.s64 q7, q8, #63
+ mov r0, sp
+ vand q7, q7, q5
+ vadd.u64 q9, q8, q8
+ vst1.64 {q8}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q9, #63
+ veor q9, q9, q7
+ vand q6, q6, q5
+ vadd.u64 q10, q9, q9
+ vst1.64 {q9}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q10, #63
+ veor q10, q10, q6
+ vand q7, q7, q5
+ vld1.8 {q0}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_enc_1
+ vadd.u64 q11, q10, q10
+ vst1.64 {q10}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q11, #63
+ veor q11, q11, q7
+ vand q6, q6, q5
+ vld1.8 {q1}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_enc_2
+ veor q0, q0, q8
+ vadd.u64 q12, q11, q11
+ vst1.64 {q11}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q12, #63
+ veor q12, q12, q6
+ vand q7, q7, q5
+ vld1.8 {q2}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_enc_3
+ veor q1, q1, q9
+ vadd.u64 q13, q12, q12
+ vst1.64 {q12}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q13, #63
+ veor q13, q13, q7
+ vand q6, q6, q5
+ vld1.8 {q3}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_enc_4
+ veor q2, q2, q10
+ vadd.u64 q14, q13, q13
+ vst1.64 {q13}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q14, #63
+ veor q14, q14, q6
+ vand q7, q7, q5
+ vld1.8 {q4}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_enc_5
+ veor q3, q3, q11
+ vadd.u64 q15, q14, q14
+ vst1.64 {q14}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q15, #63
+ veor q15, q15, q7
+ vand q6, q6, q5
+ vld1.8 {q5}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_enc_6
+ veor q4, q4, q12
+ sub r9, #0x10
+ vst1.64 {q15}, [r0,:128] @ next round tweak
+
+ vld1.8 {q6}, [r7]!
+ veor q5, q5, q13
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q6, q6, q14
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ vld1.64 {q12-q13}, [r0,:128]!
+ veor q1, q1, q9
+ veor q8, q4, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q6, q11
+ vld1.64 {q14}, [r0,:128]!
+ veor q10, q3, q12
+ vst1.8 {q8-q9}, [r8]!
+ veor q11, q7, q13
+ veor q12, q2, q14
+ vst1.8 {q10-q11}, [r8]!
+ vst1.8 {q12}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_6:
+ vst1.64 {q14}, [r0,:128] @ next round tweak
+
+ veor q4, q4, q12
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q5, q5, q13
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ vld1.64 {q12-q13}, [r0,:128]!
+ veor q1, q1, q9
+ veor q8, q4, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q6, q11
+ veor q10, q3, q12
+ vst1.8 {q8-q9}, [r8]!
+ veor q11, q7, q13
+ vst1.8 {q10-q11}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+
+@ put this in range for both ARM and Thumb mode adr instructions
+.align 5
+.Lxts_magic:
+ .quad 1, 0x87
+
+.align 5
+.Lxts_enc_5:
+ vst1.64 {q13}, [r0,:128] @ next round tweak
+
+ veor q3, q3, q11
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q4, q4, q12
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ vld1.64 {q12}, [r0,:128]!
+ veor q1, q1, q9
+ veor q8, q4, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q6, q11
+ veor q10, q3, q12
+ vst1.8 {q8-q9}, [r8]!
+ vst1.8 {q10}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_4:
+ vst1.64 {q12}, [r0,:128] @ next round tweak
+
+ veor q2, q2, q10
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q3, q3, q11
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ veor q1, q1, q9
+ veor q8, q4, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q6, q11
+ vst1.8 {q8-q9}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_3:
+ vst1.64 {q11}, [r0,:128] @ next round tweak
+
+ veor q1, q1, q9
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q2, q2, q10
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10}, [r0,:128]!
+ veor q0, q0, q8
+ veor q1, q1, q9
+ veor q8, q4, q10
+ vst1.8 {q0-q1}, [r8]!
+ vst1.8 {q8}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_2:
+ vst1.64 {q10}, [r0,:128] @ next round tweak
+
+ veor q0, q0, q8
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q1, q1, q9
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ veor q0, q0, q8
+ veor q1, q1, q9
+ vst1.8 {q0-q1}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_1:
+ mov r0, sp
+ veor q0, q8
+ mov r1, sp
+ vst1.8 {q0}, [sp,:128]
+ mov r2, r10
+ mov r4, r3 @ preserve fp
+
+ bl AES_encrypt
+
+ vld1.8 {q0}, [sp,:128]
+ veor q0, q0, q8
+ vst1.8 {q0}, [r8]!
+ mov r3, r4
+
+ vmov q8, q9 @ next round tweak
+
+.Lxts_enc_done:
+#ifndef XTS_CHAIN_TWEAK
+ adds r9, #0x10
+ beq .Lxts_enc_ret
+ sub r6, r8, #0x10
+
+.Lxts_enc_steal:
+ ldrb r0, [r7], #1
+ ldrb r1, [r8, #-0x10]
+ strb r0, [r8, #-0x10]
+ strb r1, [r8], #1
+
+ subs r9, #1
+ bhi .Lxts_enc_steal
+
+ vld1.8 {q0}, [r6]
+ mov r0, sp
+ veor q0, q0, q8
+ mov r1, sp
+ vst1.8 {q0}, [sp,:128]
+ mov r2, r10
+ mov r4, r3 @ preserve fp
+
+ bl AES_encrypt
+
+ vld1.8 {q0}, [sp,:128]
+ veor q0, q0, q8
+ vst1.8 {q0}, [r6]
+ mov r3, r4
+#endif
+
+.Lxts_enc_ret:
+ bic r0, r3, #0xf
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+#ifdef XTS_CHAIN_TWEAK
+ ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak
+#endif
+.Lxts_enc_bzero: @ wipe key schedule [if any]
+ vstmia sp!, {q0-q1}
+ cmp sp, r0
+ bne .Lxts_enc_bzero
+
+ mov sp, r3
+#ifdef XTS_CHAIN_TWEAK
+ vst1.8 {q8}, [r1]
+#endif
+ VFP_ABI_POP
+ ldmia sp!, {r4-r10, pc} @ return
+
+.size bsaes_xts_encrypt,.-bsaes_xts_encrypt
+
+.globl bsaes_xts_decrypt
+.type bsaes_xts_decrypt,%function
+.align 4
+bsaes_xts_decrypt:
+ mov ip, sp
+ stmdb sp!, {r4-r10, lr} @ 0x20
+ VFP_ABI_PUSH
+ mov r6, sp @ future r3
+
+ mov r7, r0
+ mov r8, r1
+ mov r9, r2
+ mov r10, r3
+
+ sub r0, sp, #0x10 @ 0x10
+ bic r0, #0xf @ align at 16 bytes
+ mov sp, r0
+
+#ifdef XTS_CHAIN_TWEAK
+ ldr r0, [ip] @ pointer to input tweak
+#else
+ @ generate initial tweak
+ ldr r0, [ip, #4] @ iv[]
+ mov r1, sp
+ ldr r2, [ip, #0] @ key2
+ bl AES_encrypt
+ mov r0, sp @ pointer to initial tweak
+#endif
+
+ ldr r1, [r10, #240] @ get # of rounds
+ mov r3, r6
+#ifndef BSAES_ASM_EXTENDED_KEY
+ @ allocate the key schedule on the stack
+ sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key
+ @ add r12, #96 @ size of bit-sliced key schedule
+ sub r12, #48 @ place for tweak[9]
+
+ @ populate the key schedule
+ mov r4, r10 @ pass key
+ mov r5, r1 @ pass # of rounds
+ mov sp, r12
+ add r12, #0x90 @ pass key schedule
+ bl _bsaes_key_convert
+ add r4, sp, #0x90
+ vldmia r4, {q6}
+ vstmia r12, {q15} @ save last round key
+ veor q7, q7, q6 @ fix up round 0 key
+ vstmia r4, {q7}
+#else
+ ldr r12, [r10, #244]
+ eors r12, #1
+ beq 0f
+
+ str r12, [r10, #244]
+ mov r4, r10 @ pass key
+ mov r5, r1 @ pass # of rounds
+ add r12, r10, #248 @ pass key schedule
+ bl _bsaes_key_convert
+ add r4, r10, #248
+ vldmia r4, {q6}
+ vstmia r12, {q15} @ save last round key
+ veor q7, q7, q6 @ fix up round 0 key
+ vstmia r4, {q7}
+
+.align 2
+0: sub sp, #0x90 @ place for tweak[9]
+#endif
+ vld1.8 {q8}, [r0] @ initial tweak
+ adr r2, .Lxts_magic
+
+ tst r9, #0xf @ if not multiple of 16
+ it ne @ Thumb2 thing, sanity check in ARM
+ subne r9, #0x10 @ subtract another 16 bytes
+ subs r9, #0x80
+
+ blo .Lxts_dec_short
+ b .Lxts_dec_loop
+
+.align 4
+.Lxts_dec_loop:
+ vldmia r2, {q5} @ load XTS magic
+ vshr.s64 q6, q8, #63
+ mov r0, sp
+ vand q6, q6, q5
+ vadd.u64 q9, q8, q8
+ vst1.64 {q8}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q9, #63
+ veor q9, q9, q6
+ vand q7, q7, q5
+ vadd.u64 q10, q9, q9
+ vst1.64 {q9}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q10, #63
+ veor q10, q10, q7
+ vand q6, q6, q5
+ vld1.8 {q0}, [r7]!
+ vadd.u64 q11, q10, q10
+ vst1.64 {q10}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q11, #63
+ veor q11, q11, q6
+ vand q7, q7, q5
+ vld1.8 {q1}, [r7]!
+ veor q0, q0, q8
+ vadd.u64 q12, q11, q11
+ vst1.64 {q11}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q12, #63
+ veor q12, q12, q7
+ vand q6, q6, q5
+ vld1.8 {q2}, [r7]!
+ veor q1, q1, q9
+ vadd.u64 q13, q12, q12
+ vst1.64 {q12}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q13, #63
+ veor q13, q13, q6
+ vand q7, q7, q5
+ vld1.8 {q3}, [r7]!
+ veor q2, q2, q10
+ vadd.u64 q14, q13, q13
+ vst1.64 {q13}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q14, #63
+ veor q14, q14, q7
+ vand q6, q6, q5
+ vld1.8 {q4}, [r7]!
+ veor q3, q3, q11
+ vadd.u64 q15, q14, q14
+ vst1.64 {q14}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q15, #63
+ veor q15, q15, q6
+ vand q7, q7, q5
+ vld1.8 {q5}, [r7]!
+ veor q4, q4, q12
+ vadd.u64 q8, q15, q15
+ vst1.64 {q15}, [r0,:128]!
+ vswp d15,d14
+ veor q8, q8, q7
+ vst1.64 {q8}, [r0,:128] @ next round tweak
+
+ vld1.8 {q6-q7}, [r7]!
+ veor q5, q5, q13
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q6, q6, q14
+ mov r5, r1 @ pass rounds
+ veor q7, q7, q15
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ vld1.64 {q12-q13}, [r0,:128]!
+ veor q1, q1, q9
+ veor q8, q6, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q4, q11
+ vld1.64 {q14-q15}, [r0,:128]!
+ veor q10, q2, q12
+ vst1.8 {q8-q9}, [r8]!
+ veor q11, q7, q13
+ veor q12, q3, q14
+ vst1.8 {q10-q11}, [r8]!
+ veor q13, q5, q15
+ vst1.8 {q12-q13}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+
+ subs r9, #0x80
+ bpl .Lxts_dec_loop
+
+.Lxts_dec_short:
+ adds r9, #0x70
+ bmi .Lxts_dec_done
+
+ vldmia r2, {q5} @ load XTS magic
+ vshr.s64 q7, q8, #63
+ mov r0, sp
+ vand q7, q7, q5
+ vadd.u64 q9, q8, q8
+ vst1.64 {q8}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q9, #63
+ veor q9, q9, q7
+ vand q6, q6, q5
+ vadd.u64 q10, q9, q9
+ vst1.64 {q9}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q10, #63
+ veor q10, q10, q6
+ vand q7, q7, q5
+ vld1.8 {q0}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_dec_1
+ vadd.u64 q11, q10, q10
+ vst1.64 {q10}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q11, #63
+ veor q11, q11, q7
+ vand q6, q6, q5
+ vld1.8 {q1}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_dec_2
+ veor q0, q0, q8
+ vadd.u64 q12, q11, q11
+ vst1.64 {q11}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q12, #63
+ veor q12, q12, q6
+ vand q7, q7, q5
+ vld1.8 {q2}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_dec_3
+ veor q1, q1, q9
+ vadd.u64 q13, q12, q12
+ vst1.64 {q12}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q13, #63
+ veor q13, q13, q7
+ vand q6, q6, q5
+ vld1.8 {q3}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_dec_4
+ veor q2, q2, q10
+ vadd.u64 q14, q13, q13
+ vst1.64 {q13}, [r0,:128]!
+ vswp d13,d12
+ vshr.s64 q7, q14, #63
+ veor q14, q14, q6
+ vand q7, q7, q5
+ vld1.8 {q4}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_dec_5
+ veor q3, q3, q11
+ vadd.u64 q15, q14, q14
+ vst1.64 {q14}, [r0,:128]!
+ vswp d15,d14
+ vshr.s64 q6, q15, #63
+ veor q15, q15, q7
+ vand q6, q6, q5
+ vld1.8 {q5}, [r7]!
+ subs r9, #0x10
+ bmi .Lxts_dec_6
+ veor q4, q4, q12
+ sub r9, #0x10
+ vst1.64 {q15}, [r0,:128] @ next round tweak
+
+ vld1.8 {q6}, [r7]!
+ veor q5, q5, q13
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q6, q6, q14
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ vld1.64 {q12-q13}, [r0,:128]!
+ veor q1, q1, q9
+ veor q8, q6, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q4, q11
+ vld1.64 {q14}, [r0,:128]!
+ veor q10, q2, q12
+ vst1.8 {q8-q9}, [r8]!
+ veor q11, q7, q13
+ veor q12, q3, q14
+ vst1.8 {q10-q11}, [r8]!
+ vst1.8 {q12}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_6:
+ vst1.64 {q14}, [r0,:128] @ next round tweak
+
+ veor q4, q4, q12
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q5, q5, q13
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ vld1.64 {q12-q13}, [r0,:128]!
+ veor q1, q1, q9
+ veor q8, q6, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q4, q11
+ veor q10, q2, q12
+ vst1.8 {q8-q9}, [r8]!
+ veor q11, q7, q13
+ vst1.8 {q10-q11}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_5:
+ vst1.64 {q13}, [r0,:128] @ next round tweak
+
+ veor q3, q3, q11
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q4, q4, q12
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ vld1.64 {q12}, [r0,:128]!
+ veor q1, q1, q9
+ veor q8, q6, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q4, q11
+ veor q10, q2, q12
+ vst1.8 {q8-q9}, [r8]!
+ vst1.8 {q10}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_4:
+ vst1.64 {q12}, [r0,:128] @ next round tweak
+
+ veor q2, q2, q10
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q3, q3, q11
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10-q11}, [r0,:128]!
+ veor q0, q0, q8
+ veor q1, q1, q9
+ veor q8, q6, q10
+ vst1.8 {q0-q1}, [r8]!
+ veor q9, q4, q11
+ vst1.8 {q8-q9}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_3:
+ vst1.64 {q11}, [r0,:128] @ next round tweak
+
+ veor q1, q1, q9
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q2, q2, q10
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ vld1.64 {q10}, [r0,:128]!
+ veor q0, q0, q8
+ veor q1, q1, q9
+ veor q8, q6, q10
+ vst1.8 {q0-q1}, [r8]!
+ vst1.8 {q8}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_2:
+ vst1.64 {q10}, [r0,:128] @ next round tweak
+
+ veor q0, q0, q8
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, r10, #248 @ pass key schedule
+#endif
+ veor q1, q1, q9
+ mov r5, r1 @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {q8-q9}, [r0,:128]!
+ veor q0, q0, q8
+ veor q1, q1, q9
+ vst1.8 {q0-q1}, [r8]!
+
+ vld1.64 {q8}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_1:
+ mov r0, sp
+ veor q0, q8
+ mov r1, sp
+ vst1.8 {q0}, [sp,:128]
+ mov r2, r10
+ mov r4, r3 @ preserve fp
+ mov r5, r2 @ preserve magic
+
+ bl AES_decrypt
+
+ vld1.8 {q0}, [sp,:128]
+ veor q0, q0, q8
+ vst1.8 {q0}, [r8]!
+ mov r3, r4
+ mov r2, r5
+
+ vmov q8, q9 @ next round tweak
+
+.Lxts_dec_done:
+#ifndef XTS_CHAIN_TWEAK
+ adds r9, #0x10
+ beq .Lxts_dec_ret
+
+ @ calculate one round of extra tweak for the stolen ciphertext
+ vldmia r2, {q5}
+ vshr.s64 q6, q8, #63
+ vand q6, q6, q5
+ vadd.u64 q9, q8, q8
+ vswp d13,d12
+ veor q9, q9, q6
+
+ @ perform the final decryption with the last tweak value
+ vld1.8 {q0}, [r7]!
+ mov r0, sp
+ veor q0, q0, q9
+ mov r1, sp
+ vst1.8 {q0}, [sp,:128]
+ mov r2, r10
+ mov r4, r3 @ preserve fp
+
+ bl AES_decrypt
+
+ vld1.8 {q0}, [sp,:128]
+ veor q0, q0, q9
+ vst1.8 {q0}, [r8]
+
+ mov r6, r8
+.Lxts_dec_steal:
+ ldrb r1, [r8]
+ ldrb r0, [r7], #1
+ strb r1, [r8, #0x10]
+ strb r0, [r8], #1
+
+ subs r9, #1
+ bhi .Lxts_dec_steal
+
+ vld1.8 {q0}, [r6]
+ mov r0, sp
+ veor q0, q8
+ mov r1, sp
+ vst1.8 {q0}, [sp,:128]
+ mov r2, r10
+
+ bl AES_decrypt
+
+ vld1.8 {q0}, [sp,:128]
+ veor q0, q0, q8
+ vst1.8 {q0}, [r6]
+ mov r3, r4
+#endif
+
+.Lxts_dec_ret:
+ bic r0, r3, #0xf
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+#ifdef XTS_CHAIN_TWEAK
+ ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak
+#endif
+.Lxts_dec_bzero: @ wipe key schedule [if any]
+ vstmia sp!, {q0-q1}
+ cmp sp, r0
+ bne .Lxts_dec_bzero
+
+ mov sp, r3
+#ifdef XTS_CHAIN_TWEAK
+ vst1.8 {q8}, [r1]
+#endif
+ VFP_ABI_POP
+ ldmia sp!, {r4-r10, pc} @ return
+
+.size bsaes_xts_decrypt,.-bsaes_xts_decrypt
+#endif
diff --git a/crypto/aes/asm/bsaes-armv7.pl b/crypto/aes/asm/bsaes-armv7.pl
new file mode 100644
index 0000000..f3d96d9
--- /dev/null
+++ b/crypto/aes/asm/bsaes-armv7.pl
@@ -0,0 +1,2467 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <[email protected]> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Specific modes and adaptation for Linux kernel by Ard Biesheuvel
+# <[email protected]>. Permission to use under GPL terms is
+# granted.
+# ====================================================================
+
+# Bit-sliced AES for ARM NEON
+#
+# February 2012.
+#
+# This implementation is direct adaptation of bsaes-x86_64 module for
+# ARM NEON. Except that this module is endian-neutral [in sense that
+# it can be compiled for either endianness] by courtesy of vld1.8's
+# neutrality. Initial version doesn't implement interface to OpenSSL,
+# only low-level primitives and unsupported entry points, just enough
+# to collect performance results, which for Cortex-A8 core are:
+#
+# encrypt 19.5 cycles per byte processed with 128-bit key
+# decrypt 22.1 cycles per byte processed with 128-bit key
+# key conv. 440 cycles per 128-bit key/0.18 of 8x block
+#
+# Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7,
+# which is [much] worse than anticipated (for further details see
+# http://www.openssl.org/~appro/Snapdragon-S4.html).
+#
+# Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code
+# manages in 20.0 cycles].
+#
+# When comparing to x86_64 results keep in mind that NEON unit is
+# [mostly] single-issue and thus can't [fully] benefit from
+# instruction-level parallelism. And when comparing to aes-armv4
+# results keep in mind key schedule conversion overhead (see
+# bsaes-x86_64.pl for further details)...
+#
+# <[email protected]>
+
+# April-August 2013
+#
+# Add CBC, CTR and XTS subroutines, adapt for kernel use.
+#
+# <[email protected]>
+
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+my ($inp,$out,$len,$key)=("r0","r1","r2","r3");
+my @XMM=map("q$_",(0..15));
+
+{
+my ($key,$rounds,$const)=("r4","r5","r6");
+
+sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
+sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
+
+sub Sbox {
+# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
+# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb
+my @b=@_[0..7];
+my @t=@_[8..11];
+my @s=@_[12..15];
+ &InBasisChange (@b);
+ &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s);
+ &OutBasisChange (@b[7,1,4,2,6,5,0,3]);
+}
+
+sub InBasisChange {
+# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
+# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb
+my @b=@_[0..7];
+$code.=<<___;
+ veor @b[2], @b[2], @b[1]
+ veor @b[5], @b[5], @b[6]
+ veor @b[3], @b[3], @b[0]
+ veor @b[6], @b[6], @b[2]
+ veor @b[5], @b[5], @b[0]
+
+ veor @b[6], @b[6], @b[3]
+ veor @b[3], @b[3], @b[7]
+ veor @b[7], @b[7], @b[5]
+ veor @b[3], @b[3], @b[4]
+ veor @b[4], @b[4], @b[5]
+
+ veor @b[2], @b[2], @b[7]
+ veor @b[3], @b[3], @b[1]
+ veor @b[1], @b[1], @b[5]
+___
+}
+
+sub OutBasisChange {
+# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
+# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb
+my @b=@_[0..7];
+$code.=<<___;
+ veor @b[0], @b[0], @b[6]
+ veor @b[1], @b[1], @b[4]
+ veor @b[4], @b[4], @b[6]
+ veor @b[2], @b[2], @b[0]
+ veor @b[6], @b[6], @b[1]
+
+ veor @b[1], @b[1], @b[5]
+ veor @b[5], @b[5], @b[3]
+ veor @b[3], @b[3], @b[7]
+ veor @b[7], @b[7], @b[5]
+ veor @b[2], @b[2], @b[5]
+
+ veor @b[4], @b[4], @b[7]
+___
+}
+
+sub InvSbox {
+# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
+# output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb
+my @b=@_[0..7];
+my @t=@_[8..11];
+my @s=@_[12..15];
+ &InvInBasisChange (@b);
+ &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s);
+ &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]);
+}
+
+sub InvInBasisChange { # OutBasisChange in reverse (with twist)
+my @b=@_[5,1,2,6,3,7,0,4];
+$code.=<<___
+ veor @b[1], @b[1], @b[7]
+ veor @b[4], @b[4], @b[7]
+
+ veor @b[7], @b[7], @b[5]
+ veor @b[1], @b[1], @b[3]
+ veor @b[2], @b[2], @b[5]
+ veor @b[3], @b[3], @b[7]
+
+ veor @b[6], @b[6], @b[1]
+ veor @b[2], @b[2], @b[0]
+ veor @b[5], @b[5], @b[3]
+ veor @b[4], @b[4], @b[6]
+ veor @b[0], @b[0], @b[6]
+ veor @b[1], @b[1], @b[4]
+___
+}
+
+sub InvOutBasisChange { # InBasisChange in reverse
+my @b=@_[2,5,7,3,6,1,0,4];
+$code.=<<___;
+ veor @b[1], @b[1], @b[5]
+ veor @b[2], @b[2], @b[7]
+
+ veor @b[3], @b[3], @b[1]
+ veor @b[4], @b[4], @b[5]
+ veor @b[7], @b[7], @b[5]
+ veor @b[3], @b[3], @b[4]
+ veor @b[5], @b[5], @b[0]
+ veor @b[3], @b[3], @b[7]
+ veor @b[6], @b[6], @b[2]
+ veor @b[2], @b[2], @b[1]
+ veor @b[6], @b[6], @b[3]
+
+ veor @b[3], @b[3], @b[0]
+ veor @b[5], @b[5], @b[6]
+___
+}
+
+sub Mul_GF4 {
+#;*************************************************************
+#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) *
+#;*************************************************************
+my ($x0,$x1,$y0,$y1,$t0,$t1)=@_;
+$code.=<<___;
+ veor $t0, $y0, $y1
+ vand $t0, $t0, $x0
+ veor $x0, $x0, $x1
+ vand $t1, $x1, $y0
+ vand $x0, $x0, $y1
+ veor $x1, $t1, $t0
+ veor $x0, $x0, $t1
+___
+}
+
+sub Mul_GF4_N { # not used, see next subroutine
+# multiply and scale by N
+my ($x0,$x1,$y0,$y1,$t0)=@_;
+$code.=<<___;
+ veor $t0, $y0, $y1
+ vand $t0, $t0, $x0
+ veor $x0, $x0, $x1
+ vand $x1, $x1, $y0
+ vand $x0, $x0, $y1
+ veor $x1, $x1, $x0
+ veor $x0, $x0, $t0
+___
+}
+
+sub Mul_GF4_N_GF4 {
+# interleaved Mul_GF4_N and Mul_GF4
+my ($x0,$x1,$y0,$y1,$t0,
+ $x2,$x3,$y2,$y3,$t1)=@_;
+$code.=<<___;
+ veor $t0, $y0, $y1
+ veor $t1, $y2, $y3
+ vand $t0, $t0, $x0
+ vand $t1, $t1, $x2
+ veor $x0, $x0, $x1
+ veor $x2, $x2, $x3
+ vand $x1, $x1, $y0
+ vand $x3, $x3, $y2
+ vand $x0, $x0, $y1
+ vand $x2, $x2, $y3
+ veor $x1, $x1, $x0
+ veor $x2, $x2, $x3
+ veor $x0, $x0, $t0
+ veor $x3, $x3, $t1
+___
+}
+sub Mul_GF16_2 {
+my @x=@_[0..7];
+my @y=@_[8..11];
+my @t=@_[12..15];
+$code.=<<___;
+ veor @t[0], @x[0], @x[2]
+ veor @t[1], @x[1], @x[3]
+___
+ &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2..3]);
+$code.=<<___;
+ veor @y[0], @y[0], @y[2]
+ veor @y[1], @y[1], @y[3]
+___
+ Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3],
+ @x[2], @x[3], @y[2], @y[3], @t[2]);
+$code.=<<___;
+ veor @x[0], @x[0], @t[0]
+ veor @x[2], @x[2], @t[0]
+ veor @x[1], @x[1], @t[1]
+ veor @x[3], @x[3], @t[1]
+
+ veor @t[0], @x[4], @x[6]
+ veor @t[1], @x[5], @x[7]
+___
+ &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3],
+ @x[6], @x[7], @y[2], @y[3], @t[2]);
+$code.=<<___;
+ veor @y[0], @y[0], @y[2]
+ veor @y[1], @y[1], @y[3]
+___
+ &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[2..3]);
+$code.=<<___;
+ veor @x[4], @x[4], @t[0]
+ veor @x[6], @x[6], @t[0]
+ veor @x[5], @x[5], @t[1]
+ veor @x[7], @x[7], @t[1]
+___
+}
+sub Inv_GF256 {
+#;********************************************************************
+#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) *
+#;********************************************************************
+my @x=@_[0..7];
+my @t=@_[8..11];
+my @s=@_[12..15];
+# direct optimizations from hardware
+$code.=<<___;
+ veor @t[3], @x[4], @x[6]
+ veor @t[2], @x[5], @x[7]
+ veor @t[1], @x[1], @x[3]
+ veor @s[1], @x[7], @x[6]
+ vmov @t[0], @t[2]
+ veor @s[0], @x[0], @x[2]
+
+ vorr @t[2], @t[2], @t[1]
+ veor @s[3], @t[3], @t[0]
+ vand @s[2], @t[3], @s[0]
+ vorr @t[3], @t[3], @s[0]
+ veor @s[0], @s[0], @t[1]
+ vand @t[0], @t[0], @t[1]
+ veor @t[1], @x[3], @x[2]
+ vand @s[3], @s[3], @s[0]
+ vand @s[1], @s[1], @t[1]
+ veor @t[1], @x[4], @x[5]
+ veor @s[0], @x[1], @x[0]
+ veor @t[3], @t[3], @s[1]
+ veor @t[2], @t[2], @s[1]
+ vand @s[1], @t[1], @s[0]
+ vorr @t[1], @t[1], @s[0]
+ veor @t[3], @t[3], @s[3]
+ veor @t[0], @t[0], @s[1]
+ veor @t[2], @t[2], @s[2]
+ veor @t[1], @t[1], @s[3]
+ veor @t[0], @t[0], @s[2]
+ vand @s[0], @x[7], @x[3]
+ veor @t[1], @t[1], @s[2]
+ vand @s[1], @x[6], @x[2]
+ vand @s[2], @x[5], @x[1]
+ vorr @s[3], @x[4], @x[0]
+ veor @t[3], @t[3], @s[0]
+ veor @t[1], @t[1], @s[2]
+ veor @t[0], @t[0], @s[3]
+ veor @t[2], @t[2], @s[1]
+
+ @ Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3
+
+ @ new smaller inversion
+
+ vand @s[2], @t[3], @t[1]
+ vmov @s[0], @t[0]
+
+ veor @s[1], @t[2], @s[2]
+ veor @s[3], @t[0], @s[2]
+ veor @s[2], @t[0], @s[2] @ @s[2]=@s[3]
+
+ vbsl @s[1], @t[1], @t[0]
+ vbsl @s[3], @t[3], @t[2]
+ veor @t[3], @t[3], @t[2]
+
+ vbsl @s[0], @s[1], @s[2]
+ vbsl @t[0], @s[2], @s[1]
+
+ vand @s[2], @s[0], @s[3]
+ veor @t[1], @t[1], @t[0]
+
+ veor @s[2], @s[2], @t[3]
+___
+# output in s3, s2, s1, t1
+
+# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3
+
+# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
+ &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]);
+
+### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb
+}
+
+# AES linear components
+
+sub ShiftRows {
+my @x=@_[0..7];
+my @t=@_[8..11];
+my $mask=pop;
+$code.=<<___;
+ vldmia $key!, {@t[0]-@t[3]}
+ veor @t[0], @t[0], @x[0]
+ veor @t[1], @t[1], @x[1]
+ vtbl.8 `&Dlo(@x[0])`, {@t[0]}, `&Dlo($mask)`
+ vtbl.8 `&Dhi(@x[0])`, {@t[0]}, `&Dhi($mask)`
+ vldmia $key!, {@t[0]}
+ veor @t[2], @t[2], @x[2]
+ vtbl.8 `&Dlo(@x[1])`, {@t[1]}, `&Dlo($mask)`
+ vtbl.8 `&Dhi(@x[1])`, {@t[1]}, `&Dhi($mask)`
+ vldmia $key!, {@t[1]}
+ veor @t[3], @t[3], @x[3]
+ vtbl.8 `&Dlo(@x[2])`, {@t[2]}, `&Dlo($mask)`
+ vtbl.8 `&Dhi(@x[2])`, {@t[2]}, `&Dhi($mask)`
+ vldmia $key!, {@t[2]}
+ vtbl.8 `&Dlo(@x[3])`, {@t[3]}, `&Dlo($mask)`
+ vtbl.8 `&Dhi(@x[3])`, {@t[3]}, `&Dhi($mask)`
+ vldmia $key!, {@t[3]}
+ veor @t[0], @t[0], @x[4]
+ veor @t[1], @t[1], @x[5]
+ vtbl.8 `&Dlo(@x[4])`, {@t[0]}, `&Dlo($mask)`
+ vtbl.8 `&Dhi(@x[4])`, {@t[0]}, `&Dhi($mask)`
+ veor @t[2], @t[2], @x[6]
+ vtbl.8 `&Dlo(@x[5])`, {@t[1]}, `&Dlo($mask)`
+ vtbl.8 `&Dhi(@x[5])`, {@t[1]}, `&Dhi($mask)`
+ veor @t[3], @t[3], @x[7]
+ vtbl.8 `&Dlo(@x[6])`, {@t[2]}, `&Dlo($mask)`
+ vtbl.8 `&Dhi(@x[6])`, {@t[2]}, `&Dhi($mask)`
+ vtbl.8 `&Dlo(@x[7])`, {@t[3]}, `&Dlo($mask)`
+ vtbl.8 `&Dhi(@x[7])`, {@t[3]}, `&Dhi($mask)`
+___
+}
+
+sub MixColumns {
+# modified to emit output in order suitable for feeding back to aesenc[last]
+my @x=@_[0..7];
+my @t=@_[8..15];
+my $inv=@_[16]; # optional
+$code.=<<___;
+ vext.8 @t[0], @x[0], @x[0], #12 @ x0 <<< 32
+ vext.8 @t[1], @x[1], @x[1], #12
+ veor @x[0], @x[0], @t[0] @ x0 ^ (x0 <<< 32)
+ vext.8 @t[2], @x[2], @x[2], #12
+ veor @x[1], @x[1], @t[1]
+ vext.8 @t[3], @x[3], @x[3], #12
+ veor @x[2], @x[2], @t[2]
+ vext.8 @t[4], @x[4], @x[4], #12
+ veor @x[3], @x[3], @t[3]
+ vext.8 @t[5], @x[5], @x[5], #12
+ veor @x[4], @x[4], @t[4]
+ vext.8 @t[6], @x[6], @x[6], #12
+ veor @x[5], @x[5], @t[5]
+ vext.8 @t[7], @x[7], @x[7], #12
+ veor @x[6], @x[6], @t[6]
+
+ veor @t[1], @t[1], @x[0]
+ veor @x[7], @x[7], @t[7]
+ vext.8 @x[0], @x[0], @x[0], #8 @ (x0 ^ (x0 <<< 32)) <<< 64)
+ veor @t[2], @t[2], @x[1]
+ veor @t[0], @t[0], @x[7]
+ veor @t[1], @t[1], @x[7]
+ vext.8 @x[1], @x[1], @x[1], #8
+ veor @t[5], @t[5], @x[4]
+ veor @x[0], @x[0], @t[0]
+ veor @t[6], @t[6], @x[5]
+ veor @x[1], @x[1], @t[1]
+ vext.8 @t[0], @x[4], @x[4], #8
+ veor @t[4], @t[4], @x[3]
+ vext.8 @t[1], @x[5], @x[5], #8
+ veor @t[7], @t[7], @x[6]
+ vext.8 @x[4], @x[3], @x[3], #8
+ veor @t[3], @t[3], @x[2]
+ vext.8 @x[5], @x[7], @x[7], #8
+ veor @t[4], @t[4], @x[7]
+ vext.8 @x[3], @x[6], @x[6], #8
+ veor @t[3], @t[3], @x[7]
+ vext.8 @x[6], @x[2], @x[2], #8
+ veor @x[7], @t[1], @t[5]
+___
+$code.=<<___ if (!$inv);
+ veor @x[2], @t[0], @t[4]
+ veor @x[4], @x[4], @t[3]
+ veor @x[5], @x[5], @t[7]
+ veor @x[3], @x[3], @t[6]
+ @ vmov @x[2], @t[0]
+ veor @x[6], @x[6], @t[2]
+ @ vmov @x[7], @t[1]
+___
+$code.=<<___ if ($inv);
+ veor @t[3], @t[3], @x[4]
+ veor @x[5], @x[5], @t[7]
+ veor @x[2], @x[3], @t[6]
+ veor @x[3], @t[0], @t[4]
+ veor @x[4], @x[6], @t[2]
+ vmov @x[6], @t[3]
+ @ vmov @x[7], @t[1]
+___
+}
+
+sub InvMixColumns_orig {
+my @x=@_[0..7];
+my @t=@_[8..15];
+
+$code.=<<___;
+ @ multiplication by 0x0e
+ vext.8 @t[7], @x[7], @x[7], #12
+ vmov @t[2], @x[2]
+ veor @x[2], @x[2], @x[5] @ 2 5
+ veor @x[7], @x[7], @x[5] @ 7 5
+ vext.8 @t[0], @x[0], @x[0], #12
+ vmov @t[5], @x[5]
+ veor @x[5], @x[5], @x[0] @ 5 0 [1]
+ veor @x[0], @x[0], @x[1] @ 0 1
+ vext.8 @t[1], @x[1], @x[1], #12
+ veor @x[1], @x[1], @x[2] @ 1 25
+ veor @x[0], @x[0], @x[6] @ 01 6 [2]
+ vext.8 @t[3], @x[3], @x[3], #12
+ veor @x[1], @x[1], @x[3] @ 125 3 [4]
+ veor @x[2], @x[2], @x[0] @ 25 016 [3]
+ veor @x[3], @x[3], @x[7] @ 3 75
+ veor @x[7], @x[7], @x[6] @ 75 6 [0]
+ vext.8 @t[6], @x[6], @x[6], #12
+ vmov @t[4], @x[4]
+ veor @x[6], @x[6], @x[4] @ 6 4
+ veor @x[4], @x[4], @x[3] @ 4 375 [6]
+ veor @x[3], @x[3], @x[7] @ 375 756=36
+ veor @x[6], @x[6], @t[5] @ 64 5 [7]
+ veor @x[3], @x[3], @t[2] @ 36 2
+ vext.8 @t[5], @t[5], @t[5], #12
+ veor @x[3], @x[3], @t[4] @ 362 4 [5]
+___
+ my @y = @x[7,5,0,2,1,3,4,6];
+$code.=<<___;
+ @ multiplication by 0x0b
+ veor @y[1], @y[1], @y[0]
+ veor @y[0], @y[0], @t[0]
+ vext.8 @t[2], @t[2], @t[2], #12
+ veor @y[1], @y[1], @t[1]
+ veor @y[0], @y[0], @t[5]
+ vext.8 @t[4], @t[4], @t[4], #12
+ veor @y[1], @y[1], @t[6]
+ veor @y[0], @y[0], @t[7]
+ veor @t[7], @t[7], @t[6] @ clobber t[7]
+
+ veor @y[3], @y[3], @t[0]
+ veor @y[1], @y[1], @y[0]
+ vext.8 @t[0], @t[0], @t[0], #12
+ veor @y[2], @y[2], @t[1]
+ veor @y[4], @y[4], @t[1]
+ vext.8 @t[1], @t[1], @t[1], #12
+ veor @y[2], @y[2], @t[2]
+ veor @y[3], @y[3], @t[2]
+ veor @y[5], @y[5], @t[2]
+ veor @y[2], @y[2], @t[7]
+ vext.8 @t[2], @t[2], @t[2], #12
+ veor @y[3], @y[3], @t[3]
+ veor @y[6], @y[6], @t[3]
+ veor @y[4], @y[4], @t[3]
+ veor @y[7], @y[7], @t[4]
+ vext.8 @t[3], @t[3], @t[3], #12
+ veor @y[5], @y[5], @t[4]
+ veor @y[7], @y[7], @t[7]
+ veor @t[7], @t[7], @t[5] @ clobber t[7] even more
+ veor @y[3], @y[3], @t[5]
+ veor @y[4], @y[4], @t[4]
+
+ veor @y[5], @y[5], @t[7]
+ vext.8 @t[4], @t[4], @t[4], #12
+ veor @y[6], @y[6], @t[7]
+ veor @y[4], @y[4], @t[7]
+
+ veor @t[7], @t[7], @t[5]
+ vext.8 @t[5], @t[5], @t[5], #12
+
+ @ multiplication by 0x0d
+ veor @y[4], @y[4], @y[7]
+ veor @t[7], @t[7], @t[6] @ restore t[7]
+ veor @y[7], @y[7], @t[4]
+ vext.8 @t[6], @t[6], @t[6], #12
+ veor @y[2], @y[2], @t[0]
+ veor @y[7], @y[7], @t[5]
+ vext.8 @t[7], @t[7], @t[7], #12
+ veor @y[2], @y[2], @t[2]
+
+ veor @y[3], @y[3], @y[1]
+ veor @y[1], @y[1], @t[1]
+ veor @y[0], @y[0], @t[0]
+ veor @y[3], @y[3], @t[0]
+ veor @y[1], @y[1], @t[5]
+ veor @y[0], @y[0], @t[5]
+ vext.8 @t[0], @t[0], @t[0], #12
+ veor @y[1], @y[1], @t[7]
+ veor @y[0], @y[0], @t[6]
+ veor @y[3], @y[3], @y[1]
+ veor @y[4], @y[4], @t[1]
+ vext.8 @t[1], @t[1], @t[1], #12
+
+ veor @y[7], @y[7], @t[7]
+ veor @y[4], @y[4], @t[2]
+ veor @y[5], @y[5], @t[2]
+ veor @y[2], @y[2], @t[6]
+ veor @t[6], @t[6], @t[3] @ clobber t[6]
+ vext.8 @t[2], @t[2], @t[2], #12
+ veor @y[4], @y[4], @y[7]
+ veor @y[3], @y[3], @t[6]
+
+ veor @y[6], @y[6], @t[6]
+ veor @y[5], @y[5], @t[5]
+ vext.8 @t[5], @t[5], @t[5], #12
+ veor @y[6], @y[6], @t[4]
+ vext.8 @t[4], @t[4], @t[4], #12
+ veor @y[5], @y[5], @t[6]
+ veor @y[6], @y[6], @t[7]
+ vext.8 @t[7], @t[7], @t[7], #12
+ veor @t[6], @t[6], @t[3] @ restore t[6]
+ vext.8 @t[3], @t[3], @t[3], #12
+
+ @ multiplication by 0x09
+ veor @y[4], @y[4], @y[1]
+ veor @t[1], @t[1], @y[1] @ t[1]=y[1]
+ veor @t[0], @t[0], @t[5] @ clobber t[0]
+ vext.8 @t[6], @t[6], @t[6], #12
+ veor @t[1], @t[1], @t[5]
+ veor @y[3], @y[3], @t[0]
+ veor @t[0], @t[0], @y[0] @ t[0]=y[0]
+ veor @t[1], @t[1], @t[6]
+ veor @t[6], @t[6], @t[7] @ clobber t[6]
+ veor @y[4], @y[4], @t[1]
+ veor @y[7], @y[7], @t[4]
+ veor @y[6], @y[6], @t[3]
+ veor @y[5], @y[5], @t[2]
+ veor @t[4], @t[4], @y[4] @ t[4]=y[4]
+ veor @t[3], @t[3], @y[3] @ t[3]=y[3]
+ veor @t[5], @t[5], @y[5] @ t[5]=y[5]
+ veor @t[2], @t[2], @y[2] @ t[2]=y[2]
+ veor @t[3], @t[3], @t[7]
+ veor @XMM[5], @t[5], @t[6]
+ veor @XMM[6], @t[6], @y[6] @ t[6]=y[6]
+ veor @XMM[2], @t[2], @t[6]
+ veor @XMM[7], @t[7], @y[7] @ t[7]=y[7]
+
+ vmov @XMM[0], @t[0]
+ vmov @XMM[1], @t[1]
+ @ vmov @XMM[2], @t[2]
+ vmov @XMM[3], @t[3]
+ vmov @XMM[4], @t[4]
+ @ vmov @XMM[5], @t[5]
+ @ vmov @XMM[6], @t[6]
+ @ vmov @XMM[7], @t[7]
+___
+}
+
+sub InvMixColumns {
+my @x=@_[0..7];
+my @t=@_[8..15];
+
+# Thanks to Jussi Kivilinna for providing pointer to
+#
+# | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 |
+# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 |
+# | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 |
+# | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 |
+
+$code.=<<___;
+ @ multiplication by 0x05-0x00-0x04-0x00
+ vext.8 @t[0], @x[0], @x[0], #8
+ vext.8 @t[6], @x[6], @x[6], #8
+ vext.8 @t[7], @x[7], @x[7], #8
+ veor @t[0], @t[0], @x[0]
+ vext.8 @t[1], @x[1], @x[1], #8
+ veor @t[6], @t[6], @x[6]
+ vext.8 @t[2], @x[2], @x[2], #8
+ veor @t[7], @t[7], @x[7]
+ vext.8 @t[3], @x[3], @x[3], #8
+ veor @t[1], @t[1], @x[1]
+ vext.8 @t[4], @x[4], @x[4], #8
+ veor @t[2], @t[2], @x[2]
+ vext.8 @t[5], @x[5], @x[5], #8
+ veor @t[3], @t[3], @x[3]
+ veor @t[4], @t[4], @x[4]
+ veor @t[5], @t[5], @x[5]
+
+ veor @x[0], @x[0], @t[6]
+ veor @x[1], @x[1], @t[6]
+ veor @x[2], @x[2], @t[0]
+ veor @x[4], @x[4], @t[2]
+ veor @x[3], @x[3], @t[1]
+ veor @x[1], @x[1], @t[7]
+ veor @x[2], @x[2], @t[7]
+ veor @x[4], @x[4], @t[6]
+ veor @x[5], @x[5], @t[3]
+ veor @x[3], @x[3], @t[6]
+ veor @x[6], @x[6], @t[4]
+ veor @x[4], @x[4], @t[7]
+ veor @x[5], @x[5], @t[7]
+ veor @x[7], @x[7], @t[5]
+___
+ &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6
+}
+
+sub swapmove {
+my ($a,$b,$n,$mask,$t)=@_;
+$code.=<<___;
+ vshr.u64 $t, $b, #$n
+ veor $t, $t, $a
+ vand $t, $t, $mask
+ veor $a, $a, $t
+ vshl.u64 $t, $t, #$n
+ veor $b, $b, $t
+___
+}
+sub swapmove2x {
+my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_;
+$code.=<<___;
+ vshr.u64 $t0, $b0, #$n
+ vshr.u64 $t1, $b1, #$n
+ veor $t0, $t0, $a0
+ veor $t1, $t1, $a1
+ vand $t0, $t0, $mask
+ vand $t1, $t1, $mask
+ veor $a0, $a0, $t0
+ vshl.u64 $t0, $t0, #$n
+ veor $a1, $a1, $t1
+ vshl.u64 $t1, $t1, #$n
+ veor $b0, $b0, $t0
+ veor $b1, $b1, $t1
+___
+}
+
+sub bitslice {
+my @x=reverse(@_[0..7]);
+my ($t0,$t1,$t2,$t3)=@_[8..11];
+$code.=<<___;
+ vmov.i8 $t0,#0x55 @ compose .LBS0
+ vmov.i8 $t1,#0x33 @ compose .LBS1
+___
+ &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3);
+ &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
+$code.=<<___;
+ vmov.i8 $t0,#0x0f @ compose .LBS2
+___
+ &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3);
+ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
+
+ &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3);
+ &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3);
+}
+
+$code.=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+
+# define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
+# define VFP_ABI_POP vldmia sp!,{d8-d15}
+# define VFP_ABI_FRAME 0x40
+#else
+# define VFP_ABI_PUSH
+# define VFP_ABI_POP
+# define VFP_ABI_FRAME 0
+# define BSAES_ASM_EXTENDED_KEY
+# define XTS_CHAIN_TWEAK
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+#endif
+
+#ifdef __thumb__
+# define adrl adr
+#endif
+
+#if __ARM_ARCH__>=7
+.text
+.syntax unified @ ARMv7-capable assembler is expected to handle this
+#ifdef __thumb2__
+.thumb
+#else
+.code 32
+#endif
+
+.fpu neon
+
+.type _bsaes_decrypt8,%function
+.align 4
+_bsaes_decrypt8:
+ adr $const,_bsaes_decrypt8
+ vldmia $key!, {@XMM[9]} @ round 0 key
+ add $const,$const,#.LM0ISR-_bsaes_decrypt8
+
+ vldmia $const!, {@XMM[8]} @ .LM0ISR
+ veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key
+ veor @XMM[11], @XMM[1], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])`
+ veor @XMM[12], @XMM[2], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])`
+ veor @XMM[13], @XMM[3], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])`
+ veor @XMM[14], @XMM[4], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])`
+ veor @XMM[15], @XMM[5], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])`
+ veor @XMM[10], @XMM[6], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])`
+ veor @XMM[11], @XMM[7], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])`
+ vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])`
+___
+ &bitslice (@XMM[0..7, 8..11]);
+$code.=<<___;
+ sub $rounds,$rounds,#1
+ b .Ldec_sbox
+.align 4
+.Ldec_loop:
+___
+ &ShiftRows (@XMM[0..7, 8..12]);
+$code.=".Ldec_sbox:\n";
+ &InvSbox (@XMM[0..7, 8..15]);
+$code.=<<___;
+ subs $rounds,$rounds,#1
+ bcc .Ldec_done
+___
+ &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]);
+$code.=<<___;
+ vldmia $const, {@XMM[12]} @ .LISR
+ ite eq @ Thumb2 thing, sanity check in ARM
+ addeq $const,$const,#0x10
+ bne .Ldec_loop
+ vldmia $const, {@XMM[12]} @ .LISRM0
+ b .Ldec_loop
+.align 4
+.Ldec_done:
+___
+ &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]);
+$code.=<<___;
+ vldmia $key, {@XMM[8]} @ last round key
+ veor @XMM[6], @XMM[6], @XMM[8]
+ veor @XMM[4], @XMM[4], @XMM[8]
+ veor @XMM[2], @XMM[2], @XMM[8]
+ veor @XMM[7], @XMM[7], @XMM[8]
+ veor @XMM[3], @XMM[3], @XMM[8]
+ veor @XMM[5], @XMM[5], @XMM[8]
+ veor @XMM[0], @XMM[0], @XMM[8]
+ veor @XMM[1], @XMM[1], @XMM[8]
+ bx lr
+.size _bsaes_decrypt8,.-_bsaes_decrypt8
+
+.type _bsaes_const,%object
+.align 6
+_bsaes_const:
+.LM0ISR: @ InvShiftRows constants
+ .quad 0x0a0e0206070b0f03, 0x0004080c0d010509
+.LISR:
+ .quad 0x0504070602010003, 0x0f0e0d0c080b0a09
+.LISRM0:
+ .quad 0x01040b0e0205080f, 0x0306090c00070a0d
+.LM0SR: @ ShiftRows constants
+ .quad 0x0a0e02060f03070b, 0x0004080c05090d01
+.LSR:
+ .quad 0x0504070600030201, 0x0f0e0d0c0a09080b
+.LSRM0:
+ .quad 0x0304090e00050a0f, 0x01060b0c0207080d
+.LM0:
+ .quad 0x02060a0e03070b0f, 0x0004080c0105090d
+.LREVM0SR:
+ .quad 0x090d01050c000408, 0x03070b0f060a0e02
+.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro\@openssl.org>"
+.align 6
+.size _bsaes_const,.-_bsaes_const
+
+.type _bsaes_encrypt8,%function
+.align 4
+_bsaes_encrypt8:
+ adr $const,_bsaes_encrypt8
+ vldmia $key!, {@XMM[9]} @ round 0 key
+ sub $const,$const,#_bsaes_encrypt8-.LM0SR
+
+ vldmia $const!, {@XMM[8]} @ .LM0SR
+_bsaes_encrypt8_alt:
+ veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key
+ veor @XMM[11], @XMM[1], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])`
+ veor @XMM[12], @XMM[2], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])`
+ veor @XMM[13], @XMM[3], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])`
+ veor @XMM[14], @XMM[4], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])`
+ veor @XMM[15], @XMM[5], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])`
+ veor @XMM[10], @XMM[6], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])`
+ veor @XMM[11], @XMM[7], @XMM[9]
+ vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])`
+ vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])`
+ vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])`
+_bsaes_encrypt8_bitslice:
+___
+ &bitslice (@XMM[0..7, 8..11]);
+$code.=<<___;
+ sub $rounds,$rounds,#1
+ b .Lenc_sbox
+.align 4
+.Lenc_loop:
+___
+ &ShiftRows (@XMM[0..7, 8..12]);
+$code.=".Lenc_sbox:\n";
+ &Sbox (@XMM[0..7, 8..15]);
+$code.=<<___;
+ subs $rounds,$rounds,#1
+ bcc .Lenc_done
+___
+ &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]);
+$code.=<<___;
+ vldmia $const, {@XMM[12]} @ .LSR
+ ite eq @ Thumb2 thing, samity check in ARM
+ addeq $const,$const,#0x10
+ bne .Lenc_loop
+ vldmia $const, {@XMM[12]} @ .LSRM0
+ b .Lenc_loop
+.align 4
+.Lenc_done:
+___
+ # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb
+ &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]);
+$code.=<<___;
+ vldmia $key, {@XMM[8]} @ last round key
+ veor @XMM[4], @XMM[4], @XMM[8]
+ veor @XMM[6], @XMM[6], @XMM[8]
+ veor @XMM[3], @XMM[3], @XMM[8]
+ veor @XMM[7], @XMM[7], @XMM[8]
+ veor @XMM[2], @XMM[2], @XMM[8]
+ veor @XMM[5], @XMM[5], @XMM[8]
+ veor @XMM[0], @XMM[0], @XMM[8]
+ veor @XMM[1], @XMM[1], @XMM[8]
+ bx lr
+.size _bsaes_encrypt8,.-_bsaes_encrypt8
+___
+}
+{
+my ($out,$inp,$rounds,$const)=("r12","r4","r5","r6");
+
+sub bitslice_key {
+my @x=reverse(@_[0..7]);
+my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12];
+
+ &swapmove (@x[0,1],1,$bs0,$t2,$t3);
+$code.=<<___;
+ @ &swapmove(@x[2,3],1,$t0,$t2,$t3);
+ vmov @x[2], @x[0]
+ vmov @x[3], @x[1]
+___
+ #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
+
+ &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3);
+$code.=<<___;
+ @ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
+ vmov @x[4], @x[0]
+ vmov @x[6], @x[2]
+ vmov @x[5], @x[1]
+ vmov @x[7], @x[3]
+___
+ &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3);
+ &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3);
+}
+
+$code.=<<___;
+.type _bsaes_key_convert,%function
+.align 4
+_bsaes_key_convert:
+ adr $const,_bsaes_key_convert
+ vld1.8 {@XMM[7]}, [$inp]! @ load round 0 key
+ sub $const,$const,#_bsaes_key_convert-.LM0
+ vld1.8 {@XMM[15]}, [$inp]! @ load round 1 key
+
+ vmov.i8 @XMM[8], #0x01 @ bit masks
+ vmov.i8 @XMM[9], #0x02
+ vmov.i8 @XMM[10], #0x04
+ vmov.i8 @XMM[11], #0x08
+ vmov.i8 @XMM[12], #0x10
+ vmov.i8 @XMM[13], #0x20
+ vldmia $const, {@XMM[14]} @ .LM0
+
+#ifdef __ARMEL__
+ vrev32.8 @XMM[7], @XMM[7]
+ vrev32.8 @XMM[15], @XMM[15]
+#endif
+ sub $rounds,$rounds,#1
+ vstmia $out!, {@XMM[7]} @ save round 0 key
+ b .Lkey_loop
+
+.align 4
+.Lkey_loop:
+ vtbl.8 `&Dlo(@XMM[7])`,{@XMM[15]},`&Dlo(@XMM[14])`
+ vtbl.8 `&Dhi(@XMM[7])`,{@XMM[15]},`&Dhi(@XMM[14])`
+ vmov.i8 @XMM[6], #0x40
+ vmov.i8 @XMM[15], #0x80
+
+ vtst.8 @XMM[0], @XMM[7], @XMM[8]
+ vtst.8 @XMM[1], @XMM[7], @XMM[9]
+ vtst.8 @XMM[2], @XMM[7], @XMM[10]
+ vtst.8 @XMM[3], @XMM[7], @XMM[11]
+ vtst.8 @XMM[4], @XMM[7], @XMM[12]
+ vtst.8 @XMM[5], @XMM[7], @XMM[13]
+ vtst.8 @XMM[6], @XMM[7], @XMM[6]
+ vtst.8 @XMM[7], @XMM[7], @XMM[15]
+ vld1.8 {@XMM[15]}, [$inp]! @ load next round key
+ vmvn @XMM[0], @XMM[0] @ "pnot"
+ vmvn @XMM[1], @XMM[1]
+ vmvn @XMM[5], @XMM[5]
+ vmvn @XMM[6], @XMM[6]
+#ifdef __ARMEL__
+ vrev32.8 @XMM[15], @XMM[15]
+#endif
+ subs $rounds,$rounds,#1
+ vstmia $out!,{@XMM[0]-@XMM[7]} @ write bit-sliced round key
+ bne .Lkey_loop
+
+ vmov.i8 @XMM[7],#0x63 @ compose .L63
+ @ don't save last round key
+ bx lr
+.size _bsaes_key_convert,.-_bsaes_key_convert
+___
+}
+
+if (0) { # following four functions are unsupported interface
+ # used for benchmarking...
+$code.=<<___;
+.globl bsaes_enc_key_convert
+.type bsaes_enc_key_convert,%function
+.align 4
+bsaes_enc_key_convert:
+ stmdb sp!,{r4-r6,lr}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+
+ ldr r5,[$inp,#240] @ pass rounds
+ mov r4,$inp @ pass key
+ mov r12,$out @ pass key schedule
+ bl _bsaes_key_convert
+ veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key
+ vstmia r12, {@XMM[7]} @ save last round key
+
+ vldmia sp!,{d8-d15}
+ ldmia sp!,{r4-r6,pc}
+.size bsaes_enc_key_convert,.-bsaes_enc_key_convert
+
+.globl bsaes_encrypt_128
+.type bsaes_encrypt_128,%function
+.align 4
+bsaes_encrypt_128:
+ stmdb sp!,{r4-r6,lr}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+.Lenc128_loop:
+ vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input
+ vld1.8 {@XMM[2]-@XMM[3]}, [$inp]!
+ mov r4,$key @ pass the key
+ vld1.8 {@XMM[4]-@XMM[5]}, [$inp]!
+ mov r5,#10 @ pass rounds
+ vld1.8 {@XMM[6]-@XMM[7]}, [$inp]!
+
+ bl _bsaes_encrypt8
+
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ vst1.8 {@XMM[4]}, [$out]!
+ vst1.8 {@XMM[6]}, [$out]!
+ vst1.8 {@XMM[3]}, [$out]!
+ vst1.8 {@XMM[7]}, [$out]!
+ vst1.8 {@XMM[2]}, [$out]!
+ subs $len,$len,#0x80
+ vst1.8 {@XMM[5]}, [$out]!
+ bhi .Lenc128_loop
+
+ vldmia sp!,{d8-d15}
+ ldmia sp!,{r4-r6,pc}
+.size bsaes_encrypt_128,.-bsaes_encrypt_128
+
+.globl bsaes_dec_key_convert
+.type bsaes_dec_key_convert,%function
+.align 4
+bsaes_dec_key_convert:
+ stmdb sp!,{r4-r6,lr}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+
+ ldr r5,[$inp,#240] @ pass rounds
+ mov r4,$inp @ pass key
+ mov r12,$out @ pass key schedule
+ bl _bsaes_key_convert
+ vldmia $out, {@XMM[6]}
+ vstmia r12, {@XMM[15]} @ save last round key
+ veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key
+ vstmia $out, {@XMM[7]}
+
+ vldmia sp!,{d8-d15}
+ ldmia sp!,{r4-r6,pc}
+.size bsaes_dec_key_convert,.-bsaes_dec_key_convert
+
+.globl bsaes_decrypt_128
+.type bsaes_decrypt_128,%function
+.align 4
+bsaes_decrypt_128:
+ stmdb sp!,{r4-r6,lr}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+.Ldec128_loop:
+ vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input
+ vld1.8 {@XMM[2]-@XMM[3]}, [$inp]!
+ mov r4,$key @ pass the key
+ vld1.8 {@XMM[4]-@XMM[5]}, [$inp]!
+ mov r5,#10 @ pass rounds
+ vld1.8 {@XMM[6]-@XMM[7]}, [$inp]!
+
+ bl _bsaes_decrypt8
+
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ vst1.8 {@XMM[6]}, [$out]!
+ vst1.8 {@XMM[4]}, [$out]!
+ vst1.8 {@XMM[2]}, [$out]!
+ vst1.8 {@XMM[7]}, [$out]!
+ vst1.8 {@XMM[3]}, [$out]!
+ subs $len,$len,#0x80
+ vst1.8 {@XMM[5]}, [$out]!
+ bhi .Ldec128_loop
+
+ vldmia sp!,{d8-d15}
+ ldmia sp!,{r4-r6,pc}
+.size bsaes_decrypt_128,.-bsaes_decrypt_128
+___
+}
+{
+my ($inp,$out,$len,$key, $ivp,$fp,$rounds)=map("r$_",(0..3,8..10));
+my ($keysched)=("sp");
+
+$code.=<<___;
+.extern AES_cbc_encrypt
+.extern AES_decrypt
+
+.global bsaes_cbc_encrypt
+.type bsaes_cbc_encrypt,%function
+.align 5
+bsaes_cbc_encrypt:
+#ifndef __KERNEL__
+ cmp $len, #128
+#ifndef __thumb__
+ blo AES_cbc_encrypt
+#else
+ bhs 1f
+ b AES_cbc_encrypt
+1:
+#endif
+#endif
+
+ @ it is up to the caller to make sure we are called with enc == 0
+
+ mov ip, sp
+ stmdb sp!, {r4-r10, lr}
+ VFP_ABI_PUSH
+ ldr $ivp, [ip] @ IV is 1st arg on the stack
+ mov $len, $len, lsr#4 @ len in 16 byte blocks
+ sub sp, #0x10 @ scratch space to carry over the IV
+ mov $fp, sp @ save sp
+
+ ldr $rounds, [$key, #240] @ get # of rounds
+#ifndef BSAES_ASM_EXTENDED_KEY
+ @ allocate the key schedule on the stack
+ sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key
+ add r12, #`128-32` @ sifze of bit-slices key schedule
+
+ @ populate the key schedule
+ mov r4, $key @ pass key
+ mov r5, $rounds @ pass # of rounds
+ mov sp, r12 @ sp is $keysched
+ bl _bsaes_key_convert
+ vldmia $keysched, {@XMM[6]}
+ vstmia r12, {@XMM[15]} @ save last round key
+ veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key
+ vstmia $keysched, {@XMM[7]}
+#else
+ ldr r12, [$key, #244]
+ eors r12, #1
+ beq 0f
+
+ @ populate the key schedule
+ str r12, [$key, #244]
+ mov r4, $key @ pass key
+ mov r5, $rounds @ pass # of rounds
+ add r12, $key, #248 @ pass key schedule
+ bl _bsaes_key_convert
+ add r4, $key, #248
+ vldmia r4, {@XMM[6]}
+ vstmia r12, {@XMM[15]} @ save last round key
+ veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key
+ vstmia r4, {@XMM[7]}
+
+.align 2
+0:
+#endif
+
+ vld1.8 {@XMM[15]}, [$ivp] @ load IV
+ b .Lcbc_dec_loop
+
+.align 4
+.Lcbc_dec_loop:
+ subs $len, $len, #0x8
+ bmi .Lcbc_dec_loop_finish
+
+ vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input
+ vld1.8 {@XMM[2]-@XMM[3]}, [$inp]!
+#ifndef BSAES_ASM_EXTENDED_KEY
+ mov r4, $keysched @ pass the key
+#else
+ add r4, $key, #248
+#endif
+ vld1.8 {@XMM[4]-@XMM[5]}, [$inp]!
+ mov r5, $rounds
+ vld1.8 {@XMM[6]-@XMM[7]}, [$inp]
+ sub $inp, $inp, #0x60
+ vstmia $fp, {@XMM[15]} @ put aside IV
+
+ bl _bsaes_decrypt8
+
+ vldmia $fp, {@XMM[14]} @ reload IV
+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
+ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
+ veor @XMM[1], @XMM[1], @XMM[8]
+ veor @XMM[6], @XMM[6], @XMM[9]
+ vld1.8 {@XMM[12]-@XMM[13]}, [$inp]!
+ veor @XMM[4], @XMM[4], @XMM[10]
+ veor @XMM[2], @XMM[2], @XMM[11]
+ vld1.8 {@XMM[14]-@XMM[15]}, [$inp]!
+ veor @XMM[7], @XMM[7], @XMM[12]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ veor @XMM[3], @XMM[3], @XMM[13]
+ vst1.8 {@XMM[6]}, [$out]!
+ veor @XMM[5], @XMM[5], @XMM[14]
+ vst1.8 {@XMM[4]}, [$out]!
+ vst1.8 {@XMM[2]}, [$out]!
+ vst1.8 {@XMM[7]}, [$out]!
+ vst1.8 {@XMM[3]}, [$out]!
+ vst1.8 {@XMM[5]}, [$out]!
+
+ b .Lcbc_dec_loop
+
+.Lcbc_dec_loop_finish:
+ adds $len, $len, #8
+ beq .Lcbc_dec_done
+
+ vld1.8 {@XMM[0]}, [$inp]! @ load input
+ cmp $len, #2
+ blo .Lcbc_dec_one
+ vld1.8 {@XMM[1]}, [$inp]!
+#ifndef BSAES_ASM_EXTENDED_KEY
+ mov r4, $keysched @ pass the key
+#else
+ add r4, $key, #248
+#endif
+ mov r5, $rounds
+ vstmia $fp, {@XMM[15]} @ put aside IV
+ beq .Lcbc_dec_two
+ vld1.8 {@XMM[2]}, [$inp]!
+ cmp $len, #4
+ blo .Lcbc_dec_three
+ vld1.8 {@XMM[3]}, [$inp]!
+ beq .Lcbc_dec_four
+ vld1.8 {@XMM[4]}, [$inp]!
+ cmp $len, #6
+ blo .Lcbc_dec_five
+ vld1.8 {@XMM[5]}, [$inp]!
+ beq .Lcbc_dec_six
+ vld1.8 {@XMM[6]}, [$inp]!
+ sub $inp, $inp, #0x70
+
+ bl _bsaes_decrypt8
+
+ vldmia $fp, {@XMM[14]} @ reload IV
+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
+ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
+ veor @XMM[1], @XMM[1], @XMM[8]
+ veor @XMM[6], @XMM[6], @XMM[9]
+ vld1.8 {@XMM[12]-@XMM[13]}, [$inp]!
+ veor @XMM[4], @XMM[4], @XMM[10]
+ veor @XMM[2], @XMM[2], @XMM[11]
+ vld1.8 {@XMM[15]}, [$inp]!
+ veor @XMM[7], @XMM[7], @XMM[12]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ veor @XMM[3], @XMM[3], @XMM[13]
+ vst1.8 {@XMM[6]}, [$out]!
+ vst1.8 {@XMM[4]}, [$out]!
+ vst1.8 {@XMM[2]}, [$out]!
+ vst1.8 {@XMM[7]}, [$out]!
+ vst1.8 {@XMM[3]}, [$out]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_six:
+ sub $inp, $inp, #0x60
+ bl _bsaes_decrypt8
+ vldmia $fp,{@XMM[14]} @ reload IV
+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
+ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
+ veor @XMM[1], @XMM[1], @XMM[8]
+ veor @XMM[6], @XMM[6], @XMM[9]
+ vld1.8 {@XMM[12]}, [$inp]!
+ veor @XMM[4], @XMM[4], @XMM[10]
+ veor @XMM[2], @XMM[2], @XMM[11]
+ vld1.8 {@XMM[15]}, [$inp]!
+ veor @XMM[7], @XMM[7], @XMM[12]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ vst1.8 {@XMM[6]}, [$out]!
+ vst1.8 {@XMM[4]}, [$out]!
+ vst1.8 {@XMM[2]}, [$out]!
+ vst1.8 {@XMM[7]}, [$out]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_five:
+ sub $inp, $inp, #0x50
+ bl _bsaes_decrypt8
+ vldmia $fp, {@XMM[14]} @ reload IV
+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
+ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
+ veor @XMM[1], @XMM[1], @XMM[8]
+ veor @XMM[6], @XMM[6], @XMM[9]
+ vld1.8 {@XMM[15]}, [$inp]!
+ veor @XMM[4], @XMM[4], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ veor @XMM[2], @XMM[2], @XMM[11]
+ vst1.8 {@XMM[6]}, [$out]!
+ vst1.8 {@XMM[4]}, [$out]!
+ vst1.8 {@XMM[2]}, [$out]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_four:
+ sub $inp, $inp, #0x40
+ bl _bsaes_decrypt8
+ vldmia $fp, {@XMM[14]} @ reload IV
+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
+ vld1.8 {@XMM[10]}, [$inp]!
+ veor @XMM[1], @XMM[1], @XMM[8]
+ veor @XMM[6], @XMM[6], @XMM[9]
+ vld1.8 {@XMM[15]}, [$inp]!
+ veor @XMM[4], @XMM[4], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ vst1.8 {@XMM[6]}, [$out]!
+ vst1.8 {@XMM[4]}, [$out]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_three:
+ sub $inp, $inp, #0x30
+ bl _bsaes_decrypt8
+ vldmia $fp, {@XMM[14]} @ reload IV
+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
+ vld1.8 {@XMM[15]}, [$inp]!
+ veor @XMM[1], @XMM[1], @XMM[8]
+ veor @XMM[6], @XMM[6], @XMM[9]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ vst1.8 {@XMM[6]}, [$out]!
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_two:
+ sub $inp, $inp, #0x20
+ bl _bsaes_decrypt8
+ vldmia $fp, {@XMM[14]} @ reload IV
+ vld1.8 {@XMM[8]}, [$inp]! @ reload input
+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
+ vld1.8 {@XMM[15]}, [$inp]! @ reload input
+ veor @XMM[1], @XMM[1], @XMM[8]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ b .Lcbc_dec_done
+.align 4
+.Lcbc_dec_one:
+ sub $inp, $inp, #0x10
+ mov $rounds, $out @ save original out pointer
+ mov $out, $fp @ use the iv scratch space as out buffer
+ mov r2, $key
+ vmov @XMM[4],@XMM[15] @ just in case ensure that IV
+ vmov @XMM[5],@XMM[0] @ and input are preserved
+ bl AES_decrypt
+ vld1.8 {@XMM[0]}, [$fp,:64] @ load result
+ veor @XMM[0], @XMM[0], @XMM[4] @ ^= IV
+ vmov @XMM[15], @XMM[5] @ @XMM[5] holds input
+ vst1.8 {@XMM[0]}, [$rounds] @ write output
+
+.Lcbc_dec_done:
+#ifndef BSAES_ASM_EXTENDED_KEY
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+.Lcbc_dec_bzero: @ wipe key schedule [if any]
+ vstmia $keysched!, {q0-q1}
+ cmp $keysched, $fp
+ bne .Lcbc_dec_bzero
+#endif
+
+ mov sp, $fp
+ add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb
+ vst1.8 {@XMM[15]}, [$ivp] @ return IV
+ VFP_ABI_POP
+ ldmia sp!, {r4-r10, pc}
+.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
+___
+}
+{
+my ($inp,$out,$len,$key, $ctr,$fp,$rounds)=(map("r$_",(0..3,8..10)));
+my $const = "r6"; # shared with _bsaes_encrypt8_alt
+my $keysched = "sp";
+
+$code.=<<___;
+.extern AES_encrypt
+.global bsaes_ctr32_encrypt_blocks
+.type bsaes_ctr32_encrypt_blocks,%function
+.align 5
+bsaes_ctr32_encrypt_blocks:
+ cmp $len, #8 @ use plain AES for
+ blo .Lctr_enc_short @ small sizes
+
+ mov ip, sp
+ stmdb sp!, {r4-r10, lr}
+ VFP_ABI_PUSH
+ ldr $ctr, [ip] @ ctr is 1st arg on the stack
+ sub sp, sp, #0x10 @ scratch space to carry over the ctr
+ mov $fp, sp @ save sp
+
+ ldr $rounds, [$key, #240] @ get # of rounds
+#ifndef BSAES_ASM_EXTENDED_KEY
+ @ allocate the key schedule on the stack
+ sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key
+ add r12, #`128-32` @ size of bit-sliced key schedule
+
+ @ populate the key schedule
+ mov r4, $key @ pass key
+ mov r5, $rounds @ pass # of rounds
+ mov sp, r12 @ sp is $keysched
+ bl _bsaes_key_convert
+ veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key
+ vstmia r12, {@XMM[7]} @ save last round key
+
+ vld1.8 {@XMM[0]}, [$ctr] @ load counter
+ add $ctr, $const, #.LREVM0SR-.LM0 @ borrow $ctr
+ vldmia $keysched, {@XMM[4]} @ load round0 key
+#else
+ ldr r12, [$key, #244]
+ eors r12, #1
+ beq 0f
+
+ @ populate the key schedule
+ str r12, [$key, #244]
+ mov r4, $key @ pass key
+ mov r5, $rounds @ pass # of rounds
+ add r12, $key, #248 @ pass key schedule
+ bl _bsaes_key_convert
+ veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key
+ vstmia r12, {@XMM[7]} @ save last round key
+
+.align 2
+0: add r12, $key, #248
+ vld1.8 {@XMM[0]}, [$ctr] @ load counter
+ adrl $ctr, .LREVM0SR @ borrow $ctr
+ vldmia r12, {@XMM[4]} @ load round0 key
+ sub sp, #0x10 @ place for adjusted round0 key
+#endif
+
+ vmov.i32 @XMM[8],#1 @ compose 1<<96
+ veor @XMM[9],@XMM[9],@XMM[9]
+ vrev32.8 @XMM[0],@XMM[0]
+ vext.8 @XMM[8],@XMM[9],@XMM[8],#4
+ vrev32.8 @XMM[4],@XMM[4]
+ vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96
+ vstmia $keysched, {@XMM[4]} @ save adjusted round0 key
+ b .Lctr_enc_loop
+
+.align 4
+.Lctr_enc_loop:
+ vadd.u32 @XMM[10], @XMM[8], @XMM[9] @ compose 3<<96
+ vadd.u32 @XMM[1], @XMM[0], @XMM[8] @ +1
+ vadd.u32 @XMM[2], @XMM[0], @XMM[9] @ +2
+ vadd.u32 @XMM[3], @XMM[0], @XMM[10] @ +3
+ vadd.u32 @XMM[4], @XMM[1], @XMM[10]
+ vadd.u32 @XMM[5], @XMM[2], @XMM[10]
+ vadd.u32 @XMM[6], @XMM[3], @XMM[10]
+ vadd.u32 @XMM[7], @XMM[4], @XMM[10]
+ vadd.u32 @XMM[10], @XMM[5], @XMM[10] @ next counter
+
+ @ Borrow prologue from _bsaes_encrypt8 to use the opportunity
+ @ to flip byte order in 32-bit counter
+
+ vldmia $keysched, {@XMM[9]} @ load round0 key
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, $keysched, #0x10 @ pass next round key
+#else
+ add r4, $key, #`248+16`
+#endif
+ vldmia $ctr, {@XMM[8]} @ .LREVM0SR
+ mov r5, $rounds @ pass rounds
+ vstmia $fp, {@XMM[10]} @ save next counter
+ sub $const, $ctr, #.LREVM0SR-.LSR @ pass constants
+
+ bl _bsaes_encrypt8_alt
+
+ subs $len, $len, #8
+ blo .Lctr_enc_loop_done
+
+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ load input
+ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
+ veor @XMM[0], @XMM[8]
+ veor @XMM[1], @XMM[9]
+ vld1.8 {@XMM[12]-@XMM[13]}, [$inp]!
+ veor @XMM[4], @XMM[10]
+ veor @XMM[6], @XMM[11]
+ vld1.8 {@XMM[14]-@XMM[15]}, [$inp]!
+ veor @XMM[3], @XMM[12]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
+ veor @XMM[7], @XMM[13]
+ veor @XMM[2], @XMM[14]
+ vst1.8 {@XMM[4]}, [$out]!
+ veor @XMM[5], @XMM[15]
+ vst1.8 {@XMM[6]}, [$out]!
+ vmov.i32 @XMM[8], #1 @ compose 1<<96
+ vst1.8 {@XMM[3]}, [$out]!
+ veor @XMM[9], @XMM[9], @XMM[9]
+ vst1.8 {@XMM[7]}, [$out]!
+ vext.8 @XMM[8], @XMM[9], @XMM[8], #4
+ vst1.8 {@XMM[2]}, [$out]!
+ vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96
+ vst1.8 {@XMM[5]}, [$out]!
+ vldmia $fp, {@XMM[0]} @ load counter
+
+ bne .Lctr_enc_loop
+ b .Lctr_enc_done
+
+.align 4
+.Lctr_enc_loop_done:
+ add $len, $len, #8
+ vld1.8 {@XMM[8]}, [$inp]! @ load input
+ veor @XMM[0], @XMM[8]
+ vst1.8 {@XMM[0]}, [$out]! @ write output
+ cmp $len, #2
+ blo .Lctr_enc_done
+ vld1.8 {@XMM[9]}, [$inp]!
+ veor @XMM[1], @XMM[9]
+ vst1.8 {@XMM[1]}, [$out]!
+ beq .Lctr_enc_done
+ vld1.8 {@XMM[10]}, [$inp]!
+ veor @XMM[4], @XMM[10]
+ vst1.8 {@XMM[4]}, [$out]!
+ cmp $len, #4
+ blo .Lctr_enc_done
+ vld1.8 {@XMM[11]}, [$inp]!
+ veor @XMM[6], @XMM[11]
+ vst1.8 {@XMM[6]}, [$out]!
+ beq .Lctr_enc_done
+ vld1.8 {@XMM[12]}, [$inp]!
+ veor @XMM[3], @XMM[12]
+ vst1.8 {@XMM[3]}, [$out]!
+ cmp $len, #6
+ blo .Lctr_enc_done
+ vld1.8 {@XMM[13]}, [$inp]!
+ veor @XMM[7], @XMM[13]
+ vst1.8 {@XMM[7]}, [$out]!
+ beq .Lctr_enc_done
+ vld1.8 {@XMM[14]}, [$inp]
+ veor @XMM[2], @XMM[14]
+ vst1.8 {@XMM[2]}, [$out]!
+
+.Lctr_enc_done:
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+#ifndef BSAES_ASM_EXTENDED_KEY
+.Lctr_enc_bzero: @ wipe key schedule [if any]
+ vstmia $keysched!, {q0-q1}
+ cmp $keysched, $fp
+ bne .Lctr_enc_bzero
+#else
+ vstmia $keysched, {q0-q1}
+#endif
+
+ mov sp, $fp
+ add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb
+ VFP_ABI_POP
+ ldmia sp!, {r4-r10, pc} @ return
+
+.align 4
+.Lctr_enc_short:
+ ldr ip, [sp] @ ctr pointer is passed on stack
+ stmdb sp!, {r4-r8, lr}
+
+ mov r4, $inp @ copy arguments
+ mov r5, $out
+ mov r6, $len
+ mov r7, $key
+ ldr r8, [ip, #12] @ load counter LSW
+ vld1.8 {@XMM[1]}, [ip] @ load whole counter value
+#ifdef __ARMEL__
+ rev r8, r8
+#endif
+ sub sp, sp, #0x10
+ vst1.8 {@XMM[1]}, [sp,:64] @ copy counter value
+ sub sp, sp, #0x10
+
+.Lctr_enc_short_loop:
+ add r0, sp, #0x10 @ input counter value
+ mov r1, sp @ output on the stack
+ mov r2, r7 @ key
+
+ bl AES_encrypt
+
+ vld1.8 {@XMM[0]}, [r4]! @ load input
+ vld1.8 {@XMM[1]}, [sp,:64] @ load encrypted counter
+ add r8, r8, #1
+#ifdef __ARMEL__
+ rev r0, r8
+ str r0, [sp, #0x1c] @ next counter value
+#else
+ str r8, [sp, #0x1c] @ next counter value
+#endif
+ veor @XMM[0],@XMM[0],@XMM[1]
+ vst1.8 {@XMM[0]}, [r5]! @ store output
+ subs r6, r6, #1
+ bne .Lctr_enc_short_loop
+
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+ vstmia sp!, {q0-q1}
+
+ ldmia sp!, {r4-r8, pc}
+.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
+___
+}
+{
+######################################################################
+# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len,
+# const AES_KEY *key1, const AES_KEY *key2,
+# const unsigned char iv[16]);
+#
+my ($inp,$out,$len,$key,$rounds,$magic,$fp)=(map("r$_",(7..10,1..3)));
+my $const="r6"; # returned by _bsaes_key_convert
+my $twmask=@XMM[5];
+my @T=@XMM[6..7];
+
+$code.=<<___;
+.globl bsaes_xts_encrypt
+.type bsaes_xts_encrypt,%function
+.align 4
+bsaes_xts_encrypt:
+ mov ip, sp
+ stmdb sp!, {r4-r10, lr} @ 0x20
+ VFP_ABI_PUSH
+ mov r6, sp @ future $fp
+
+ mov $inp, r0
+ mov $out, r1
+ mov $len, r2
+ mov $key, r3
+
+ sub r0, sp, #0x10 @ 0x10
+ bic r0, #0xf @ align at 16 bytes
+ mov sp, r0
+
+#ifdef XTS_CHAIN_TWEAK
+ ldr r0, [ip] @ pointer to input tweak
+#else
+ @ generate initial tweak
+ ldr r0, [ip, #4] @ iv[]
+ mov r1, sp
+ ldr r2, [ip, #0] @ key2
+ bl AES_encrypt
+ mov r0,sp @ pointer to initial tweak
+#endif
+
+ ldr $rounds, [$key, #240] @ get # of rounds
+ mov $fp, r6
+#ifndef BSAES_ASM_EXTENDED_KEY
+ @ allocate the key schedule on the stack
+ sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key
+ @ add r12, #`128-32` @ size of bit-sliced key schedule
+ sub r12, #`32+16` @ place for tweak[9]
+
+ @ populate the key schedule
+ mov r4, $key @ pass key
+ mov r5, $rounds @ pass # of rounds
+ mov sp, r12
+ add r12, #0x90 @ pass key schedule
+ bl _bsaes_key_convert
+ veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key
+ vstmia r12, {@XMM[7]} @ save last round key
+#else
+ ldr r12, [$key, #244]
+ eors r12, #1
+ beq 0f
+
+ str r12, [$key, #244]
+ mov r4, $key @ pass key
+ mov r5, $rounds @ pass # of rounds
+ add r12, $key, #248 @ pass key schedule
+ bl _bsaes_key_convert
+ veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key
+ vstmia r12, {@XMM[7]}
+
+.align 2
+0: sub sp, #0x90 @ place for tweak[9]
+#endif
+
+ vld1.8 {@XMM[8]}, [r0] @ initial tweak
+ adr $magic, .Lxts_magic
+
+ subs $len, #0x80
+ blo .Lxts_enc_short
+ b .Lxts_enc_loop
+
+.align 4
+.Lxts_enc_loop:
+ vldmia $magic, {$twmask} @ load XTS magic
+ vshr.s64 @T[0], @XMM[8], #63
+ mov r0, sp
+ vand @T[0], @T[0], $twmask
+___
+for($i=9;$i<16;$i++) {
+$code.=<<___;
+ vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1]
+ vst1.64 {@XMM[$i-1]}, [r0,:128]!
+ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+ vshr.s64 @T[1], @XMM[$i], #63
+ veor @XMM[$i], @XMM[$i], @T[0]
+ vand @T[1], @T[1], $twmask
+___
+ @T=reverse(@T);
+
+$code.=<<___ if ($i>=10);
+ vld1.8 {@XMM[$i-10]}, [$inp]!
+___
+$code.=<<___ if ($i>=11);
+ veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
+___
+}
+$code.=<<___;
+ vadd.u64 @XMM[8], @XMM[15], @XMM[15]
+ vst1.64 {@XMM[15]}, [r0,:128]!
+ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+ veor @XMM[8], @XMM[8], @T[0]
+ vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+
+ vld1.8 {@XMM[6]-@XMM[7]}, [$inp]!
+ veor @XMM[5], @XMM[5], @XMM[13]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[6], @XMM[6], @XMM[14]
+ mov r5, $rounds @ pass rounds
+ veor @XMM[7], @XMM[7], @XMM[15]
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[4], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[6], @XMM[11]
+ vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]!
+ veor @XMM[10], @XMM[3], @XMM[12]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+ veor @XMM[11], @XMM[7], @XMM[13]
+ veor @XMM[12], @XMM[2], @XMM[14]
+ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
+ veor @XMM[13], @XMM[5], @XMM[15]
+ vst1.8 {@XMM[12]-@XMM[13]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+
+ subs $len, #0x80
+ bpl .Lxts_enc_loop
+
+.Lxts_enc_short:
+ adds $len, #0x70
+ bmi .Lxts_enc_done
+
+ vldmia $magic, {$twmask} @ load XTS magic
+ vshr.s64 @T[0], @XMM[8], #63
+ mov r0, sp
+ vand @T[0], @T[0], $twmask
+___
+for($i=9;$i<16;$i++) {
+$code.=<<___;
+ vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1]
+ vst1.64 {@XMM[$i-1]}, [r0,:128]!
+ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+ vshr.s64 @T[1], @XMM[$i], #63
+ veor @XMM[$i], @XMM[$i], @T[0]
+ vand @T[1], @T[1], $twmask
+___
+ @T=reverse(@T);
+
+$code.=<<___ if ($i>=10);
+ vld1.8 {@XMM[$i-10]}, [$inp]!
+ subs $len, #0x10
+ bmi .Lxts_enc_`$i-9`
+___
+$code.=<<___ if ($i>=11);
+ veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
+___
+}
+$code.=<<___;
+ sub $len, #0x10
+ vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak
+
+ vld1.8 {@XMM[6]}, [$inp]!
+ veor @XMM[5], @XMM[5], @XMM[13]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[6], @XMM[6], @XMM[14]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[4], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[6], @XMM[11]
+ vld1.64 {@XMM[14]}, [r0,:128]!
+ veor @XMM[10], @XMM[3], @XMM[12]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+ veor @XMM[11], @XMM[7], @XMM[13]
+ veor @XMM[12], @XMM[2], @XMM[14]
+ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
+ vst1.8 {@XMM[12]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_6:
+ vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak
+
+ veor @XMM[4], @XMM[4], @XMM[12]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[5], @XMM[5], @XMM[13]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[4], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[6], @XMM[11]
+ veor @XMM[10], @XMM[3], @XMM[12]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+ veor @XMM[11], @XMM[7], @XMM[13]
+ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+
+@ put this in range for both ARM and Thumb mode adr instructions
+.align 5
+.Lxts_magic:
+ .quad 1, 0x87
+
+.align 5
+.Lxts_enc_5:
+ vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak
+
+ veor @XMM[3], @XMM[3], @XMM[11]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[4], @XMM[4], @XMM[12]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ vld1.64 {@XMM[12]}, [r0,:128]!
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[4], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[6], @XMM[11]
+ veor @XMM[10], @XMM[3], @XMM[12]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+ vst1.8 {@XMM[10]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_4:
+ vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak
+
+ veor @XMM[2], @XMM[2], @XMM[10]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[3], @XMM[3], @XMM[11]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[4], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[6], @XMM[11]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_3:
+ vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak
+
+ veor @XMM[1], @XMM[1], @XMM[9]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[2], @XMM[2], @XMM[10]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]!
+ vld1.64 {@XMM[10]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[4], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ vst1.8 {@XMM[8]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_2:
+ vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak
+
+ veor @XMM[0], @XMM[0], @XMM[8]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[1], @XMM[1], @XMM[9]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_encrypt8
+
+ vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_enc_done
+.align 4
+.Lxts_enc_1:
+ mov r0, sp
+ veor @XMM[0], @XMM[8]
+ mov r1, sp
+ vst1.8 {@XMM[0]}, [sp,:128]
+ mov r2, $key
+ mov r4, $fp @ preserve fp
+
+ bl AES_encrypt
+
+ vld1.8 {@XMM[0]}, [sp,:128]
+ veor @XMM[0], @XMM[0], @XMM[8]
+ vst1.8 {@XMM[0]}, [$out]!
+ mov $fp, r4
+
+ vmov @XMM[8], @XMM[9] @ next round tweak
+
+.Lxts_enc_done:
+#ifndef XTS_CHAIN_TWEAK
+ adds $len, #0x10
+ beq .Lxts_enc_ret
+ sub r6, $out, #0x10
+
+.Lxts_enc_steal:
+ ldrb r0, [$inp], #1
+ ldrb r1, [$out, #-0x10]
+ strb r0, [$out, #-0x10]
+ strb r1, [$out], #1
+
+ subs $len, #1
+ bhi .Lxts_enc_steal
+
+ vld1.8 {@XMM[0]}, [r6]
+ mov r0, sp
+ veor @XMM[0], @XMM[0], @XMM[8]
+ mov r1, sp
+ vst1.8 {@XMM[0]}, [sp,:128]
+ mov r2, $key
+ mov r4, $fp @ preserve fp
+
+ bl AES_encrypt
+
+ vld1.8 {@XMM[0]}, [sp,:128]
+ veor @XMM[0], @XMM[0], @XMM[8]
+ vst1.8 {@XMM[0]}, [r6]
+ mov $fp, r4
+#endif
+
+.Lxts_enc_ret:
+ bic r0, $fp, #0xf
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+#ifdef XTS_CHAIN_TWEAK
+ ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak
+#endif
+.Lxts_enc_bzero: @ wipe key schedule [if any]
+ vstmia sp!, {q0-q1}
+ cmp sp, r0
+ bne .Lxts_enc_bzero
+
+ mov sp, $fp
+#ifdef XTS_CHAIN_TWEAK
+ vst1.8 {@XMM[8]}, [r1]
+#endif
+ VFP_ABI_POP
+ ldmia sp!, {r4-r10, pc} @ return
+
+.size bsaes_xts_encrypt,.-bsaes_xts_encrypt
+
+.globl bsaes_xts_decrypt
+.type bsaes_xts_decrypt,%function
+.align 4
+bsaes_xts_decrypt:
+ mov ip, sp
+ stmdb sp!, {r4-r10, lr} @ 0x20
+ VFP_ABI_PUSH
+ mov r6, sp @ future $fp
+
+ mov $inp, r0
+ mov $out, r1
+ mov $len, r2
+ mov $key, r3
+
+ sub r0, sp, #0x10 @ 0x10
+ bic r0, #0xf @ align at 16 bytes
+ mov sp, r0
+
+#ifdef XTS_CHAIN_TWEAK
+ ldr r0, [ip] @ pointer to input tweak
+#else
+ @ generate initial tweak
+ ldr r0, [ip, #4] @ iv[]
+ mov r1, sp
+ ldr r2, [ip, #0] @ key2
+ bl AES_encrypt
+ mov r0, sp @ pointer to initial tweak
+#endif
+
+ ldr $rounds, [$key, #240] @ get # of rounds
+ mov $fp, r6
+#ifndef BSAES_ASM_EXTENDED_KEY
+ @ allocate the key schedule on the stack
+ sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key
+ @ add r12, #`128-32` @ size of bit-sliced key schedule
+ sub r12, #`32+16` @ place for tweak[9]
+
+ @ populate the key schedule
+ mov r4, $key @ pass key
+ mov r5, $rounds @ pass # of rounds
+ mov sp, r12
+ add r12, #0x90 @ pass key schedule
+ bl _bsaes_key_convert
+ add r4, sp, #0x90
+ vldmia r4, {@XMM[6]}
+ vstmia r12, {@XMM[15]} @ save last round key
+ veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key
+ vstmia r4, {@XMM[7]}
+#else
+ ldr r12, [$key, #244]
+ eors r12, #1
+ beq 0f
+
+ str r12, [$key, #244]
+ mov r4, $key @ pass key
+ mov r5, $rounds @ pass # of rounds
+ add r12, $key, #248 @ pass key schedule
+ bl _bsaes_key_convert
+ add r4, $key, #248
+ vldmia r4, {@XMM[6]}
+ vstmia r12, {@XMM[15]} @ save last round key
+ veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key
+ vstmia r4, {@XMM[7]}
+
+.align 2
+0: sub sp, #0x90 @ place for tweak[9]
+#endif
+ vld1.8 {@XMM[8]}, [r0] @ initial tweak
+ adr $magic, .Lxts_magic
+
+ tst $len, #0xf @ if not multiple of 16
+ it ne @ Thumb2 thing, sanity check in ARM
+ subne $len, #0x10 @ subtract another 16 bytes
+ subs $len, #0x80
+
+ blo .Lxts_dec_short
+ b .Lxts_dec_loop
+
+.align 4
+.Lxts_dec_loop:
+ vldmia $magic, {$twmask} @ load XTS magic
+ vshr.s64 @T[0], @XMM[8], #63
+ mov r0, sp
+ vand @T[0], @T[0], $twmask
+___
+for($i=9;$i<16;$i++) {
+$code.=<<___;
+ vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1]
+ vst1.64 {@XMM[$i-1]}, [r0,:128]!
+ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+ vshr.s64 @T[1], @XMM[$i], #63
+ veor @XMM[$i], @XMM[$i], @T[0]
+ vand @T[1], @T[1], $twmask
+___
+ @T=reverse(@T);
+
+$code.=<<___ if ($i>=10);
+ vld1.8 {@XMM[$i-10]}, [$inp]!
+___
+$code.=<<___ if ($i>=11);
+ veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
+___
+}
+$code.=<<___;
+ vadd.u64 @XMM[8], @XMM[15], @XMM[15]
+ vst1.64 {@XMM[15]}, [r0,:128]!
+ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+ veor @XMM[8], @XMM[8], @T[0]
+ vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+
+ vld1.8 {@XMM[6]-@XMM[7]}, [$inp]!
+ veor @XMM[5], @XMM[5], @XMM[13]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[6], @XMM[6], @XMM[14]
+ mov r5, $rounds @ pass rounds
+ veor @XMM[7], @XMM[7], @XMM[15]
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[6], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[4], @XMM[11]
+ vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]!
+ veor @XMM[10], @XMM[2], @XMM[12]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+ veor @XMM[11], @XMM[7], @XMM[13]
+ veor @XMM[12], @XMM[3], @XMM[14]
+ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
+ veor @XMM[13], @XMM[5], @XMM[15]
+ vst1.8 {@XMM[12]-@XMM[13]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+
+ subs $len, #0x80
+ bpl .Lxts_dec_loop
+
+.Lxts_dec_short:
+ adds $len, #0x70
+ bmi .Lxts_dec_done
+
+ vldmia $magic, {$twmask} @ load XTS magic
+ vshr.s64 @T[0], @XMM[8], #63
+ mov r0, sp
+ vand @T[0], @T[0], $twmask
+___
+for($i=9;$i<16;$i++) {
+$code.=<<___;
+ vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1]
+ vst1.64 {@XMM[$i-1]}, [r0,:128]!
+ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+ vshr.s64 @T[1], @XMM[$i], #63
+ veor @XMM[$i], @XMM[$i], @T[0]
+ vand @T[1], @T[1], $twmask
+___
+ @T=reverse(@T);
+
+$code.=<<___ if ($i>=10);
+ vld1.8 {@XMM[$i-10]}, [$inp]!
+ subs $len, #0x10
+ bmi .Lxts_dec_`$i-9`
+___
+$code.=<<___ if ($i>=11);
+ veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
+___
+}
+$code.=<<___;
+ sub $len, #0x10
+ vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak
+
+ vld1.8 {@XMM[6]}, [$inp]!
+ veor @XMM[5], @XMM[5], @XMM[13]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[6], @XMM[6], @XMM[14]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[6], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[4], @XMM[11]
+ vld1.64 {@XMM[14]}, [r0,:128]!
+ veor @XMM[10], @XMM[2], @XMM[12]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+ veor @XMM[11], @XMM[7], @XMM[13]
+ veor @XMM[12], @XMM[3], @XMM[14]
+ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
+ vst1.8 {@XMM[12]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_6:
+ vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak
+
+ veor @XMM[4], @XMM[4], @XMM[12]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[5], @XMM[5], @XMM[13]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[6], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[4], @XMM[11]
+ veor @XMM[10], @XMM[2], @XMM[12]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+ veor @XMM[11], @XMM[7], @XMM[13]
+ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_5:
+ vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak
+
+ veor @XMM[3], @XMM[3], @XMM[11]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[4], @XMM[4], @XMM[12]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ vld1.64 {@XMM[12]}, [r0,:128]!
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[6], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[4], @XMM[11]
+ veor @XMM[10], @XMM[2], @XMM[12]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+ vst1.8 {@XMM[10]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_4:
+ vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak
+
+ veor @XMM[2], @XMM[2], @XMM[10]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[3], @XMM[3], @XMM[11]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[6], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ veor @XMM[9], @XMM[4], @XMM[11]
+ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_3:
+ vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak
+
+ veor @XMM[1], @XMM[1], @XMM[9]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[2], @XMM[2], @XMM[10]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]!
+ vld1.64 {@XMM[10]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ veor @XMM[8], @XMM[6], @XMM[10]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+ vst1.8 {@XMM[8]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_2:
+ vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak
+
+ veor @XMM[0], @XMM[0], @XMM[8]
+#ifndef BSAES_ASM_EXTENDED_KEY
+ add r4, sp, #0x90 @ pass key schedule
+#else
+ add r4, $key, #248 @ pass key schedule
+#endif
+ veor @XMM[1], @XMM[1], @XMM[9]
+ mov r5, $rounds @ pass rounds
+ mov r0, sp
+
+ bl _bsaes_decrypt8
+
+ vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]!
+ veor @XMM[0], @XMM[0], @XMM[ 8]
+ veor @XMM[1], @XMM[1], @XMM[ 9]
+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
+
+ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
+ b .Lxts_dec_done
+.align 4
+.Lxts_dec_1:
+ mov r0, sp
+ veor @XMM[0], @XMM[8]
+ mov r1, sp
+ vst1.8 {@XMM[0]}, [sp,:128]
+ mov r2, $key
+ mov r4, $fp @ preserve fp
+ mov r5, $magic @ preserve magic
+
+ bl AES_decrypt
+
+ vld1.8 {@XMM[0]}, [sp,:128]
+ veor @XMM[0], @XMM[0], @XMM[8]
+ vst1.8 {@XMM[0]}, [$out]!
+ mov $fp, r4
+ mov $magic, r5
+
+ vmov @XMM[8], @XMM[9] @ next round tweak
+
+.Lxts_dec_done:
+#ifndef XTS_CHAIN_TWEAK
+ adds $len, #0x10
+ beq .Lxts_dec_ret
+
+ @ calculate one round of extra tweak for the stolen ciphertext
+ vldmia $magic, {$twmask}
+ vshr.s64 @XMM[6], @XMM[8], #63
+ vand @XMM[6], @XMM[6], $twmask
+ vadd.u64 @XMM[9], @XMM[8], @XMM[8]
+ vswp `&Dhi("@XMM[6]")`,`&Dlo("@XMM[6]")`
+ veor @XMM[9], @XMM[9], @XMM[6]
+
+ @ perform the final decryption with the last tweak value
+ vld1.8 {@XMM[0]}, [$inp]!
+ mov r0, sp
+ veor @XMM[0], @XMM[0], @XMM[9]
+ mov r1, sp
+ vst1.8 {@XMM[0]}, [sp,:128]
+ mov r2, $key
+ mov r4, $fp @ preserve fp
+
+ bl AES_decrypt
+
+ vld1.8 {@XMM[0]}, [sp,:128]
+ veor @XMM[0], @XMM[0], @XMM[9]
+ vst1.8 {@XMM[0]}, [$out]
+
+ mov r6, $out
+.Lxts_dec_steal:
+ ldrb r1, [$out]
+ ldrb r0, [$inp], #1
+ strb r1, [$out, #0x10]
+ strb r0, [$out], #1
+
+ subs $len, #1
+ bhi .Lxts_dec_steal
+
+ vld1.8 {@XMM[0]}, [r6]
+ mov r0, sp
+ veor @XMM[0], @XMM[8]
+ mov r1, sp
+ vst1.8 {@XMM[0]}, [sp,:128]
+ mov r2, $key
+
+ bl AES_decrypt
+
+ vld1.8 {@XMM[0]}, [sp,:128]
+ veor @XMM[0], @XMM[0], @XMM[8]
+ vst1.8 {@XMM[0]}, [r6]
+ mov $fp, r4
+#endif
+
+.Lxts_dec_ret:
+ bic r0, $fp, #0xf
+ vmov.i32 q0, #0
+ vmov.i32 q1, #0
+#ifdef XTS_CHAIN_TWEAK
+ ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak
+#endif
+.Lxts_dec_bzero: @ wipe key schedule [if any]
+ vstmia sp!, {q0-q1}
+ cmp sp, r0
+ bne .Lxts_dec_bzero
+
+ mov sp, $fp
+#ifdef XTS_CHAIN_TWEAK
+ vst1.8 {@XMM[8]}, [r1]
+#endif
+ VFP_ABI_POP
+ ldmia sp!, {r4-r10, pc} @ return
+
+.size bsaes_xts_decrypt,.-bsaes_xts_decrypt
+___
+}
+$code.=<<___;
+#endif
+___
+
+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+
+open SELF,$0;
+while(<SELF>) {
+ next if (/^#!/);
+ last if (!s/^#/@/ and !/^$/);
+ print;
+}
+close SELF;
+
+print $code;
+
+close STDOUT;
diff --git a/crypto/aes/asm/vpaes-x86_64.S b/crypto/aes/asm/vpaes-x86_64.S
index 2b68e61..0162631 100644
--- a/crypto/aes/asm/vpaes-x86_64.S
+++ b/crypto/aes/asm/vpaes-x86_64.S
@@ -823,6 +823,6 @@
.Lk_dsbo:
.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
-.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
+.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
.align 64
.size _vpaes_consts,.-_vpaes_consts
diff --git a/crypto/aes/asm/vpaes-x86_64.pl b/crypto/aes/asm/vpaes-x86_64.pl
index 41f2e46..bd7f45b 100644
--- a/crypto/aes/asm/vpaes-x86_64.pl
+++ b/crypto/aes/asm/vpaes-x86_64.pl
@@ -1060,7 +1060,7 @@
.Lk_dsbo: # decryption sbox final output
.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
-.asciz "Vector Permutaion AES for x86_64/SSSE3, Mike Hamburg (Stanford University)"
+.asciz "Vector Permutation AES for x86_64/SSSE3, Mike Hamburg (Stanford University)"
.align 64
.size _vpaes_consts,.-_vpaes_consts
___
diff --git a/crypto/arm64cpuid.S b/crypto/arm64cpuid.S
new file mode 100644
index 0000000..4778ac1
--- /dev/null
+++ b/crypto/arm64cpuid.S
@@ -0,0 +1,46 @@
+#include "arm_arch.h"
+
+.text
+.arch armv8-a+crypto
+
+.align 5
+.global _armv7_neon_probe
+.type _armv7_neon_probe,%function
+_armv7_neon_probe:
+ orr v15.16b, v15.16b, v15.16b
+ ret
+.size _armv7_neon_probe,.-_armv7_neon_probe
+
+.global _armv7_tick
+.type _armv7_tick,%function
+_armv7_tick:
+ mrs x0, CNTVCT_EL0
+ ret
+.size _armv7_tick,.-_armv7_tick
+
+.global _armv8_aes_probe
+.type _armv8_aes_probe,%function
+_armv8_aes_probe:
+ aese v0.16b, v0.16b
+ ret
+.size _armv8_aes_probe,.-_armv8_aes_probe
+
+.global _armv8_sha1_probe
+.type _armv8_sha1_probe,%function
+_armv8_sha1_probe:
+ sha1h s0, s0
+ ret
+.size _armv8_sha1_probe,.-_armv8_sha1_probe
+
+.global _armv8_sha256_probe
+.type _armv8_sha256_probe,%function
+_armv8_sha256_probe:
+ sha256su0 v0.4s, v0.4s
+ ret
+.size _armv8_sha256_probe,.-_armv8_sha256_probe
+.global _armv8_pmull_probe
+.type _armv8_pmull_probe,%function
+_armv8_pmull_probe:
+ pmull v0.1q, v0.1d, v0.1d
+ ret
+.size _armv8_pmull_probe,.-_armv8_pmull_probe
diff --git a/crypto/arm_arch.h b/crypto/arm_arch.h
index 5a83107..6fa8724 100644
--- a/crypto/arm_arch.h
+++ b/crypto/arm_arch.h
@@ -10,13 +10,24 @@
# define __ARMEL__
# endif
# elif defined(__GNUC__)
+# if defined(__aarch64__)
+# define __ARM_ARCH__ 8
+# if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
+# define __ARMEB__
+# else
+# define __ARMEL__
+# endif
/*
* Why doesn't gcc define __ARM_ARCH__? Instead it defines
* bunch of below macros. See all_architectires[] table in
* gcc/config/arm/arm.c. On a side note it defines
* __ARMEL__/__ARMEB__ for little-/big-endian.
*/
-# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \
+# elif defined(__ARM_ARCH)
+# define __ARM_ARCH__ __ARM_ARCH
+# elif defined(__ARM_ARCH_8A__)
+# define __ARM_ARCH__ 8
+# elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \
defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__) || \
defined(__ARM_ARCH_7EM__)
# define __ARM_ARCH__ 7
@@ -43,9 +54,13 @@
#if !__ASSEMBLER__
extern unsigned int OPENSSL_armcap_P;
+#endif
#define ARMV7_NEON (1<<0)
#define ARMV7_TICK (1<<1)
-#endif
+#define ARMV8_AES (1<<2)
+#define ARMV8_SHA1 (1<<3)
+#define ARMV8_SHA256 (1<<4)
+#define ARMV8_PMULL (1<<5)
#endif
diff --git a/crypto/armcap.c b/crypto/armcap.c
new file mode 100644
index 0000000..7e46d07
--- /dev/null
+++ b/crypto/armcap.c
@@ -0,0 +1,157 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <crypto.h>
+
+#include "arm_arch.h"
+
+unsigned int OPENSSL_armcap_P;
+
+static sigset_t all_masked;
+
+static sigjmp_buf ill_jmp;
+static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); }
+
+/*
+ * Following subroutines could have been inlined, but it's not all
+ * ARM compilers support inline assembler...
+ */
+void _armv7_neon_probe(void);
+void _armv8_aes_probe(void);
+void _armv8_sha1_probe(void);
+void _armv8_sha256_probe(void);
+void _armv8_pmull_probe(void);
+unsigned long _armv7_tick(void);
+
+unsigned long OPENSSL_rdtsc(void)
+ {
+ if (OPENSSL_armcap_P & ARMV7_TICK)
+ return _armv7_tick();
+ else
+ return 0;
+ }
+
+/*
+ * Use a weak reference to getauxval() so we can use it if it is available but
+ * don't break the build if it is not.
+ */
+#if defined(__GNUC__) && __GNUC__>=2
+void OPENSSL_cpuid_setup(void) __attribute__((constructor));
+extern unsigned long getauxval(unsigned long type) __attribute__((weak));
+#else
+static unsigned long (*getauxval)(unsigned long) = NULL;
+#endif
+
+/*
+ * ARM puts the the feature bits for Crypto Extensions in AT_HWCAP2, whereas
+ * AArch64 used AT_HWCAP.
+ */
+#if defined(__arm__) || defined (__arm)
+# define HWCAP 16 /* AT_HWCAP */
+# define HWCAP_NEON (1 << 12)
+
+# define HWCAP_CE 26 /* AT_HWCAP2 */
+# define HWCAP_CE_AES (1 << 0)
+# define HWCAP_CE_PMULL (1 << 1)
+# define HWCAP_CE_SHA1 (1 << 2)
+# define HWCAP_CE_SHA256 (1 << 3)
+#elif defined(__aarch64__)
+# define HWCAP 16 /* AT_HWCAP */
+# define HWCAP_NEON (1 << 1)
+
+# define HWCAP_CE HWCAP
+# define HWCAP_CE_AES (1 << 3)
+# define HWCAP_CE_PMULL (1 << 4)
+# define HWCAP_CE_SHA1 (1 << 5)
+# define HWCAP_CE_SHA256 (1 << 6)
+#endif
+
+void OPENSSL_cpuid_setup(void)
+ {
+ char *e;
+ struct sigaction ill_oact,ill_act;
+ sigset_t oset;
+ static int trigger=0;
+
+ if (trigger) return;
+ trigger=1;
+
+ if ((e=getenv("OPENSSL_armcap")))
+ {
+ OPENSSL_armcap_P=(unsigned int)strtoul(e,NULL,0);
+ return;
+ }
+
+ sigfillset(&all_masked);
+ sigdelset(&all_masked,SIGILL);
+ sigdelset(&all_masked,SIGTRAP);
+ sigdelset(&all_masked,SIGFPE);
+ sigdelset(&all_masked,SIGBUS);
+ sigdelset(&all_masked,SIGSEGV);
+
+ OPENSSL_armcap_P = 0;
+
+ memset(&ill_act,0,sizeof(ill_act));
+ ill_act.sa_handler = ill_handler;
+ ill_act.sa_mask = all_masked;
+
+ sigprocmask(SIG_SETMASK,&ill_act.sa_mask,&oset);
+ sigaction(SIGILL,&ill_act,&ill_oact);
+
+ if (getauxval != NULL)
+ {
+ if (getauxval(HWCAP) & HWCAP_NEON)
+ {
+ unsigned long hwcap = getauxval(HWCAP_CE);
+
+ OPENSSL_armcap_P |= ARMV7_NEON;
+
+ if (hwcap & HWCAP_CE_AES)
+ OPENSSL_armcap_P |= ARMV8_AES;
+
+ if (hwcap & HWCAP_CE_PMULL)
+ OPENSSL_armcap_P |= ARMV8_PMULL;
+
+ if (hwcap & HWCAP_CE_SHA1)
+ OPENSSL_armcap_P |= ARMV8_SHA1;
+
+ if (hwcap & HWCAP_CE_SHA256)
+ OPENSSL_armcap_P |= ARMV8_SHA256;
+ }
+ }
+ else if (sigsetjmp(ill_jmp,1) == 0)
+ {
+ _armv7_neon_probe();
+ OPENSSL_armcap_P |= ARMV7_NEON;
+ if (sigsetjmp(ill_jmp,1) == 0)
+ {
+ _armv8_pmull_probe();
+ OPENSSL_armcap_P |= ARMV8_PMULL|ARMV8_AES;
+ }
+ else if (sigsetjmp(ill_jmp,1) == 0)
+ {
+ _armv8_aes_probe();
+ OPENSSL_armcap_P |= ARMV8_AES;
+ }
+ if (sigsetjmp(ill_jmp,1) == 0)
+ {
+ _armv8_sha1_probe();
+ OPENSSL_armcap_P |= ARMV8_SHA1;
+ }
+ if (sigsetjmp(ill_jmp,1) == 0)
+ {
+ _armv8_sha256_probe();
+ OPENSSL_armcap_P |= ARMV8_SHA256;
+ }
+ }
+ if (sigsetjmp(ill_jmp,1) == 0)
+ {
+ _armv7_tick();
+ OPENSSL_armcap_P |= ARMV7_TICK;
+ }
+
+ sigaction (SIGILL,&ill_oact,NULL);
+ sigprocmask(SIG_SETMASK,&oset,NULL);
+ }
diff --git a/crypto/armv4cpuid.S b/crypto/armv4cpuid.S
index 2d618de..add11d4 100644
--- a/crypto/armv4cpuid.S
+++ b/crypto/armv4cpuid.S
@@ -7,17 +7,49 @@
.global _armv7_neon_probe
.type _armv7_neon_probe,%function
_armv7_neon_probe:
- .word 0xf26ee1fe @ vorr q15,q15,q15
- .word 0xe12fff1e @ bx lr
+ .byte 0xf0,0x01,0x60,0xf2 @ vorr q8,q8,q8
+ .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv7_neon_probe,.-_armv7_neon_probe
.global _armv7_tick
.type _armv7_tick,%function
_armv7_tick:
- mrc p15,0,r0,c9,c13,0
- .word 0xe12fff1e @ bx lr
+ mrrc p15,1,r0,r1,c14 @ CNTVCT
+#if __ARM_ARCH__>=5
+ bx lr
+#else
+ .word 0xe12fff1e @ bx lr
+#endif
.size _armv7_tick,.-_armv7_tick
+.global _armv8_aes_probe
+.type _armv8_aes_probe,%function
+_armv8_aes_probe:
+ .byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
+ .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
+.size _armv8_aes_probe,.-_armv8_aes_probe
+
+.global _armv8_sha1_probe
+.type _armv8_sha1_probe,%function
+_armv8_sha1_probe:
+ .byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
+ .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
+.size _armv8_sha1_probe,.-_armv8_sha1_probe
+
+.global _armv8_sha256_probe
+.type _armv8_sha256_probe,%function
+_armv8_sha256_probe:
+ .byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
+ .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
+.size _armv8_sha256_probe,.-_armv8_sha256_probe
+.global _armv8_pmull_probe
+.type _armv8_pmull_probe,%function
+_armv8_pmull_probe:
+ .byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
+ .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
+.size _armv8_pmull_probe,.-_armv8_pmull_probe
+
+.align 5
.global OPENSSL_atomic_add
.type OPENSSL_atomic_add,%function
OPENSSL_atomic_add:
@@ -28,7 +60,7 @@
cmp r2,#0
bne .Ladd
mov r0,r3
- .word 0xe12fff1e @ bx lr
+ bx lr
#else
stmdb sp!,{r4-r6,lr}
ldr r2,.Lspinlock
@@ -81,9 +113,13 @@
adds r1,r1,#4
bne .Little
.Lcleanse_done:
+#if __ARM_ARCH__>=5
+ bx lr
+#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
+#endif
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.global OPENSSL_wipe_cpu
@@ -97,41 +133,53 @@
eor ip,ip,ip
tst r0,#1
beq .Lwipe_done
- .word 0xf3000150 @ veor q0, q0, q0
- .word 0xf3022152 @ veor q1, q1, q1
- .word 0xf3044154 @ veor q2, q2, q2
- .word 0xf3066156 @ veor q3, q3, q3
- .word 0xf34001f0 @ veor q8, q8, q8
- .word 0xf34221f2 @ veor q9, q9, q9
- .word 0xf34441f4 @ veor q10, q10, q10
- .word 0xf34661f6 @ veor q11, q11, q11
- .word 0xf34881f8 @ veor q12, q12, q12
- .word 0xf34aa1fa @ veor q13, q13, q13
- .word 0xf34cc1fc @ veor q14, q14, q14
- .word 0xf34ee1fe @ veor q15, q15, q15
+ .byte 0x50,0x01,0x00,0xf3 @ veor q0, q0, q0
+ .byte 0x52,0x21,0x02,0xf3 @ veor q1, q1, q1
+ .byte 0x54,0x41,0x04,0xf3 @ veor q2, q2, q2
+ .byte 0x56,0x61,0x06,0xf3 @ veor q3, q3, q3
+ .byte 0xf0,0x01,0x40,0xf3 @ veor q8, q8, q8
+ .byte 0xf2,0x21,0x42,0xf3 @ veor q9, q9, q9
+ .byte 0xf4,0x41,0x44,0xf3 @ veor q10, q10, q10
+ .byte 0xf6,0x61,0x46,0xf3 @ veor q11, q11, q11
+ .byte 0xf8,0x81,0x48,0xf3 @ veor q12, q12, q12
+ .byte 0xfa,0xa1,0x4a,0xf3 @ veor q13, q13, q13
+ .byte 0xfc,0xc1,0x4c,0xf3 @ veor q14, q14, q14
+ .byte 0xfe,0xe1,0x4e,0xf3 @ veor q14, q14, q14
.Lwipe_done:
mov r0,sp
+#if __ARM_ARCH__>=5
+ bx lr
+#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
+#endif
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
.global OPENSSL_instrument_bus
.type OPENSSL_instrument_bus,%function
OPENSSL_instrument_bus:
eor r0,r0,r0
+#if __ARM_ARCH__>=5
+ bx lr
+#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
+#endif
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
.global OPENSSL_instrument_bus2
.type OPENSSL_instrument_bus2,%function
OPENSSL_instrument_bus2:
eor r0,r0,r0
+#if __ARM_ARCH__>=5
+ bx lr
+#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
+#endif
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
.align 5
diff --git a/crypto/asn1/a_strnid.c b/crypto/asn1/a_strnid.c
index 2fc48c1..2afd5a4 100644
--- a/crypto/asn1/a_strnid.c
+++ b/crypto/asn1/a_strnid.c
@@ -74,7 +74,7 @@
* certain software (e.g. Netscape) has problems with them.
*/
-static unsigned long global_mask = 0xFFFFFFFFL;
+static unsigned long global_mask = B_ASN1_UTF8STRING;
void ASN1_STRING_set_default_mask(unsigned long mask)
{
diff --git a/crypto/asn1/asn1_err.c b/crypto/asn1/asn1_err.c
index 1a30bf1..aa60203 100644
--- a/crypto/asn1/asn1_err.c
+++ b/crypto/asn1/asn1_err.c
@@ -305,7 +305,7 @@
{ERR_REASON(ASN1_R_UNKNOWN_PUBLIC_KEY_TYPE),"unknown public key type"},
{ERR_REASON(ASN1_R_UNKNOWN_SIGNATURE_ALGORITHM),"unknown signature algorithm"},
{ERR_REASON(ASN1_R_UNKNOWN_TAG) ,"unknown tag"},
-{ERR_REASON(ASN1_R_UNKOWN_FORMAT) ,"unkown format"},
+{ERR_REASON(ASN1_R_UNKOWN_FORMAT) ,"unknown format"},
{ERR_REASON(ASN1_R_UNSUPPORTED_ANY_DEFINED_BY_TYPE),"unsupported any defined by type"},
{ERR_REASON(ASN1_R_UNSUPPORTED_CIPHER) ,"unsupported cipher"},
{ERR_REASON(ASN1_R_UNSUPPORTED_ENCRYPTION_ALGORITHM),"unsupported encryption algorithm"},
diff --git a/crypto/bio/bio.h b/crypto/bio/bio.h
index 05699ab..d05fa22 100644
--- a/crypto/bio/bio.h
+++ b/crypto/bio/bio.h
@@ -266,6 +266,9 @@
#define BIO_RR_CONNECT 0x02
/* Returned from the accept BIO when an accept would have blocked */
#define BIO_RR_ACCEPT 0x03
+/* Returned from the SSL bio when the channel id retrieval code cannot find the
+ * private key. */
+#define BIO_RR_SSL_CHANNEL_ID_LOOKUP 0x04
/* These are passed by the BIO callback */
#define BIO_CB_FREE 0x01
diff --git a/crypto/bio/bss_dgram.c b/crypto/bio/bss_dgram.c
index 54c012c..d9967e7 100644
--- a/crypto/bio/bss_dgram.c
+++ b/crypto/bio/bss_dgram.c
@@ -1333,7 +1333,7 @@
bio_dgram_sctp_data *data = NULL;
socklen_t sockopt_len = 0;
struct sctp_authkeyid authkeyid;
- struct sctp_authkey *authkey;
+ struct sctp_authkey *authkey = NULL;
data = (bio_dgram_sctp_data *)b->ptr;
@@ -1388,6 +1388,11 @@
/* Add new key */
sockopt_len = sizeof(struct sctp_authkey) + 64 * sizeof(uint8_t);
authkey = OPENSSL_malloc(sockopt_len);
+ if (authkey == NULL)
+ {
+ ret = -1;
+ break;
+ }
memset(authkey, 0x00, sockopt_len);
authkey->sca_keynumber = authkeyid.scact_keynumber + 1;
#ifndef __FreeBSD__
@@ -1399,6 +1404,8 @@
memcpy(&authkey->sca_key[0], ptr, 64 * sizeof(uint8_t));
ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_KEY, authkey, sockopt_len);
+ OPENSSL_free(authkey);
+ authkey = NULL;
if (ret < 0) break;
/* Reset active key */
diff --git a/crypto/bio/bss_log.c b/crypto/bio/bss_log.c
index b7dce5c..2227b2b 100644
--- a/crypto/bio/bss_log.c
+++ b/crypto/bio/bss_log.c
@@ -245,7 +245,7 @@
static void xopenlog(BIO* bp, char* name, int level)
{
- if (GetVersion() < 0x80000000)
+ if (check_winnt())
bp->ptr = RegisterEventSourceA(NULL,name);
else
bp->ptr = NULL;
diff --git a/crypto/bn/asm/armv4-gf2m.S b/crypto/bn/asm/armv4-gf2m.S
index 038f086..0fa25b2 100644
--- a/crypto/bn/asm/armv4-gf2m.S
+++ b/crypto/bn/asm/armv4-gf2m.S
@@ -5,31 +5,6 @@
#if __ARM_ARCH__>=7
.fpu neon
-
-.type mul_1x1_neon,%function
-.align 5
-mul_1x1_neon:
- vshl.u64 d2,d16,#8 @ q1-q3 are slided
- vmull.p8 q0,d16,d17 @ a·bb
- vshl.u64 d4,d16,#16
- vmull.p8 q1,d2,d17 @ a<<8·bb
- vshl.u64 d6,d16,#24
- vmull.p8 q2,d4,d17 @ a<<16·bb
- vshr.u64 d2,#8
- vmull.p8 q3,d6,d17 @ a<<24·bb
- vshl.u64 d3,#24
- veor d0,d2
- vshr.u64 d4,#16
- veor d0,d3
- vshl.u64 d5,#16
- veor d0,d4
- vshr.u64 d6,#24
- veor d0,d5
- vshl.u64 d7,#8
- veor d0,d6
- veor d0,d7
- .word 0xe12fff1e
-.size mul_1x1_neon,.-mul_1x1_neon
#endif
.type mul_1x1_ialu,%function
.align 5
@@ -120,40 +95,53 @@
tst r12,#1
beq .Lialu
- veor d18,d18
- vmov.32 d19,r3,r3 @ two copies of b1
- vmov.32 d18[0],r1 @ a1
+ ldr r12, [sp] @ 5th argument
+ vmov.32 d26, r2, r1
+ vmov.32 d27, r12, r3
+ vmov.i64 d28, #0x0000ffffffffffff
+ vmov.i64 d29, #0x00000000ffffffff
+ vmov.i64 d30, #0x000000000000ffff
- veor d20,d20
- vld1.32 d21[],[sp,:32] @ two copies of b0
- vmov.32 d20[0],r2 @ a0
- mov r12,lr
+ vext.8 d2, d26, d26, #1 @ A1
+ vmull.p8 q1, d2, d27 @ F = A1*B
+ vext.8 d0, d27, d27, #1 @ B1
+ vmull.p8 q0, d26, d0 @ E = A*B1
+ vext.8 d4, d26, d26, #2 @ A2
+ vmull.p8 q2, d4, d27 @ H = A2*B
+ vext.8 d16, d27, d27, #2 @ B2
+ vmull.p8 q8, d26, d16 @ G = A*B2
+ vext.8 d6, d26, d26, #3 @ A3
+ veor q1, q1, q0 @ L = E + F
+ vmull.p8 q3, d6, d27 @ J = A3*B
+ vext.8 d0, d27, d27, #3 @ B3
+ veor q2, q2, q8 @ M = G + H
+ vmull.p8 q0, d26, d0 @ I = A*B3
+ veor d2, d2, d3 @ t0 = (L) (P0 + P1) << 8
+ vand d3, d3, d28
+ vext.8 d16, d27, d27, #4 @ B4
+ veor d4, d4, d5 @ t1 = (M) (P2 + P3) << 16
+ vand d5, d5, d29
+ vmull.p8 q8, d26, d16 @ K = A*B4
+ veor q3, q3, q0 @ N = I + J
+ veor d2, d2, d3
+ veor d4, d4, d5
+ veor d6, d6, d7 @ t2 = (N) (P4 + P5) << 24
+ vand d7, d7, d30
+ vext.8 q1, q1, q1, #15
+ veor d16, d16, d17 @ t3 = (K) (P6 + P7) << 32
+ vmov.i64 d17, #0
+ vext.8 q2, q2, q2, #14
+ veor d6, d6, d7
+ vmull.p8 q0, d26, d27 @ D = A*B
+ vext.8 q8, q8, q8, #12
+ vext.8 q3, q3, q3, #13
+ veor q1, q1, q2
+ veor q3, q3, q8
+ veor q0, q0, q1
+ veor q0, q0, q3
- vmov d16,d18
- vmov d17,d19
- bl mul_1x1_neon @ a1·b1
- vmov d22,d0
-
- vmov d16,d20
- vmov d17,d21
- bl mul_1x1_neon @ a0·b0
- vmov d23,d0
-
- veor d16,d20,d18
- veor d17,d21,d19
- veor d20,d23,d22
- bl mul_1x1_neon @ (a0+a1)·(b0+b1)
-
- veor d0,d20 @ (a0+a1)·(b0+b1)-a0·b0-a1·b1
- vshl.u64 d1,d0,#32
- vshr.u64 d0,d0,#32
- veor d23,d1
- veor d22,d0
- vst1.32 {d23[0]},[r0,:32]!
- vst1.32 {d23[1]},[r0,:32]!
- vst1.32 {d22[0]},[r0,:32]!
- vst1.32 {d22[1]},[r0,:32]
- bx r12
+ vst1.32 {q0}, [r0]
+ bx lr @ bx lr
.align 4
.Lialu:
#endif
diff --git a/crypto/bn/asm/armv4-gf2m.pl b/crypto/bn/asm/armv4-gf2m.pl
index 22ad1f8..3f1f4f6 100644
--- a/crypto/bn/asm/armv4-gf2m.pl
+++ b/crypto/bn/asm/armv4-gf2m.pl
@@ -20,14 +20,21 @@
# length, more for longer keys. Even though NEON 1x1 multiplication
# runs in even less cycles, ~30, improvement is measurable only on
# longer keys. One has to optimize code elsewhere to get NEON glow...
+#
+# April 2014
+#
+# Double bn_GF2m_mul_2x2 performance by using algorithm from paper
+# referred below, which improves ECDH and ECDSA verify benchmarks
+# by 18-40%.
+#
+# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
+# Polynomial Multiplication on ARM Processors using the NEON Engine.
+#
+# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
-sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
-sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
-sub Q() { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; }
-
$code=<<___;
#include "arm_arch.h"
@@ -36,31 +43,6 @@
#if __ARM_ARCH__>=7
.fpu neon
-
-.type mul_1x1_neon,%function
-.align 5
-mul_1x1_neon:
- vshl.u64 `&Dlo("q1")`,d16,#8 @ q1-q3 are slided $a
- vmull.p8 `&Q("d0")`,d16,d17 @ a·bb
- vshl.u64 `&Dlo("q2")`,d16,#16
- vmull.p8 q1,`&Dlo("q1")`,d17 @ a<<8·bb
- vshl.u64 `&Dlo("q3")`,d16,#24
- vmull.p8 q2,`&Dlo("q2")`,d17 @ a<<16·bb
- vshr.u64 `&Dlo("q1")`,#8
- vmull.p8 q3,`&Dlo("q3")`,d17 @ a<<24·bb
- vshl.u64 `&Dhi("q1")`,#24
- veor d0,`&Dlo("q1")`
- vshr.u64 `&Dlo("q2")`,#16
- veor d0,`&Dhi("q1")`
- vshl.u64 `&Dhi("q2")`,#16
- veor d0,`&Dlo("q2")`
- vshr.u64 `&Dlo("q3")`,#24
- veor d0,`&Dhi("q2")`
- vshl.u64 `&Dhi("q3")`,#8
- veor d0,`&Dlo("q3")`
- veor d0,`&Dhi("q3")`
- bx lr
-.size mul_1x1_neon,.-mul_1x1_neon
#endif
___
################
@@ -159,8 +141,9 @@
# void bn_GF2m_mul_2x2(BN_ULONG *r,
# BN_ULONG a1,BN_ULONG a0,
# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0
-
-($A1,$B1,$A0,$B0,$A1B1,$A0B0)=map("d$_",(18..23));
+{
+my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
+my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
$code.=<<___;
.global bn_GF2m_mul_2x2
@@ -173,44 +156,58 @@
tst r12,#1
beq .Lialu
- veor $A1,$A1
- vmov.32 $B1,r3,r3 @ two copies of b1
- vmov.32 ${A1}[0],r1 @ a1
+ ldr r12, [sp] @ 5th argument
+ vmov.32 $a, r2, r1
+ vmov.32 $b, r12, r3
+ vmov.i64 $k48, #0x0000ffffffffffff
+ vmov.i64 $k32, #0x00000000ffffffff
+ vmov.i64 $k16, #0x000000000000ffff
- veor $A0,$A0
- vld1.32 ${B0}[],[sp,:32] @ two copies of b0
- vmov.32 ${A0}[0],r2 @ a0
- mov r12,lr
+ vext.8 $t0#lo, $a, $a, #1 @ A1
+ vmull.p8 $t0, $t0#lo, $b @ F = A1*B
+ vext.8 $r#lo, $b, $b, #1 @ B1
+ vmull.p8 $r, $a, $r#lo @ E = A*B1
+ vext.8 $t1#lo, $a, $a, #2 @ A2
+ vmull.p8 $t1, $t1#lo, $b @ H = A2*B
+ vext.8 $t3#lo, $b, $b, #2 @ B2
+ vmull.p8 $t3, $a, $t3#lo @ G = A*B2
+ vext.8 $t2#lo, $a, $a, #3 @ A3
+ veor $t0, $t0, $r @ L = E + F
+ vmull.p8 $t2, $t2#lo, $b @ J = A3*B
+ vext.8 $r#lo, $b, $b, #3 @ B3
+ veor $t1, $t1, $t3 @ M = G + H
+ vmull.p8 $r, $a, $r#lo @ I = A*B3
+ veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8
+ vand $t0#hi, $t0#hi, $k48
+ vext.8 $t3#lo, $b, $b, #4 @ B4
+ veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16
+ vand $t1#hi, $t1#hi, $k32
+ vmull.p8 $t3, $a, $t3#lo @ K = A*B4
+ veor $t2, $t2, $r @ N = I + J
+ veor $t0#lo, $t0#lo, $t0#hi
+ veor $t1#lo, $t1#lo, $t1#hi
+ veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24
+ vand $t2#hi, $t2#hi, $k16
+ vext.8 $t0, $t0, $t0, #15
+ veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32
+ vmov.i64 $t3#hi, #0
+ vext.8 $t1, $t1, $t1, #14
+ veor $t2#lo, $t2#lo, $t2#hi
+ vmull.p8 $r, $a, $b @ D = A*B
+ vext.8 $t3, $t3, $t3, #12
+ vext.8 $t2, $t2, $t2, #13
+ veor $t0, $t0, $t1
+ veor $t2, $t2, $t3
+ veor $r, $r, $t0
+ veor $r, $r, $t2
- vmov d16,$A1
- vmov d17,$B1
- bl mul_1x1_neon @ a1·b1
- vmov $A1B1,d0
-
- vmov d16,$A0
- vmov d17,$B0
- bl mul_1x1_neon @ a0·b0
- vmov $A0B0,d0
-
- veor d16,$A0,$A1
- veor d17,$B0,$B1
- veor $A0,$A0B0,$A1B1
- bl mul_1x1_neon @ (a0+a1)·(b0+b1)
-
- veor d0,$A0 @ (a0+a1)·(b0+b1)-a0·b0-a1·b1
- vshl.u64 d1,d0,#32
- vshr.u64 d0,d0,#32
- veor $A0B0,d1
- veor $A1B1,d0
- vst1.32 {${A0B0}[0]},[r0,:32]!
- vst1.32 {${A0B0}[1]},[r0,:32]!
- vst1.32 {${A1B1}[0]},[r0,:32]!
- vst1.32 {${A1B1}[1]},[r0,:32]
- bx r12
+ vst1.32 {$r}, [r0]
+ ret @ bx lr
.align 4
.Lialu:
#endif
___
+}
$ret="r10"; # reassigned 1st argument
$code.=<<___;
stmdb sp!,{r4-r10,lr}
@@ -272,7 +269,13 @@
.comm OPENSSL_armcap_P,4,4
___
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
-print $code;
+foreach (split("\n",$code)) {
+ s/\`([^\`]*)\`/eval $1/geo;
+
+ s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or
+ s/\bret\b/bx lr/go or
+ s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
+
+ print $_,"\n";
+}
close STDOUT; # enforce flush
diff --git a/crypto/bn/asm/armv4-mont.S b/crypto/bn/asm/armv4-mont.S
index 64c220b..fecae15 100644
--- a/crypto/bn/asm/armv4-mont.S
+++ b/crypto/bn/asm/armv4-mont.S
@@ -1,13 +1,37 @@
+#include "arm_arch.h"
+
.text
+.code 32
+
+#if __ARM_ARCH__>=7
+.align 5
+.LOPENSSL_armcap:
+.word OPENSSL_armcap_P-bn_mul_mont
+#endif
.global bn_mul_mont
.type bn_mul_mont,%function
-.align 2
+.align 5
bn_mul_mont:
+ ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
- ldr r0,[sp,#3*4] @ load num
- cmp r0,#2
+#if __ARM_ARCH__>=7
+ tst ip,#7
+ bne .Lialu
+ adr r0,bn_mul_mont
+ ldr r2,.LOPENSSL_armcap
+ ldr r0,[r0,r2]
+ tst r0,#1 @ NEON available?
+ ldmia sp, {r0,r2}
+ beq .Lialu
+ add sp,sp,#8
+ b bn_mul8x_mont_neon
+.align 4
+.Lialu:
+#endif
+ cmp ip,#2
+ mov r0,ip @ load num
movlt r0,#0
addlt sp,sp,#2*4
blt .Labrt
@@ -137,9 +161,419 @@
ldmia sp!,{r4-r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
-.Labrt: tst lr,#1
+.Labrt:
+#if __ARM_ARCH__>=5
+ bx lr @ .word 0xe12fff1e
+#else
+ tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
.size bn_mul_mont,.-bn_mul_mont
-.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <[email protected]>"
+#if __ARM_ARCH__>=7
+.fpu neon
+
+.type bn_mul8x_mont_neon,%function
+.align 5
+bn_mul8x_mont_neon:
+ mov ip,sp
+ stmdb sp!,{r4-r11}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+ ldmia ip,{r4-r5} @ load rest of parameter block
+
+ sub r7,sp,#16
+ vld1.32 {d28[0]}, [r2,:32]!
+ sub r7,r7,r5,lsl#4
+ vld1.32 {d0-d3}, [r1]! @ can't specify :32 :-(
+ and r7,r7,#-64
+ vld1.32 {d30[0]}, [r4,:32]
+ mov sp,r7 @ alloca
+ veor d8,d8,d8
+ subs r8,r5,#8
+ vzip.16 d28,d8
+
+ vmull.u32 q6,d28,d0[0]
+ vmull.u32 q7,d28,d0[1]
+ vmull.u32 q8,d28,d1[0]
+ vshl.i64 d10,d13,#16
+ vmull.u32 q9,d28,d1[1]
+
+ vadd.u64 d10,d10,d12
+ veor d8,d8,d8
+ vmul.u32 d29,d10,d30
+
+ vmull.u32 q10,d28,d2[0]
+ vld1.32 {d4-d7}, [r3]!
+ vmull.u32 q11,d28,d2[1]
+ vmull.u32 q12,d28,d3[0]
+ vzip.16 d29,d8
+ vmull.u32 q13,d28,d3[1]
+
+ bne .LNEON_1st
+
+ @ special case for num=8, everything is in register bank...
+
+ vmlal.u32 q6,d29,d4[0]
+ sub r9,r5,#1
+ vmlal.u32 q7,d29,d4[1]
+ vmlal.u32 q8,d29,d5[0]
+ vmlal.u32 q9,d29,d5[1]
+
+ vmlal.u32 q10,d29,d6[0]
+ vmov q5,q6
+ vmlal.u32 q11,d29,d6[1]
+ vmov q6,q7
+ vmlal.u32 q12,d29,d7[0]
+ vmov q7,q8
+ vmlal.u32 q13,d29,d7[1]
+ vmov q8,q9
+ vmov q9,q10
+ vshr.u64 d10,d10,#16
+ vmov q10,q11
+ vmov q11,q12
+ vadd.u64 d10,d10,d11
+ vmov q12,q13
+ veor q13,q13
+ vshr.u64 d10,d10,#16
+
+ b .LNEON_outer8
+
+.align 4
+.LNEON_outer8:
+ vld1.32 {d28[0]}, [r2,:32]!
+ veor d8,d8,d8
+ vzip.16 d28,d8
+ vadd.u64 d12,d12,d10
+
+ vmlal.u32 q6,d28,d0[0]
+ vmlal.u32 q7,d28,d0[1]
+ vmlal.u32 q8,d28,d1[0]
+ vshl.i64 d10,d13,#16
+ vmlal.u32 q9,d28,d1[1]
+
+ vadd.u64 d10,d10,d12
+ veor d8,d8,d8
+ subs r9,r9,#1
+ vmul.u32 d29,d10,d30
+
+ vmlal.u32 q10,d28,d2[0]
+ vmlal.u32 q11,d28,d2[1]
+ vmlal.u32 q12,d28,d3[0]
+ vzip.16 d29,d8
+ vmlal.u32 q13,d28,d3[1]
+
+ vmlal.u32 q6,d29,d4[0]
+ vmlal.u32 q7,d29,d4[1]
+ vmlal.u32 q8,d29,d5[0]
+ vmlal.u32 q9,d29,d5[1]
+
+ vmlal.u32 q10,d29,d6[0]
+ vmov q5,q6
+ vmlal.u32 q11,d29,d6[1]
+ vmov q6,q7
+ vmlal.u32 q12,d29,d7[0]
+ vmov q7,q8
+ vmlal.u32 q13,d29,d7[1]
+ vmov q8,q9
+ vmov q9,q10
+ vshr.u64 d10,d10,#16
+ vmov q10,q11
+ vmov q11,q12
+ vadd.u64 d10,d10,d11
+ vmov q12,q13
+ veor q13,q13
+ vshr.u64 d10,d10,#16
+
+ bne .LNEON_outer8
+
+ vadd.u64 d12,d12,d10
+ mov r7,sp
+ vshr.u64 d10,d12,#16
+ mov r8,r5
+ vadd.u64 d13,d13,d10
+ add r6,sp,#16
+ vshr.u64 d10,d13,#16
+ vzip.16 d12,d13
+
+ b .LNEON_tail2
+
+.align 4
+.LNEON_1st:
+ vmlal.u32 q6,d29,d4[0]
+ vld1.32 {d0-d3}, [r1]!
+ vmlal.u32 q7,d29,d4[1]
+ subs r8,r8,#8
+ vmlal.u32 q8,d29,d5[0]
+ vmlal.u32 q9,d29,d5[1]
+
+ vmlal.u32 q10,d29,d6[0]
+ vld1.32 {d4-d5}, [r3]!
+ vmlal.u32 q11,d29,d6[1]
+ vst1.64 {q6-q7}, [r7,:256]!
+ vmlal.u32 q12,d29,d7[0]
+ vmlal.u32 q13,d29,d7[1]
+ vst1.64 {q8-q9}, [r7,:256]!
+
+ vmull.u32 q6,d28,d0[0]
+ vld1.32 {d6-d7}, [r3]!
+ vmull.u32 q7,d28,d0[1]
+ vst1.64 {q10-q11}, [r7,:256]!
+ vmull.u32 q8,d28,d1[0]
+ vmull.u32 q9,d28,d1[1]
+ vst1.64 {q12-q13}, [r7,:256]!
+
+ vmull.u32 q10,d28,d2[0]
+ vmull.u32 q11,d28,d2[1]
+ vmull.u32 q12,d28,d3[0]
+ vmull.u32 q13,d28,d3[1]
+
+ bne .LNEON_1st
+
+ vmlal.u32 q6,d29,d4[0]
+ add r6,sp,#16
+ vmlal.u32 q7,d29,d4[1]
+ sub r1,r1,r5,lsl#2 @ rewind r1
+ vmlal.u32 q8,d29,d5[0]
+ vld1.64 {q5}, [sp,:128]
+ vmlal.u32 q9,d29,d5[1]
+ sub r9,r5,#1
+
+ vmlal.u32 q10,d29,d6[0]
+ vst1.64 {q6-q7}, [r7,:256]!
+ vmlal.u32 q11,d29,d6[1]
+ vshr.u64 d10,d10,#16
+ vld1.64 {q6}, [r6, :128]!
+ vmlal.u32 q12,d29,d7[0]
+ vst1.64 {q8-q9}, [r7,:256]!
+ vmlal.u32 q13,d29,d7[1]
+
+ vst1.64 {q10-q11}, [r7,:256]!
+ vadd.u64 d10,d10,d11
+ veor q4,q4,q4
+ vst1.64 {q12-q13}, [r7,:256]!
+ vld1.64 {q7-q8}, [r6, :256]!
+ vst1.64 {q4}, [r7,:128]
+ vshr.u64 d10,d10,#16
+
+ b .LNEON_outer
+
+.align 4
+.LNEON_outer:
+ vld1.32 {d28[0]}, [r2,:32]!
+ sub r3,r3,r5,lsl#2 @ rewind r3
+ vld1.32 {d0-d3}, [r1]!
+ veor d8,d8,d8
+ mov r7,sp
+ vzip.16 d28,d8
+ sub r8,r5,#8
+ vadd.u64 d12,d12,d10
+
+ vmlal.u32 q6,d28,d0[0]
+ vld1.64 {q9-q10},[r6,:256]!
+ vmlal.u32 q7,d28,d0[1]
+ vmlal.u32 q8,d28,d1[0]
+ vld1.64 {q11-q12},[r6,:256]!
+ vmlal.u32 q9,d28,d1[1]
+
+ vshl.i64 d10,d13,#16
+ veor d8,d8,d8
+ vadd.u64 d10,d10,d12
+ vld1.64 {q13},[r6,:128]!
+ vmul.u32 d29,d10,d30
+
+ vmlal.u32 q10,d28,d2[0]
+ vld1.32 {d4-d7}, [r3]!
+ vmlal.u32 q11,d28,d2[1]
+ vmlal.u32 q12,d28,d3[0]
+ vzip.16 d29,d8
+ vmlal.u32 q13,d28,d3[1]
+
+.LNEON_inner:
+ vmlal.u32 q6,d29,d4[0]
+ vld1.32 {d0-d3}, [r1]!
+ vmlal.u32 q7,d29,d4[1]
+ subs r8,r8,#8
+ vmlal.u32 q8,d29,d5[0]
+ vmlal.u32 q9,d29,d5[1]
+ vst1.64 {q6-q7}, [r7,:256]!
+
+ vmlal.u32 q10,d29,d6[0]
+ vld1.64 {q6}, [r6, :128]!
+ vmlal.u32 q11,d29,d6[1]
+ vst1.64 {q8-q9}, [r7,:256]!
+ vmlal.u32 q12,d29,d7[0]
+ vld1.64 {q7-q8}, [r6, :256]!
+ vmlal.u32 q13,d29,d7[1]
+ vst1.64 {q10-q11}, [r7,:256]!
+
+ vmlal.u32 q6,d28,d0[0]
+ vld1.64 {q9-q10}, [r6, :256]!
+ vmlal.u32 q7,d28,d0[1]
+ vst1.64 {q12-q13}, [r7,:256]!
+ vmlal.u32 q8,d28,d1[0]
+ vld1.64 {q11-q12}, [r6, :256]!
+ vmlal.u32 q9,d28,d1[1]
+ vld1.32 {d4-d7}, [r3]!
+
+ vmlal.u32 q10,d28,d2[0]
+ vld1.64 {q13}, [r6, :128]!
+ vmlal.u32 q11,d28,d2[1]
+ vmlal.u32 q12,d28,d3[0]
+ vmlal.u32 q13,d28,d3[1]
+
+ bne .LNEON_inner
+
+ vmlal.u32 q6,d29,d4[0]
+ add r6,sp,#16
+ vmlal.u32 q7,d29,d4[1]
+ sub r1,r1,r5,lsl#2 @ rewind r1
+ vmlal.u32 q8,d29,d5[0]
+ vld1.64 {q5}, [sp,:128]
+ vmlal.u32 q9,d29,d5[1]
+ subs r9,r9,#1
+
+ vmlal.u32 q10,d29,d6[0]
+ vst1.64 {q6-q7}, [r7,:256]!
+ vmlal.u32 q11,d29,d6[1]
+ vld1.64 {q6}, [r6, :128]!
+ vshr.u64 d10,d10,#16
+ vst1.64 {q8-q9}, [r7,:256]!
+ vmlal.u32 q12,d29,d7[0]
+ vld1.64 {q7-q8}, [r6, :256]!
+ vmlal.u32 q13,d29,d7[1]
+
+ vst1.64 {q10-q11}, [r7,:256]!
+ vadd.u64 d10,d10,d11
+ vst1.64 {q12-q13}, [r7,:256]!
+ vshr.u64 d10,d10,#16
+
+ bne .LNEON_outer
+
+ mov r7,sp
+ mov r8,r5
+
+.LNEON_tail:
+ vadd.u64 d12,d12,d10
+ vld1.64 {q9-q10}, [r6, :256]!
+ vshr.u64 d10,d12,#16
+ vadd.u64 d13,d13,d10
+ vld1.64 {q11-q12}, [r6, :256]!
+ vshr.u64 d10,d13,#16
+ vld1.64 {q13}, [r6, :128]!
+ vzip.16 d12,d13
+
+.LNEON_tail2:
+ vadd.u64 d14,d14,d10
+ vst1.32 {d12[0]}, [r7, :32]!
+ vshr.u64 d10,d14,#16
+ vadd.u64 d15,d15,d10
+ vshr.u64 d10,d15,#16
+ vzip.16 d14,d15
+
+ vadd.u64 d16,d16,d10
+ vst1.32 {d14[0]}, [r7, :32]!
+ vshr.u64 d10,d16,#16
+ vadd.u64 d17,d17,d10
+ vshr.u64 d10,d17,#16
+ vzip.16 d16,d17
+
+ vadd.u64 d18,d18,d10
+ vst1.32 {d16[0]}, [r7, :32]!
+ vshr.u64 d10,d18,#16
+ vadd.u64 d19,d19,d10
+ vshr.u64 d10,d19,#16
+ vzip.16 d18,d19
+
+ vadd.u64 d20,d20,d10
+ vst1.32 {d18[0]}, [r7, :32]!
+ vshr.u64 d10,d20,#16
+ vadd.u64 d21,d21,d10
+ vshr.u64 d10,d21,#16
+ vzip.16 d20,d21
+
+ vadd.u64 d22,d22,d10
+ vst1.32 {d20[0]}, [r7, :32]!
+ vshr.u64 d10,d22,#16
+ vadd.u64 d23,d23,d10
+ vshr.u64 d10,d23,#16
+ vzip.16 d22,d23
+
+ vadd.u64 d24,d24,d10
+ vst1.32 {d22[0]}, [r7, :32]!
+ vshr.u64 d10,d24,#16
+ vadd.u64 d25,d25,d10
+ vld1.64 {q6}, [r6, :128]!
+ vshr.u64 d10,d25,#16
+ vzip.16 d24,d25
+
+ vadd.u64 d26,d26,d10
+ vst1.32 {d24[0]}, [r7, :32]!
+ vshr.u64 d10,d26,#16
+ vadd.u64 d27,d27,d10
+ vld1.64 {q7-q8}, [r6, :256]!
+ vshr.u64 d10,d27,#16
+ vzip.16 d26,d27
+ subs r8,r8,#8
+ vst1.32 {d26[0]}, [r7, :32]!
+
+ bne .LNEON_tail
+
+ vst1.32 {d10[0]}, [r7, :32] @ top-most bit
+ sub r3,r3,r5,lsl#2 @ rewind r3
+ subs r1,sp,#0 @ clear carry flag
+ add r2,sp,r5,lsl#2
+
+.LNEON_sub:
+ ldmia r1!, {r4-r7}
+ ldmia r3!, {r8-r11}
+ sbcs r8, r4,r8
+ sbcs r9, r5,r9
+ sbcs r10,r6,r10
+ sbcs r11,r7,r11
+ teq r1,r2 @ preserves carry
+ stmia r0!, {r8-r11}
+ bne .LNEON_sub
+
+ ldr r10, [r1] @ load top-most bit
+ veor q0,q0,q0
+ sub r11,r2,sp @ this is num*4
+ veor q1,q1,q1
+ mov r1,sp
+ sub r0,r0,r11 @ rewind r0
+ mov r3,r2 @ second 3/4th of frame
+ sbcs r10,r10,#0 @ result is carry flag
+
+.LNEON_copy_n_zap:
+ ldmia r1!, {r4-r7}
+ ldmia r0, {r8-r11}
+ movcc r8, r4
+ vst1.64 {q0-q1}, [r3,:256]! @ wipe
+ movcc r9, r5
+ movcc r10,r6
+ vst1.64 {q0-q1}, [r3,:256]! @ wipe
+ movcc r11,r7
+ ldmia r1, {r4-r7}
+ stmia r0!, {r8-r11}
+ sub r1,r1,#16
+ ldmia r0, {r8-r11}
+ movcc r8, r4
+ vst1.64 {q0-q1}, [r1,:256]! @ wipe
+ movcc r9, r5
+ movcc r10,r6
+ vst1.64 {q0-q1}, [r3,:256]! @ wipe
+ movcc r11,r7
+ teq r1,r2 @ preserves carry
+ stmia r0!, {r8-r11}
+ bne .LNEON_copy_n_zap
+
+ sub sp,ip,#96
+ vldmia sp!,{d8-d15}
+ ldmia sp!,{r4-r11}
+ bx lr @ .word 0xe12fff1e
+.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
+#endif
+.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <[email protected]>"
.align 2
+#if __ARM_ARCH__>=7
+.comm OPENSSL_armcap_P,4,4
+#endif
diff --git a/crypto/bn/asm/armv4-mont.pl b/crypto/bn/asm/armv4-mont.pl
index f78a8b5..72bad8e 100644
--- a/crypto/bn/asm/armv4-mont.pl
+++ b/crypto/bn/asm/armv4-mont.pl
@@ -1,7 +1,7 @@
#!/usr/bin/env perl
# ====================================================================
-# Written by Andy Polyakov <[email protected]> for the OpenSSL
+# Written by Andy Polyakov <[email protected]> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@@ -23,6 +23,21 @@
# than 1/2KB. Windows CE port would be trivial, as it's exclusively
# about decorations, ABI and instruction syntax are identical.
+# November 2013
+#
+# Add NEON code path, which handles lengths divisible by 8. RSA/DSA
+# performance improvement on Cortex-A8 is ~45-100% depending on key
+# length, more for longer keys. On Cortex-A15 the span is ~10-105%.
+# On Snapdragon S4 improvement was measured to vary from ~70% to
+# incredible ~380%, yes, 4.8x faster, for RSA4096 sign. But this is
+# rather because original integer-only code seems to perform
+# suboptimally on S4. Situation on Cortex-A9 is unfortunately
+# different. It's being looked into, but the trouble is that
+# performance for vectors longer than 256 bits is actually couple
+# of percent worse than for integer-only code. The code is chosen
+# for execution on all NEON-capable processors, because gain on
+# others outweighs the marginal loss on Cortex-A9.
+
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
@@ -52,16 +67,40 @@
$_num="$num,#15*4"; $_bpend=$_num;
$code=<<___;
+#include "arm_arch.h"
+
.text
+.code 32
+
+#if __ARM_ARCH__>=7
+.align 5
+.LOPENSSL_armcap:
+.word OPENSSL_armcap_P-bn_mul_mont
+#endif
.global bn_mul_mont
.type bn_mul_mont,%function
-.align 2
+.align 5
bn_mul_mont:
+ ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
- ldr $num,[sp,#3*4] @ load num
- cmp $num,#2
+#if __ARM_ARCH__>=7
+ tst ip,#7
+ bne .Lialu
+ adr r0,bn_mul_mont
+ ldr r2,.LOPENSSL_armcap
+ ldr r0,[r0,r2]
+ tst r0,#1 @ NEON available?
+ ldmia sp, {r0,r2}
+ beq .Lialu
+ add sp,sp,#8
+ b bn_mul8x_mont_neon
+.align 4
+.Lialu:
+#endif
+ cmp ip,#2
+ mov $num,ip @ load num
movlt r0,#0
addlt sp,sp,#2*4
blt .Labrt
@@ -191,14 +230,446 @@
ldmia sp!,{r4-r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
-.Labrt: tst lr,#1
+.Labrt:
+#if __ARM_ARCH__>=5
+ ret @ bx lr
+#else
+ tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-)
+#endif
.size bn_mul_mont,.-bn_mul_mont
-.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
+___
+{
+sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
+sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
+
+my ($A0,$A1,$A2,$A3)=map("d$_",(0..3));
+my ($N0,$N1,$N2,$N3)=map("d$_",(4..7));
+my ($Z,$Temp)=("q4","q5");
+my ($A0xB,$A1xB,$A2xB,$A3xB,$A4xB,$A5xB,$A6xB,$A7xB)=map("q$_",(6..13));
+my ($Bi,$Ni,$M0)=map("d$_",(28..31));
+my $zero=&Dlo($Z);
+my $temp=&Dlo($Temp);
+
+my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5));
+my ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9));
+
+$code.=<<___;
+#if __ARM_ARCH__>=7
+.fpu neon
+
+.type bn_mul8x_mont_neon,%function
+.align 5
+bn_mul8x_mont_neon:
+ mov ip,sp
+ stmdb sp!,{r4-r11}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+ ldmia ip,{r4-r5} @ load rest of parameter block
+
+ sub $toutptr,sp,#16
+ vld1.32 {${Bi}[0]}, [$bptr,:32]!
+ sub $toutptr,$toutptr,$num,lsl#4
+ vld1.32 {$A0-$A3}, [$aptr]! @ can't specify :32 :-(
+ and $toutptr,$toutptr,#-64
+ vld1.32 {${M0}[0]}, [$n0,:32]
+ mov sp,$toutptr @ alloca
+ veor $zero,$zero,$zero
+ subs $inner,$num,#8
+ vzip.16 $Bi,$zero
+
+ vmull.u32 $A0xB,$Bi,${A0}[0]
+ vmull.u32 $A1xB,$Bi,${A0}[1]
+ vmull.u32 $A2xB,$Bi,${A1}[0]
+ vshl.i64 $temp,`&Dhi("$A0xB")`,#16
+ vmull.u32 $A3xB,$Bi,${A1}[1]
+
+ vadd.u64 $temp,$temp,`&Dlo("$A0xB")`
+ veor $zero,$zero,$zero
+ vmul.u32 $Ni,$temp,$M0
+
+ vmull.u32 $A4xB,$Bi,${A2}[0]
+ vld1.32 {$N0-$N3}, [$nptr]!
+ vmull.u32 $A5xB,$Bi,${A2}[1]
+ vmull.u32 $A6xB,$Bi,${A3}[0]
+ vzip.16 $Ni,$zero
+ vmull.u32 $A7xB,$Bi,${A3}[1]
+
+ bne .LNEON_1st
+
+ @ special case for num=8, everything is in register bank...
+
+ vmlal.u32 $A0xB,$Ni,${N0}[0]
+ sub $outer,$num,#1
+ vmlal.u32 $A1xB,$Ni,${N0}[1]
+ vmlal.u32 $A2xB,$Ni,${N1}[0]
+ vmlal.u32 $A3xB,$Ni,${N1}[1]
+
+ vmlal.u32 $A4xB,$Ni,${N2}[0]
+ vmov $Temp,$A0xB
+ vmlal.u32 $A5xB,$Ni,${N2}[1]
+ vmov $A0xB,$A1xB
+ vmlal.u32 $A6xB,$Ni,${N3}[0]
+ vmov $A1xB,$A2xB
+ vmlal.u32 $A7xB,$Ni,${N3}[1]
+ vmov $A2xB,$A3xB
+ vmov $A3xB,$A4xB
+ vshr.u64 $temp,$temp,#16
+ vmov $A4xB,$A5xB
+ vmov $A5xB,$A6xB
+ vadd.u64 $temp,$temp,`&Dhi("$Temp")`
+ vmov $A6xB,$A7xB
+ veor $A7xB,$A7xB
+ vshr.u64 $temp,$temp,#16
+
+ b .LNEON_outer8
+
+.align 4
+.LNEON_outer8:
+ vld1.32 {${Bi}[0]}, [$bptr,:32]!
+ veor $zero,$zero,$zero
+ vzip.16 $Bi,$zero
+ vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
+
+ vmlal.u32 $A0xB,$Bi,${A0}[0]
+ vmlal.u32 $A1xB,$Bi,${A0}[1]
+ vmlal.u32 $A2xB,$Bi,${A1}[0]
+ vshl.i64 $temp,`&Dhi("$A0xB")`,#16
+ vmlal.u32 $A3xB,$Bi,${A1}[1]
+
+ vadd.u64 $temp,$temp,`&Dlo("$A0xB")`
+ veor $zero,$zero,$zero
+ subs $outer,$outer,#1
+ vmul.u32 $Ni,$temp,$M0
+
+ vmlal.u32 $A4xB,$Bi,${A2}[0]
+ vmlal.u32 $A5xB,$Bi,${A2}[1]
+ vmlal.u32 $A6xB,$Bi,${A3}[0]
+ vzip.16 $Ni,$zero
+ vmlal.u32 $A7xB,$Bi,${A3}[1]
+
+ vmlal.u32 $A0xB,$Ni,${N0}[0]
+ vmlal.u32 $A1xB,$Ni,${N0}[1]
+ vmlal.u32 $A2xB,$Ni,${N1}[0]
+ vmlal.u32 $A3xB,$Ni,${N1}[1]
+
+ vmlal.u32 $A4xB,$Ni,${N2}[0]
+ vmov $Temp,$A0xB
+ vmlal.u32 $A5xB,$Ni,${N2}[1]
+ vmov $A0xB,$A1xB
+ vmlal.u32 $A6xB,$Ni,${N3}[0]
+ vmov $A1xB,$A2xB
+ vmlal.u32 $A7xB,$Ni,${N3}[1]
+ vmov $A2xB,$A3xB
+ vmov $A3xB,$A4xB
+ vshr.u64 $temp,$temp,#16
+ vmov $A4xB,$A5xB
+ vmov $A5xB,$A6xB
+ vadd.u64 $temp,$temp,`&Dhi("$Temp")`
+ vmov $A6xB,$A7xB
+ veor $A7xB,$A7xB
+ vshr.u64 $temp,$temp,#16
+
+ bne .LNEON_outer8
+
+ vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
+ mov $toutptr,sp
+ vshr.u64 $temp,`&Dlo("$A0xB")`,#16
+ mov $inner,$num
+ vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp
+ add $tinptr,sp,#16
+ vshr.u64 $temp,`&Dhi("$A0xB")`,#16
+ vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")`
+
+ b .LNEON_tail2
+
+.align 4
+.LNEON_1st:
+ vmlal.u32 $A0xB,$Ni,${N0}[0]
+ vld1.32 {$A0-$A3}, [$aptr]!
+ vmlal.u32 $A1xB,$Ni,${N0}[1]
+ subs $inner,$inner,#8
+ vmlal.u32 $A2xB,$Ni,${N1}[0]
+ vmlal.u32 $A3xB,$Ni,${N1}[1]
+
+ vmlal.u32 $A4xB,$Ni,${N2}[0]
+ vld1.32 {$N0-$N1}, [$nptr]!
+ vmlal.u32 $A5xB,$Ni,${N2}[1]
+ vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]!
+ vmlal.u32 $A6xB,$Ni,${N3}[0]
+ vmlal.u32 $A7xB,$Ni,${N3}[1]
+ vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]!
+
+ vmull.u32 $A0xB,$Bi,${A0}[0]
+ vld1.32 {$N2-$N3}, [$nptr]!
+ vmull.u32 $A1xB,$Bi,${A0}[1]
+ vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]!
+ vmull.u32 $A2xB,$Bi,${A1}[0]
+ vmull.u32 $A3xB,$Bi,${A1}[1]
+ vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]!
+
+ vmull.u32 $A4xB,$Bi,${A2}[0]
+ vmull.u32 $A5xB,$Bi,${A2}[1]
+ vmull.u32 $A6xB,$Bi,${A3}[0]
+ vmull.u32 $A7xB,$Bi,${A3}[1]
+
+ bne .LNEON_1st
+
+ vmlal.u32 $A0xB,$Ni,${N0}[0]
+ add $tinptr,sp,#16
+ vmlal.u32 $A1xB,$Ni,${N0}[1]
+ sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr
+ vmlal.u32 $A2xB,$Ni,${N1}[0]
+ vld1.64 {$Temp}, [sp,:128]
+ vmlal.u32 $A3xB,$Ni,${N1}[1]
+ sub $outer,$num,#1
+
+ vmlal.u32 $A4xB,$Ni,${N2}[0]
+ vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]!
+ vmlal.u32 $A5xB,$Ni,${N2}[1]
+ vshr.u64 $temp,$temp,#16
+ vld1.64 {$A0xB}, [$tinptr, :128]!
+ vmlal.u32 $A6xB,$Ni,${N3}[0]
+ vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]!
+ vmlal.u32 $A7xB,$Ni,${N3}[1]
+
+ vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]!
+ vadd.u64 $temp,$temp,`&Dhi("$Temp")`
+ veor $Z,$Z,$Z
+ vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]!
+ vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]!
+ vst1.64 {$Z}, [$toutptr,:128]
+ vshr.u64 $temp,$temp,#16
+
+ b .LNEON_outer
+
+.align 4
+.LNEON_outer:
+ vld1.32 {${Bi}[0]}, [$bptr,:32]!
+ sub $nptr,$nptr,$num,lsl#2 @ rewind $nptr
+ vld1.32 {$A0-$A3}, [$aptr]!
+ veor $zero,$zero,$zero
+ mov $toutptr,sp
+ vzip.16 $Bi,$zero
+ sub $inner,$num,#8
+ vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
+
+ vmlal.u32 $A0xB,$Bi,${A0}[0]
+ vld1.64 {$A3xB-$A4xB},[$tinptr,:256]!
+ vmlal.u32 $A1xB,$Bi,${A0}[1]
+ vmlal.u32 $A2xB,$Bi,${A1}[0]
+ vld1.64 {$A5xB-$A6xB},[$tinptr,:256]!
+ vmlal.u32 $A3xB,$Bi,${A1}[1]
+
+ vshl.i64 $temp,`&Dhi("$A0xB")`,#16
+ veor $zero,$zero,$zero
+ vadd.u64 $temp,$temp,`&Dlo("$A0xB")`
+ vld1.64 {$A7xB},[$tinptr,:128]!
+ vmul.u32 $Ni,$temp,$M0
+
+ vmlal.u32 $A4xB,$Bi,${A2}[0]
+ vld1.32 {$N0-$N3}, [$nptr]!
+ vmlal.u32 $A5xB,$Bi,${A2}[1]
+ vmlal.u32 $A6xB,$Bi,${A3}[0]
+ vzip.16 $Ni,$zero
+ vmlal.u32 $A7xB,$Bi,${A3}[1]
+
+.LNEON_inner:
+ vmlal.u32 $A0xB,$Ni,${N0}[0]
+ vld1.32 {$A0-$A3}, [$aptr]!
+ vmlal.u32 $A1xB,$Ni,${N0}[1]
+ subs $inner,$inner,#8
+ vmlal.u32 $A2xB,$Ni,${N1}[0]
+ vmlal.u32 $A3xB,$Ni,${N1}[1]
+ vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]!
+
+ vmlal.u32 $A4xB,$Ni,${N2}[0]
+ vld1.64 {$A0xB}, [$tinptr, :128]!
+ vmlal.u32 $A5xB,$Ni,${N2}[1]
+ vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]!
+ vmlal.u32 $A6xB,$Ni,${N3}[0]
+ vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]!
+ vmlal.u32 $A7xB,$Ni,${N3}[1]
+ vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]!
+
+ vmlal.u32 $A0xB,$Bi,${A0}[0]
+ vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]!
+ vmlal.u32 $A1xB,$Bi,${A0}[1]
+ vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]!
+ vmlal.u32 $A2xB,$Bi,${A1}[0]
+ vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]!
+ vmlal.u32 $A3xB,$Bi,${A1}[1]
+ vld1.32 {$N0-$N3}, [$nptr]!
+
+ vmlal.u32 $A4xB,$Bi,${A2}[0]
+ vld1.64 {$A7xB}, [$tinptr, :128]!
+ vmlal.u32 $A5xB,$Bi,${A2}[1]
+ vmlal.u32 $A6xB,$Bi,${A3}[0]
+ vmlal.u32 $A7xB,$Bi,${A3}[1]
+
+ bne .LNEON_inner
+
+ vmlal.u32 $A0xB,$Ni,${N0}[0]
+ add $tinptr,sp,#16
+ vmlal.u32 $A1xB,$Ni,${N0}[1]
+ sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr
+ vmlal.u32 $A2xB,$Ni,${N1}[0]
+ vld1.64 {$Temp}, [sp,:128]
+ vmlal.u32 $A3xB,$Ni,${N1}[1]
+ subs $outer,$outer,#1
+
+ vmlal.u32 $A4xB,$Ni,${N2}[0]
+ vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]!
+ vmlal.u32 $A5xB,$Ni,${N2}[1]
+ vld1.64 {$A0xB}, [$tinptr, :128]!
+ vshr.u64 $temp,$temp,#16
+ vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]!
+ vmlal.u32 $A6xB,$Ni,${N3}[0]
+ vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]!
+ vmlal.u32 $A7xB,$Ni,${N3}[1]
+
+ vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]!
+ vadd.u64 $temp,$temp,`&Dhi("$Temp")`
+ vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]!
+ vshr.u64 $temp,$temp,#16
+
+ bne .LNEON_outer
+
+ mov $toutptr,sp
+ mov $inner,$num
+
+.LNEON_tail:
+ vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
+ vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]!
+ vshr.u64 $temp,`&Dlo("$A0xB")`,#16
+ vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp
+ vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]!
+ vshr.u64 $temp,`&Dhi("$A0xB")`,#16
+ vld1.64 {$A7xB}, [$tinptr, :128]!
+ vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")`
+
+.LNEON_tail2:
+ vadd.u64 `&Dlo("$A1xB")`,`&Dlo("$A1xB")`,$temp
+ vst1.32 {`&Dlo("$A0xB")`[0]}, [$toutptr, :32]!
+ vshr.u64 $temp,`&Dlo("$A1xB")`,#16
+ vadd.u64 `&Dhi("$A1xB")`,`&Dhi("$A1xB")`,$temp
+ vshr.u64 $temp,`&Dhi("$A1xB")`,#16
+ vzip.16 `&Dlo("$A1xB")`,`&Dhi("$A1xB")`
+
+ vadd.u64 `&Dlo("$A2xB")`,`&Dlo("$A2xB")`,$temp
+ vst1.32 {`&Dlo("$A1xB")`[0]}, [$toutptr, :32]!
+ vshr.u64 $temp,`&Dlo("$A2xB")`,#16
+ vadd.u64 `&Dhi("$A2xB")`,`&Dhi("$A2xB")`,$temp
+ vshr.u64 $temp,`&Dhi("$A2xB")`,#16
+ vzip.16 `&Dlo("$A2xB")`,`&Dhi("$A2xB")`
+
+ vadd.u64 `&Dlo("$A3xB")`,`&Dlo("$A3xB")`,$temp
+ vst1.32 {`&Dlo("$A2xB")`[0]}, [$toutptr, :32]!
+ vshr.u64 $temp,`&Dlo("$A3xB")`,#16
+ vadd.u64 `&Dhi("$A3xB")`,`&Dhi("$A3xB")`,$temp
+ vshr.u64 $temp,`&Dhi("$A3xB")`,#16
+ vzip.16 `&Dlo("$A3xB")`,`&Dhi("$A3xB")`
+
+ vadd.u64 `&Dlo("$A4xB")`,`&Dlo("$A4xB")`,$temp
+ vst1.32 {`&Dlo("$A3xB")`[0]}, [$toutptr, :32]!
+ vshr.u64 $temp,`&Dlo("$A4xB")`,#16
+ vadd.u64 `&Dhi("$A4xB")`,`&Dhi("$A4xB")`,$temp
+ vshr.u64 $temp,`&Dhi("$A4xB")`,#16
+ vzip.16 `&Dlo("$A4xB")`,`&Dhi("$A4xB")`
+
+ vadd.u64 `&Dlo("$A5xB")`,`&Dlo("$A5xB")`,$temp
+ vst1.32 {`&Dlo("$A4xB")`[0]}, [$toutptr, :32]!
+ vshr.u64 $temp,`&Dlo("$A5xB")`,#16
+ vadd.u64 `&Dhi("$A5xB")`,`&Dhi("$A5xB")`,$temp
+ vshr.u64 $temp,`&Dhi("$A5xB")`,#16
+ vzip.16 `&Dlo("$A5xB")`,`&Dhi("$A5xB")`
+
+ vadd.u64 `&Dlo("$A6xB")`,`&Dlo("$A6xB")`,$temp
+ vst1.32 {`&Dlo("$A5xB")`[0]}, [$toutptr, :32]!
+ vshr.u64 $temp,`&Dlo("$A6xB")`,#16
+ vadd.u64 `&Dhi("$A6xB")`,`&Dhi("$A6xB")`,$temp
+ vld1.64 {$A0xB}, [$tinptr, :128]!
+ vshr.u64 $temp,`&Dhi("$A6xB")`,#16
+ vzip.16 `&Dlo("$A6xB")`,`&Dhi("$A6xB")`
+
+ vadd.u64 `&Dlo("$A7xB")`,`&Dlo("$A7xB")`,$temp
+ vst1.32 {`&Dlo("$A6xB")`[0]}, [$toutptr, :32]!
+ vshr.u64 $temp,`&Dlo("$A7xB")`,#16
+ vadd.u64 `&Dhi("$A7xB")`,`&Dhi("$A7xB")`,$temp
+ vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]!
+ vshr.u64 $temp,`&Dhi("$A7xB")`,#16
+ vzip.16 `&Dlo("$A7xB")`,`&Dhi("$A7xB")`
+ subs $inner,$inner,#8
+ vst1.32 {`&Dlo("$A7xB")`[0]}, [$toutptr, :32]!
+
+ bne .LNEON_tail
+
+ vst1.32 {${temp}[0]}, [$toutptr, :32] @ top-most bit
+ sub $nptr,$nptr,$num,lsl#2 @ rewind $nptr
+ subs $aptr,sp,#0 @ clear carry flag
+ add $bptr,sp,$num,lsl#2
+
+.LNEON_sub:
+ ldmia $aptr!, {r4-r7}
+ ldmia $nptr!, {r8-r11}
+ sbcs r8, r4,r8
+ sbcs r9, r5,r9
+ sbcs r10,r6,r10
+ sbcs r11,r7,r11
+ teq $aptr,$bptr @ preserves carry
+ stmia $rptr!, {r8-r11}
+ bne .LNEON_sub
+
+ ldr r10, [$aptr] @ load top-most bit
+ veor q0,q0,q0
+ sub r11,$bptr,sp @ this is num*4
+ veor q1,q1,q1
+ mov $aptr,sp
+ sub $rptr,$rptr,r11 @ rewind $rptr
+ mov $nptr,$bptr @ second 3/4th of frame
+ sbcs r10,r10,#0 @ result is carry flag
+
+.LNEON_copy_n_zap:
+ ldmia $aptr!, {r4-r7}
+ ldmia $rptr, {r8-r11}
+ movcc r8, r4
+ vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
+ movcc r9, r5
+ movcc r10,r6
+ vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
+ movcc r11,r7
+ ldmia $aptr, {r4-r7}
+ stmia $rptr!, {r8-r11}
+ sub $aptr,$aptr,#16
+ ldmia $rptr, {r8-r11}
+ movcc r8, r4
+ vst1.64 {q0-q1}, [$aptr,:256]! @ wipe
+ movcc r9, r5
+ movcc r10,r6
+ vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
+ movcc r11,r7
+ teq $aptr,$bptr @ preserves carry
+ stmia $rptr!, {r8-r11}
+ bne .LNEON_copy_n_zap
+
+ sub sp,ip,#96
+ vldmia sp!,{d8-d15}
+ ldmia sp!,{r4-r11}
+ ret @ bx lr
+.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
+#endif
+___
+}
+$code.=<<___;
+.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
+#if __ARM_ARCH__>=7
+.comm OPENSSL_armcap_P,4,4
+#endif
___
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
+$code =~ s/\bret\b/bx lr/gm;
print $code;
close STDOUT;
diff --git a/crypto/bn/asm/bn-586.S b/crypto/bn/asm/bn-586.S
index fe873ce..66695e2 100644
--- a/crypto/bn/asm/bn-586.S
+++ b/crypto/bn/asm/bn-586.S
@@ -5,6 +5,103 @@
.align 16
bn_mul_add_words:
.L_bn_mul_add_words_begin:
+ call .L000PIC_me_up
+.L000PIC_me_up:
+ popl %eax
+ leal _GLOBAL_OFFSET_TABLE_+[.-.L000PIC_me_up](%eax),%eax
+ movl OPENSSL_ia32cap_P@GOT(%eax),%eax
+ btl $26,(%eax)
+ jnc .L001maw_non_sse2
+ movl 4(%esp),%eax
+ movl 8(%esp),%edx
+ movl 12(%esp),%ecx
+ movd 16(%esp),%mm0
+ pxor %mm1,%mm1
+ jmp .L002maw_sse2_entry
+.align 16
+.L003maw_sse2_unrolled:
+ movd (%eax),%mm3
+ paddq %mm3,%mm1
+ movd (%edx),%mm2
+ pmuludq %mm0,%mm2
+ movd 4(%edx),%mm4
+ pmuludq %mm0,%mm4
+ movd 8(%edx),%mm6
+ pmuludq %mm0,%mm6
+ movd 12(%edx),%mm7
+ pmuludq %mm0,%mm7
+ paddq %mm2,%mm1
+ movd 4(%eax),%mm3
+ paddq %mm4,%mm3
+ movd 8(%eax),%mm5
+ paddq %mm6,%mm5
+ movd 12(%eax),%mm4
+ paddq %mm4,%mm7
+ movd %mm1,(%eax)
+ movd 16(%edx),%mm2
+ pmuludq %mm0,%mm2
+ psrlq $32,%mm1
+ movd 20(%edx),%mm4
+ pmuludq %mm0,%mm4
+ paddq %mm3,%mm1
+ movd 24(%edx),%mm6
+ pmuludq %mm0,%mm6
+ movd %mm1,4(%eax)
+ psrlq $32,%mm1
+ movd 28(%edx),%mm3
+ addl $32,%edx
+ pmuludq %mm0,%mm3
+ paddq %mm5,%mm1
+ movd 16(%eax),%mm5
+ paddq %mm5,%mm2
+ movd %mm1,8(%eax)
+ psrlq $32,%mm1
+ paddq %mm7,%mm1
+ movd 20(%eax),%mm5
+ paddq %mm5,%mm4
+ movd %mm1,12(%eax)
+ psrlq $32,%mm1
+ paddq %mm2,%mm1
+ movd 24(%eax),%mm5
+ paddq %mm5,%mm6
+ movd %mm1,16(%eax)
+ psrlq $32,%mm1
+ paddq %mm4,%mm1
+ movd 28(%eax),%mm5
+ paddq %mm5,%mm3
+ movd %mm1,20(%eax)
+ psrlq $32,%mm1
+ paddq %mm6,%mm1
+ movd %mm1,24(%eax)
+ psrlq $32,%mm1
+ paddq %mm3,%mm1
+ movd %mm1,28(%eax)
+ leal 32(%eax),%eax
+ psrlq $32,%mm1
+ subl $8,%ecx
+ jz .L004maw_sse2_exit
+.L002maw_sse2_entry:
+ testl $4294967288,%ecx
+ jnz .L003maw_sse2_unrolled
+.align 4
+.L005maw_sse2_loop:
+ movd (%edx),%mm2
+ movd (%eax),%mm3
+ pmuludq %mm0,%mm2
+ leal 4(%edx),%edx
+ paddq %mm3,%mm1
+ paddq %mm2,%mm1
+ movd %mm1,(%eax)
+ subl $1,%ecx
+ psrlq $32,%mm1
+ leal 4(%eax),%eax
+ jnz .L005maw_sse2_loop
+.L004maw_sse2_exit:
+ movd %mm1,%eax
+ emms
+ ret
+.align 16
+.L001maw_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
@@ -17,9 +114,9 @@
andl $4294967288,%ecx
movl 32(%esp),%ebp
pushl %ecx
- jz .L000maw_finish
+ jz .L006maw_finish
.align 16
-.L001maw_loop:
+.L007maw_loop:
movl (%ebx),%eax
mull %ebp
@@ -96,13 +193,13 @@
subl $8,%ecx
leal 32(%ebx),%ebx
leal 32(%edi),%edi
- jnz .L001maw_loop
-.L000maw_finish:
+ jnz .L007maw_loop
+.L006maw_finish:
movl 32(%esp),%ecx
andl $7,%ecx
- jnz .L002maw_finish2
- jmp .L003maw_end
-.L002maw_finish2:
+ jnz .L008maw_finish2
+ jmp .L009maw_end
+.L008maw_finish2:
movl (%ebx),%eax
mull %ebp
@@ -113,7 +210,7 @@
decl %ecx
movl %eax,(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 4(%ebx),%eax
mull %ebp
@@ -124,7 +221,7 @@
decl %ecx
movl %eax,4(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 8(%ebx),%eax
mull %ebp
@@ -135,7 +232,7 @@
decl %ecx
movl %eax,8(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 12(%ebx),%eax
mull %ebp
@@ -146,7 +243,7 @@
decl %ecx
movl %eax,12(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 16(%ebx),%eax
mull %ebp
@@ -157,7 +254,7 @@
decl %ecx
movl %eax,16(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 20(%ebx),%eax
mull %ebp
@@ -168,7 +265,7 @@
decl %ecx
movl %eax,20(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 24(%ebx),%eax
mull %ebp
@@ -178,7 +275,7 @@
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
-.L003maw_end:
+.L009maw_end:
movl %esi,%eax
popl %ecx
popl %edi
@@ -192,6 +289,34 @@
.align 16
bn_mul_words:
.L_bn_mul_words_begin:
+ call .L010PIC_me_up
+.L010PIC_me_up:
+ popl %eax
+ leal _GLOBAL_OFFSET_TABLE_+[.-.L010PIC_me_up](%eax),%eax
+ movl OPENSSL_ia32cap_P@GOT(%eax),%eax
+ btl $26,(%eax)
+ jnc .L011mw_non_sse2
+ movl 4(%esp),%eax
+ movl 8(%esp),%edx
+ movl 12(%esp),%ecx
+ movd 16(%esp),%mm0
+ pxor %mm1,%mm1
+.align 16
+.L012mw_sse2_loop:
+ movd (%edx),%mm2
+ pmuludq %mm0,%mm2
+ leal 4(%edx),%edx
+ paddq %mm2,%mm1
+ movd %mm1,(%eax)
+ subl $1,%ecx
+ psrlq $32,%mm1
+ leal 4(%eax),%eax
+ jnz .L012mw_sse2_loop
+ movd %mm1,%eax
+ emms
+ ret
+.align 16
+.L011mw_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
@@ -203,8 +328,8 @@
movl 28(%esp),%ebp
movl 32(%esp),%ecx
andl $4294967288,%ebp
- jz .L004mw_finish
-.L005mw_loop:
+ jz .L013mw_finish
+.L014mw_loop:
movl (%ebx),%eax
mull %ecx
@@ -265,14 +390,14 @@
addl $32,%ebx
addl $32,%edi
subl $8,%ebp
- jz .L004mw_finish
- jmp .L005mw_loop
-.L004mw_finish:
+ jz .L013mw_finish
+ jmp .L014mw_loop
+.L013mw_finish:
movl 28(%esp),%ebp
andl $7,%ebp
- jnz .L006mw_finish2
- jmp .L007mw_end
-.L006mw_finish2:
+ jnz .L015mw_finish2
+ jmp .L016mw_end
+.L015mw_finish2:
movl (%ebx),%eax
mull %ecx
@@ -281,7 +406,7 @@
movl %eax,(%edi)
movl %edx,%esi
decl %ebp
- jz .L007mw_end
+ jz .L016mw_end
movl 4(%ebx),%eax
mull %ecx
@@ -290,7 +415,7 @@
movl %eax,4(%edi)
movl %edx,%esi
decl %ebp
- jz .L007mw_end
+ jz .L016mw_end
movl 8(%ebx),%eax
mull %ecx
@@ -299,7 +424,7 @@
movl %eax,8(%edi)
movl %edx,%esi
decl %ebp
- jz .L007mw_end
+ jz .L016mw_end
movl 12(%ebx),%eax
mull %ecx
@@ -308,7 +433,7 @@
movl %eax,12(%edi)
movl %edx,%esi
decl %ebp
- jz .L007mw_end
+ jz .L016mw_end
movl 16(%ebx),%eax
mull %ecx
@@ -317,7 +442,7 @@
movl %eax,16(%edi)
movl %edx,%esi
decl %ebp
- jz .L007mw_end
+ jz .L016mw_end
movl 20(%ebx),%eax
mull %ecx
@@ -326,7 +451,7 @@
movl %eax,20(%edi)
movl %edx,%esi
decl %ebp
- jz .L007mw_end
+ jz .L016mw_end
movl 24(%ebx),%eax
mull %ecx
@@ -334,7 +459,7 @@
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
-.L007mw_end:
+.L016mw_end:
movl %esi,%eax
popl %edi
popl %esi
@@ -347,6 +472,29 @@
.align 16
bn_sqr_words:
.L_bn_sqr_words_begin:
+ call .L017PIC_me_up
+.L017PIC_me_up:
+ popl %eax
+ leal _GLOBAL_OFFSET_TABLE_+[.-.L017PIC_me_up](%eax),%eax
+ movl OPENSSL_ia32cap_P@GOT(%eax),%eax
+ btl $26,(%eax)
+ jnc .L018sqr_non_sse2
+ movl 4(%esp),%eax
+ movl 8(%esp),%edx
+ movl 12(%esp),%ecx
+.align 16
+.L019sqr_sse2_loop:
+ movd (%edx),%mm0
+ pmuludq %mm0,%mm0
+ leal 4(%edx),%edx
+ movq %mm0,(%eax)
+ subl $1,%ecx
+ leal 8(%eax),%eax
+ jnz .L019sqr_sse2_loop
+ emms
+ ret
+.align 16
+.L018sqr_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
@@ -356,8 +504,8 @@
movl 24(%esp),%edi
movl 28(%esp),%ebx
andl $4294967288,%ebx
- jz .L008sw_finish
-.L009sw_loop:
+ jz .L020sw_finish
+.L021sw_loop:
movl (%edi),%eax
mull %eax
@@ -402,59 +550,59 @@
addl $32,%edi
addl $64,%esi
subl $8,%ebx
- jnz .L009sw_loop
-.L008sw_finish:
+ jnz .L021sw_loop
+.L020sw_finish:
movl 28(%esp),%ebx
andl $7,%ebx
- jz .L010sw_end
+ jz .L022sw_end
movl (%edi),%eax
mull %eax
movl %eax,(%esi)
decl %ebx
movl %edx,4(%esi)
- jz .L010sw_end
+ jz .L022sw_end
movl 4(%edi),%eax
mull %eax
movl %eax,8(%esi)
decl %ebx
movl %edx,12(%esi)
- jz .L010sw_end
+ jz .L022sw_end
movl 8(%edi),%eax
mull %eax
movl %eax,16(%esi)
decl %ebx
movl %edx,20(%esi)
- jz .L010sw_end
+ jz .L022sw_end
movl 12(%edi),%eax
mull %eax
movl %eax,24(%esi)
decl %ebx
movl %edx,28(%esi)
- jz .L010sw_end
+ jz .L022sw_end
movl 16(%edi),%eax
mull %eax
movl %eax,32(%esi)
decl %ebx
movl %edx,36(%esi)
- jz .L010sw_end
+ jz .L022sw_end
movl 20(%edi),%eax
mull %eax
movl %eax,40(%esi)
decl %ebx
movl %edx,44(%esi)
- jz .L010sw_end
+ jz .L022sw_end
movl 24(%edi),%eax
mull %eax
movl %eax,48(%esi)
movl %edx,52(%esi)
-.L010sw_end:
+.L022sw_end:
popl %edi
popl %esi
popl %ebx
@@ -488,8 +636,8 @@
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
- jz .L011aw_finish
-.L012aw_loop:
+ jz .L023aw_finish
+.L024aw_loop:
movl (%esi),%ecx
movl (%edi),%edx
@@ -567,11 +715,11 @@
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
- jnz .L012aw_loop
-.L011aw_finish:
+ jnz .L024aw_loop
+.L023aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
- jz .L013aw_end
+ jz .L025aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -582,7 +730,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
- jz .L013aw_end
+ jz .L025aw_end
movl 4(%esi),%ecx
movl 4(%edi),%edx
@@ -593,7 +741,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
- jz .L013aw_end
+ jz .L025aw_end
movl 8(%esi),%ecx
movl 8(%edi),%edx
@@ -604,7 +752,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
- jz .L013aw_end
+ jz .L025aw_end
movl 12(%esi),%ecx
movl 12(%edi),%edx
@@ -615,7 +763,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
- jz .L013aw_end
+ jz .L025aw_end
movl 16(%esi),%ecx
movl 16(%edi),%edx
@@ -626,7 +774,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
- jz .L013aw_end
+ jz .L025aw_end
movl 20(%esi),%ecx
movl 20(%edi),%edx
@@ -637,7 +785,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
- jz .L013aw_end
+ jz .L025aw_end
movl 24(%esi),%ecx
movl 24(%edi),%edx
@@ -647,7 +795,7 @@
addl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
-.L013aw_end:
+.L025aw_end:
popl %edi
popl %esi
popl %ebx
@@ -670,8 +818,8 @@
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
- jz .L014aw_finish
-.L015aw_loop:
+ jz .L026aw_finish
+.L027aw_loop:
movl (%esi),%ecx
movl (%edi),%edx
@@ -749,11 +897,11 @@
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
- jnz .L015aw_loop
-.L014aw_finish:
+ jnz .L027aw_loop
+.L026aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
- jz .L016aw_end
+ jz .L028aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -764,7 +912,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
- jz .L016aw_end
+ jz .L028aw_end
movl 4(%esi),%ecx
movl 4(%edi),%edx
@@ -775,7 +923,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
- jz .L016aw_end
+ jz .L028aw_end
movl 8(%esi),%ecx
movl 8(%edi),%edx
@@ -786,7 +934,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
- jz .L016aw_end
+ jz .L028aw_end
movl 12(%esi),%ecx
movl 12(%edi),%edx
@@ -797,7 +945,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
- jz .L016aw_end
+ jz .L028aw_end
movl 16(%esi),%ecx
movl 16(%edi),%edx
@@ -808,7 +956,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
- jz .L016aw_end
+ jz .L028aw_end
movl 20(%esi),%ecx
movl 20(%edi),%edx
@@ -819,7 +967,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
- jz .L016aw_end
+ jz .L028aw_end
movl 24(%esi),%ecx
movl 24(%edi),%edx
@@ -829,7 +977,7 @@
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
-.L016aw_end:
+.L028aw_end:
popl %edi
popl %esi
popl %ebx
@@ -852,8 +1000,8 @@
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
- jz .L017aw_finish
-.L018aw_loop:
+ jz .L029aw_finish
+.L030aw_loop:
movl (%esi),%ecx
movl (%edi),%edx
@@ -931,11 +1079,11 @@
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
- jnz .L018aw_loop
-.L017aw_finish:
+ jnz .L030aw_loop
+.L029aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
- jz .L019aw_end
+ jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -949,7 +1097,7 @@
addl $4,%edi
addl $4,%ebx
decl %ebp
- jz .L019aw_end
+ jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -963,7 +1111,7 @@
addl $4,%edi
addl $4,%ebx
decl %ebp
- jz .L019aw_end
+ jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -977,7 +1125,7 @@
addl $4,%edi
addl $4,%ebx
decl %ebp
- jz .L019aw_end
+ jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -991,7 +1139,7 @@
addl $4,%edi
addl $4,%ebx
decl %ebp
- jz .L019aw_end
+ jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -1005,7 +1153,7 @@
addl $4,%edi
addl $4,%ebx
decl %ebp
- jz .L019aw_end
+ jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -1019,7 +1167,7 @@
addl $4,%edi
addl $4,%ebx
decl %ebp
- jz .L019aw_end
+ jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -1032,20 +1180,20 @@
addl $4,%esi
addl $4,%edi
addl $4,%ebx
-.L019aw_end:
+.L031aw_end:
cmpl $0,36(%esp)
- je .L020pw_end
+ je .L032pw_end
movl 36(%esp),%ebp
cmpl $0,%ebp
- je .L020pw_end
- jge .L021pw_pos
+ je .L032pw_end
+ jge .L033pw_pos
movl $0,%edx
subl %ebp,%edx
movl %edx,%ebp
andl $4294967288,%ebp
- jz .L022pw_neg_finish
-.L023pw_neg_loop:
+ jz .L034pw_neg_finish
+.L035pw_neg_loop:
movl $0,%ecx
movl (%edi),%edx
@@ -1122,13 +1270,13 @@
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
- jnz .L023pw_neg_loop
-.L022pw_neg_finish:
+ jnz .L035pw_neg_loop
+.L034pw_neg_finish:
movl 36(%esp),%edx
movl $0,%ebp
subl %edx,%ebp
andl $7,%ebp
- jz .L020pw_end
+ jz .L032pw_end
movl $0,%ecx
movl (%edi),%edx
@@ -1139,7 +1287,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
- jz .L020pw_end
+ jz .L032pw_end
movl $0,%ecx
movl 4(%edi),%edx
@@ -1150,7 +1298,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
- jz .L020pw_end
+ jz .L032pw_end
movl $0,%ecx
movl 8(%edi),%edx
@@ -1161,7 +1309,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
- jz .L020pw_end
+ jz .L032pw_end
movl $0,%ecx
movl 12(%edi),%edx
@@ -1172,7 +1320,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
- jz .L020pw_end
+ jz .L032pw_end
movl $0,%ecx
movl 16(%edi),%edx
@@ -1183,7 +1331,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
- jz .L020pw_end
+ jz .L032pw_end
movl $0,%ecx
movl 20(%edi),%edx
@@ -1194,7 +1342,7 @@
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
- jz .L020pw_end
+ jz .L032pw_end
movl $0,%ecx
movl 24(%edi),%edx
@@ -1204,181 +1352,182 @@
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
- jmp .L020pw_end
-.L021pw_pos:
+ jmp .L032pw_end
+.L033pw_pos:
andl $4294967288,%ebp
- jz .L024pw_pos_finish
-.L025pw_pos_loop:
+ jz .L036pw_pos_finish
+.L037pw_pos_loop:
movl (%esi),%ecx
subl %eax,%ecx
movl %ecx,(%ebx)
- jnc .L026pw_nc0
+ jnc .L038pw_nc0
movl 4(%esi),%ecx
subl %eax,%ecx
movl %ecx,4(%ebx)
- jnc .L027pw_nc1
+ jnc .L039pw_nc1
movl 8(%esi),%ecx
subl %eax,%ecx
movl %ecx,8(%ebx)
- jnc .L028pw_nc2
+ jnc .L040pw_nc2
movl 12(%esi),%ecx
subl %eax,%ecx
movl %ecx,12(%ebx)
- jnc .L029pw_nc3
+ jnc .L041pw_nc3
movl 16(%esi),%ecx
subl %eax,%ecx
movl %ecx,16(%ebx)
- jnc .L030pw_nc4
+ jnc .L042pw_nc4
movl 20(%esi),%ecx
subl %eax,%ecx
movl %ecx,20(%ebx)
- jnc .L031pw_nc5
+ jnc .L043pw_nc5
movl 24(%esi),%ecx
subl %eax,%ecx
movl %ecx,24(%ebx)
- jnc .L032pw_nc6
+ jnc .L044pw_nc6
movl 28(%esi),%ecx
subl %eax,%ecx
movl %ecx,28(%ebx)
- jnc .L033pw_nc7
+ jnc .L045pw_nc7
addl $32,%esi
addl $32,%ebx
subl $8,%ebp
- jnz .L025pw_pos_loop
-.L024pw_pos_finish:
+ jnz .L037pw_pos_loop
+.L036pw_pos_finish:
movl 36(%esp),%ebp
andl $7,%ebp
- jz .L020pw_end
+ jz .L032pw_end
movl (%esi),%ecx
subl %eax,%ecx
movl %ecx,(%ebx)
- jnc .L034pw_tail_nc0
+ jnc .L046pw_tail_nc0
decl %ebp
- jz .L020pw_end
+ jz .L032pw_end
movl 4(%esi),%ecx
subl %eax,%ecx
movl %ecx,4(%ebx)
- jnc .L035pw_tail_nc1
+ jnc .L047pw_tail_nc1
decl %ebp
- jz .L020pw_end
+ jz .L032pw_end
movl 8(%esi),%ecx
subl %eax,%ecx
movl %ecx,8(%ebx)
- jnc .L036pw_tail_nc2
+ jnc .L048pw_tail_nc2
decl %ebp
- jz .L020pw_end
+ jz .L032pw_end
movl 12(%esi),%ecx
subl %eax,%ecx
movl %ecx,12(%ebx)
- jnc .L037pw_tail_nc3
+ jnc .L049pw_tail_nc3
decl %ebp
- jz .L020pw_end
+ jz .L032pw_end
movl 16(%esi),%ecx
subl %eax,%ecx
movl %ecx,16(%ebx)
- jnc .L038pw_tail_nc4
+ jnc .L050pw_tail_nc4
decl %ebp
- jz .L020pw_end
+ jz .L032pw_end
movl 20(%esi),%ecx
subl %eax,%ecx
movl %ecx,20(%ebx)
- jnc .L039pw_tail_nc5
+ jnc .L051pw_tail_nc5
decl %ebp
- jz .L020pw_end
+ jz .L032pw_end
movl 24(%esi),%ecx
subl %eax,%ecx
movl %ecx,24(%ebx)
- jnc .L040pw_tail_nc6
+ jnc .L052pw_tail_nc6
movl $1,%eax
- jmp .L020pw_end
-.L041pw_nc_loop:
+ jmp .L032pw_end
+.L053pw_nc_loop:
movl (%esi),%ecx
movl %ecx,(%ebx)
-.L026pw_nc0:
+.L038pw_nc0:
movl 4(%esi),%ecx
movl %ecx,4(%ebx)
-.L027pw_nc1:
+.L039pw_nc1:
movl 8(%esi),%ecx
movl %ecx,8(%ebx)
-.L028pw_nc2:
+.L040pw_nc2:
movl 12(%esi),%ecx
movl %ecx,12(%ebx)
-.L029pw_nc3:
+.L041pw_nc3:
movl 16(%esi),%ecx
movl %ecx,16(%ebx)
-.L030pw_nc4:
+.L042pw_nc4:
movl 20(%esi),%ecx
movl %ecx,20(%ebx)
-.L031pw_nc5:
+.L043pw_nc5:
movl 24(%esi),%ecx
movl %ecx,24(%ebx)
-.L032pw_nc6:
+.L044pw_nc6:
movl 28(%esi),%ecx
movl %ecx,28(%ebx)
-.L033pw_nc7:
+.L045pw_nc7:
addl $32,%esi
addl $32,%ebx
subl $8,%ebp
- jnz .L041pw_nc_loop
+ jnz .L053pw_nc_loop
movl 36(%esp),%ebp
andl $7,%ebp
- jz .L042pw_nc_end
+ jz .L054pw_nc_end
movl (%esi),%ecx
movl %ecx,(%ebx)
-.L034pw_tail_nc0:
+.L046pw_tail_nc0:
decl %ebp
- jz .L042pw_nc_end
+ jz .L054pw_nc_end
movl 4(%esi),%ecx
movl %ecx,4(%ebx)
-.L035pw_tail_nc1:
+.L047pw_tail_nc1:
decl %ebp
- jz .L042pw_nc_end
+ jz .L054pw_nc_end
movl 8(%esi),%ecx
movl %ecx,8(%ebx)
-.L036pw_tail_nc2:
+.L048pw_tail_nc2:
decl %ebp
- jz .L042pw_nc_end
+ jz .L054pw_nc_end
movl 12(%esi),%ecx
movl %ecx,12(%ebx)
-.L037pw_tail_nc3:
+.L049pw_tail_nc3:
decl %ebp
- jz .L042pw_nc_end
+ jz .L054pw_nc_end
movl 16(%esi),%ecx
movl %ecx,16(%ebx)
-.L038pw_tail_nc4:
+.L050pw_tail_nc4:
decl %ebp
- jz .L042pw_nc_end
+ jz .L054pw_nc_end
movl 20(%esi),%ecx
movl %ecx,20(%ebx)
-.L039pw_tail_nc5:
+.L051pw_tail_nc5:
decl %ebp
- jz .L042pw_nc_end
+ jz .L054pw_nc_end
movl 24(%esi),%ecx
movl %ecx,24(%ebx)
-.L040pw_tail_nc6:
-.L042pw_nc_end:
+.L052pw_tail_nc6:
+.L054pw_nc_end:
movl $0,%eax
-.L020pw_end:
+.L032pw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_sub_part_words,.-.L_bn_sub_part_words_begin
+.comm OPENSSL_ia32cap_P,8,4
diff --git a/crypto/bn/asm/x86-gf2m.S b/crypto/bn/asm/x86-gf2m.S
index 9403a5a..9ed29ae 100644
--- a/crypto/bn/asm/x86-gf2m.S
+++ b/crypto/bn/asm/x86-gf2m.S
@@ -249,6 +249,18 @@
movl 4(%edx),%edx
testl $8388608,%eax
jz .L001ialu
+ testl $16777216,%eax
+ jz .L002mmx
+ testl $2,%edx
+ jz .L002mmx
+ movups 8(%esp),%xmm0
+ shufps $177,%xmm0,%xmm0
+.byte 102,15,58,68,192,1
+ movl 4(%esp),%eax
+ movups %xmm0,(%eax)
+ ret
+.align 16
+.L002mmx:
pushl %ebp
pushl %ebx
pushl %esi
diff --git a/crypto/bn/asm/x86-mont.S b/crypto/bn/asm/x86-mont.S
index 2bbb0e3..c701e9e 100644
--- a/crypto/bn/asm/x86-mont.S
+++ b/crypto/bn/asm/x86-mont.S
@@ -42,6 +42,127 @@
movl %esi,20(%esp)
leal -3(%edi),%ebx
movl %ebp,24(%esp)
+ call .L001PIC_me_up
+.L001PIC_me_up:
+ popl %eax
+ leal _GLOBAL_OFFSET_TABLE_+[.-.L001PIC_me_up](%eax),%eax
+ movl OPENSSL_ia32cap_P@GOT(%eax),%eax
+ btl $26,(%eax)
+ jnc .L002non_sse2
+ movl $-1,%eax
+ movd %eax,%mm7
+ movl 8(%esp),%esi
+ movl 12(%esp),%edi
+ movl 16(%esp),%ebp
+ xorl %edx,%edx
+ xorl %ecx,%ecx
+ movd (%edi),%mm4
+ movd (%esi),%mm5
+ movd (%ebp),%mm3
+ pmuludq %mm4,%mm5
+ movq %mm5,%mm2
+ movq %mm5,%mm0
+ pand %mm7,%mm0
+ pmuludq 20(%esp),%mm5
+ pmuludq %mm5,%mm3
+ paddq %mm0,%mm3
+ movd 4(%ebp),%mm1
+ movd 4(%esi),%mm0
+ psrlq $32,%mm2
+ psrlq $32,%mm3
+ incl %ecx
+.align 16
+.L0031st:
+ pmuludq %mm4,%mm0
+ pmuludq %mm5,%mm1
+ paddq %mm0,%mm2
+ paddq %mm1,%mm3
+ movq %mm2,%mm0
+ pand %mm7,%mm0
+ movd 4(%ebp,%ecx,4),%mm1
+ paddq %mm0,%mm3
+ movd 4(%esi,%ecx,4),%mm0
+ psrlq $32,%mm2
+ movd %mm3,28(%esp,%ecx,4)
+ psrlq $32,%mm3
+ leal 1(%ecx),%ecx
+ cmpl %ebx,%ecx
+ jl .L0031st
+ pmuludq %mm4,%mm0
+ pmuludq %mm5,%mm1
+ paddq %mm0,%mm2
+ paddq %mm1,%mm3
+ movq %mm2,%mm0
+ pand %mm7,%mm0
+ paddq %mm0,%mm3
+ movd %mm3,28(%esp,%ecx,4)
+ psrlq $32,%mm2
+ psrlq $32,%mm3
+ paddq %mm2,%mm3
+ movq %mm3,32(%esp,%ebx,4)
+ incl %edx
+.L004outer:
+ xorl %ecx,%ecx
+ movd (%edi,%edx,4),%mm4
+ movd (%esi),%mm5
+ movd 32(%esp),%mm6
+ movd (%ebp),%mm3
+ pmuludq %mm4,%mm5
+ paddq %mm6,%mm5
+ movq %mm5,%mm0
+ movq %mm5,%mm2
+ pand %mm7,%mm0
+ pmuludq 20(%esp),%mm5
+ pmuludq %mm5,%mm3
+ paddq %mm0,%mm3
+ movd 36(%esp),%mm6
+ movd 4(%ebp),%mm1
+ movd 4(%esi),%mm0
+ psrlq $32,%mm2
+ psrlq $32,%mm3
+ paddq %mm6,%mm2
+ incl %ecx
+ decl %ebx
+.L005inner:
+ pmuludq %mm4,%mm0
+ pmuludq %mm5,%mm1
+ paddq %mm0,%mm2
+ paddq %mm1,%mm3
+ movq %mm2,%mm0
+ movd 36(%esp,%ecx,4),%mm6
+ pand %mm7,%mm0
+ movd 4(%ebp,%ecx,4),%mm1
+ paddq %mm0,%mm3
+ movd 4(%esi,%ecx,4),%mm0
+ psrlq $32,%mm2
+ movd %mm3,28(%esp,%ecx,4)
+ psrlq $32,%mm3
+ paddq %mm6,%mm2
+ decl %ebx
+ leal 1(%ecx),%ecx
+ jnz .L005inner
+ movl %ecx,%ebx
+ pmuludq %mm4,%mm0
+ pmuludq %mm5,%mm1
+ paddq %mm0,%mm2
+ paddq %mm1,%mm3
+ movq %mm2,%mm0
+ pand %mm7,%mm0
+ paddq %mm0,%mm3
+ movd %mm3,28(%esp,%ecx,4)
+ psrlq $32,%mm2
+ psrlq $32,%mm3
+ movd 36(%esp,%ebx,4),%mm6
+ paddq %mm2,%mm3
+ paddq %mm6,%mm3
+ movq %mm3,32(%esp,%ebx,4)
+ leal 1(%edx),%edx
+ cmpl %ebx,%edx
+ jle .L004outer
+ emms
+ jmp .L006common_tail
+.align 16
+.L002non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
@@ -52,12 +173,12 @@
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
- jz .L001bn_sqr_mont
+ jz .L007bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 16
-.L002mull:
+.L008mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
@@ -66,7 +187,7 @@
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
- jl .L002mull
+ jl .L008mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
@@ -84,9 +205,9 @@
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
- jmp .L0032ndmadd
+ jmp .L0092ndmadd
.align 16
-.L0041stmadd:
+.L0101stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -97,7 +218,7 @@
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
- jl .L0041stmadd
+ jl .L0101stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
@@ -120,7 +241,7 @@
adcl $0,%edx
movl $1,%ecx
.align 16
-.L0032ndmadd:
+.L0092ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -131,7 +252,7 @@
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
- jl .L0032ndmadd
+ jl .L0092ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@@ -147,16 +268,16 @@
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
- je .L005common_tail
+ je .L006common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
- jmp .L0041stmadd
+ jmp .L0101stmadd
.align 16
-.L001bn_sqr_mont:
+.L007bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
@@ -167,7 +288,7 @@
andl $1,%ebx
incl %ecx
.align 16
-.L006sqr:
+.L011sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -179,7 +300,7 @@
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
- jl .L006sqr
+ jl .L011sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -203,7 +324,7 @@
movl 4(%esi),%eax
movl $1,%ecx
.align 16
-.L0073rdmadd:
+.L0123rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -222,7 +343,7 @@
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
- jl .L0073rdmadd
+ jl .L0123rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@@ -238,7 +359,7 @@
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
- je .L005common_tail
+ je .L006common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
@@ -250,12 +371,12 @@
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
- je .L008sqrlast
+ je .L013sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 16
-.L009sqradd:
+.L014sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -271,13 +392,13 @@
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
- jle .L009sqradd
+ jle .L014sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
-.L008sqrlast:
+.L013sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
@@ -292,9 +413,9 @@
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
- jmp .L0073rdmadd
+ jmp .L0123rdmadd
.align 16
-.L005common_tail:
+.L006common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
@@ -302,13 +423,13 @@
movl %ebx,%ecx
xorl %edx,%edx
.align 16
-.L010sub:
+.L015sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
- jge .L010sub
+ jge .L015sub
sbbl $0,%eax
andl %eax,%esi
notl %eax
@@ -316,12 +437,12 @@
andl %eax,%ebp
orl %ebp,%esi
.align 16
-.L011copy:
+.L016copy:
movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4)
decl %ebx
- jge .L011copy
+ jge .L016copy
movl 24(%esp),%esp
movl $1,%eax
.L000just_leave:
@@ -336,3 +457,4 @@
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
+.comm OPENSSL_ia32cap_P,8,4
diff --git a/crypto/bn/bn.h b/crypto/bn/bn.h
index f34248e..e776c07 100644
--- a/crypto/bn/bn.h
+++ b/crypto/bn/bn.h
@@ -538,6 +538,8 @@
BIGNUM *BN_mod_sqrt(BIGNUM *ret,
const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx);
+void BN_consttime_swap(BN_ULONG swap, BIGNUM *a, BIGNUM *b, int nwords);
+
/* Deprecated versions */
#ifndef OPENSSL_NO_DEPRECATED
BIGNUM *BN_generate_prime(BIGNUM *ret,int bits,int safe,
@@ -692,6 +694,10 @@
const BIGNUM *BN_get0_nist_prime_384(void);
const BIGNUM *BN_get0_nist_prime_521(void);
+int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range, const BIGNUM *priv,
+ const unsigned char *message, size_t message_len,
+ BN_CTX *ctx);
+
/* library internal functions */
#define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->dmax)?\
@@ -774,11 +780,20 @@
#define bn_fix_top(a) bn_check_top(a)
+#define bn_check_size(bn, bits) bn_wcheck_size(bn, ((bits+BN_BITS2-1))/BN_BITS2)
+#define bn_wcheck_size(bn, words) \
+ do { \
+ const BIGNUM *_bnum2 = (bn); \
+ assert(words <= (_bnum2)->dmax && words >= (_bnum2)->top); \
+ } while(0)
+
#else /* !BN_DEBUG */
#define bn_pollute(a)
#define bn_check_top(a)
#define bn_fix_top(a) bn_correct_top(a)
+#define bn_check_size(bn, bits)
+#define bn_wcheck_size(bn, words)
#endif
@@ -842,6 +857,7 @@
#define BN_F_BN_EXP 123
#define BN_F_BN_EXPAND2 108
#define BN_F_BN_EXPAND_INTERNAL 120
+#define BN_F_BN_GENERATE_DSA_NONCE 140
#define BN_F_BN_GF2M_MOD 131
#define BN_F_BN_GF2M_MOD_EXP 132
#define BN_F_BN_GF2M_MOD_MUL 133
@@ -881,6 +897,7 @@
#define BN_R_NOT_INITIALIZED 107
#define BN_R_NO_INVERSE 108
#define BN_R_NO_SOLUTION 116
+#define BN_R_PRIVATE_KEY_TOO_LARGE 117
#define BN_R_P_IS_NOT_PRIME 112
#define BN_R_TOO_MANY_ITERATIONS 113
#define BN_R_TOO_MANY_TEMPORARY_VARIABLES 109
diff --git a/crypto/bn/bn_err.c b/crypto/bn/bn_err.c
index cfe2eb9..f722b52 100644
--- a/crypto/bn/bn_err.c
+++ b/crypto/bn/bn_err.c
@@ -87,6 +87,7 @@
{ERR_FUNC(BN_F_BN_EXP), "BN_exp"},
{ERR_FUNC(BN_F_BN_EXPAND2), "bn_expand2"},
{ERR_FUNC(BN_F_BN_EXPAND_INTERNAL), "BN_EXPAND_INTERNAL"},
+{ERR_FUNC(BN_F_BN_GENERATE_DSA_NONCE), "BN_generate_dsa_nonce"},
{ERR_FUNC(BN_F_BN_GF2M_MOD), "BN_GF2m_mod"},
{ERR_FUNC(BN_F_BN_GF2M_MOD_EXP), "BN_GF2m_mod_exp"},
{ERR_FUNC(BN_F_BN_GF2M_MOD_MUL), "BN_GF2m_mod_mul"},
@@ -129,6 +130,7 @@
{ERR_REASON(BN_R_NOT_INITIALIZED) ,"not initialized"},
{ERR_REASON(BN_R_NO_INVERSE) ,"no inverse"},
{ERR_REASON(BN_R_NO_SOLUTION) ,"no solution"},
+{ERR_REASON(BN_R_PRIVATE_KEY_TOO_LARGE) ,"private key too large"},
{ERR_REASON(BN_R_P_IS_NOT_PRIME) ,"p is not prime"},
{ERR_REASON(BN_R_TOO_MANY_ITERATIONS) ,"too many iterations"},
{ERR_REASON(BN_R_TOO_MANY_TEMPORARY_VARIABLES),"too many temporary variables"},
diff --git a/crypto/bn/bn_lib.c b/crypto/bn/bn_lib.c
index 7a5676d..5461e6e 100644
--- a/crypto/bn/bn_lib.c
+++ b/crypto/bn/bn_lib.c
@@ -824,3 +824,55 @@
}
return bn_cmp_words(a,b,cl);
}
+
+/*
+ * Constant-time conditional swap of a and b.
+ * a and b are swapped if condition is not 0. The code assumes that at most one bit of condition is set.
+ * nwords is the number of words to swap. The code assumes that at least nwords are allocated in both a and b,
+ * and that no more than nwords are used by either a or b.
+ * a and b cannot be the same number
+ */
+void BN_consttime_swap(BN_ULONG condition, BIGNUM *a, BIGNUM *b, int nwords)
+ {
+ BN_ULONG t;
+ int i;
+
+ bn_wcheck_size(a, nwords);
+ bn_wcheck_size(b, nwords);
+
+ assert(a != b);
+ assert((condition & (condition - 1)) == 0);
+ assert(sizeof(BN_ULONG) >= sizeof(int));
+
+ condition = ((condition - 1) >> (BN_BITS2 - 1)) - 1;
+
+ t = (a->top^b->top) & condition;
+ a->top ^= t;
+ b->top ^= t;
+
+#define BN_CONSTTIME_SWAP(ind) \
+ do { \
+ t = (a->d[ind] ^ b->d[ind]) & condition; \
+ a->d[ind] ^= t; \
+ b->d[ind] ^= t; \
+ } while (0)
+
+
+ switch (nwords) {
+ default:
+ for (i = 10; i < nwords; i++)
+ BN_CONSTTIME_SWAP(i);
+ /* Fallthrough */
+ case 10: BN_CONSTTIME_SWAP(9); /* Fallthrough */
+ case 9: BN_CONSTTIME_SWAP(8); /* Fallthrough */
+ case 8: BN_CONSTTIME_SWAP(7); /* Fallthrough */
+ case 7: BN_CONSTTIME_SWAP(6); /* Fallthrough */
+ case 6: BN_CONSTTIME_SWAP(5); /* Fallthrough */
+ case 5: BN_CONSTTIME_SWAP(4); /* Fallthrough */
+ case 4: BN_CONSTTIME_SWAP(3); /* Fallthrough */
+ case 3: BN_CONSTTIME_SWAP(2); /* Fallthrough */
+ case 2: BN_CONSTTIME_SWAP(1); /* Fallthrough */
+ case 1: BN_CONSTTIME_SWAP(0);
+ }
+#undef BN_CONSTTIME_SWAP
+}
diff --git a/crypto/bn/bn_mont.c b/crypto/bn/bn_mont.c
index 427b5cf..ee8532c 100644
--- a/crypto/bn/bn_mont.c
+++ b/crypto/bn/bn_mont.c
@@ -478,32 +478,38 @@
BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,
const BIGNUM *mod, BN_CTX *ctx)
{
- int got_write_lock = 0;
BN_MONT_CTX *ret;
CRYPTO_r_lock(lock);
- if (!*pmont)
- {
- CRYPTO_r_unlock(lock);
- CRYPTO_w_lock(lock);
- got_write_lock = 1;
-
- if (!*pmont)
- {
- ret = BN_MONT_CTX_new();
- if (ret && !BN_MONT_CTX_set(ret, mod, ctx))
- BN_MONT_CTX_free(ret);
- else
- *pmont = ret;
- }
- }
-
ret = *pmont;
-
- if (got_write_lock)
- CRYPTO_w_unlock(lock);
+ CRYPTO_r_unlock(lock);
+ if (ret)
+ return ret;
+
+ /* We don't want to serialise globally while doing our lazy-init math in
+ * BN_MONT_CTX_set. That punishes threads that are doing independent
+ * things. Instead, punish the case where more than one thread tries to
+ * lazy-init the same 'pmont', by having each do the lazy-init math work
+ * independently and only use the one from the thread that wins the race
+ * (the losers throw away the work they've done). */
+ ret = BN_MONT_CTX_new();
+ if (!ret)
+ return NULL;
+ if (!BN_MONT_CTX_set(ret, mod, ctx))
+ {
+ BN_MONT_CTX_free(ret);
+ return NULL;
+ }
+
+ /* The locked compare-and-set, after the local work is done. */
+ CRYPTO_w_lock(lock);
+ if (*pmont)
+ {
+ BN_MONT_CTX_free(ret);
+ ret = *pmont;
+ }
else
- CRYPTO_r_unlock(lock);
-
+ *pmont = ret;
+ CRYPTO_w_unlock(lock);
return ret;
}
diff --git a/crypto/bn/bn_rand.c b/crypto/bn/bn_rand.c
index b376c28..55676f0 100644
--- a/crypto/bn/bn_rand.c
+++ b/crypto/bn/bn_rand.c
@@ -114,6 +114,7 @@
#include "cryptlib.h"
#include "bn_lcl.h"
#include <openssl/rand.h>
+#include <openssl/sha.h>
static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
{
@@ -303,3 +304,72 @@
{
return bn_rand_range(1, r, range);
}
+
+#ifndef OPENSSL_NO_SHA512
+/* BN_generate_dsa_nonce generates a random number 0 <= out < range. Unlike
+ * BN_rand_range, it also includes the contents of |priv| and |message| in the
+ * generation so that an RNG failure isn't fatal as long as |priv| remains
+ * secret. This is intended for use in DSA and ECDSA where an RNG weakness
+ * leads directly to private key exposure unless this function is used. */
+int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range, const BIGNUM* priv,
+ const unsigned char *message, size_t message_len,
+ BN_CTX *ctx)
+ {
+ SHA512_CTX sha;
+ /* We use 512 bits of random data per iteration to
+ * ensure that we have at least |range| bits of randomness. */
+ unsigned char random_bytes[64];
+ unsigned char digest[SHA512_DIGEST_LENGTH];
+ unsigned done, todo;
+ /* We generate |range|+8 bytes of random output. */
+ const unsigned num_k_bytes = BN_num_bytes(range) + 8;
+ unsigned char private_bytes[96];
+ unsigned char *k_bytes;
+ int ret = 0;
+
+ k_bytes = OPENSSL_malloc(num_k_bytes);
+ if (!k_bytes)
+ goto err;
+
+ /* We copy |priv| into a local buffer to avoid exposing its length. */
+ todo = sizeof(priv->d[0])*priv->top;
+ if (todo > sizeof(private_bytes))
+ {
+ /* No reasonable DSA or ECDSA key should have a private key
+ * this large and we don't handle this case in order to avoid
+ * leaking the length of the private key. */
+ BNerr(BN_F_BN_GENERATE_DSA_NONCE, BN_R_PRIVATE_KEY_TOO_LARGE);
+ goto err;
+ }
+ memcpy(private_bytes, priv->d, todo);
+ memset(private_bytes + todo, 0, sizeof(private_bytes) - todo);
+
+ for (done = 0; done < num_k_bytes;) {
+ if (RAND_bytes(random_bytes, sizeof(random_bytes)) != 1)
+ goto err;
+ SHA512_Init(&sha);
+ SHA512_Update(&sha, &done, sizeof(done));
+ SHA512_Update(&sha, private_bytes, sizeof(private_bytes));
+ SHA512_Update(&sha, message, message_len);
+ SHA512_Update(&sha, random_bytes, sizeof(random_bytes));
+ SHA512_Final(digest, &sha);
+
+ todo = num_k_bytes - done;
+ if (todo > SHA512_DIGEST_LENGTH)
+ todo = SHA512_DIGEST_LENGTH;
+ memcpy(k_bytes + done, digest, todo);
+ done += todo;
+ }
+
+ if (!BN_bin2bn(k_bytes, num_k_bytes, out))
+ goto err;
+ if (BN_mod(out, out, range, ctx) != 1)
+ goto err;
+ ret = 1;
+
+err:
+ if (k_bytes)
+ OPENSSL_free(k_bytes);
+ return ret;
+ }
+#endif /* OPENSSL_NO_SHA512 */
diff --git a/crypto/cms/cms_env.c b/crypto/cms/cms_env.c
index be20b1c..add00bf 100644
--- a/crypto/cms/cms_env.c
+++ b/crypto/cms/cms_env.c
@@ -185,6 +185,8 @@
if (flags & CMS_USE_KEYID)
{
ktri->version = 2;
+ if (env->version < 2)
+ env->version = 2;
type = CMS_RECIPINFO_KEYIDENTIFIER;
}
else
diff --git a/crypto/cms/cms_lib.c b/crypto/cms/cms_lib.c
index b62d1bf..ba08279 100644
--- a/crypto/cms/cms_lib.c
+++ b/crypto/cms/cms_lib.c
@@ -465,8 +465,6 @@
pcerts = cms_get0_certificate_choices(cms);
if (!pcerts)
return 0;
- if (!pcerts)
- return 0;
for (i = 0; i < sk_CMS_CertificateChoices_num(*pcerts); i++)
{
cch = sk_CMS_CertificateChoices_value(*pcerts, i);
diff --git a/crypto/cms/cms_sd.c b/crypto/cms/cms_sd.c
index 77fbd13..51dd33a 100644
--- a/crypto/cms/cms_sd.c
+++ b/crypto/cms/cms_sd.c
@@ -158,8 +158,8 @@
if (sd->version < 3)
sd->version = 3;
}
- else
- sd->version = 1;
+ else if (si->version < 1)
+ si->version = 1;
}
if (sd->version < 1)
diff --git a/crypto/cms/cms_smime.c b/crypto/cms/cms_smime.c
index 8c56e3a..1af9f3a 100644
--- a/crypto/cms/cms_smime.c
+++ b/crypto/cms/cms_smime.c
@@ -611,7 +611,7 @@
STACK_OF(CMS_RecipientInfo) *ris;
CMS_RecipientInfo *ri;
int i, r;
- int debug = 0;
+ int debug = 0, ri_match = 0;
ris = CMS_get0_RecipientInfos(cms);
if (ris)
debug = cms->d.envelopedData->encryptedContentInfo->debug;
@@ -620,6 +620,7 @@
ri = sk_CMS_RecipientInfo_value(ris, i);
if (CMS_RecipientInfo_type(ri) != CMS_RECIPINFO_TRANS)
continue;
+ ri_match = 1;
/* If we have a cert try matching RecipientInfo
* otherwise try them all.
*/
@@ -655,7 +656,7 @@
}
}
/* If no cert and not debugging always return success */
- if (!cert && !debug)
+ if (ri_match && !cert && !debug)
{
ERR_clear_error();
return 1;
diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c
index 304c6b7..0b77d8b 100644
--- a/crypto/cryptlib.c
+++ b/crypto/cryptlib.c
@@ -889,7 +889,7 @@
#if defined(_WIN32_WINNT) && _WIN32_WINNT>=0x0333
/* this -------------v--- guards NT-specific calls */
- if (GetVersion() < 0x80000000 && OPENSSL_isservice() > 0)
+ if (check_winnt() && OPENSSL_isservice() > 0)
{ HANDLE h = RegisterEventSource(0,_T("OPENSSL"));
const TCHAR *pmsg=buf;
ReportEvent(h,EVENTLOG_ERROR_TYPE,0,0,0,1,0,&pmsg,0);
diff --git a/crypto/dsa/dsa.h b/crypto/dsa/dsa.h
index a6f6d0b..7531c65 100644
--- a/crypto/dsa/dsa.h
+++ b/crypto/dsa/dsa.h
@@ -96,6 +96,10 @@
* faster variable sliding window method to
* be used for all exponents.
*/
+#define DSA_FLAG_NONCE_FROM_HASH 0x04 /* Causes the DSA nonce to be calculated
+ from SHA512(private_key + H(message) +
+ random). This strengthens DSA against a
+ weak PRNG. */
/* If this flag is set the DSA method is FIPS compliant and can be used
* in FIPS mode. This is set in the validated module method. If an
@@ -130,8 +134,9 @@
{
const char *name;
DSA_SIG * (*dsa_do_sign)(const unsigned char *dgst, int dlen, DSA *dsa);
- int (*dsa_sign_setup)(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp,
- BIGNUM **rp);
+ int (*dsa_sign_setup)(DSA *dsa, BN_CTX *ctx_in,
+ BIGNUM **kinvp, BIGNUM **rp,
+ const unsigned char *dgst, int dlen);
int (*dsa_do_verify)(const unsigned char *dgst, int dgst_len,
DSA_SIG *sig, DSA *dsa);
int (*dsa_mod_exp)(DSA *dsa, BIGNUM *rr, BIGNUM *a1, BIGNUM *p1,
@@ -317,6 +322,7 @@
#define DSA_R_MISSING_PARAMETERS 101
#define DSA_R_MODULUS_TOO_LARGE 103
#define DSA_R_NEED_NEW_SETUP_VALUES 110
+#define DSA_R_NONCE_CANNOT_BE_PRECOMPUTED 112
#define DSA_R_NON_FIPS_DSA_METHOD 111
#define DSA_R_NO_PARAMETERS_SET 107
#define DSA_R_PARAMETER_ENCODING_ERROR 105
diff --git a/crypto/dsa/dsa_err.c b/crypto/dsa/dsa_err.c
index 00545b7..e6171cc 100644
--- a/crypto/dsa/dsa_err.c
+++ b/crypto/dsa/dsa_err.c
@@ -109,6 +109,7 @@
{ERR_REASON(DSA_R_MISSING_PARAMETERS) ,"missing parameters"},
{ERR_REASON(DSA_R_MODULUS_TOO_LARGE) ,"modulus too large"},
{ERR_REASON(DSA_R_NEED_NEW_SETUP_VALUES) ,"need new setup values"},
+{ERR_REASON(DSA_R_NONCE_CANNOT_BE_PRECOMPUTED),"nonce cannot be precomputed"},
{ERR_REASON(DSA_R_NON_FIPS_DSA_METHOD) ,"non fips dsa method"},
{ERR_REASON(DSA_R_NO_PARAMETERS_SET) ,"no parameters set"},
{ERR_REASON(DSA_R_PARAMETER_ENCODING_ERROR),"parameter encoding error"},
diff --git a/crypto/dsa/dsa_ossl.c b/crypto/dsa/dsa_ossl.c
index b3d78e5..177fc54 100644
--- a/crypto/dsa/dsa_ossl.c
+++ b/crypto/dsa/dsa_ossl.c
@@ -67,7 +67,9 @@
#include <openssl/asn1.h>
static DSA_SIG *dsa_do_sign(const unsigned char *dgst, int dlen, DSA *dsa);
-static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp, BIGNUM **rp);
+static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in,
+ BIGNUM **kinvp, BIGNUM **rp,
+ const unsigned char *dgst, int dlen);
static int dsa_do_verify(const unsigned char *dgst, int dgst_len, DSA_SIG *sig,
DSA *dsa);
static int dsa_init(DSA *dsa);
@@ -154,7 +156,8 @@
redo:
if ((dsa->kinv == NULL) || (dsa->r == NULL))
{
- if (!DSA_sign_setup(dsa,ctx,&kinv,&r)) goto err;
+ if (!dsa->meth->dsa_sign_setup(dsa,ctx,&kinv,&r,dgst,dlen))
+ goto err;
}
else
{
@@ -213,7 +216,9 @@
return(ret);
}
-static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp, BIGNUM **rp)
+static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in,
+ BIGNUM **kinvp, BIGNUM **rp,
+ const unsigned char *dgst, int dlen)
{
BN_CTX *ctx;
BIGNUM k,kq,*K,*kinv=NULL,*r=NULL;
@@ -239,8 +244,21 @@
/* Get random k */
do
- if (!BN_rand_range(&k, dsa->q)) goto err;
- while (BN_is_zero(&k));
+ {
+#ifndef OPENSSL_NO_SHA512
+ if (dsa->flags & DSA_FLAG_NONCE_FROM_HASH)
+ {
+ /* If DSA_FLAG_NONCE_FROM_HASH is set then we calculate k from
+ * SHA512(private_key + H(message) + random). This protects the
+ * private key from a weak PRNG. */
+ if (!BN_generate_dsa_nonce(&k, dsa->q, dsa->priv_key, dgst,
+ dlen, ctx))
+ goto err;
+ }
+ else
+#endif
+ if (!BN_rand_range(&k, dsa->q)) goto err;
+ } while (BN_is_zero(&k));
if ((dsa->flags & DSA_FLAG_NO_EXP_CONSTTIME) == 0)
{
BN_set_flags(&k, BN_FLG_CONSTTIME);
diff --git a/crypto/dsa/dsa_sign.c b/crypto/dsa/dsa_sign.c
index c3cc364..8ace300 100644
--- a/crypto/dsa/dsa_sign.c
+++ b/crypto/dsa/dsa_sign.c
@@ -86,7 +86,14 @@
return 0;
}
#endif
- return dsa->meth->dsa_sign_setup(dsa, ctx_in, kinvp, rp);
+ if (dsa->flags & DSA_FLAG_NONCE_FROM_HASH)
+ {
+ /* You cannot precompute the DSA nonce if it is required to
+ * depend on the message. */
+ DSAerr(DSA_F_DSA_SIGN_SETUP, DSA_R_NONCE_CANNOT_BE_PRECOMPUTED);
+ return 0;
+ }
+ return dsa->meth->dsa_sign_setup(dsa, ctx_in, kinvp, rp, NULL, 0);
}
DSA_SIG *DSA_SIG_new(void)
diff --git a/crypto/dso/dso_dlfcn.c b/crypto/dso/dso_dlfcn.c
index 5f22548..4a56aac 100644
--- a/crypto/dso/dso_dlfcn.c
+++ b/crypto/dso/dso_dlfcn.c
@@ -464,7 +464,7 @@
return len;
}
- ERR_add_error_data(4, "dlfcn_pathbyaddr(): ", dlerror());
+ ERR_add_error_data(2, "dlfcn_pathbyaddr(): ", dlerror());
#endif
return -1;
}
diff --git a/crypto/ec/ec.h b/crypto/ec/ec.h
index dfe8710..d008a0d 100644
--- a/crypto/ec/ec.h
+++ b/crypto/ec/ec.h
@@ -819,6 +819,17 @@
/* wrapper functions for the underlying EC_GROUP object */
void EC_KEY_set_asn1_flag(EC_KEY *eckey, int asn1_flag);
+/** Sets whether ECDSA operations with the given key will calculate their k
+ * value from SHA512(private_key + message + random) in order to protect
+ * against a weak PRNG.
+ * \param on Whether to calculate k from a hash or not
+ */
+void EC_KEY_set_nonce_from_hash(EC_KEY *key, int on);
+
+/** Returns the value of nonce_from_hash
+ */
+int EC_KEY_get_nonce_from_hash(const EC_KEY *key);
+
/** Creates a table of pre-computed multiples of the generator to
* accelerate further EC_KEY operations.
* \param key EC_KEY object
diff --git a/crypto/ec/ec2_mult.c b/crypto/ec/ec2_mult.c
index 26f4a78..1c575dc 100644
--- a/crypto/ec/ec2_mult.c
+++ b/crypto/ec/ec2_mult.c
@@ -208,11 +208,15 @@
return ret;
}
+
/* Computes scalar*point and stores the result in r.
* point can not equal r.
- * Uses algorithm 2P of
+ * Uses a modified algorithm 2P of
* Lopez, J. and Dahab, R. "Fast multiplication on elliptic curves over
* GF(2^m) without precomputation" (CHES '99, LNCS 1717).
+ *
+ * To protect against side-channel attack the function uses constant time swap,
+ * avoiding conditional branches.
*/
static int ec_GF2m_montgomery_point_multiply(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar,
const EC_POINT *point, BN_CTX *ctx)
@@ -246,6 +250,11 @@
x2 = &r->X;
z2 = &r->Y;
+ bn_wexpand(x1, group->field.top);
+ bn_wexpand(z1, group->field.top);
+ bn_wexpand(x2, group->field.top);
+ bn_wexpand(z2, group->field.top);
+
if (!BN_GF2m_mod_arr(x1, &point->X, group->poly)) goto err; /* x1 = x */
if (!BN_one(z1)) goto err; /* z1 = 1 */
if (!group->meth->field_sqr(group, z2, x1, ctx)) goto err; /* z2 = x1^2 = x^2 */
@@ -270,16 +279,12 @@
word = scalar->d[i];
while (mask)
{
- if (word & mask)
- {
- if (!gf2m_Madd(group, &point->X, x1, z1, x2, z2, ctx)) goto err;
- if (!gf2m_Mdouble(group, x2, z2, ctx)) goto err;
- }
- else
- {
- if (!gf2m_Madd(group, &point->X, x2, z2, x1, z1, ctx)) goto err;
- if (!gf2m_Mdouble(group, x1, z1, ctx)) goto err;
- }
+ BN_consttime_swap(word & mask, x1, x2, group->field.top);
+ BN_consttime_swap(word & mask, z1, z2, group->field.top);
+ if (!gf2m_Madd(group, &point->X, x2, z2, x1, z1, ctx)) goto err;
+ if (!gf2m_Mdouble(group, x1, z1, ctx)) goto err;
+ BN_consttime_swap(word & mask, x1, x2, group->field.top);
+ BN_consttime_swap(word & mask, z1, z2, group->field.top);
mask >>= 1;
}
mask = BN_TBIT;
diff --git a/crypto/ec/ec_ameth.c b/crypto/ec/ec_ameth.c
index 0ce4524..f715a23 100644
--- a/crypto/ec/ec_ameth.c
+++ b/crypto/ec/ec_ameth.c
@@ -352,6 +352,7 @@
EC_KEY_set_enc_flags(ec_key, old_flags);
OPENSSL_free(ep);
ECerr(EC_F_ECKEY_PRIV_ENCODE, ERR_R_EC_LIB);
+ return 0;
}
/* restore old encoding flags */
EC_KEY_set_enc_flags(ec_key, old_flags);
diff --git a/crypto/ec/ec_asn1.c b/crypto/ec/ec_asn1.c
index 145807b..e94f34e 100644
--- a/crypto/ec/ec_asn1.c
+++ b/crypto/ec/ec_asn1.c
@@ -1435,8 +1435,11 @@
*out, buf_len, NULL))
{
ECerr(EC_F_I2O_ECPUBLICKEY, ERR_R_EC_LIB);
- OPENSSL_free(*out);
- *out = NULL;
+ if (new_buffer)
+ {
+ OPENSSL_free(*out);
+ *out = NULL;
+ }
return 0;
}
if (!new_buffer)
diff --git a/crypto/ec/ec_key.c b/crypto/ec/ec_key.c
index 7fa2475..73dd7b9 100644
--- a/crypto/ec/ec_key.c
+++ b/crypto/ec/ec_key.c
@@ -85,6 +85,7 @@
ret->pub_key = NULL;
ret->priv_key= NULL;
ret->enc_flag= 0;
+ ret->nonce_from_hash_flag = 0;
ret->conv_form = POINT_CONVERSION_UNCOMPRESSED;
ret->references= 1;
ret->method_data = NULL;
@@ -198,6 +199,7 @@
/* copy the rest */
dest->enc_flag = src->enc_flag;
+ dest->nonce_from_hash_flag = src->nonce_from_hash_flag;
dest->conv_form = src->conv_form;
dest->version = src->version;
dest->flags = src->flags;
@@ -505,6 +507,16 @@
key->enc_flag = flags;
}
+int EC_KEY_get_nonce_from_hash(const EC_KEY *key)
+ {
+ return key->nonce_from_hash_flag;
+ }
+
+void EC_KEY_set_nonce_from_hash(EC_KEY *key, int on)
+ {
+ key->nonce_from_hash_flag = on != 0;
+ }
+
point_conversion_form_t EC_KEY_get_conv_form(const EC_KEY *key)
{
return key->conv_form;
diff --git a/crypto/ec/ec_lcl.h b/crypto/ec/ec_lcl.h
index da7967d..dae9148 100644
--- a/crypto/ec/ec_lcl.h
+++ b/crypto/ec/ec_lcl.h
@@ -246,6 +246,7 @@
BIGNUM *priv_key;
unsigned int enc_flag;
+ char nonce_from_hash_flag;
point_conversion_form_t conv_form;
int references;
@@ -404,7 +405,7 @@
int ec_GF2m_precompute_mult(EC_GROUP *group, BN_CTX *ctx);
int ec_GF2m_have_precompute_mult(const EC_GROUP *group);
-#ifndef OPENSSL_EC_NISTP_64_GCC_128
+#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
/* method functions in ecp_nistp224.c */
int ec_GFp_nistp224_group_init(EC_GROUP *group);
int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *n, BN_CTX *);
diff --git a/crypto/ecdsa/ecdsa.h b/crypto/ecdsa/ecdsa.h
index 7fb5254..dc6a36b 100644
--- a/crypto/ecdsa/ecdsa.h
+++ b/crypto/ecdsa/ecdsa.h
@@ -250,6 +250,7 @@
#define ECDSA_R_ERR_EC_LIB 102
#define ECDSA_R_MISSING_PARAMETERS 103
#define ECDSA_R_NEED_NEW_SETUP_VALUES 106
+#define ECDSA_R_NONCE_CANNOT_BE_PRECOMPUTED 108
#define ECDSA_R_NON_FIPS_METHOD 107
#define ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED 104
#define ECDSA_R_SIGNATURE_MALLOC_FAILED 105
diff --git a/crypto/ecdsa/ecs_err.c b/crypto/ecdsa/ecs_err.c
index 81542e6..7406c6d 100644
--- a/crypto/ecdsa/ecs_err.c
+++ b/crypto/ecdsa/ecs_err.c
@@ -85,6 +85,7 @@
{ERR_REASON(ECDSA_R_ERR_EC_LIB) ,"err ec lib"},
{ERR_REASON(ECDSA_R_MISSING_PARAMETERS) ,"missing parameters"},
{ERR_REASON(ECDSA_R_NEED_NEW_SETUP_VALUES),"need new setup values"},
+{ERR_REASON(ECDSA_R_NONCE_CANNOT_BE_PRECOMPUTED),"nonce cannot be precomputed"},
{ERR_REASON(ECDSA_R_NON_FIPS_METHOD) ,"non fips method"},
{ERR_REASON(ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED),"random number generation failed"},
{ERR_REASON(ECDSA_R_SIGNATURE_MALLOC_FAILED),"signature malloc failed"},
diff --git a/crypto/ecdsa/ecs_locl.h b/crypto/ecdsa/ecs_locl.h
index cb3be13..46f7ad9 100644
--- a/crypto/ecdsa/ecs_locl.h
+++ b/crypto/ecdsa/ecs_locl.h
@@ -70,8 +70,9 @@
const char *name;
ECDSA_SIG *(*ecdsa_do_sign)(const unsigned char *dgst, int dgst_len,
const BIGNUM *inv, const BIGNUM *rp, EC_KEY *eckey);
- int (*ecdsa_sign_setup)(EC_KEY *eckey, BN_CTX *ctx, BIGNUM **kinv,
- BIGNUM **r);
+ int (*ecdsa_sign_setup)(EC_KEY *eckey, BN_CTX *ctx,
+ BIGNUM **kinv, BIGNUM **r,
+ const unsigned char *dgst, int dlen);
int (*ecdsa_do_verify)(const unsigned char *dgst, int dgst_len,
const ECDSA_SIG *sig, EC_KEY *eckey);
#if 0
diff --git a/crypto/ecdsa/ecs_ossl.c b/crypto/ecdsa/ecs_ossl.c
index 7725935..325aca8 100644
--- a/crypto/ecdsa/ecs_ossl.c
+++ b/crypto/ecdsa/ecs_ossl.c
@@ -60,11 +60,13 @@
#include <openssl/err.h>
#include <openssl/obj_mac.h>
#include <openssl/bn.h>
+#include <openssl/rand.h>
static ECDSA_SIG *ecdsa_do_sign(const unsigned char *dgst, int dlen,
const BIGNUM *, const BIGNUM *, EC_KEY *eckey);
-static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, BIGNUM **kinvp,
- BIGNUM **rp);
+static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in,
+ BIGNUM **kinvp, BIGNUM **rp,
+ const unsigned char *dgst, int dlen);
static int ecdsa_do_verify(const unsigned char *dgst, int dgst_len,
const ECDSA_SIG *sig, EC_KEY *eckey);
@@ -86,8 +88,9 @@
return &openssl_ecdsa_meth;
}
-static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, BIGNUM **kinvp,
- BIGNUM **rp)
+static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in,
+ BIGNUM **kinvp, BIGNUM **rp,
+ const unsigned char *dgst, int dlen)
{
BN_CTX *ctx = NULL;
BIGNUM *k = NULL, *r = NULL, *order = NULL, *X = NULL;
@@ -136,11 +139,28 @@
{
/* get random k */
do
- if (!BN_rand_range(k, order))
+#ifndef OPENSSL_NO_SHA512
+ if (EC_KEY_get_nonce_from_hash(eckey))
{
- ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP,
- ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED);
- goto err;
+ if (!BN_generate_dsa_nonce(
+ k, order,
+ EC_KEY_get0_private_key(eckey),
+ dgst, dlen, ctx))
+ {
+ ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP,
+ ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED);
+ goto err;
+ }
+ }
+ else
+#endif
+ {
+ if (!BN_rand_range(k, order))
+ {
+ ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP,
+ ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED);
+ goto err;
+ }
}
while (BN_is_zero(k));
@@ -282,7 +302,7 @@
{
if (in_kinv == NULL || in_r == NULL)
{
- if (!ECDSA_sign_setup(eckey, ctx, &kinv, &ret->r))
+ if (!ecdsa->meth->ecdsa_sign_setup(eckey, ctx, &kinv, &ret->r, dgst, dgst_len))
{
ECDSAerr(ECDSA_F_ECDSA_DO_SIGN,ERR_R_ECDSA_LIB);
goto err;
diff --git a/crypto/ecdsa/ecs_sign.c b/crypto/ecdsa/ecs_sign.c
index 353d5af..ea79a24 100644
--- a/crypto/ecdsa/ecs_sign.c
+++ b/crypto/ecdsa/ecs_sign.c
@@ -58,6 +58,7 @@
#include <openssl/engine.h>
#endif
#include <openssl/rand.h>
+#include <openssl/err.h>
ECDSA_SIG *ECDSA_do_sign(const unsigned char *dgst, int dlen, EC_KEY *eckey)
{
@@ -102,5 +103,12 @@
ECDSA_DATA *ecdsa = ecdsa_check(eckey);
if (ecdsa == NULL)
return 0;
- return ecdsa->meth->ecdsa_sign_setup(eckey, ctx_in, kinvp, rp);
+ if (EC_KEY_get_nonce_from_hash(eckey))
+ {
+ /* You cannot precompute the ECDSA nonce if it is required to
+ * depend on the message. */
+ ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ECDSA_R_NONCE_CANNOT_BE_PRECOMPUTED);
+ return 0;
+ }
+ return ecdsa->meth->ecdsa_sign_setup(eckey, ctx_in, kinvp, rp, NULL, 0);
}
diff --git a/crypto/engine/eng_list.c b/crypto/engine/eng_list.c
index 27846ed..95c8589 100644
--- a/crypto/engine/eng_list.c
+++ b/crypto/engine/eng_list.c
@@ -408,6 +408,7 @@
!ENGINE_ctrl_cmd_string(iterator, "DIR_LOAD", "2", 0) ||
!ENGINE_ctrl_cmd_string(iterator, "DIR_ADD",
load_dir, 0) ||
+ !ENGINE_ctrl_cmd_string(iterator, "LIST_ADD", "1", 0) ||
!ENGINE_ctrl_cmd_string(iterator, "LOAD", NULL, 0))
goto notfound;
return iterator;
diff --git a/crypto/evp/bio_b64.c b/crypto/evp/bio_b64.c
index 72a2a67..16863fe 100644
--- a/crypto/evp/bio_b64.c
+++ b/crypto/evp/bio_b64.c
@@ -226,6 +226,7 @@
else if (ctx->start)
{
q=p=(unsigned char *)ctx->tmp;
+ num = 0;
for (j=0; j<i; j++)
{
if (*(q++) != '\n') continue;
@@ -264,7 +265,7 @@
}
/* we fell off the end without starting */
- if (j == i)
+ if ((j == i) && (num == 0))
{
/* Is this is one long chunk?, if so, keep on
* reading until a new line. */
diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c
index c7869b6..ad0f7a4 100644
--- a/crypto/evp/e_aes.c
+++ b/crypto/evp/e_aes.c
@@ -62,7 +62,7 @@
typedef struct
{
- AES_KEY ks;
+ union { double align; AES_KEY ks; } ks;
block128_f block;
union {
cbc128_f cbc;
@@ -72,7 +72,7 @@
typedef struct
{
- AES_KEY ks; /* AES key schedule to use */
+ union { double align; AES_KEY ks; } ks; /* AES key schedule to use */
int key_set; /* Set if key initialised */
int iv_set; /* Set if an iv is set */
GCM128_CONTEXT gcm;
@@ -86,7 +86,7 @@
typedef struct
{
- AES_KEY ks1, ks2; /* AES key schedules to use */
+ union { double align; AES_KEY ks; } ks1, ks2; /* AES key schedules to use */
XTS128_CONTEXT xts;
void (*stream)(const unsigned char *in,
unsigned char *out, size_t length,
@@ -96,7 +96,7 @@
typedef struct
{
- AES_KEY ks; /* AES key schedule to use */
+ union { double align; AES_KEY ks; } ks; /* AES key schedule to use */
int key_set; /* Set if key initialised */
int iv_set; /* Set if an iv is set */
int tag_set; /* Set if tag is valid */
@@ -160,7 +160,7 @@
defined(_M_AMD64) || defined(_M_X64) || \
defined(__INTEL__) )
-extern unsigned int OPENSSL_ia32cap_P[2];
+extern unsigned int OPENSSL_ia32cap_P[];
#ifdef VPAES_ASM
#define VPAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32)))
@@ -310,7 +310,7 @@
return 1;
if (key)
{
- aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
+ aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
(block128_f)aesni_encrypt);
gctx->ctr = (ctr128_f)aesni_ctr32_encrypt_blocks;
@@ -355,19 +355,19 @@
/* key_len is two AES keys */
if (enc)
{
- aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
+ aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
xctx->xts.block1 = (block128_f)aesni_encrypt;
xctx->stream = aesni_xts_encrypt;
}
else
{
- aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
+ aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
xctx->xts.block1 = (block128_f)aesni_decrypt;
xctx->stream = aesni_xts_decrypt;
}
aesni_set_encrypt_key(key + ctx->key_len/2,
- ctx->key_len * 4, &xctx->ks2);
+ ctx->key_len * 4, &xctx->ks2.ks);
xctx->xts.block2 = (block128_f)aesni_encrypt;
xctx->xts.key1 = &xctx->ks1;
@@ -394,7 +394,7 @@
return 1;
if (key)
{
- aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks);
+ aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks);
CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
&cctx->ks, (block128_f)aesni_encrypt);
cctx->str = enc?(ccm128_f)aesni_ccm64_encrypt_blocks :
@@ -484,6 +484,38 @@
{ return &aes_##keylen##_##mode; }
#endif
+#if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__))
+#include "arm_arch.h"
+#if __ARM_ARCH__>=7
+# if defined(BSAES_ASM)
+# define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
+# endif
+# define HWAES_CAPABLE (OPENSSL_armcap_P & ARMV8_AES)
+# define HWAES_set_encrypt_key aes_v8_set_encrypt_key
+# define HWAES_set_decrypt_key aes_v8_set_decrypt_key
+# define HWAES_encrypt aes_v8_encrypt
+# define HWAES_decrypt aes_v8_decrypt
+# define HWAES_cbc_encrypt aes_v8_cbc_encrypt
+# define HWAES_ctr32_encrypt_blocks aes_v8_ctr32_encrypt_blocks
+#endif
+#endif
+
+#if defined(HWAES_CAPABLE)
+int HWAES_set_encrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key);
+int HWAES_set_decrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key);
+void HWAES_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void HWAES_decrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void HWAES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key,
+ unsigned char *ivec, const int enc);
+void HWAES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
+ size_t len, const AES_KEY *key, const unsigned char ivec[16]);
+#endif
+
#define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \
BLOCK_CIPHER_generic(nid,keylen,16,16,cbc,cbc,CBC,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \
BLOCK_CIPHER_generic(nid,keylen,16,0,ecb,ecb,ECB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \
@@ -502,10 +534,23 @@
mode = ctx->cipher->flags & EVP_CIPH_MODE;
if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE)
&& !enc)
+#ifdef HWAES_CAPABLE
+ if (HWAES_CAPABLE)
+ {
+ ret = HWAES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks);
+ dat->block = (block128_f)HWAES_decrypt;
+ dat->stream.cbc = NULL;
+#ifdef HWAES_cbc_encrypt
+ if (mode==EVP_CIPH_CBC_MODE)
+ dat->stream.cbc = (cbc128_f)HWAES_cbc_encrypt;
+#endif
+ }
+ else
+#endif
#ifdef BSAES_CAPABLE
if (BSAES_CAPABLE && mode==EVP_CIPH_CBC_MODE)
{
- ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks);
+ ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks);
dat->block = (block128_f)AES_decrypt;
dat->stream.cbc = (cbc128_f)bsaes_cbc_encrypt;
}
@@ -514,7 +559,7 @@
#ifdef VPAES_CAPABLE
if (VPAES_CAPABLE)
{
- ret = vpaes_set_decrypt_key(key,ctx->key_len*8,&dat->ks);
+ ret = vpaes_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks);
dat->block = (block128_f)vpaes_decrypt;
dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ?
(cbc128_f)vpaes_cbc_encrypt :
@@ -523,17 +568,37 @@
else
#endif
{
- ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks);
+ ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks);
dat->block = (block128_f)AES_decrypt;
dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ?
(cbc128_f)AES_cbc_encrypt :
NULL;
}
else
+#ifdef HWAES_CAPABLE
+ if (HWAES_CAPABLE)
+ {
+ ret = HWAES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks);
+ dat->block = (block128_f)HWAES_encrypt;
+ dat->stream.cbc = NULL;
+#ifdef HWAES_cbc_encrypt
+ if (mode==EVP_CIPH_CBC_MODE)
+ dat->stream.cbc = (cbc128_f)HWAES_cbc_encrypt;
+ else
+#endif
+#ifdef HWAES_ctr32_encrypt_blocks
+ if (mode==EVP_CIPH_CTR_MODE)
+ dat->stream.ctr = (ctr128_f)HWAES_ctr32_encrypt_blocks;
+ else
+#endif
+ (void)0; /* terminate potentially open 'else' */
+ }
+ else
+#endif
#ifdef BSAES_CAPABLE
if (BSAES_CAPABLE && mode==EVP_CIPH_CTR_MODE)
{
- ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks);
+ ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks);
dat->block = (block128_f)AES_encrypt;
dat->stream.ctr = (ctr128_f)bsaes_ctr32_encrypt_blocks;
}
@@ -542,7 +607,7 @@
#ifdef VPAES_CAPABLE
if (VPAES_CAPABLE)
{
- ret = vpaes_set_encrypt_key(key,ctx->key_len*8,&dat->ks);
+ ret = vpaes_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks);
dat->block = (block128_f)vpaes_encrypt;
dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ?
(cbc128_f)vpaes_cbc_encrypt :
@@ -551,7 +616,7 @@
else
#endif
{
- ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks);
+ ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks);
dat->block = (block128_f)AES_encrypt;
dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ?
(cbc128_f)AES_cbc_encrypt :
@@ -822,10 +887,25 @@
return 1;
if (key)
{ do {
+#ifdef HWAES_CAPABLE
+ if (HWAES_CAPABLE)
+ {
+ HWAES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks);
+ CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks,
+ (block128_f)HWAES_encrypt);
+#ifdef HWAES_ctr32_encrypt_blocks
+ gctx->ctr = (ctr128_f)HWAES_ctr32_encrypt_blocks;
+#else
+ gctx->ctr = NULL;
+#endif
+ break;
+ }
+ else
+#endif
#ifdef BSAES_CAPABLE
if (BSAES_CAPABLE)
{
- AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks);
+ AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks);
CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks,
(block128_f)AES_encrypt);
gctx->ctr = (ctr128_f)bsaes_ctr32_encrypt_blocks;
@@ -836,7 +916,7 @@
#ifdef VPAES_CAPABLE
if (VPAES_CAPABLE)
{
- vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks);
+ vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks);
CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks,
(block128_f)vpaes_encrypt);
gctx->ctr = NULL;
@@ -846,7 +926,7 @@
#endif
(void)0; /* terminate potentially open 'else' */
- AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
+ AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f)AES_encrypt);
#ifdef AES_CTR_ASM
gctx->ctr = (ctr128_f)AES_ctr32_encrypt;
@@ -1067,6 +1147,29 @@
xctx->stream = NULL;
#endif
/* key_len is two AES keys */
+#ifdef HWAES_CAPABLE
+ if (HWAES_CAPABLE)
+ {
+ if (enc)
+ {
+ HWAES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
+ xctx->xts.block1 = (block128_f)HWAES_encrypt;
+ }
+ else
+ {
+ HWAES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
+ xctx->xts.block1 = (block128_f)HWAES_decrypt;
+ }
+
+ HWAES_set_encrypt_key(key + ctx->key_len/2,
+ ctx->key_len * 4, &xctx->ks2.ks);
+ xctx->xts.block2 = (block128_f)HWAES_encrypt;
+
+ xctx->xts.key1 = &xctx->ks1;
+ break;
+ }
+ else
+#endif
#ifdef BSAES_CAPABLE
if (BSAES_CAPABLE)
xctx->stream = enc ? bsaes_xts_encrypt : bsaes_xts_decrypt;
@@ -1077,17 +1180,17 @@
{
if (enc)
{
- vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
+ vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
xctx->xts.block1 = (block128_f)vpaes_encrypt;
}
else
{
- vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
+ vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
xctx->xts.block1 = (block128_f)vpaes_decrypt;
}
vpaes_set_encrypt_key(key + ctx->key_len/2,
- ctx->key_len * 4, &xctx->ks2);
+ ctx->key_len * 4, &xctx->ks2.ks);
xctx->xts.block2 = (block128_f)vpaes_encrypt;
xctx->xts.key1 = &xctx->ks1;
@@ -1099,17 +1202,17 @@
if (enc)
{
- AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
+ AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
xctx->xts.block1 = (block128_f)AES_encrypt;
}
else
{
- AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
+ AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
xctx->xts.block1 = (block128_f)AES_decrypt;
}
AES_set_encrypt_key(key + ctx->key_len/2,
- ctx->key_len * 4, &xctx->ks2);
+ ctx->key_len * 4, &xctx->ks2.ks);
xctx->xts.block2 = (block128_f)AES_encrypt;
xctx->xts.key1 = &xctx->ks1;
@@ -1217,10 +1320,23 @@
return 1;
if (key) do
{
+#ifdef HWAES_CAPABLE
+ if (HWAES_CAPABLE)
+ {
+ HWAES_set_encrypt_key(key,ctx->key_len*8,&cctx->ks.ks);
+
+ CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
+ &cctx->ks, (block128_f)HWAES_encrypt);
+ cctx->str = NULL;
+ cctx->key_set = 1;
+ break;
+ }
+ else
+#endif
#ifdef VPAES_CAPABLE
if (VPAES_CAPABLE)
{
- vpaes_set_encrypt_key(key, ctx->key_len*8, &cctx->ks);
+ vpaes_set_encrypt_key(key, ctx->key_len*8, &cctx->ks.ks);
CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
&cctx->ks, (block128_f)vpaes_encrypt);
cctx->str = NULL;
@@ -1228,7 +1344,7 @@
break;
}
#endif
- AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks);
+ AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks);
CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
&cctx->ks, (block128_f)AES_encrypt);
cctx->str = NULL;
diff --git a/crypto/evp/encode.c b/crypto/evp/encode.c
index 28546a8..4654bdc 100644
--- a/crypto/evp/encode.c
+++ b/crypto/evp/encode.c
@@ -324,6 +324,7 @@
v=EVP_DecodeBlock(out,d,n);
n=0;
if (v < 0) { rv=0; goto end; }
+ if (eof > v) { rv=-1; goto end; }
ret+=(v-eof);
}
else
diff --git a/crypto/evp/p_lib.c b/crypto/evp/p_lib.c
index bd1977d..8ee53c1 100644
--- a/crypto/evp/p_lib.c
+++ b/crypto/evp/p_lib.c
@@ -202,7 +202,7 @@
EVP_PKEY *EVP_PKEY_dup(EVP_PKEY *pkey)
{
- CRYPTO_add(&pkey->references, 1, CRYPTO_LOCK_EVP_PKEY);
+ CRYPTO_add(&pkey->references,1,CRYPTO_LOCK_EVP_PKEY);
return pkey;
}
diff --git a/crypto/modes/asm/ghash-armv4.S b/crypto/modes/asm/ghash-armv4.S
index d66c4cb..6c45377 100644
--- a/crypto/modes/asm/ghash-armv4.S
+++ b/crypto/modes/asm/ghash-armv4.S
@@ -309,99 +309,213 @@
#if __ARM_ARCH__>=7
.fpu neon
+.global gcm_init_neon
+.type gcm_init_neon,%function
+.align 4
+gcm_init_neon:
+ vld1.64 d7,[r1,:64]! @ load H
+ vmov.i8 q8,#0xe1
+ vld1.64 d6,[r1,:64]
+ vshl.i64 d17,#57
+ vshr.u64 d16,#63 @ t0=0xc2....01
+ vdup.8 q9,d7[7]
+ vshr.u64 d26,d6,#63
+ vshr.s8 q9,#7 @ broadcast carry bit
+ vshl.i64 q3,q3,#1
+ vand q8,q8,q9
+ vorr d7,d26 @ H<<<=1
+ veor q3,q3,q8 @ twisted H
+ vstmia r0,{q3}
+
+ bx lr @ bx lr
+.size gcm_init_neon,.-gcm_init_neon
+
.global gcm_gmult_neon
.type gcm_gmult_neon,%function
.align 4
gcm_gmult_neon:
- sub r1,#16 @ point at H in GCM128_CTX
- vld1.64 d29,[r0,:64]!@ load Xi
- vmov.i32 d5,#0xe1 @ our irreducible polynomial
- vld1.64 d28,[r0,:64]!
- vshr.u64 d5,#32
- vldmia r1,{d0-d1} @ load H
- veor q12,q12
+ vld1.64 d7,[r0,:64]! @ load Xi
+ vld1.64 d6,[r0,:64]!
+ vmov.i64 d29,#0x0000ffffffffffff
+ vldmia r1,{d26-d27} @ load twisted H
+ vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
- vrev64.8 q14,q14
+ vrev64.8 q3,q3
#endif
- veor q13,q13
- veor q11,q11
- mov r1,#16
- veor q10,q10
+ vmov.i64 d31,#0x000000000000ffff
+ veor d28,d26,d27 @ Karatsuba pre-processing
mov r3,#16
- veor d2,d2
- vdup.8 d4,d28[0] @ broadcast lowest byte
- b .Linner_neon
+ b .Lgmult_neon
.size gcm_gmult_neon,.-gcm_gmult_neon
.global gcm_ghash_neon
.type gcm_ghash_neon,%function
.align 4
gcm_ghash_neon:
- vld1.64 d21,[r0,:64]! @ load Xi
- vmov.i32 d5,#0xe1 @ our irreducible polynomial
- vld1.64 d20,[r0,:64]!
- vshr.u64 d5,#32
- vldmia r0,{d0-d1} @ load H
- veor q12,q12
- nop
+ vld1.64 d1,[r0,:64]! @ load Xi
+ vld1.64 d0,[r0,:64]!
+ vmov.i64 d29,#0x0000ffffffffffff
+ vldmia r1,{d26-d27} @ load twisted H
+ vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
- vrev64.8 q10,q10
+ vrev64.8 q0,q0
#endif
-.Louter_neon:
- vld1.64 d29,[r2]! @ load inp
- veor q13,q13
- vld1.64 d28,[r2]!
- veor q11,q11
- mov r1,#16
+ vmov.i64 d31,#0x000000000000ffff
+ veor d28,d26,d27 @ Karatsuba pre-processing
+
+.Loop_neon:
+ vld1.64 d7,[r2]! @ load inp
+ vld1.64 d6,[r2]!
#ifdef __ARMEL__
- vrev64.8 q14,q14
+ vrev64.8 q3,q3
#endif
- veor d2,d2
- veor q14,q10 @ inp^=Xi
- veor q10,q10
- vdup.8 d4,d28[0] @ broadcast lowest byte
-.Linner_neon:
- subs r1,r1,#1
- vmull.p8 q9,d1,d4 @ H.lo·Xi[i]
- vmull.p8 q8,d0,d4 @ H.hi·Xi[i]
- vext.8 q14,q12,#1 @ IN>>=8
+ veor q3,q0 @ inp^=Xi
+.Lgmult_neon:
+ vext.8 d16, d26, d26, #1 @ A1
+ vmull.p8 q8, d16, d6 @ F = A1*B
+ vext.8 d0, d6, d6, #1 @ B1
+ vmull.p8 q0, d26, d0 @ E = A*B1
+ vext.8 d18, d26, d26, #2 @ A2
+ vmull.p8 q9, d18, d6 @ H = A2*B
+ vext.8 d22, d6, d6, #2 @ B2
+ vmull.p8 q11, d26, d22 @ G = A*B2
+ vext.8 d20, d26, d26, #3 @ A3
+ veor q8, q8, q0 @ L = E + F
+ vmull.p8 q10, d20, d6 @ J = A3*B
+ vext.8 d0, d6, d6, #3 @ B3
+ veor q9, q9, q11 @ M = G + H
+ vmull.p8 q0, d26, d0 @ I = A*B3
+ veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
+ vand d17, d17, d29
+ vext.8 d22, d6, d6, #4 @ B4
+ veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
+ vand d19, d19, d30
+ vmull.p8 q11, d26, d22 @ K = A*B4
+ veor q10, q10, q0 @ N = I + J
+ veor d16, d16, d17
+ veor d18, d18, d19
+ veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
+ vand d21, d21, d31
+ vext.8 q8, q8, q8, #15
+ veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
+ vmov.i64 d23, #0
+ vext.8 q9, q9, q9, #14
+ veor d20, d20, d21
+ vmull.p8 q0, d26, d6 @ D = A*B
+ vext.8 q11, q11, q11, #12
+ vext.8 q10, q10, q10, #13
+ veor q8, q8, q9
+ veor q10, q10, q11
+ veor q0, q0, q8
+ veor q0, q0, q10
+ veor d6,d6,d7 @ Karatsuba pre-processing
+ vext.8 d16, d28, d28, #1 @ A1
+ vmull.p8 q8, d16, d6 @ F = A1*B
+ vext.8 d2, d6, d6, #1 @ B1
+ vmull.p8 q1, d28, d2 @ E = A*B1
+ vext.8 d18, d28, d28, #2 @ A2
+ vmull.p8 q9, d18, d6 @ H = A2*B
+ vext.8 d22, d6, d6, #2 @ B2
+ vmull.p8 q11, d28, d22 @ G = A*B2
+ vext.8 d20, d28, d28, #3 @ A3
+ veor q8, q8, q1 @ L = E + F
+ vmull.p8 q10, d20, d6 @ J = A3*B
+ vext.8 d2, d6, d6, #3 @ B3
+ veor q9, q9, q11 @ M = G + H
+ vmull.p8 q1, d28, d2 @ I = A*B3
+ veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
+ vand d17, d17, d29
+ vext.8 d22, d6, d6, #4 @ B4
+ veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
+ vand d19, d19, d30
+ vmull.p8 q11, d28, d22 @ K = A*B4
+ veor q10, q10, q1 @ N = I + J
+ veor d16, d16, d17
+ veor d18, d18, d19
+ veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
+ vand d21, d21, d31
+ vext.8 q8, q8, q8, #15
+ veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
+ vmov.i64 d23, #0
+ vext.8 q9, q9, q9, #14
+ veor d20, d20, d21
+ vmull.p8 q1, d28, d6 @ D = A*B
+ vext.8 q11, q11, q11, #12
+ vext.8 q10, q10, q10, #13
+ veor q8, q8, q9
+ veor q10, q10, q11
+ veor q1, q1, q8
+ veor q1, q1, q10
+ vext.8 d16, d27, d27, #1 @ A1
+ vmull.p8 q8, d16, d7 @ F = A1*B
+ vext.8 d4, d7, d7, #1 @ B1
+ vmull.p8 q2, d27, d4 @ E = A*B1
+ vext.8 d18, d27, d27, #2 @ A2
+ vmull.p8 q9, d18, d7 @ H = A2*B
+ vext.8 d22, d7, d7, #2 @ B2
+ vmull.p8 q11, d27, d22 @ G = A*B2
+ vext.8 d20, d27, d27, #3 @ A3
+ veor q8, q8, q2 @ L = E + F
+ vmull.p8 q10, d20, d7 @ J = A3*B
+ vext.8 d4, d7, d7, #3 @ B3
+ veor q9, q9, q11 @ M = G + H
+ vmull.p8 q2, d27, d4 @ I = A*B3
+ veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
+ vand d17, d17, d29
+ vext.8 d22, d7, d7, #4 @ B4
+ veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
+ vand d19, d19, d30
+ vmull.p8 q11, d27, d22 @ K = A*B4
+ veor q10, q10, q2 @ N = I + J
+ veor d16, d16, d17
+ veor d18, d18, d19
+ veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
+ vand d21, d21, d31
+ vext.8 q8, q8, q8, #15
+ veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
+ vmov.i64 d23, #0
+ vext.8 q9, q9, q9, #14
+ veor d20, d20, d21
+ vmull.p8 q2, d27, d7 @ D = A*B
+ vext.8 q11, q11, q11, #12
+ vext.8 q10, q10, q10, #13
+ veor q8, q8, q9
+ veor q10, q10, q11
+ veor q2, q2, q8
+ veor q2, q2, q10
+ veor q1,q1,q0 @ Karatsuba post-processing
+ veor q1,q1,q2
+ veor d1,d1,d2
+ veor d4,d4,d3 @ Xh|Xl - 256-bit result
- veor q10,q13 @ modulo-scheduled part
- vshl.i64 d22,#48
- vdup.8 d4,d28[0] @ broadcast lowest byte
- veor d3,d18,d20
+ @ equivalent of reduction_avx from ghash-x86_64.pl
+ vshl.i64 q9,q0,#57 @ 1st phase
+ vshl.i64 q10,q0,#62
+ veor q10,q10,q9 @
+ vshl.i64 q9,q0,#63
+ veor q10, q10, q9 @
+ veor d1,d1,d20 @
+ veor d4,d4,d21
- veor d21,d22
- vuzp.8 q9,q8
- vsli.8 d2,d3,#1 @ compose the "carry" byte
- vext.8 q10,q12,#1 @ Z>>=8
+ vshr.u64 q10,q0,#1 @ 2nd phase
+ veor q2,q2,q0
+ veor q0,q0,q10 @
+ vshr.u64 q10,q10,#6
+ vshr.u64 q0,q0,#1 @
+ veor q0,q0,q2 @
+ veor q0,q0,q10 @
- vmull.p8 q11,d2,d5 @ "carry"·0xe1
- vshr.u8 d2,d3,#7 @ save Z's bottom bit
- vext.8 q13,q9,q12,#1 @ Qlo>>=8
- veor q10,q8
- bne .Linner_neon
-
- veor q10,q13 @ modulo-scheduled artefact
- vshl.i64 d22,#48
- veor d21,d22
-
- @ finalization, normalize Z:Zo
- vand d2,d5 @ suffices to mask the bit
- vshr.u64 d3,d20,#63
- vshl.i64 q10,#1
subs r3,#16
- vorr q10,q1 @ Z=Z:Zo<<1
- bne .Louter_neon
+ bne .Loop_neon
#ifdef __ARMEL__
- vrev64.8 q10,q10
+ vrev64.8 q0,q0
#endif
sub r0,#16
- vst1.64 d21,[r0,:64]! @ write out Xi
- vst1.64 d20,[r0,:64]
+ vst1.64 d1,[r0,:64]! @ write out Xi
+ vst1.64 d0,[r0,:64]
- .word 0xe12fff1e
+ bx lr @ bx lr
.size gcm_ghash_neon,.-gcm_ghash_neon
#endif
.asciz "GHASH for ARMv4/NEON, CRYPTOGAMS by <[email protected]>"
diff --git a/crypto/modes/asm/ghash-armv4.pl b/crypto/modes/asm/ghash-armv4.pl
index e46f8e3..b79ecbc 100644
--- a/crypto/modes/asm/ghash-armv4.pl
+++ b/crypto/modes/asm/ghash-armv4.pl
@@ -35,6 +35,20 @@
# Add NEON implementation featuring polynomial multiplication, i.e. no
# lookup tables involved. On Cortex A8 it was measured to process one
# byte in 15 cycles or 55% faster than integer-only code.
+#
+# April 2014
+#
+# Switch to multiplication algorithm suggested in paper referred
+# below and combine it with reduction algorithm from x86 module.
+# Performance improvement over previous version varies from 65% on
+# Snapdragon S4 to 110% on Cortex A9. In absolute terms Cortex A8
+# processes one byte in 8.45 cycles, A9 - in 10.2, Snapdragon S4 -
+# in 9.33.
+#
+# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
+# Polynomial Multiplication on ARM Processors using the NEON Engine.
+#
+# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
# ====================================================================
# Note about "528B" variant. In ARM case it makes lesser sense to
@@ -303,117 +317,160 @@
.size gcm_gmult_4bit,.-gcm_gmult_4bit
___
{
-my $cnt=$Htbl; # $Htbl is used once in the very beginning
+my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3));
+my ($t0,$t1,$t2,$t3)=map("q$_",(8..12));
+my ($Hlo,$Hhi,$Hhl,$k48,$k32,$k16)=map("d$_",(26..31));
-my ($Hhi, $Hlo, $Zo, $T, $xi, $mod) = map("d$_",(0..7));
-my ($Qhi, $Qlo, $Z, $R, $zero, $Qpost, $IN) = map("q$_",(8..15));
-
-# Z:Zo keeps 128-bit result shifted by 1 to the right, with bottom bit
-# in Zo. Or should I say "top bit", because GHASH is specified in
-# reverse bit order? Otherwise straightforward 128-bt H by one input
-# byte multiplication and modulo-reduction, times 16.
-
-sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
-sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
-sub Q() { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; }
+sub clmul64x64 {
+my ($r,$a,$b)=@_;
+$code.=<<___;
+ vext.8 $t0#lo, $a, $a, #1 @ A1
+ vmull.p8 $t0, $t0#lo, $b @ F = A1*B
+ vext.8 $r#lo, $b, $b, #1 @ B1
+ vmull.p8 $r, $a, $r#lo @ E = A*B1
+ vext.8 $t1#lo, $a, $a, #2 @ A2
+ vmull.p8 $t1, $t1#lo, $b @ H = A2*B
+ vext.8 $t3#lo, $b, $b, #2 @ B2
+ vmull.p8 $t3, $a, $t3#lo @ G = A*B2
+ vext.8 $t2#lo, $a, $a, #3 @ A3
+ veor $t0, $t0, $r @ L = E + F
+ vmull.p8 $t2, $t2#lo, $b @ J = A3*B
+ vext.8 $r#lo, $b, $b, #3 @ B3
+ veor $t1, $t1, $t3 @ M = G + H
+ vmull.p8 $r, $a, $r#lo @ I = A*B3
+ veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8
+ vand $t0#hi, $t0#hi, $k48
+ vext.8 $t3#lo, $b, $b, #4 @ B4
+ veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16
+ vand $t1#hi, $t1#hi, $k32
+ vmull.p8 $t3, $a, $t3#lo @ K = A*B4
+ veor $t2, $t2, $r @ N = I + J
+ veor $t0#lo, $t0#lo, $t0#hi
+ veor $t1#lo, $t1#lo, $t1#hi
+ veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24
+ vand $t2#hi, $t2#hi, $k16
+ vext.8 $t0, $t0, $t0, #15
+ veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32
+ vmov.i64 $t3#hi, #0
+ vext.8 $t1, $t1, $t1, #14
+ veor $t2#lo, $t2#lo, $t2#hi
+ vmull.p8 $r, $a, $b @ D = A*B
+ vext.8 $t3, $t3, $t3, #12
+ vext.8 $t2, $t2, $t2, #13
+ veor $t0, $t0, $t1
+ veor $t2, $t2, $t3
+ veor $r, $r, $t0
+ veor $r, $r, $t2
+___
+}
$code.=<<___;
#if __ARM_ARCH__>=7
.fpu neon
+.global gcm_init_neon
+.type gcm_init_neon,%function
+.align 4
+gcm_init_neon:
+ vld1.64 $IN#hi,[r1,:64]! @ load H
+ vmov.i8 $t0,#0xe1
+ vld1.64 $IN#lo,[r1,:64]
+ vshl.i64 $t0#hi,#57
+ vshr.u64 $t0#lo,#63 @ t0=0xc2....01
+ vdup.8 $t1,$IN#hi[7]
+ vshr.u64 $Hlo,$IN#lo,#63
+ vshr.s8 $t1,#7 @ broadcast carry bit
+ vshl.i64 $IN,$IN,#1
+ vand $t0,$t0,$t1
+ vorr $IN#hi,$Hlo @ H<<<=1
+ veor $IN,$IN,$t0 @ twisted H
+ vstmia r0,{$IN}
+
+ ret @ bx lr
+.size gcm_init_neon,.-gcm_init_neon
+
.global gcm_gmult_neon
.type gcm_gmult_neon,%function
.align 4
gcm_gmult_neon:
- sub $Htbl,#16 @ point at H in GCM128_CTX
- vld1.64 `&Dhi("$IN")`,[$Xi,:64]!@ load Xi
- vmov.i32 $mod,#0xe1 @ our irreducible polynomial
- vld1.64 `&Dlo("$IN")`,[$Xi,:64]!
- vshr.u64 $mod,#32
- vldmia $Htbl,{$Hhi-$Hlo} @ load H
- veor $zero,$zero
+ vld1.64 $IN#hi,[$Xi,:64]! @ load Xi
+ vld1.64 $IN#lo,[$Xi,:64]!
+ vmov.i64 $k48,#0x0000ffffffffffff
+ vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H
+ vmov.i64 $k32,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 $IN,$IN
#endif
- veor $Qpost,$Qpost
- veor $R,$R
- mov $cnt,#16
- veor $Z,$Z
+ vmov.i64 $k16,#0x000000000000ffff
+ veor $Hhl,$Hlo,$Hhi @ Karatsuba pre-processing
mov $len,#16
- veor $Zo,$Zo
- vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte
- b .Linner_neon
+ b .Lgmult_neon
.size gcm_gmult_neon,.-gcm_gmult_neon
.global gcm_ghash_neon
.type gcm_ghash_neon,%function
.align 4
gcm_ghash_neon:
- vld1.64 `&Dhi("$Z")`,[$Xi,:64]! @ load Xi
- vmov.i32 $mod,#0xe1 @ our irreducible polynomial
- vld1.64 `&Dlo("$Z")`,[$Xi,:64]!
- vshr.u64 $mod,#32
- vldmia $Xi,{$Hhi-$Hlo} @ load H
- veor $zero,$zero
- nop
+ vld1.64 $Xl#hi,[$Xi,:64]! @ load Xi
+ vld1.64 $Xl#lo,[$Xi,:64]!
+ vmov.i64 $k48,#0x0000ffffffffffff
+ vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H
+ vmov.i64 $k32,#0x00000000ffffffff
#ifdef __ARMEL__
- vrev64.8 $Z,$Z
+ vrev64.8 $Xl,$Xl
#endif
-.Louter_neon:
- vld1.64 `&Dhi($IN)`,[$inp]! @ load inp
- veor $Qpost,$Qpost
- vld1.64 `&Dlo($IN)`,[$inp]!
- veor $R,$R
- mov $cnt,#16
+ vmov.i64 $k16,#0x000000000000ffff
+ veor $Hhl,$Hlo,$Hhi @ Karatsuba pre-processing
+
+.Loop_neon:
+ vld1.64 $IN#hi,[$inp]! @ load inp
+ vld1.64 $IN#lo,[$inp]!
#ifdef __ARMEL__
vrev64.8 $IN,$IN
#endif
- veor $Zo,$Zo
- veor $IN,$Z @ inp^=Xi
- veor $Z,$Z
- vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte
-.Linner_neon:
- subs $cnt,$cnt,#1
- vmull.p8 $Qlo,$Hlo,$xi @ H.lo·Xi[i]
- vmull.p8 $Qhi,$Hhi,$xi @ H.hi·Xi[i]
- vext.8 $IN,$zero,#1 @ IN>>=8
+ veor $IN,$Xl @ inp^=Xi
+.Lgmult_neon:
+___
+ &clmul64x64 ($Xl,$Hlo,"$IN#lo"); # H.lo·Xi.lo
+$code.=<<___;
+ veor $IN#lo,$IN#lo,$IN#hi @ Karatsuba pre-processing
+___
+ &clmul64x64 ($Xm,$Hhl,"$IN#lo"); # (H.lo+H.hi)·(Xi.lo+Xi.hi)
+ &clmul64x64 ($Xh,$Hhi,"$IN#hi"); # H.hi·Xi.hi
+$code.=<<___;
+ veor $Xm,$Xm,$Xl @ Karatsuba post-processing
+ veor $Xm,$Xm,$Xh
+ veor $Xl#hi,$Xl#hi,$Xm#lo
+ veor $Xh#lo,$Xh#lo,$Xm#hi @ Xh|Xl - 256-bit result
- veor $Z,$Qpost @ modulo-scheduled part
- vshl.i64 `&Dlo("$R")`,#48
- vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte
- veor $T,`&Dlo("$Qlo")`,`&Dlo("$Z")`
+ @ equivalent of reduction_avx from ghash-x86_64.pl
+ vshl.i64 $t1,$Xl,#57 @ 1st phase
+ vshl.i64 $t2,$Xl,#62
+ veor $t2,$t2,$t1 @
+ vshl.i64 $t1,$Xl,#63
+ veor $t2, $t2, $t1 @
+ veor $Xl#hi,$Xl#hi,$t2#lo @
+ veor $Xh#lo,$Xh#lo,$t2#hi
- veor `&Dhi("$Z")`,`&Dlo("$R")`
- vuzp.8 $Qlo,$Qhi
- vsli.8 $Zo,$T,#1 @ compose the "carry" byte
- vext.8 $Z,$zero,#1 @ Z>>=8
+ vshr.u64 $t2,$Xl,#1 @ 2nd phase
+ veor $Xh,$Xh,$Xl
+ veor $Xl,$Xl,$t2 @
+ vshr.u64 $t2,$t2,#6
+ vshr.u64 $Xl,$Xl,#1 @
+ veor $Xl,$Xl,$Xh @
+ veor $Xl,$Xl,$t2 @
- vmull.p8 $R,$Zo,$mod @ "carry"·0xe1
- vshr.u8 $Zo,$T,#7 @ save Z's bottom bit
- vext.8 $Qpost,$Qlo,$zero,#1 @ Qlo>>=8
- veor $Z,$Qhi
- bne .Linner_neon
-
- veor $Z,$Qpost @ modulo-scheduled artefact
- vshl.i64 `&Dlo("$R")`,#48
- veor `&Dhi("$Z")`,`&Dlo("$R")`
-
- @ finalization, normalize Z:Zo
- vand $Zo,$mod @ suffices to mask the bit
- vshr.u64 `&Dhi(&Q("$Zo"))`,`&Dlo("$Z")`,#63
- vshl.i64 $Z,#1
subs $len,#16
- vorr $Z,`&Q("$Zo")` @ Z=Z:Zo<<1
- bne .Louter_neon
+ bne .Loop_neon
#ifdef __ARMEL__
- vrev64.8 $Z,$Z
+ vrev64.8 $Xl,$Xl
#endif
sub $Xi,#16
- vst1.64 `&Dhi("$Z")`,[$Xi,:64]! @ write out Xi
- vst1.64 `&Dlo("$Z")`,[$Xi,:64]
+ vst1.64 $Xl#hi,[$Xi,:64]! @ write out Xi
+ vst1.64 $Xl#lo,[$Xi,:64]
- bx lr
+ ret @ bx lr
.size gcm_ghash_neon,.-gcm_ghash_neon
#endif
___
@@ -423,7 +480,13 @@
.align 2
___
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
-print $code;
+foreach (split("\n",$code)) {
+ s/\`([^\`]*)\`/eval $1/geo;
+
+ s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or
+ s/\bret\b/bx lr/go or
+ s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
+
+ print $_,"\n";
+}
close STDOUT; # enforce flush
diff --git a/crypto/modes/asm/ghash-x86.S b/crypto/modes/asm/ghash-x86.S
index cb9ae20..5047320 100644
--- a/crypto/modes/asm/ghash-x86.S
+++ b/crypto/modes/asm/ghash-x86.S
@@ -203,400 +203,6 @@
popl %ebp
ret
.size gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
-.type _mmx_gmult_4bit_inner,@function
-.align 16
-_mmx_gmult_4bit_inner:
- xorl %ecx,%ecx
- movl %ebx,%edx
- movb %dl,%cl
- shlb $4,%cl
- andl $240,%edx
- movq 8(%esi,%ecx,1),%mm0
- movq (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 14(%edi),%cl
- psllq $60,%mm2
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 13(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 12(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 11(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 10(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 9(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 8(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 7(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 6(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 5(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 4(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 3(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 2(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 1(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb (%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movd %mm0,%ebx
- pxor %mm2,%mm0
- movl 4(%eax,%ebp,8),%edi
- psrlq $32,%mm0
- movd %mm1,%edx
- psrlq $32,%mm1
- movd %mm0,%ecx
- movd %mm1,%ebp
- shll $4,%edi
- bswap %ebx
- bswap %edx
- bswap %ecx
- xorl %edi,%ebp
- bswap %ebp
- ret
-.size _mmx_gmult_4bit_inner,.-_mmx_gmult_4bit_inner
.globl gcm_gmult_4bit_mmx
.type gcm_gmult_4bit_mmx,@function
.align 16
@@ -613,8 +219,78 @@
popl %eax
leal .Lrem_4bit-.L005pic_point(%eax),%eax
movzbl 15(%edi),%ebx
- call _mmx_gmult_4bit_inner
- movl 20(%esp),%edi
+ xorl %ecx,%ecx
+ movl %ebx,%edx
+ movb %dl,%cl
+ movl $14,%ebp
+ shlb $4,%cl
+ andl $240,%edx
+ movq 8(%esi,%ecx,1),%mm0
+ movq (%esi,%ecx,1),%mm1
+ movd %mm0,%ebx
+ jmp .L006mmx_loop
+.align 16
+.L006mmx_loop:
+ psrlq $4,%mm0
+ andl $15,%ebx
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ movb (%edi,%ebp,1),%cl
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ decl %ebp
+ movd %mm0,%ebx
+ pxor (%esi,%edx,1),%mm1
+ movl %ecx,%edx
+ pxor %mm2,%mm0
+ js .L007mmx_break
+ shlb $4,%cl
+ andl $15,%ebx
+ psrlq $4,%mm0
+ andl $240,%edx
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%ecx,1),%mm0
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ movd %mm0,%ebx
+ pxor (%esi,%ecx,1),%mm1
+ pxor %mm2,%mm0
+ jmp .L006mmx_loop
+.align 16
+.L007mmx_break:
+ shlb $4,%cl
+ andl $15,%ebx
+ psrlq $4,%mm0
+ andl $240,%edx
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%ecx,1),%mm0
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ movd %mm0,%ebx
+ pxor (%esi,%ecx,1),%mm1
+ pxor %mm2,%mm0
+ psrlq $4,%mm0
+ andl $15,%ebx
+ movq %mm1,%mm2
+ psrlq $4,%mm1
+ pxor 8(%esi,%edx,1),%mm0
+ psllq $60,%mm2
+ pxor (%eax,%ebx,8),%mm1
+ movd %mm0,%ebx
+ pxor (%esi,%edx,1),%mm1
+ pxor %mm2,%mm0
+ psrlq $32,%mm0
+ movd %mm1,%edx
+ psrlq $32,%mm1
+ movd %mm0,%ecx
+ movd %mm1,%ebp
+ bswap %ebx
+ bswap %edx
+ bswap %ecx
+ bswap %ebp
emms
movl %ebx,12(%edi)
movl %edx,4(%edi)
@@ -635,61 +311,926 @@
pushl %ebx
pushl %esi
pushl %edi
- movl 20(%esp),%ebp
- movl 24(%esp),%esi
- movl 28(%esp),%edi
- movl 32(%esp),%ecx
- call .L006pic_point
-.L006pic_point:
- popl %eax
- leal .Lrem_4bit-.L006pic_point(%eax),%eax
- addl %edi,%ecx
- movl %ecx,32(%esp)
- subl $20,%esp
- movl 12(%ebp),%ebx
- movl 4(%ebp),%edx
- movl 8(%ebp),%ecx
- movl (%ebp),%ebp
- jmp .L007mmx_outer_loop
+ movl 20(%esp),%eax
+ movl 24(%esp),%ebx
+ movl 28(%esp),%ecx
+ movl 32(%esp),%edx
+ movl %esp,%ebp
+ call .L008pic_point
+.L008pic_point:
+ popl %esi
+ leal .Lrem_8bit-.L008pic_point(%esi),%esi
+ subl $544,%esp
+ andl $-64,%esp
+ subl $16,%esp
+ addl %ecx,%edx
+ movl %eax,544(%esp)
+ movl %edx,552(%esp)
+ movl %ebp,556(%esp)
+ addl $128,%ebx
+ leal 144(%esp),%edi
+ leal 400(%esp),%ebp
+ movl -120(%ebx),%edx
+ movq -120(%ebx),%mm0
+ movq -128(%ebx),%mm3
+ shll $4,%edx
+ movb %dl,(%esp)
+ movl -104(%ebx),%edx
+ movq -104(%ebx),%mm2
+ movq -112(%ebx),%mm5
+ movq %mm0,-128(%edi)
+ psrlq $4,%mm0
+ movq %mm3,(%edi)
+ movq %mm3,%mm7
+ psrlq $4,%mm3
+ shll $4,%edx
+ movb %dl,1(%esp)
+ movl -88(%ebx),%edx
+ movq -88(%ebx),%mm1
+ psllq $60,%mm7
+ movq -96(%ebx),%mm4
+ por %mm7,%mm0
+ movq %mm2,-120(%edi)
+ psrlq $4,%mm2
+ movq %mm5,8(%edi)
+ movq %mm5,%mm6
+ movq %mm0,-128(%ebp)
+ psrlq $4,%mm5
+ movq %mm3,(%ebp)
+ shll $4,%edx
+ movb %dl,2(%esp)
+ movl -72(%ebx),%edx
+ movq -72(%ebx),%mm0
+ psllq $60,%mm6
+ movq -80(%ebx),%mm3
+ por %mm6,%mm2
+ movq %mm1,-112(%edi)
+ psrlq $4,%mm1
+ movq %mm4,16(%edi)
+ movq %mm4,%mm7
+ movq %mm2,-120(%ebp)
+ psrlq $4,%mm4
+ movq %mm5,8(%ebp)
+ shll $4,%edx
+ movb %dl,3(%esp)
+ movl -56(%ebx),%edx
+ movq -56(%ebx),%mm2
+ psllq $60,%mm7
+ movq -64(%ebx),%mm5
+ por %mm7,%mm1
+ movq %mm0,-104(%edi)
+ psrlq $4,%mm0
+ movq %mm3,24(%edi)
+ movq %mm3,%mm6
+ movq %mm1,-112(%ebp)
+ psrlq $4,%mm3
+ movq %mm4,16(%ebp)
+ shll $4,%edx
+ movb %dl,4(%esp)
+ movl -40(%ebx),%edx
+ movq -40(%ebx),%mm1
+ psllq $60,%mm6
+ movq -48(%ebx),%mm4
+ por %mm6,%mm0
+ movq %mm2,-96(%edi)
+ psrlq $4,%mm2
+ movq %mm5,32(%edi)
+ movq %mm5,%mm7
+ movq %mm0,-104(%ebp)
+ psrlq $4,%mm5
+ movq %mm3,24(%ebp)
+ shll $4,%edx
+ movb %dl,5(%esp)
+ movl -24(%ebx),%edx
+ movq -24(%ebx),%mm0
+ psllq $60,%mm7
+ movq -32(%ebx),%mm3
+ por %mm7,%mm2
+ movq %mm1,-88(%edi)
+ psrlq $4,%mm1
+ movq %mm4,40(%edi)
+ movq %mm4,%mm6
+ movq %mm2,-96(%ebp)
+ psrlq $4,%mm4
+ movq %mm5,32(%ebp)
+ shll $4,%edx
+ movb %dl,6(%esp)
+ movl -8(%ebx),%edx
+ movq -8(%ebx),%mm2
+ psllq $60,%mm6
+ movq -16(%ebx),%mm5
+ por %mm6,%mm1
+ movq %mm0,-80(%edi)
+ psrlq $4,%mm0
+ movq %mm3,48(%edi)
+ movq %mm3,%mm7
+ movq %mm1,-88(%ebp)
+ psrlq $4,%mm3
+ movq %mm4,40(%ebp)
+ shll $4,%edx
+ movb %dl,7(%esp)
+ movl 8(%ebx),%edx
+ movq 8(%ebx),%mm1
+ psllq $60,%mm7
+ movq (%ebx),%mm4
+ por %mm7,%mm0
+ movq %mm2,-72(%edi)
+ psrlq $4,%mm2
+ movq %mm5,56(%edi)
+ movq %mm5,%mm6
+ movq %mm0,-80(%ebp)
+ psrlq $4,%mm5
+ movq %mm3,48(%ebp)
+ shll $4,%edx
+ movb %dl,8(%esp)
+ movl 24(%ebx),%edx
+ movq 24(%ebx),%mm0
+ psllq $60,%mm6
+ movq 16(%ebx),%mm3
+ por %mm6,%mm2
+ movq %mm1,-64(%edi)
+ psrlq $4,%mm1
+ movq %mm4,64(%edi)
+ movq %mm4,%mm7
+ movq %mm2,-72(%ebp)
+ psrlq $4,%mm4
+ movq %mm5,56(%ebp)
+ shll $4,%edx
+ movb %dl,9(%esp)
+ movl 40(%ebx),%edx
+ movq 40(%ebx),%mm2
+ psllq $60,%mm7
+ movq 32(%ebx),%mm5
+ por %mm7,%mm1
+ movq %mm0,-56(%edi)
+ psrlq $4,%mm0
+ movq %mm3,72(%edi)
+ movq %mm3,%mm6
+ movq %mm1,-64(%ebp)
+ psrlq $4,%mm3
+ movq %mm4,64(%ebp)
+ shll $4,%edx
+ movb %dl,10(%esp)
+ movl 56(%ebx),%edx
+ movq 56(%ebx),%mm1
+ psllq $60,%mm6
+ movq 48(%ebx),%mm4
+ por %mm6,%mm0
+ movq %mm2,-48(%edi)
+ psrlq $4,%mm2
+ movq %mm5,80(%edi)
+ movq %mm5,%mm7
+ movq %mm0,-56(%ebp)
+ psrlq $4,%mm5
+ movq %mm3,72(%ebp)
+ shll $4,%edx
+ movb %dl,11(%esp)
+ movl 72(%ebx),%edx
+ movq 72(%ebx),%mm0
+ psllq $60,%mm7
+ movq 64(%ebx),%mm3
+ por %mm7,%mm2
+ movq %mm1,-40(%edi)
+ psrlq $4,%mm1
+ movq %mm4,88(%edi)
+ movq %mm4,%mm6
+ movq %mm2,-48(%ebp)
+ psrlq $4,%mm4
+ movq %mm5,80(%ebp)
+ shll $4,%edx
+ movb %dl,12(%esp)
+ movl 88(%ebx),%edx
+ movq 88(%ebx),%mm2
+ psllq $60,%mm6
+ movq 80(%ebx),%mm5
+ por %mm6,%mm1
+ movq %mm0,-32(%edi)
+ psrlq $4,%mm0
+ movq %mm3,96(%edi)
+ movq %mm3,%mm7
+ movq %mm1,-40(%ebp)
+ psrlq $4,%mm3
+ movq %mm4,88(%ebp)
+ shll $4,%edx
+ movb %dl,13(%esp)
+ movl 104(%ebx),%edx
+ movq 104(%ebx),%mm1
+ psllq $60,%mm7
+ movq 96(%ebx),%mm4
+ por %mm7,%mm0
+ movq %mm2,-24(%edi)
+ psrlq $4,%mm2
+ movq %mm5,104(%edi)
+ movq %mm5,%mm6
+ movq %mm0,-32(%ebp)
+ psrlq $4,%mm5
+ movq %mm3,96(%ebp)
+ shll $4,%edx
+ movb %dl,14(%esp)
+ movl 120(%ebx),%edx
+ movq 120(%ebx),%mm0
+ psllq $60,%mm6
+ movq 112(%ebx),%mm3
+ por %mm6,%mm2
+ movq %mm1,-16(%edi)
+ psrlq $4,%mm1
+ movq %mm4,112(%edi)
+ movq %mm4,%mm7
+ movq %mm2,-24(%ebp)
+ psrlq $4,%mm4
+ movq %mm5,104(%ebp)
+ shll $4,%edx
+ movb %dl,15(%esp)
+ psllq $60,%mm7
+ por %mm7,%mm1
+ movq %mm0,-8(%edi)
+ psrlq $4,%mm0
+ movq %mm3,120(%edi)
+ movq %mm3,%mm6
+ movq %mm1,-16(%ebp)
+ psrlq $4,%mm3
+ movq %mm4,112(%ebp)
+ psllq $60,%mm6
+ por %mm6,%mm0
+ movq %mm0,-8(%ebp)
+ movq %mm3,120(%ebp)
+ movq (%eax),%mm6
+ movl 8(%eax),%ebx
+ movl 12(%eax),%edx
.align 16
-.L007mmx_outer_loop:
- xorl 12(%edi),%ebx
- xorl 4(%edi),%edx
- xorl 8(%edi),%ecx
- xorl (%edi),%ebp
- movl %edi,48(%esp)
- movl %ebx,12(%esp)
- movl %edx,4(%esp)
- movl %ecx,8(%esp)
- movl %ebp,(%esp)
- movl %esp,%edi
- shrl $24,%ebx
- call _mmx_gmult_4bit_inner
- movl 48(%esp),%edi
- leal 16(%edi),%edi
- cmpl 52(%esp),%edi
- jb .L007mmx_outer_loop
- movl 40(%esp),%edi
+.L009outer:
+ xorl 12(%ecx),%edx
+ xorl 8(%ecx),%ebx
+ pxor (%ecx),%mm6
+ leal 16(%ecx),%ecx
+ movl %ebx,536(%esp)
+ movq %mm6,528(%esp)
+ movl %ecx,548(%esp)
+ xorl %eax,%eax
+ roll $8,%edx
+ movb %dl,%al
+ movl %eax,%ebp
+ andb $15,%al
+ shrl $4,%ebp
+ pxor %mm0,%mm0
+ roll $8,%edx
+ pxor %mm1,%mm1
+ pxor %mm2,%mm2
+ movq 16(%esp,%eax,8),%mm7
+ movq 144(%esp,%eax,8),%mm6
+ movb %dl,%al
+ movd %mm7,%ebx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%edi
+ psrlq $8,%mm6
+ pxor 272(%esp,%ebp,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ shrl $4,%edi
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%ebp,8),%mm6
+ xorb (%esp,%ebp,1),%bl
+ movb %dl,%al
+ movd %mm7,%ecx
+ movzbl %bl,%ebx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%ebp
+ psrlq $8,%mm6
+ pxor 272(%esp,%edi,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ shrl $4,%ebp
+ pinsrw $2,(%esi,%ebx,2),%mm2
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%edi,8),%mm6
+ xorb (%esp,%edi,1),%cl
+ movb %dl,%al
+ movl 536(%esp),%edx
+ movd %mm7,%ebx
+ movzbl %cl,%ecx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%edi
+ psrlq $8,%mm6
+ pxor 272(%esp,%ebp,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm2,%mm6
+ shrl $4,%edi
+ pinsrw $2,(%esi,%ecx,2),%mm1
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%ebp,8),%mm6
+ xorb (%esp,%ebp,1),%bl
+ movb %dl,%al
+ movd %mm7,%ecx
+ movzbl %bl,%ebx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%ebp
+ psrlq $8,%mm6
+ pxor 272(%esp,%edi,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm1,%mm6
+ shrl $4,%ebp
+ pinsrw $2,(%esi,%ebx,2),%mm0
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%edi,8),%mm6
+ xorb (%esp,%edi,1),%cl
+ movb %dl,%al
+ movd %mm7,%ebx
+ movzbl %cl,%ecx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%edi
+ psrlq $8,%mm6
+ pxor 272(%esp,%ebp,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm0,%mm6
+ shrl $4,%edi
+ pinsrw $2,(%esi,%ecx,2),%mm2
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%ebp,8),%mm6
+ xorb (%esp,%ebp,1),%bl
+ movb %dl,%al
+ movd %mm7,%ecx
+ movzbl %bl,%ebx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%ebp
+ psrlq $8,%mm6
+ pxor 272(%esp,%edi,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm2,%mm6
+ shrl $4,%ebp
+ pinsrw $2,(%esi,%ebx,2),%mm1
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%edi,8),%mm6
+ xorb (%esp,%edi,1),%cl
+ movb %dl,%al
+ movl 532(%esp),%edx
+ movd %mm7,%ebx
+ movzbl %cl,%ecx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%edi
+ psrlq $8,%mm6
+ pxor 272(%esp,%ebp,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm1,%mm6
+ shrl $4,%edi
+ pinsrw $2,(%esi,%ecx,2),%mm0
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%ebp,8),%mm6
+ xorb (%esp,%ebp,1),%bl
+ movb %dl,%al
+ movd %mm7,%ecx
+ movzbl %bl,%ebx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%ebp
+ psrlq $8,%mm6
+ pxor 272(%esp,%edi,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm0,%mm6
+ shrl $4,%ebp
+ pinsrw $2,(%esi,%ebx,2),%mm2
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%edi,8),%mm6
+ xorb (%esp,%edi,1),%cl
+ movb %dl,%al
+ movd %mm7,%ebx
+ movzbl %cl,%ecx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%edi
+ psrlq $8,%mm6
+ pxor 272(%esp,%ebp,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm2,%mm6
+ shrl $4,%edi
+ pinsrw $2,(%esi,%ecx,2),%mm1
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%ebp,8),%mm6
+ xorb (%esp,%ebp,1),%bl
+ movb %dl,%al
+ movd %mm7,%ecx
+ movzbl %bl,%ebx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%ebp
+ psrlq $8,%mm6
+ pxor 272(%esp,%edi,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm1,%mm6
+ shrl $4,%ebp
+ pinsrw $2,(%esi,%ebx,2),%mm0
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%edi,8),%mm6
+ xorb (%esp,%edi,1),%cl
+ movb %dl,%al
+ movl 528(%esp),%edx
+ movd %mm7,%ebx
+ movzbl %cl,%ecx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%edi
+ psrlq $8,%mm6
+ pxor 272(%esp,%ebp,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm0,%mm6
+ shrl $4,%edi
+ pinsrw $2,(%esi,%ecx,2),%mm2
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%ebp,8),%mm6
+ xorb (%esp,%ebp,1),%bl
+ movb %dl,%al
+ movd %mm7,%ecx
+ movzbl %bl,%ebx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%ebp
+ psrlq $8,%mm6
+ pxor 272(%esp,%edi,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm2,%mm6
+ shrl $4,%ebp
+ pinsrw $2,(%esi,%ebx,2),%mm1
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%edi,8),%mm6
+ xorb (%esp,%edi,1),%cl
+ movb %dl,%al
+ movd %mm7,%ebx
+ movzbl %cl,%ecx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%edi
+ psrlq $8,%mm6
+ pxor 272(%esp,%ebp,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm1,%mm6
+ shrl $4,%edi
+ pinsrw $2,(%esi,%ecx,2),%mm0
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%ebp,8),%mm6
+ xorb (%esp,%ebp,1),%bl
+ movb %dl,%al
+ movd %mm7,%ecx
+ movzbl %bl,%ebx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%ebp
+ psrlq $8,%mm6
+ pxor 272(%esp,%edi,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm0,%mm6
+ shrl $4,%ebp
+ pinsrw $2,(%esi,%ebx,2),%mm2
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%edi,8),%mm6
+ xorb (%esp,%edi,1),%cl
+ movb %dl,%al
+ movl 524(%esp),%edx
+ movd %mm7,%ebx
+ movzbl %cl,%ecx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%edi
+ psrlq $8,%mm6
+ pxor 272(%esp,%ebp,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm2,%mm6
+ shrl $4,%edi
+ pinsrw $2,(%esi,%ecx,2),%mm1
+ pxor 16(%esp,%eax,8),%mm7
+ pxor 144(%esp,%eax,8),%mm6
+ xorb (%esp,%ebp,1),%bl
+ pxor %mm3,%mm7
+ pxor 400(%esp,%ebp,8),%mm6
+ movzbl %bl,%ebx
+ pxor %mm2,%mm2
+ psllq $4,%mm1
+ movd %mm7,%ecx
+ psrlq $4,%mm7
+ movq %mm6,%mm3
+ psrlq $4,%mm6
+ shll $4,%ecx
+ pxor 16(%esp,%edi,8),%mm7
+ psllq $60,%mm3
+ movzbl %cl,%ecx
+ pxor %mm3,%mm7
+ pxor 144(%esp,%edi,8),%mm6
+ pinsrw $2,(%esi,%ebx,2),%mm0
+ pxor %mm1,%mm6
+ movd %mm7,%edx
+ pinsrw $3,(%esi,%ecx,2),%mm2
+ psllq $12,%mm0
+ pxor %mm0,%mm6
+ psrlq $32,%mm7
+ pxor %mm2,%mm6
+ movl 548(%esp),%ecx
+ movd %mm7,%ebx
+ movq %mm6,%mm3
+ psllw $8,%mm6
+ psrlw $8,%mm3
+ por %mm3,%mm6
+ bswap %edx
+ pshufw $27,%mm6,%mm6
+ bswap %ebx
+ cmpl 552(%esp),%ecx
+ jne .L009outer
+ movl 544(%esp),%eax
+ movl %edx,12(%eax)
+ movl %ebx,8(%eax)
+ movq %mm6,(%eax)
+ movl 556(%esp),%esp
emms
- movl %ebx,12(%edi)
- movl %edx,4(%edi)
- movl %ecx,8(%edi)
- movl %ebp,(%edi)
- addl $20,%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
+.globl gcm_init_clmul
+.type gcm_init_clmul,@function
+.align 16
+gcm_init_clmul:
+.L_gcm_init_clmul_begin:
+ movl 4(%esp),%edx
+ movl 8(%esp),%eax
+ call .L010pic
+.L010pic:
+ popl %ecx
+ leal .Lbswap-.L010pic(%ecx),%ecx
+ movdqu (%eax),%xmm2
+ pshufd $78,%xmm2,%xmm2
+ pshufd $255,%xmm2,%xmm4
+ movdqa %xmm2,%xmm3
+ psllq $1,%xmm2
+ pxor %xmm5,%xmm5
+ psrlq $63,%xmm3
+ pcmpgtd %xmm4,%xmm5
+ pslldq $8,%xmm3
+ por %xmm3,%xmm2
+ pand 16(%ecx),%xmm5
+ pxor %xmm5,%xmm2
+ movdqa %xmm2,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ xorps %xmm0,%xmm3
+ xorps %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ movdqu %xmm2,(%edx)
+ movdqu %xmm0,16(%edx)
+ ret
+.size gcm_init_clmul,.-.L_gcm_init_clmul_begin
+.globl gcm_gmult_clmul
+.type gcm_gmult_clmul,@function
+.align 16
+gcm_gmult_clmul:
+.L_gcm_gmult_clmul_begin:
+ movl 4(%esp),%eax
+ movl 8(%esp),%edx
+ call .L011pic
+.L011pic:
+ popl %ecx
+ leal .Lbswap-.L011pic(%ecx),%ecx
+ movdqu (%eax),%xmm0
+ movdqa (%ecx),%xmm5
+ movups (%edx),%xmm2
+.byte 102,15,56,0,197
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ xorps %xmm0,%xmm3
+ xorps %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%eax)
+ ret
+.size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
+.globl gcm_ghash_clmul
+.type gcm_ghash_clmul,@function
+.align 16
+gcm_ghash_clmul:
+.L_gcm_ghash_clmul_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%eax
+ movl 24(%esp),%edx
+ movl 28(%esp),%esi
+ movl 32(%esp),%ebx
+ call .L012pic
+.L012pic:
+ popl %ecx
+ leal .Lbswap-.L012pic(%ecx),%ecx
+ movdqu (%eax),%xmm0
+ movdqa (%ecx),%xmm5
+ movdqu (%edx),%xmm2
+.byte 102,15,56,0,197
+ subl $16,%ebx
+ jz .L013odd_tail
+ movdqu (%esi),%xmm3
+ movdqu 16(%esi),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+ pxor %xmm3,%xmm0
+ movdqa %xmm6,%xmm7
+ pshufd $78,%xmm6,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm6,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,242,0
+.byte 102,15,58,68,250,17
+.byte 102,15,58,68,220,0
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm7
+ pxor %xmm4,%xmm6
+ movups 16(%edx),%xmm2
+ leal 32(%esi),%esi
+ subl $32,%ebx
+ jbe .L014even_tail
+.L015mod_loop:
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ xorps %xmm0,%xmm3
+ xorps %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqu (%esi),%xmm3
+ movups (%edx),%xmm2
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+ movdqu 16(%esi),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,%xmm7
+ pxor %xmm3,%xmm1
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+.byte 102,15,58,68,242,0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pshufd $78,%xmm5,%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm5,%xmm3
+ pshufd $78,%xmm2,%xmm5
+ pxor %xmm2,%xmm5
+.byte 102,15,58,68,250,17
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.byte 102,15,58,68,221,0
+ movups 16(%edx),%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm3
+ movdqa %xmm3,%xmm5
+ psrldq $8,%xmm3
+ pslldq $8,%xmm5
+ pxor %xmm3,%xmm7
+ pxor %xmm5,%xmm6
+ movdqa (%ecx),%xmm5
+ leal 32(%esi),%esi
+ subl $32,%ebx
+ ja .L015mod_loop
+.L014even_tail:
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ xorps %xmm0,%xmm3
+ xorps %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ testl %ebx,%ebx
+ jnz .L016done
+ movups (%edx),%xmm2
+.L013odd_tail:
+ movdqu (%esi),%xmm3
+.byte 102,15,56,0,221
+ pxor %xmm3,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ xorps %xmm0,%xmm3
+ xorps %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.L016done:
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%eax)
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
+.align 64
+.Lbswap:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
.align 64
.Lrem_4bit:
-.long 0,0,0,29491200,0,58982400,0,38141952
-.long 0,117964800,0,113901568,0,76283904,0,88997888
-.long 0,235929600,0,265420800,0,227803136,0,206962688
-.long 0,152567808,0,148504576,0,177995776,0,190709760
+.long 0,0,0,471859200,0,943718400,0,610271232
+.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
+.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
+.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
.align 64
-.L008rem_8bit:
+.Lrem_8bit:
.value 0,450,900,582,1800,1738,1164,1358
.value 3600,4050,3476,3158,2328,2266,2716,2910
.value 7200,7650,8100,7782,6952,6890,6316,6510
diff --git a/crypto/modes/asm/ghashv8-armx-64.S b/crypto/modes/asm/ghashv8-armx-64.S
new file mode 100644
index 0000000..b77b6c4
--- /dev/null
+++ b/crypto/modes/asm/ghashv8-armx-64.S
@@ -0,0 +1,115 @@
+#include "arm_arch.h"
+
+.text
+.arch armv8-a+crypto
+.global gcm_init_v8
+.type gcm_init_v8,%function
+.align 4
+gcm_init_v8:
+ ld1 {v17.2d},[x1] //load H
+ movi v16.16b,#0xe1
+ ext v3.16b,v17.16b,v17.16b,#8
+ shl v16.2d,v16.2d,#57
+ ushr v18.2d,v16.2d,#63
+ ext v16.16b,v18.16b,v16.16b,#8 //t0=0xc2....01
+ dup v17.4s,v17.s[1]
+ ushr v19.2d,v3.2d,#63
+ sshr v17.4s,v17.4s,#31 //broadcast carry bit
+ and v19.16b,v19.16b,v16.16b
+ shl v3.2d,v3.2d,#1
+ ext v19.16b,v19.16b,v19.16b,#8
+ and v16.16b,v16.16b,v17.16b
+ orr v3.16b,v3.16b,v19.16b //H<<<=1
+ eor v3.16b,v3.16b,v16.16b //twisted H
+ st1 {v3.2d},[x0]
+
+ ret
+.size gcm_init_v8,.-gcm_init_v8
+
+.global gcm_gmult_v8
+.type gcm_gmult_v8,%function
+.align 4
+gcm_gmult_v8:
+ ld1 {v17.2d},[x0] //load Xi
+ movi v19.16b,#0xe1
+ ld1 {v20.2d},[x1] //load twisted H
+ shl v19.2d,v19.2d,#57
+#ifndef __ARMEB__
+ rev64 v17.16b,v17.16b
+#endif
+ ext v21.16b,v20.16b,v20.16b,#8
+ mov x3,#0
+ ext v3.16b,v17.16b,v17.16b,#8
+ mov x12,#0
+ eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing
+ mov x2,x0
+ b .Lgmult_v8
+.size gcm_gmult_v8,.-gcm_gmult_v8
+
+.global gcm_ghash_v8
+.type gcm_ghash_v8,%function
+.align 4
+gcm_ghash_v8:
+ ld1 {v0.2d},[x0] //load [rotated] Xi
+ subs x3,x3,#16
+ movi v19.16b,#0xe1
+ mov x12,#16
+ ld1 {v20.2d},[x1] //load twisted H
+ csel x12,xzr,x12,eq
+ ext v0.16b,v0.16b,v0.16b,#8
+ shl v19.2d,v19.2d,#57
+ ld1 {v17.2d},[x2],x12 //load [rotated] inp
+ ext v21.16b,v20.16b,v20.16b,#8
+#ifndef __ARMEB__
+ rev64 v0.16b,v0.16b
+ rev64 v17.16b,v17.16b
+#endif
+ eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing
+ ext v3.16b,v17.16b,v17.16b,#8
+ b .Loop_v8
+
+.align 4
+.Loop_v8:
+ ext v18.16b,v0.16b,v0.16b,#8
+ eor v3.16b,v3.16b,v0.16b //inp^=Xi
+ eor v17.16b,v17.16b,v18.16b //v17.16b is rotated inp^Xi
+
+.Lgmult_v8:
+ pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
+ eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
+ pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
+ subs x3,x3,#16
+ pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
+ csel x12,xzr,x12,eq
+
+ ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
+ eor v18.16b,v0.16b,v2.16b
+ eor v1.16b,v1.16b,v17.16b
+ ld1 {v17.2d},[x2],x12 //load [rotated] inp
+ eor v1.16b,v1.16b,v18.16b
+ pmull v18.1q,v0.1d,v19.1d //1st phase
+
+ ins v2.d[0],v1.d[1]
+ ins v1.d[1],v0.d[0]
+#ifndef __ARMEB__
+ rev64 v17.16b,v17.16b
+#endif
+ eor v0.16b,v1.16b,v18.16b
+ ext v3.16b,v17.16b,v17.16b,#8
+
+ ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
+ pmull v0.1q,v0.1d,v19.1d
+ eor v18.16b,v18.16b,v2.16b
+ eor v0.16b,v0.16b,v18.16b
+ b.hs .Loop_v8
+
+#ifndef __ARMEB__
+ rev64 v0.16b,v0.16b
+#endif
+ ext v0.16b,v0.16b,v0.16b,#8
+ st1 {v0.2d},[x0] //write out Xi
+
+ ret
+.size gcm_ghash_v8,.-gcm_ghash_v8
+.asciz "GHASH for ARMv8, CRYPTOGAMS by <[email protected]>"
+.align 2
diff --git a/crypto/modes/asm/ghashv8-armx.S b/crypto/modes/asm/ghashv8-armx.S
new file mode 100644
index 0000000..f388c54
--- /dev/null
+++ b/crypto/modes/asm/ghashv8-armx.S
@@ -0,0 +1,116 @@
+#include "arm_arch.h"
+
+.text
+.fpu neon
+.code 32
+.global gcm_init_v8
+.type gcm_init_v8,%function
+.align 4
+gcm_init_v8:
+ vld1.64 {q9},[r1] @ load H
+ vmov.i8 q8,#0xe1
+ vext.8 q3,q9,q9,#8
+ vshl.i64 q8,q8,#57
+ vshr.u64 q10,q8,#63
+ vext.8 q8,q10,q8,#8 @ t0=0xc2....01
+ vdup.32 q9,d18[1]
+ vshr.u64 q11,q3,#63
+ vshr.s32 q9,q9,#31 @ broadcast carry bit
+ vand q11,q11,q8
+ vshl.i64 q3,q3,#1
+ vext.8 q11,q11,q11,#8
+ vand q8,q8,q9
+ vorr q3,q3,q11 @ H<<<=1
+ veor q3,q3,q8 @ twisted H
+ vst1.64 {q3},[r0]
+
+ bx lr
+.size gcm_init_v8,.-gcm_init_v8
+
+.global gcm_gmult_v8
+.type gcm_gmult_v8,%function
+.align 4
+gcm_gmult_v8:
+ vld1.64 {q9},[r0] @ load Xi
+ vmov.i8 q11,#0xe1
+ vld1.64 {q12},[r1] @ load twisted H
+ vshl.u64 q11,q11,#57
+#ifndef __ARMEB__
+ vrev64.8 q9,q9
+#endif
+ vext.8 q13,q12,q12,#8
+ mov r3,#0
+ vext.8 q3,q9,q9,#8
+ mov r12,#0
+ veor q13,q13,q12 @ Karatsuba pre-processing
+ mov r2,r0
+ b .Lgmult_v8
+.size gcm_gmult_v8,.-gcm_gmult_v8
+
+.global gcm_ghash_v8
+.type gcm_ghash_v8,%function
+.align 4
+gcm_ghash_v8:
+ vld1.64 {q0},[r0] @ load [rotated] Xi
+ subs r3,r3,#16
+ vmov.i8 q11,#0xe1
+ mov r12,#16
+ vld1.64 {q12},[r1] @ load twisted H
+ moveq r12,#0
+ vext.8 q0,q0,q0,#8
+ vshl.u64 q11,q11,#57
+ vld1.64 {q9},[r2],r12 @ load [rotated] inp
+ vext.8 q13,q12,q12,#8
+#ifndef __ARMEB__
+ vrev64.8 q0,q0
+ vrev64.8 q9,q9
+#endif
+ veor q13,q13,q12 @ Karatsuba pre-processing
+ vext.8 q3,q9,q9,#8
+ b .Loop_v8
+
+.align 4
+.Loop_v8:
+ vext.8 q10,q0,q0,#8
+ veor q3,q3,q0 @ inp^=Xi
+ veor q9,q9,q10 @ q9 is rotated inp^Xi
+
+.Lgmult_v8:
+ .byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
+ veor q9,q9,q3 @ Karatsuba pre-processing
+ .byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
+ subs r3,r3,#16
+ .byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+ moveq r12,#0
+
+ vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
+ veor q10,q0,q2
+ veor q1,q1,q9
+ vld1.64 {q9},[r2],r12 @ load [rotated] inp
+ veor q1,q1,q10
+ .byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase
+
+ vmov d4,d3 @ Xh|Xm - 256-bit result
+ vmov d3,d0 @ Xm is rotated Xl
+#ifndef __ARMEB__
+ vrev64.8 q9,q9
+#endif
+ veor q0,q1,q10
+ vext.8 q3,q9,q9,#8
+
+ vext.8 q10,q0,q0,#8 @ 2nd phase
+ .byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
+ veor q10,q10,q2
+ veor q0,q0,q10
+ bhs .Loop_v8
+
+#ifndef __ARMEB__
+ vrev64.8 q0,q0
+#endif
+ vext.8 q0,q0,q0,#8
+ vst1.64 {q0},[r0] @ write out Xi
+
+ bx lr
+.size gcm_ghash_v8,.-gcm_ghash_v8
+.asciz "GHASH for ARMv8, CRYPTOGAMS by <[email protected]>"
+.align 2
diff --git a/crypto/modes/asm/ghashv8-armx.pl b/crypto/modes/asm/ghashv8-armx.pl
new file mode 100644
index 0000000..69e863e
--- /dev/null
+++ b/crypto/modes/asm/ghashv8-armx.pl
@@ -0,0 +1,240 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <[email protected]> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# GHASH for ARMv8 Crypto Extension, 64-bit polynomial multiplication.
+#
+# June 2014
+#
+# Initial version was developed in tight cooperation with Ard
+# Biesheuvel <[email protected]> from bits-n-pieces from
+# other assembly modules. Just like aesv8-armx.pl this module
+# supports both AArch32 and AArch64 execution modes.
+#
+# Current performance in cycles per processed byte:
+#
+# PMULL[2] 32-bit NEON(*)
+# Apple A7 1.76 5.62
+# Cortex-A5x n/a n/a
+#
+# (*) presented for reference/comparison purposes;
+
+$flavour = shift;
+open STDOUT,">".shift;
+
+$Xi="x0"; # argument block
+$Htbl="x1";
+$inp="x2";
+$len="x3";
+
+$inc="x12";
+
+{
+my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3));
+my ($t0,$t1,$t2,$t3,$H,$Hhl)=map("q$_",(8..14));
+
+$code=<<___;
+#include "arm_arch.h"
+
+.text
+___
+$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/);
+$code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/);
+
+$code.=<<___;
+.global gcm_init_v8
+.type gcm_init_v8,%function
+.align 4
+gcm_init_v8:
+ vld1.64 {$t1},[x1] @ load H
+ vmov.i8 $t0,#0xe1
+ vext.8 $IN,$t1,$t1,#8
+ vshl.i64 $t0,$t0,#57
+ vshr.u64 $t2,$t0,#63
+ vext.8 $t0,$t2,$t0,#8 @ t0=0xc2....01
+ vdup.32 $t1,${t1}[1]
+ vshr.u64 $t3,$IN,#63
+ vshr.s32 $t1,$t1,#31 @ broadcast carry bit
+ vand $t3,$t3,$t0
+ vshl.i64 $IN,$IN,#1
+ vext.8 $t3,$t3,$t3,#8
+ vand $t0,$t0,$t1
+ vorr $IN,$IN,$t3 @ H<<<=1
+ veor $IN,$IN,$t0 @ twisted H
+ vst1.64 {$IN},[x0]
+
+ ret
+.size gcm_init_v8,.-gcm_init_v8
+
+.global gcm_gmult_v8
+.type gcm_gmult_v8,%function
+.align 4
+gcm_gmult_v8:
+ vld1.64 {$t1},[$Xi] @ load Xi
+ vmov.i8 $t3,#0xe1
+ vld1.64 {$H},[$Htbl] @ load twisted H
+ vshl.u64 $t3,$t3,#57
+#ifndef __ARMEB__
+ vrev64.8 $t1,$t1
+#endif
+ vext.8 $Hhl,$H,$H,#8
+ mov $len,#0
+ vext.8 $IN,$t1,$t1,#8
+ mov $inc,#0
+ veor $Hhl,$Hhl,$H @ Karatsuba pre-processing
+ mov $inp,$Xi
+ b .Lgmult_v8
+.size gcm_gmult_v8,.-gcm_gmult_v8
+
+.global gcm_ghash_v8
+.type gcm_ghash_v8,%function
+.align 4
+gcm_ghash_v8:
+ vld1.64 {$Xl},[$Xi] @ load [rotated] Xi
+ subs $len,$len,#16
+ vmov.i8 $t3,#0xe1
+ mov $inc,#16
+ vld1.64 {$H},[$Htbl] @ load twisted H
+ cclr $inc,eq
+ vext.8 $Xl,$Xl,$Xl,#8
+ vshl.u64 $t3,$t3,#57
+ vld1.64 {$t1},[$inp],$inc @ load [rotated] inp
+ vext.8 $Hhl,$H,$H,#8
+#ifndef __ARMEB__
+ vrev64.8 $Xl,$Xl
+ vrev64.8 $t1,$t1
+#endif
+ veor $Hhl,$Hhl,$H @ Karatsuba pre-processing
+ vext.8 $IN,$t1,$t1,#8
+ b .Loop_v8
+
+.align 4
+.Loop_v8:
+ vext.8 $t2,$Xl,$Xl,#8
+ veor $IN,$IN,$Xl @ inp^=Xi
+ veor $t1,$t1,$t2 @ $t1 is rotated inp^Xi
+
+.Lgmult_v8:
+ vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
+ veor $t1,$t1,$IN @ Karatsuba pre-processing
+ vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
+ subs $len,$len,#16
+ vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+ cclr $inc,eq
+
+ vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
+ veor $t2,$Xl,$Xh
+ veor $Xm,$Xm,$t1
+ vld1.64 {$t1},[$inp],$inc @ load [rotated] inp
+ veor $Xm,$Xm,$t2
+ vpmull.p64 $t2,$Xl,$t3 @ 1st phase
+
+ vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result
+ vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl
+#ifndef __ARMEB__
+ vrev64.8 $t1,$t1
+#endif
+ veor $Xl,$Xm,$t2
+ vext.8 $IN,$t1,$t1,#8
+
+ vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase
+ vpmull.p64 $Xl,$Xl,$t3
+ veor $t2,$t2,$Xh
+ veor $Xl,$Xl,$t2
+ b.hs .Loop_v8
+
+#ifndef __ARMEB__
+ vrev64.8 $Xl,$Xl
+#endif
+ vext.8 $Xl,$Xl,$Xl,#8
+ vst1.64 {$Xl},[$Xi] @ write out Xi
+
+ ret
+.size gcm_ghash_v8,.-gcm_ghash_v8
+___
+}
+$code.=<<___;
+.asciz "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
+___
+
+if ($flavour =~ /64/) { ######## 64-bit code
+ sub unvmov {
+ my $arg=shift;
+
+ $arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o &&
+ sprintf "ins v%d.d[%d],v%d.d[%d]",$1,($2 eq "lo")?0:1,$3,($4 eq "lo")?0:1;
+ }
+ foreach(split("\n",$code)) {
+ s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or
+ s/vmov\.i8/movi/o or # fix up legacy mnemonics
+ s/vmov\s+(.*)/unvmov($1)/geo or
+ s/vext\.8/ext/o or
+ s/vshr\.s/sshr\.s/o or
+ s/vshr/ushr/o or
+ s/^(\s+)v/$1/o or # strip off v prefix
+ s/\bbx\s+lr\b/ret/o;
+
+ s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers
+ s/@\s/\/\//o; # old->new style commentary
+
+ # fix up remainig legacy suffixes
+ s/\.[ui]?8(\s)/$1/o;
+ s/\.[uis]?32//o and s/\.16b/\.4s/go;
+ m/\.p64/o and s/\.16b/\.1q/o; # 1st pmull argument
+ m/l\.p64/o and s/\.16b/\.1d/go; # 2nd and 3rd pmull arguments
+ s/\.[uisp]?64//o and s/\.16b/\.2d/go;
+ s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o;
+
+ print $_,"\n";
+ }
+} else { ######## 32-bit code
+ sub unvdup32 {
+ my $arg=shift;
+
+ $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o &&
+ sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1;
+ }
+ sub unvpmullp64 {
+ my ($mnemonic,$arg)=@_;
+
+ if ($arg =~ m/q([0-9]+),\s*q([0-9]+),\s*q([0-9]+)/o) {
+ my $word = 0xf2a00e00|(($1&7)<<13)|(($1&8)<<19)
+ |(($2&7)<<17)|(($2&8)<<4)
+ |(($3&7)<<1) |(($3&8)<<2);
+ $word |= 0x00010001 if ($mnemonic =~ "2");
+ # since ARMv7 instructions are always encoded little-endian.
+ # correct solution is to use .inst directive, but older
+ # assemblers don't implement it:-(
+ sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+ $word&0xff,($word>>8)&0xff,
+ ($word>>16)&0xff,($word>>24)&0xff,
+ $mnemonic,$arg;
+ }
+ }
+
+ foreach(split("\n",$code)) {
+ s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers
+ s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers
+ s/\/\/\s?/@ /o; # new->old style commentary
+
+ # fix up remainig new-style suffixes
+ s/\],#[0-9]+/]!/o;
+
+ s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or
+ s/vdup\.32\s+(.*)/unvdup32($1)/geo or
+ s/v?(pmull2?)\.p64\s+(.*)/unvpmullp64($1,$2)/geo or
+ s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or
+ s/^(\s+)b\./$1b/o or
+ s/^(\s+)ret/$1bx\tlr/o;
+
+ print $_,"\n";
+ }
+}
+
+close STDOUT; # enforce flush
diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c
index 250063d..79ebb66 100644
--- a/crypto/modes/gcm128.c
+++ b/crypto/modes/gcm128.c
@@ -642,7 +642,7 @@
#endif
-#if TABLE_BITS==4 && defined(GHASH_ASM)
+#if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
# if !defined(I386_ONLY) && \
(defined(__i386) || defined(__i386__) || \
defined(__x86_64) || defined(__x86_64__) || \
@@ -663,13 +663,21 @@
void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
# endif
-# elif defined(__arm__) || defined(__arm)
+# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
# include "arm_arch.h"
# if __ARM_ARCH__>=7
# define GHASH_ASM_ARM
# define GCM_FUNCREF_4BIT
+# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
+# if defined(__arm__) || defined(__arm)
+# define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
+# endif
+void gcm_init_neon(u128 Htable[16],const u64 Xi[2]);
void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+void gcm_init_v8(u128 Htable[16],const u64 Xi[2]);
+void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
+void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
# endif
# endif
#endif
@@ -739,10 +747,21 @@
ctx->ghash = gcm_ghash_4bit;
# endif
# elif defined(GHASH_ASM_ARM)
- if (OPENSSL_armcap_P & ARMV7_NEON) {
+# ifdef PMULL_CAPABLE
+ if (PMULL_CAPABLE) {
+ gcm_init_v8(ctx->Htable,ctx->H.u);
+ ctx->gmult = gcm_gmult_v8;
+ ctx->ghash = gcm_ghash_v8;
+ } else
+# endif
+# ifdef NEON_CAPABLE
+ if (NEON_CAPABLE) {
+ gcm_init_neon(ctx->Htable,ctx->H.u);
ctx->gmult = gcm_gmult_neon;
ctx->ghash = gcm_ghash_neon;
- } else {
+ } else
+# endif
+ {
gcm_init_4bit(ctx->Htable,ctx->H.u);
ctx->gmult = gcm_gmult_4bit;
ctx->ghash = gcm_ghash_4bit;
@@ -810,7 +829,11 @@
GCM_MUL(ctx,Yi);
if (is_endian.little)
+#ifdef BSWAP4
+ ctr = BSWAP4(ctx->Yi.d[3]);
+#else
ctr = GETU32(ctx->Yi.c+12);
+#endif
else
ctr = ctx->Yi.d[3];
}
@@ -818,7 +841,11 @@
(*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
++ctr;
if (is_endian.little)
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
PUTU32(ctx->Yi.c+12,ctr);
+#endif
else
ctx->Yi.d[3] = ctr;
}
@@ -913,7 +940,11 @@
}
if (is_endian.little)
+#ifdef BSWAP4
+ ctr = BSWAP4(ctx->Yi.d[3]);
+#else
ctr = GETU32(ctx->Yi.c+12);
+#endif
else
ctr = ctx->Yi.d[3];
@@ -947,7 +978,11 @@
(*block)(ctx->Yi.c,ctx->EKi.c,key);
++ctr;
if (is_endian.little)
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
PUTU32(ctx->Yi.c+12,ctr);
+#endif
else
ctx->Yi.d[3] = ctr;
for (i=0; i<16/sizeof(size_t); ++i)
@@ -969,7 +1004,11 @@
(*block)(ctx->Yi.c,ctx->EKi.c,key);
++ctr;
if (is_endian.little)
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
PUTU32(ctx->Yi.c+12,ctr);
+#endif
else
ctx->Yi.d[3] = ctr;
for (i=0; i<16/sizeof(size_t); ++i)
@@ -988,7 +1027,11 @@
(*block)(ctx->Yi.c,ctx->EKi.c,key);
++ctr;
if (is_endian.little)
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
PUTU32(ctx->Yi.c+12,ctr);
+#endif
else
ctx->Yi.d[3] = ctr;
for (i=0; i<16/sizeof(size_t); ++i)
@@ -1004,7 +1047,11 @@
(*block)(ctx->Yi.c,ctx->EKi.c,key);
++ctr;
if (is_endian.little)
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
PUTU32(ctx->Yi.c+12,ctr);
+#endif
else
ctx->Yi.d[3] = ctr;
while (len--) {
@@ -1022,7 +1069,11 @@
(*block)(ctx->Yi.c,ctx->EKi.c,key);
++ctr;
if (is_endian.little)
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
PUTU32(ctx->Yi.c+12,ctr);
+#endif
else
ctx->Yi.d[3] = ctr;
}
@@ -1066,7 +1117,11 @@
}
if (is_endian.little)
+#ifdef BSWAP4
+ ctr = BSWAP4(ctx->Yi.d[3]);
+#else
ctr = GETU32(ctx->Yi.c+12);
+#endif
else
ctr = ctx->Yi.d[3];
@@ -1103,7 +1158,11 @@
(*block)(ctx->Yi.c,ctx->EKi.c,key);
++ctr;
if (is_endian.little)
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
PUTU32(ctx->Yi.c+12,ctr);
+#endif
else
ctx->Yi.d[3] = ctr;
for (i=0; i<16/sizeof(size_t); ++i)
@@ -1123,7 +1182,11 @@
(*block)(ctx->Yi.c,ctx->EKi.c,key);
++ctr;
if (is_endian.little)
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
PUTU32(ctx->Yi.c+12,ctr);
+#endif
else
ctx->Yi.d[3] = ctr;
for (i=0; i<16/sizeof(size_t); ++i)
@@ -1141,7 +1204,11 @@
(*block)(ctx->Yi.c,ctx->EKi.c,key);
++ctr;
if (is_endian.little)
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
PUTU32(ctx->Yi.c+12,ctr);
+#endif
else
ctx->Yi.d[3] = ctr;
for (i=0; i<16/sizeof(size_t); ++i) {
@@ -1159,7 +1226,11 @@
(*block)(ctx->Yi.c,ctx->EKi.c,key);
++ctr;
if (is_endian.little)
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
PUTU32(ctx->Yi.c+12,ctr);
+#endif
else
ctx->Yi.d[3] = ctr;
while (len--) {
@@ -1180,7 +1251,11 @@
(*block)(ctx->Yi.c,ctx->EKi.c,key);
++ctr;
if (is_endian.little)
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
PUTU32(ctx->Yi.c+12,ctr);
+#endif
else
ctx->Yi.d[3] = ctr;
}
@@ -1225,7 +1300,11 @@
}
if (is_endian.little)
+#ifdef BSWAP4
+ ctr = BSWAP4(ctx->Yi.d[3]);
+#else
ctr = GETU32(ctx->Yi.c+12);
+#endif
else
ctr = ctx->Yi.d[3];
@@ -1247,7 +1326,11 @@
(*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
ctr += GHASH_CHUNK/16;
if (is_endian.little)
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
PUTU32(ctx->Yi.c+12,ctr);
+#endif
else
ctx->Yi.d[3] = ctr;
GHASH(ctx,out,GHASH_CHUNK);
@@ -1262,7 +1345,11 @@
(*stream)(in,out,j,key,ctx->Yi.c);
ctr += (unsigned int)j;
if (is_endian.little)
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
PUTU32(ctx->Yi.c+12,ctr);
+#endif
else
ctx->Yi.d[3] = ctr;
in += i;
@@ -1282,7 +1369,11 @@
(*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
++ctr;
if (is_endian.little)
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
PUTU32(ctx->Yi.c+12,ctr);
+#endif
else
ctx->Yi.d[3] = ctr;
while (len--) {
@@ -1324,7 +1415,11 @@
}
if (is_endian.little)
+#ifdef BSWAP4
+ ctr = BSWAP4(ctx->Yi.d[3]);
+#else
ctr = GETU32(ctx->Yi.c+12);
+#endif
else
ctr = ctx->Yi.d[3];
@@ -1349,7 +1444,11 @@
(*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
ctr += GHASH_CHUNK/16;
if (is_endian.little)
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
PUTU32(ctx->Yi.c+12,ctr);
+#endif
else
ctx->Yi.d[3] = ctr;
out += GHASH_CHUNK;
@@ -1375,7 +1474,11 @@
(*stream)(in,out,j,key,ctx->Yi.c);
ctr += (unsigned int)j;
if (is_endian.little)
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
PUTU32(ctx->Yi.c+12,ctr);
+#endif
else
ctx->Yi.d[3] = ctr;
out += i;
@@ -1386,7 +1489,11 @@
(*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
++ctr;
if (is_endian.little)
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
PUTU32(ctx->Yi.c+12,ctr);
+#endif
else
ctx->Yi.d[3] = ctr;
while (len--) {
diff --git a/crypto/ocsp/ocsp.h b/crypto/ocsp/ocsp.h
index 31e4574..f14e9f7 100644
--- a/crypto/ocsp/ocsp.h
+++ b/crypto/ocsp/ocsp.h
@@ -90,6 +90,13 @@
#define OCSP_RESPID_KEY 0x400
#define OCSP_NOTIME 0x800
+#ifdef OPENSSL_SYS_WIN32
+ /* Under Win32 these are defined in wincrypt.h */
+#undef OCSP_REQUEST
+#undef X509_NAME
+#undef OCSP_RESPONSE
+#endif
+
/* CertID ::= SEQUENCE {
* hashAlgorithm AlgorithmIdentifier,
* issuerNameHash OCTET STRING, -- Hash of Issuer's DN
diff --git a/crypto/opensslconf-32.h b/crypto/opensslconf-32.h
index d662548..caf6f1b 100644
--- a/crypto/opensslconf-32.h
+++ b/crypto/opensslconf-32.h
@@ -53,6 +53,9 @@
#ifndef OPENSSL_NO_RFC3779
# define OPENSSL_NO_RFC3779
#endif
+#ifndef OPENSSL_NO_RIPEMD
+# define OPENSSL_NO_RIPEMD
+#endif
#ifndef OPENSSL_NO_RSAX
# define OPENSSL_NO_RSAX
#endif
@@ -137,6 +140,9 @@
# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)
# define NO_RFC3779
# endif
+# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD)
+# define NO_RIPEMD
+# endif
# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)
# define NO_RSAX
# endif
diff --git a/crypto/opensslconf-64.h b/crypto/opensslconf-64.h
index 70c5a2c..88fb041 100644
--- a/crypto/opensslconf-64.h
+++ b/crypto/opensslconf-64.h
@@ -53,6 +53,9 @@
#ifndef OPENSSL_NO_RFC3779
# define OPENSSL_NO_RFC3779
#endif
+#ifndef OPENSSL_NO_RIPEMD
+# define OPENSSL_NO_RIPEMD
+#endif
#ifndef OPENSSL_NO_RSAX
# define OPENSSL_NO_RSAX
#endif
@@ -137,6 +140,9 @@
# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)
# define NO_RFC3779
# endif
+# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD)
+# define NO_RIPEMD
+# endif
# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)
# define NO_RSAX
# endif
diff --git a/crypto/opensslconf-static-32.h b/crypto/opensslconf-static-32.h
new file mode 100644
index 0000000..caf6f1b
--- /dev/null
+++ b/crypto/opensslconf-static-32.h
@@ -0,0 +1,322 @@
+/* opensslconf.h */
+/* WARNING: Generated automatically from opensslconf.h.in by Configure. */
+
+/* OpenSSL was configured with the following options: */
+#ifndef OPENSSL_DOING_MAKEDEPEND
+
+
+#ifndef OPENSSL_NO_CAMELLIA
+# define OPENSSL_NO_CAMELLIA
+#endif
+#ifndef OPENSSL_NO_CAPIENG
+# define OPENSSL_NO_CAPIENG
+#endif
+#ifndef OPENSSL_NO_CAST
+# define OPENSSL_NO_CAST
+#endif
+#ifndef OPENSSL_NO_DTLS1
+# define OPENSSL_NO_DTLS1
+#endif
+#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
+# define OPENSSL_NO_EC_NISTP_64_GCC_128
+#endif
+#ifndef OPENSSL_NO_GMP
+# define OPENSSL_NO_GMP
+#endif
+#ifndef OPENSSL_NO_GOST
+# define OPENSSL_NO_GOST
+#endif
+#ifndef OPENSSL_NO_HEARTBEATS
+# define OPENSSL_NO_HEARTBEATS
+#endif
+#ifndef OPENSSL_NO_IDEA
+# define OPENSSL_NO_IDEA
+#endif
+#ifndef OPENSSL_NO_JPAKE
+# define OPENSSL_NO_JPAKE
+#endif
+#ifndef OPENSSL_NO_KRB5
+# define OPENSSL_NO_KRB5
+#endif
+#ifndef OPENSSL_NO_MD2
+# define OPENSSL_NO_MD2
+#endif
+#ifndef OPENSSL_NO_MDC2
+# define OPENSSL_NO_MDC2
+#endif
+#ifndef OPENSSL_NO_RC5
+# define OPENSSL_NO_RC5
+#endif
+#ifndef OPENSSL_NO_RDRAND
+# define OPENSSL_NO_RDRAND
+#endif
+#ifndef OPENSSL_NO_RFC3779
+# define OPENSSL_NO_RFC3779
+#endif
+#ifndef OPENSSL_NO_RIPEMD
+# define OPENSSL_NO_RIPEMD
+#endif
+#ifndef OPENSSL_NO_RSAX
+# define OPENSSL_NO_RSAX
+#endif
+#ifndef OPENSSL_NO_SCTP
+# define OPENSSL_NO_SCTP
+#endif
+#ifndef OPENSSL_NO_SEED
+# define OPENSSL_NO_SEED
+#endif
+#ifndef OPENSSL_NO_SHA0
+# define OPENSSL_NO_SHA0
+#endif
+#ifndef OPENSSL_NO_STATIC_ENGINE
+# define OPENSSL_NO_STATIC_ENGINE
+#endif
+#ifndef OPENSSL_NO_STORE
+# define OPENSSL_NO_STORE
+#endif
+#ifndef OPENSSL_NO_WHIRLPOOL
+# define OPENSSL_NO_WHIRLPOOL
+#endif
+
+#endif /* OPENSSL_DOING_MAKEDEPEND */
+
+#ifndef OPENSSL_THREADS
+# define OPENSSL_THREADS
+#endif
+#ifndef OPENSSL_NO_DYNAMIC_ENGINE
+# define OPENSSL_NO_DYNAMIC_ENGINE
+#endif
+
+/* The OPENSSL_NO_* macros are also defined as NO_* if the application
+ asks for it. This is a transient feature that is provided for those
+ who haven't had the time to do the appropriate changes in their
+ applications. */
+#ifdef OPENSSL_ALGORITHM_DEFINES
+# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA)
+# define NO_CAMELLIA
+# endif
+# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG)
+# define NO_CAPIENG
+# endif
+# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST)
+# define NO_CAST
+# endif
+# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1)
+# define NO_DTLS1
+# endif
+# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128)
+# define NO_EC_NISTP_64_GCC_128
+# endif
+# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP)
+# define NO_GMP
+# endif
+# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST)
+# define NO_GOST
+# endif
+# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS)
+# define NO_HEARTBEATS
+# endif
+# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA)
+# define NO_IDEA
+# endif
+# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE)
+# define NO_JPAKE
+# endif
+# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5)
+# define NO_KRB5
+# endif
+# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2)
+# define NO_MD2
+# endif
+# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2)
+# define NO_MDC2
+# endif
+# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5)
+# define NO_RC5
+# endif
+# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND)
+# define NO_RDRAND
+# endif
+# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)
+# define NO_RFC3779
+# endif
+# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD)
+# define NO_RIPEMD
+# endif
+# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)
+# define NO_RSAX
+# endif
+# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP)
+# define NO_SCTP
+# endif
+# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED)
+# define NO_SEED
+# endif
+# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0)
+# define NO_SHA0
+# endif
+# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE)
+# define NO_STATIC_ENGINE
+# endif
+# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE)
+# define NO_STORE
+# endif
+# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL)
+# define NO_WHIRLPOOL
+# endif
+#endif
+
+/* crypto/opensslconf.h.in */
+
+/* Generate 80386 code? */
+#undef I386_ONLY
+
+#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */
+#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR)
+#define ENGINESDIR "/usr/local/ssl/lib/engines"
+#define OPENSSLDIR "/usr/local/ssl"
+#endif
+#endif
+
+#undef OPENSSL_UNISTD
+#define OPENSSL_UNISTD <unistd.h>
+
+#undef OPENSSL_EXPORT_VAR_AS_FUNCTION
+
+#if defined(HEADER_IDEA_H) && !defined(IDEA_INT)
+#define IDEA_INT unsigned int
+#endif
+
+#if defined(HEADER_MD2_H) && !defined(MD2_INT)
+#define MD2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC2_H) && !defined(RC2_INT)
+/* I need to put in a mod for the alpha - eay */
+#define RC2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC4_H)
+#if !defined(RC4_INT)
+/* using int types make the structure larger but make the code faster
+ * on most boxes I have tested - up to %20 faster. */
+/*
+ * I don't know what does "most" mean, but declaring "int" is a must on:
+ * - Intel P6 because partial register stalls are very expensive;
+ * - elder Alpha because it lacks byte load/store instructions;
+ */
+#define RC4_INT unsigned char
+#endif
+#if !defined(RC4_CHUNK)
+/*
+ * This enables code handling data aligned at natural CPU word
+ * boundary. See crypto/rc4/rc4_enc.c for further details.
+ */
+#define RC4_CHUNK unsigned long
+#endif
+#endif
+
+#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG)
+/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a
+ * %20 speed up (longs are 8 bytes, int's are 4). */
+#ifndef DES_LONG
+#define DES_LONG unsigned int
+#endif
+#endif
+
+#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H)
+#define CONFIG_HEADER_BN_H
+#define BN_LLONG
+
+/* Should we define BN_DIV2W here? */
+
+/* Only one for the following should be defined */
+#undef SIXTY_FOUR_BIT_LONG
+#undef SIXTY_FOUR_BIT
+#define THIRTY_TWO_BIT
+#endif
+
+#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H)
+#define CONFIG_HEADER_RC4_LOCL_H
+/* if this is defined data[i] is used instead of *data, this is a %20
+ * speedup on x86 */
+#undef RC4_INDEX
+#endif
+
+#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H)
+#define CONFIG_HEADER_BF_LOCL_H
+#define BF_PTR
+#endif /* HEADER_BF_LOCL_H */
+
+#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H)
+#define CONFIG_HEADER_DES_LOCL_H
+#ifndef DES_DEFAULT_OPTIONS
+/* the following is tweaked from a config script, that is why it is a
+ * protected undef/define */
+#ifndef DES_PTR
+#undef DES_PTR
+#endif
+
+/* This helps C compiler generate the correct code for multiple functional
+ * units. It reduces register dependancies at the expense of 2 more
+ * registers */
+#ifndef DES_RISC1
+#undef DES_RISC1
+#endif
+
+#ifndef DES_RISC2
+#undef DES_RISC2
+#endif
+
+#if defined(DES_RISC1) && defined(DES_RISC2)
+YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!!
+#endif
+
+/* Unroll the inner loop, this sometimes helps, sometimes hinders.
+ * Very mucy CPU dependant */
+#ifndef DES_UNROLL
+#define DES_UNROLL
+#endif
+
+/* These default values were supplied by
+ * Peter Gutman <[email protected]>
+ * They are only used if nothing else has been defined */
+#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL)
+/* Special defines which change the way the code is built depending on the
+ CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find
+ even newer MIPS CPU's, but at the moment one size fits all for
+ optimization options. Older Sparc's work better with only UNROLL, but
+ there's no way to tell at compile time what it is you're running on */
+
+#if defined( sun ) /* Newer Sparc's */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#elif defined( __ultrix ) /* Older MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined( __osf1__ ) /* Alpha */
+# define DES_PTR
+# define DES_RISC2
+#elif defined ( _AIX ) /* RS6000 */
+ /* Unknown */
+#elif defined( __hpux ) /* HP-PA */
+ /* Unknown */
+#elif defined( __aux ) /* 68K */
+ /* Unknown */
+#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */
+# define DES_UNROLL
+#elif defined( __sgi ) /* Newer MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#endif /* Systems-specific speed defines */
+#endif
+
+#endif /* DES_DEFAULT_OPTIONS */
+#endif /* HEADER_DES_LOCL_H */
diff --git a/crypto/opensslconf-static-64.h b/crypto/opensslconf-static-64.h
new file mode 100644
index 0000000..88fb041
--- /dev/null
+++ b/crypto/opensslconf-static-64.h
@@ -0,0 +1,322 @@
+/* opensslconf.h */
+/* WARNING: Generated automatically from opensslconf.h.in by Configure. */
+
+/* OpenSSL was configured with the following options: */
+#ifndef OPENSSL_DOING_MAKEDEPEND
+
+
+#ifndef OPENSSL_NO_CAMELLIA
+# define OPENSSL_NO_CAMELLIA
+#endif
+#ifndef OPENSSL_NO_CAPIENG
+# define OPENSSL_NO_CAPIENG
+#endif
+#ifndef OPENSSL_NO_CAST
+# define OPENSSL_NO_CAST
+#endif
+#ifndef OPENSSL_NO_DTLS1
+# define OPENSSL_NO_DTLS1
+#endif
+#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
+# define OPENSSL_NO_EC_NISTP_64_GCC_128
+#endif
+#ifndef OPENSSL_NO_GMP
+# define OPENSSL_NO_GMP
+#endif
+#ifndef OPENSSL_NO_GOST
+# define OPENSSL_NO_GOST
+#endif
+#ifndef OPENSSL_NO_HEARTBEATS
+# define OPENSSL_NO_HEARTBEATS
+#endif
+#ifndef OPENSSL_NO_IDEA
+# define OPENSSL_NO_IDEA
+#endif
+#ifndef OPENSSL_NO_JPAKE
+# define OPENSSL_NO_JPAKE
+#endif
+#ifndef OPENSSL_NO_KRB5
+# define OPENSSL_NO_KRB5
+#endif
+#ifndef OPENSSL_NO_MD2
+# define OPENSSL_NO_MD2
+#endif
+#ifndef OPENSSL_NO_MDC2
+# define OPENSSL_NO_MDC2
+#endif
+#ifndef OPENSSL_NO_RC5
+# define OPENSSL_NO_RC5
+#endif
+#ifndef OPENSSL_NO_RDRAND
+# define OPENSSL_NO_RDRAND
+#endif
+#ifndef OPENSSL_NO_RFC3779
+# define OPENSSL_NO_RFC3779
+#endif
+#ifndef OPENSSL_NO_RIPEMD
+# define OPENSSL_NO_RIPEMD
+#endif
+#ifndef OPENSSL_NO_RSAX
+# define OPENSSL_NO_RSAX
+#endif
+#ifndef OPENSSL_NO_SCTP
+# define OPENSSL_NO_SCTP
+#endif
+#ifndef OPENSSL_NO_SEED
+# define OPENSSL_NO_SEED
+#endif
+#ifndef OPENSSL_NO_SHA0
+# define OPENSSL_NO_SHA0
+#endif
+#ifndef OPENSSL_NO_STATIC_ENGINE
+# define OPENSSL_NO_STATIC_ENGINE
+#endif
+#ifndef OPENSSL_NO_STORE
+# define OPENSSL_NO_STORE
+#endif
+#ifndef OPENSSL_NO_WHIRLPOOL
+# define OPENSSL_NO_WHIRLPOOL
+#endif
+
+#endif /* OPENSSL_DOING_MAKEDEPEND */
+
+#ifndef OPENSSL_THREADS
+# define OPENSSL_THREADS
+#endif
+#ifndef OPENSSL_NO_DYNAMIC_ENGINE
+# define OPENSSL_NO_DYNAMIC_ENGINE
+#endif
+
+/* The OPENSSL_NO_* macros are also defined as NO_* if the application
+ asks for it. This is a transient feature that is provided for those
+ who haven't had the time to do the appropriate changes in their
+ applications. */
+#ifdef OPENSSL_ALGORITHM_DEFINES
+# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA)
+# define NO_CAMELLIA
+# endif
+# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG)
+# define NO_CAPIENG
+# endif
+# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST)
+# define NO_CAST
+# endif
+# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1)
+# define NO_DTLS1
+# endif
+# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128)
+# define NO_EC_NISTP_64_GCC_128
+# endif
+# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP)
+# define NO_GMP
+# endif
+# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST)
+# define NO_GOST
+# endif
+# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS)
+# define NO_HEARTBEATS
+# endif
+# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA)
+# define NO_IDEA
+# endif
+# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE)
+# define NO_JPAKE
+# endif
+# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5)
+# define NO_KRB5
+# endif
+# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2)
+# define NO_MD2
+# endif
+# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2)
+# define NO_MDC2
+# endif
+# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5)
+# define NO_RC5
+# endif
+# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND)
+# define NO_RDRAND
+# endif
+# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)
+# define NO_RFC3779
+# endif
+# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD)
+# define NO_RIPEMD
+# endif
+# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)
+# define NO_RSAX
+# endif
+# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP)
+# define NO_SCTP
+# endif
+# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED)
+# define NO_SEED
+# endif
+# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0)
+# define NO_SHA0
+# endif
+# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE)
+# define NO_STATIC_ENGINE
+# endif
+# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE)
+# define NO_STORE
+# endif
+# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL)
+# define NO_WHIRLPOOL
+# endif
+#endif
+
+/* crypto/opensslconf.h.in */
+
+/* Generate 80386 code? */
+#undef I386_ONLY
+
+#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */
+#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR)
+#define ENGINESDIR "/usr/local/ssl/lib/engines"
+#define OPENSSLDIR "/usr/local/ssl"
+#endif
+#endif
+
+#undef OPENSSL_UNISTD
+#define OPENSSL_UNISTD <unistd.h>
+
+#undef OPENSSL_EXPORT_VAR_AS_FUNCTION
+
+#if defined(HEADER_IDEA_H) && !defined(IDEA_INT)
+#define IDEA_INT unsigned int
+#endif
+
+#if defined(HEADER_MD2_H) && !defined(MD2_INT)
+#define MD2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC2_H) && !defined(RC2_INT)
+/* I need to put in a mod for the alpha - eay */
+#define RC2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC4_H)
+#if !defined(RC4_INT)
+/* using int types make the structure larger but make the code faster
+ * on most boxes I have tested - up to %20 faster. */
+/*
+ * I don't know what does "most" mean, but declaring "int" is a must on:
+ * - Intel P6 because partial register stalls are very expensive;
+ * - elder Alpha because it lacks byte load/store instructions;
+ */
+#define RC4_INT unsigned char
+#endif
+#if !defined(RC4_CHUNK)
+/*
+ * This enables code handling data aligned at natural CPU word
+ * boundary. See crypto/rc4/rc4_enc.c for further details.
+ */
+#define RC4_CHUNK unsigned long
+#endif
+#endif
+
+#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG)
+/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a
+ * %20 speed up (longs are 8 bytes, int's are 4). */
+#ifndef DES_LONG
+#define DES_LONG unsigned int
+#endif
+#endif
+
+#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H)
+#define CONFIG_HEADER_BN_H
+#undef BN_LLONG
+
+/* Should we define BN_DIV2W here? */
+
+/* Only one for the following should be defined */
+#define SIXTY_FOUR_BIT_LONG
+#undef SIXTY_FOUR_BIT
+#undef THIRTY_TWO_BIT
+#endif
+
+#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H)
+#define CONFIG_HEADER_RC4_LOCL_H
+/* if this is defined data[i] is used instead of *data, this is a %20
+ * speedup on x86 */
+#undef RC4_INDEX
+#endif
+
+#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H)
+#define CONFIG_HEADER_BF_LOCL_H
+#define BF_PTR
+#endif /* HEADER_BF_LOCL_H */
+
+#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H)
+#define CONFIG_HEADER_DES_LOCL_H
+#ifndef DES_DEFAULT_OPTIONS
+/* the following is tweaked from a config script, that is why it is a
+ * protected undef/define */
+#ifndef DES_PTR
+#undef DES_PTR
+#endif
+
+/* This helps C compiler generate the correct code for multiple functional
+ * units. It reduces register dependancies at the expense of 2 more
+ * registers */
+#ifndef DES_RISC1
+#undef DES_RISC1
+#endif
+
+#ifndef DES_RISC2
+#undef DES_RISC2
+#endif
+
+#if defined(DES_RISC1) && defined(DES_RISC2)
+YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!!
+#endif
+
+/* Unroll the inner loop, this sometimes helps, sometimes hinders.
+ * Very mucy CPU dependant */
+#ifndef DES_UNROLL
+#define DES_UNROLL
+#endif
+
+/* These default values were supplied by
+ * Peter Gutman <[email protected]>
+ * They are only used if nothing else has been defined */
+#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL)
+/* Special defines which change the way the code is built depending on the
+ CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find
+ even newer MIPS CPU's, but at the moment one size fits all for
+ optimization options. Older Sparc's work better with only UNROLL, but
+ there's no way to tell at compile time what it is you're running on */
+
+#if defined( sun ) /* Newer Sparc's */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#elif defined( __ultrix ) /* Older MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined( __osf1__ ) /* Alpha */
+# define DES_PTR
+# define DES_RISC2
+#elif defined ( _AIX ) /* RS6000 */
+ /* Unknown */
+#elif defined( __hpux ) /* HP-PA */
+ /* Unknown */
+#elif defined( __aux ) /* 68K */
+ /* Unknown */
+#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */
+# define DES_UNROLL
+#elif defined( __sgi ) /* Newer MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#endif /* Systems-specific speed defines */
+#endif
+
+#endif /* DES_DEFAULT_OPTIONS */
+#endif /* HEADER_DES_LOCL_H */
diff --git a/crypto/opensslconf-static-trusty.h b/crypto/opensslconf-static-trusty.h
new file mode 100644
index 0000000..06f9f98
--- /dev/null
+++ b/crypto/opensslconf-static-trusty.h
@@ -0,0 +1,448 @@
+/* opensslconf.h */
+/* WARNING: Generated automatically from opensslconf.h.in by Configure. */
+
+/* OpenSSL was configured with the following options: */
+#ifndef OPENSSL_DOING_MAKEDEPEND
+
+
+#ifndef OPENSSL_NO_CAMELLIA
+# define OPENSSL_NO_CAMELLIA
+#endif
+#ifndef OPENSSL_NO_CAPIENG
+# define OPENSSL_NO_CAPIENG
+#endif
+#ifndef OPENSSL_NO_CAST
+# define OPENSSL_NO_CAST
+#endif
+#ifndef OPENSSL_NO_CMS
+# define OPENSSL_NO_CMS
+#endif
+#ifndef OPENSSL_NO_COMP
+# define OPENSSL_NO_COMP
+#endif
+#ifndef OPENSSL_NO_CONF
+# define OPENSSL_NO_CONF
+#endif
+#ifndef OPENSSL_NO_DES
+# define OPENSSL_NO_DES
+#endif
+#ifndef OPENSSL_NO_DTLS1
+# define OPENSSL_NO_DTLS1
+#endif
+#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
+# define OPENSSL_NO_EC_NISTP_64_GCC_128
+#endif
+#ifndef OPENSSL_NO_GMP
+# define OPENSSL_NO_GMP
+#endif
+#ifndef OPENSSL_NO_GOST
+# define OPENSSL_NO_GOST
+#endif
+#ifndef OPENSSL_NO_HEARTBEATS
+# define OPENSSL_NO_HEARTBEATS
+#endif
+#ifndef OPENSSL_NO_IDEA
+# define OPENSSL_NO_IDEA
+#endif
+#ifndef OPENSSL_NO_JPAKE
+# define OPENSSL_NO_JPAKE
+#endif
+#ifndef OPENSSL_NO_KRB5
+# define OPENSSL_NO_KRB5
+#endif
+#ifndef OPENSSL_NO_LOCKING
+# define OPENSSL_NO_LOCKING
+#endif
+#ifndef OPENSSL_NO_MD2
+# define OPENSSL_NO_MD2
+#endif
+#ifndef OPENSSL_NO_MD4
+# define OPENSSL_NO_MD4
+#endif
+#ifndef OPENSSL_NO_MD5
+# define OPENSSL_NO_MD5
+#endif
+#ifndef OPENSSL_NO_MDC2
+# define OPENSSL_NO_MDC2
+#endif
+#ifndef OPENSSL_NO_OCSP
+# define OPENSSL_NO_OCSP
+#endif
+#ifndef OPENSSL_NO_PEM
+# define OPENSSL_NO_PEM
+#endif
+#ifndef OPENSSL_NO_PKCS12
+# define OPENSSL_NO_PKCS12
+#endif
+#ifndef OPENSSL_NO_PQUEUE
+# define OPENSSL_NO_PQUEUE
+#endif
+#ifndef OPENSSL_NO_RC2
+# define OPENSSL_NO_RC2
+#endif
+#ifndef OPENSSL_NO_RC4
+# define OPENSSL_NO_RC4
+#endif
+#ifndef OPENSSL_NO_RC5
+# define OPENSSL_NO_RC5
+#endif
+#ifndef OPENSSL_NO_RDRAND
+# define OPENSSL_NO_RDRAND
+#endif
+#ifndef OPENSSL_NO_RFC3779
+# define OPENSSL_NO_RFC3779
+#endif
+#ifndef OPENSSL_NO_RIPEMD
+# define OPENSSL_NO_RIPEMD
+#endif
+#ifndef OPENSSL_NO_RSAX
+# define OPENSSL_NO_RSAX
+#endif
+#ifndef OPENSSL_NO_SCTP
+# define OPENSSL_NO_SCTP
+#endif
+#ifndef OPENSSL_NO_SEED
+# define OPENSSL_NO_SEED
+#endif
+#ifndef OPENSSL_NO_SHA0
+# define OPENSSL_NO_SHA0
+#endif
+#ifndef OPENSSL_NO_SRP
+# define OPENSSL_NO_SRP
+#endif
+#ifndef OPENSSL_NO_SSL2
+# define OPENSSL_NO_SSL2
+#endif
+#ifndef OPENSSL_NO_SSL3
+# define OPENSSL_NO_SSL3
+#endif
+#ifndef OPENSSL_NO_STATIC_ENGINE
+# define OPENSSL_NO_STATIC_ENGINE
+#endif
+#ifndef OPENSSL_NO_STORE
+# define OPENSSL_NO_STORE
+#endif
+#ifndef OPENSSL_NO_TLS1
+# define OPENSSL_NO_TLS1
+#endif
+#ifndef OPENSSL_NO_TLSEXT
+# define OPENSSL_NO_TLSEXT
+#endif
+#ifndef OPENSSL_NO_TS
+# define OPENSSL_NO_TS
+#endif
+#ifndef OPENSSL_NO_TXT_DB
+# define OPENSSL_NO_TXT_DB
+#endif
+#ifndef OPENSSL_NO_UI
+# define OPENSSL_NO_UI
+#endif
+#ifndef OPENSSL_NO_WHIRLPOOL
+# define OPENSSL_NO_WHIRLPOOL
+#endif
+
+#endif /* OPENSSL_DOING_MAKEDEPEND */
+
+#ifndef OPENSSL_NO_ERR
+# define OPENSSL_NO_ERR
+#endif
+#ifndef OPENSSL_NO_DYNAMIC_ENGINE
+# define OPENSSL_NO_DYNAMIC_ENGINE
+#endif
+
+/* The OPENSSL_NO_* macros are also defined as NO_* if the application
+ asks for it. This is a transient feature that is provided for those
+ who haven't had the time to do the appropriate changes in their
+ applications. */
+#ifdef OPENSSL_ALGORITHM_DEFINES
+# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA)
+# define NO_CAMELLIA
+# endif
+# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG)
+# define NO_CAPIENG
+# endif
+# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST)
+# define NO_CAST
+# endif
+# if defined(OPENSSL_NO_CMS) && !defined(NO_CMS)
+# define NO_CMS
+# endif
+# if defined(OPENSSL_NO_COMP) && !defined(NO_COMP)
+# define NO_COMP
+# endif
+# if defined(OPENSSL_NO_CONF) && !defined(NO_CONF)
+# define NO_CONF
+# endif
+# if defined(OPENSSL_NO_DES) && !defined(NO_DES)
+# define NO_DES
+# endif
+# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1)
+# define NO_DTLS1
+# endif
+# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128)
+# define NO_EC_NISTP_64_GCC_128
+# endif
+# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP)
+# define NO_GMP
+# endif
+# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST)
+# define NO_GOST
+# endif
+# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS)
+# define NO_HEARTBEATS
+# endif
+# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA)
+# define NO_IDEA
+# endif
+# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE)
+# define NO_JPAKE
+# endif
+# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5)
+# define NO_KRB5
+# endif
+# if defined(OPENSSL_NO_LOCKING) && !defined(NO_LOCKING)
+# define NO_LOCKING
+# endif
+# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2)
+# define NO_MD2
+# endif
+# if defined(OPENSSL_NO_MD4) && !defined(NO_MD4)
+# define NO_MD4
+# endif
+# if defined(OPENSSL_NO_MD5) && !defined(NO_MD5)
+# define NO_MD5
+# endif
+# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2)
+# define NO_MDC2
+# endif
+# if defined(OPENSSL_NO_OCSP) && !defined(NO_OCSP)
+# define NO_OCSP
+# endif
+# if defined(OPENSSL_NO_PEM) && !defined(NO_PEM)
+# define NO_PEM
+# endif
+# if defined(OPENSSL_NO_PKCS12) && !defined(NO_PKCS12)
+# define NO_PKCS12
+# endif
+# if defined(OPENSSL_NO_PQUEUE) && !defined(NO_PQUEUE)
+# define NO_PQUEUE
+# endif
+# if defined(OPENSSL_NO_RC2) && !defined(NO_RC2)
+# define NO_RC2
+# endif
+# if defined(OPENSSL_NO_RC4) && !defined(NO_RC4)
+# define NO_RC4
+# endif
+# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5)
+# define NO_RC5
+# endif
+# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND)
+# define NO_RDRAND
+# endif
+# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)
+# define NO_RFC3779
+# endif
+# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD)
+# define NO_RIPEMD
+# endif
+# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)
+# define NO_RSAX
+# endif
+# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP)
+# define NO_SCTP
+# endif
+# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED)
+# define NO_SEED
+# endif
+# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0)
+# define NO_SHA0
+# endif
+# if defined(OPENSSL_NO_SRP) && !defined(NO_SRP)
+# define NO_SRP
+# endif
+# if defined(OPENSSL_NO_SSL2) && !defined(NO_SSL2)
+# define NO_SSL2
+# endif
+# if defined(OPENSSL_NO_SSL3) && !defined(NO_SSL3)
+# define NO_SSL3
+# endif
+# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE)
+# define NO_STATIC_ENGINE
+# endif
+# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE)
+# define NO_STORE
+# endif
+# if defined(OPENSSL_NO_TLS1) && !defined(NO_TLS1)
+# define NO_TLS1
+# endif
+# if defined(OPENSSL_NO_TLSEXT) && !defined(NO_TLSEXT)
+# define NO_TLSEXT
+# endif
+# if defined(OPENSSL_NO_TS) && !defined(NO_TS)
+# define NO_TS
+# endif
+# if defined(OPENSSL_NO_TXT_DB) && !defined(NO_TXT_DB)
+# define NO_TXT_DB
+# endif
+# if defined(OPENSSL_NO_UI) && !defined(NO_UI)
+# define NO_UI
+# endif
+# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL)
+# define NO_WHIRLPOOL
+# endif
+#endif
+
+/* crypto/opensslconf.h.in */
+
+/* Generate 80386 code? */
+#undef I386_ONLY
+
+#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */
+#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR)
+#define ENGINESDIR "/usr/local/ssl/lib/engines"
+#define OPENSSLDIR "/usr/local/ssl"
+#endif
+#endif
+
+#undef OPENSSL_UNISTD
+#define OPENSSL_UNISTD <trusty_std.h>
+
+#undef OPENSSL_EXPORT_VAR_AS_FUNCTION
+
+#if defined(HEADER_IDEA_H) && !defined(IDEA_INT)
+#define IDEA_INT unsigned int
+#endif
+
+#if defined(HEADER_MD2_H) && !defined(MD2_INT)
+#define MD2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC2_H) && !defined(RC2_INT)
+/* I need to put in a mod for the alpha - eay */
+#define RC2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC4_H)
+#if !defined(RC4_INT)
+/* using int types make the structure larger but make the code faster
+ * on most boxes I have tested - up to %20 faster. */
+/*
+ * I don't know what does "most" mean, but declaring "int" is a must on:
+ * - Intel P6 because partial register stalls are very expensive;
+ * - elder Alpha because it lacks byte load/store instructions;
+ */
+#define RC4_INT unsigned int
+#endif
+#if !defined(RC4_CHUNK)
+/*
+ * This enables code handling data aligned at natural CPU word
+ * boundary. See crypto/rc4/rc4_enc.c for further details.
+ */
+#undef RC4_CHUNK
+#endif
+#endif
+
+#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG)
+/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a
+ * %20 speed up (longs are 8 bytes, int's are 4). */
+#ifndef DES_LONG
+#define DES_LONG unsigned long
+#endif
+#endif
+
+#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H)
+#define CONFIG_HEADER_BN_H
+#undef BN_LLONG
+
+/* Should we define BN_DIV2W here? */
+
+/* Only one for the following should be defined */
+#undef SIXTY_FOUR_BIT_LONG
+#undef SIXTY_FOUR_BIT
+#define THIRTY_TWO_BIT
+#endif
+
+#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H)
+#define CONFIG_HEADER_RC4_LOCL_H
+/* if this is defined data[i] is used instead of *data, this is a %20
+ * speedup on x86 */
+#undef RC4_INDEX
+#endif
+
+#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H)
+#define CONFIG_HEADER_BF_LOCL_H
+#undef BF_PTR
+#endif /* HEADER_BF_LOCL_H */
+
+#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H)
+#define CONFIG_HEADER_DES_LOCL_H
+#ifndef DES_DEFAULT_OPTIONS
+/* the following is tweaked from a config script, that is why it is a
+ * protected undef/define */
+#ifndef DES_PTR
+#undef DES_PTR
+#endif
+
+/* This helps C compiler generate the correct code for multiple functional
+ * units. It reduces register dependancies at the expense of 2 more
+ * registers */
+#ifndef DES_RISC1
+#undef DES_RISC1
+#endif
+
+#ifndef DES_RISC2
+#undef DES_RISC2
+#endif
+
+#if defined(DES_RISC1) && defined(DES_RISC2)
+YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!!
+#endif
+
+/* Unroll the inner loop, this sometimes helps, sometimes hinders.
+ * Very mucy CPU dependant */
+#ifndef DES_UNROLL
+#undef DES_UNROLL
+#endif
+
+/* These default values were supplied by
+ * Peter Gutman <[email protected]>
+ * They are only used if nothing else has been defined */
+#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL)
+/* Special defines which change the way the code is built depending on the
+ CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find
+ even newer MIPS CPU's, but at the moment one size fits all for
+ optimization options. Older Sparc's work better with only UNROLL, but
+ there's no way to tell at compile time what it is you're running on */
+
+#if defined( sun ) /* Newer Sparc's */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#elif defined( __ultrix ) /* Older MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined( __osf1__ ) /* Alpha */
+# define DES_PTR
+# define DES_RISC2
+#elif defined ( _AIX ) /* RS6000 */
+ /* Unknown */
+#elif defined( __hpux ) /* HP-PA */
+ /* Unknown */
+#elif defined( __aux ) /* 68K */
+ /* Unknown */
+#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */
+# define DES_UNROLL
+#elif defined( __sgi ) /* Newer MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#endif /* Systems-specific speed defines */
+#endif
+
+#endif /* DES_DEFAULT_OPTIONS */
+#endif /* HEADER_DES_LOCL_H */
diff --git a/crypto/opensslconf-static.h b/crypto/opensslconf-static.h
new file mode 100644
index 0000000..f63a6e0
--- /dev/null
+++ b/crypto/opensslconf-static.h
@@ -0,0 +1,6 @@
+// Auto-generated - DO NOT EDIT!
+#if defined(__LP64__)
+#include "opensslconf-static-64.h"
+#else
+#include "opensslconf-static-32.h"
+#endif
diff --git a/crypto/opensslconf-trusty.h b/crypto/opensslconf-trusty.h
new file mode 100644
index 0000000..06f9f98
--- /dev/null
+++ b/crypto/opensslconf-trusty.h
@@ -0,0 +1,448 @@
+/* opensslconf.h */
+/* WARNING: Generated automatically from opensslconf.h.in by Configure. */
+
+/* OpenSSL was configured with the following options: */
+#ifndef OPENSSL_DOING_MAKEDEPEND
+
+
+#ifndef OPENSSL_NO_CAMELLIA
+# define OPENSSL_NO_CAMELLIA
+#endif
+#ifndef OPENSSL_NO_CAPIENG
+# define OPENSSL_NO_CAPIENG
+#endif
+#ifndef OPENSSL_NO_CAST
+# define OPENSSL_NO_CAST
+#endif
+#ifndef OPENSSL_NO_CMS
+# define OPENSSL_NO_CMS
+#endif
+#ifndef OPENSSL_NO_COMP
+# define OPENSSL_NO_COMP
+#endif
+#ifndef OPENSSL_NO_CONF
+# define OPENSSL_NO_CONF
+#endif
+#ifndef OPENSSL_NO_DES
+# define OPENSSL_NO_DES
+#endif
+#ifndef OPENSSL_NO_DTLS1
+# define OPENSSL_NO_DTLS1
+#endif
+#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
+# define OPENSSL_NO_EC_NISTP_64_GCC_128
+#endif
+#ifndef OPENSSL_NO_GMP
+# define OPENSSL_NO_GMP
+#endif
+#ifndef OPENSSL_NO_GOST
+# define OPENSSL_NO_GOST
+#endif
+#ifndef OPENSSL_NO_HEARTBEATS
+# define OPENSSL_NO_HEARTBEATS
+#endif
+#ifndef OPENSSL_NO_IDEA
+# define OPENSSL_NO_IDEA
+#endif
+#ifndef OPENSSL_NO_JPAKE
+# define OPENSSL_NO_JPAKE
+#endif
+#ifndef OPENSSL_NO_KRB5
+# define OPENSSL_NO_KRB5
+#endif
+#ifndef OPENSSL_NO_LOCKING
+# define OPENSSL_NO_LOCKING
+#endif
+#ifndef OPENSSL_NO_MD2
+# define OPENSSL_NO_MD2
+#endif
+#ifndef OPENSSL_NO_MD4
+# define OPENSSL_NO_MD4
+#endif
+#ifndef OPENSSL_NO_MD5
+# define OPENSSL_NO_MD5
+#endif
+#ifndef OPENSSL_NO_MDC2
+# define OPENSSL_NO_MDC2
+#endif
+#ifndef OPENSSL_NO_OCSP
+# define OPENSSL_NO_OCSP
+#endif
+#ifndef OPENSSL_NO_PEM
+# define OPENSSL_NO_PEM
+#endif
+#ifndef OPENSSL_NO_PKCS12
+# define OPENSSL_NO_PKCS12
+#endif
+#ifndef OPENSSL_NO_PQUEUE
+# define OPENSSL_NO_PQUEUE
+#endif
+#ifndef OPENSSL_NO_RC2
+# define OPENSSL_NO_RC2
+#endif
+#ifndef OPENSSL_NO_RC4
+# define OPENSSL_NO_RC4
+#endif
+#ifndef OPENSSL_NO_RC5
+# define OPENSSL_NO_RC5
+#endif
+#ifndef OPENSSL_NO_RDRAND
+# define OPENSSL_NO_RDRAND
+#endif
+#ifndef OPENSSL_NO_RFC3779
+# define OPENSSL_NO_RFC3779
+#endif
+#ifndef OPENSSL_NO_RIPEMD
+# define OPENSSL_NO_RIPEMD
+#endif
+#ifndef OPENSSL_NO_RSAX
+# define OPENSSL_NO_RSAX
+#endif
+#ifndef OPENSSL_NO_SCTP
+# define OPENSSL_NO_SCTP
+#endif
+#ifndef OPENSSL_NO_SEED
+# define OPENSSL_NO_SEED
+#endif
+#ifndef OPENSSL_NO_SHA0
+# define OPENSSL_NO_SHA0
+#endif
+#ifndef OPENSSL_NO_SRP
+# define OPENSSL_NO_SRP
+#endif
+#ifndef OPENSSL_NO_SSL2
+# define OPENSSL_NO_SSL2
+#endif
+#ifndef OPENSSL_NO_SSL3
+# define OPENSSL_NO_SSL3
+#endif
+#ifndef OPENSSL_NO_STATIC_ENGINE
+# define OPENSSL_NO_STATIC_ENGINE
+#endif
+#ifndef OPENSSL_NO_STORE
+# define OPENSSL_NO_STORE
+#endif
+#ifndef OPENSSL_NO_TLS1
+# define OPENSSL_NO_TLS1
+#endif
+#ifndef OPENSSL_NO_TLSEXT
+# define OPENSSL_NO_TLSEXT
+#endif
+#ifndef OPENSSL_NO_TS
+# define OPENSSL_NO_TS
+#endif
+#ifndef OPENSSL_NO_TXT_DB
+# define OPENSSL_NO_TXT_DB
+#endif
+#ifndef OPENSSL_NO_UI
+# define OPENSSL_NO_UI
+#endif
+#ifndef OPENSSL_NO_WHIRLPOOL
+# define OPENSSL_NO_WHIRLPOOL
+#endif
+
+#endif /* OPENSSL_DOING_MAKEDEPEND */
+
+#ifndef OPENSSL_NO_ERR
+# define OPENSSL_NO_ERR
+#endif
+#ifndef OPENSSL_NO_DYNAMIC_ENGINE
+# define OPENSSL_NO_DYNAMIC_ENGINE
+#endif
+
+/* The OPENSSL_NO_* macros are also defined as NO_* if the application
+ asks for it. This is a transient feature that is provided for those
+ who haven't had the time to do the appropriate changes in their
+ applications. */
+#ifdef OPENSSL_ALGORITHM_DEFINES
+# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA)
+# define NO_CAMELLIA
+# endif
+# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG)
+# define NO_CAPIENG
+# endif
+# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST)
+# define NO_CAST
+# endif
+# if defined(OPENSSL_NO_CMS) && !defined(NO_CMS)
+# define NO_CMS
+# endif
+# if defined(OPENSSL_NO_COMP) && !defined(NO_COMP)
+# define NO_COMP
+# endif
+# if defined(OPENSSL_NO_CONF) && !defined(NO_CONF)
+# define NO_CONF
+# endif
+# if defined(OPENSSL_NO_DES) && !defined(NO_DES)
+# define NO_DES
+# endif
+# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1)
+# define NO_DTLS1
+# endif
+# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128)
+# define NO_EC_NISTP_64_GCC_128
+# endif
+# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP)
+# define NO_GMP
+# endif
+# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST)
+# define NO_GOST
+# endif
+# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS)
+# define NO_HEARTBEATS
+# endif
+# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA)
+# define NO_IDEA
+# endif
+# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE)
+# define NO_JPAKE
+# endif
+# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5)
+# define NO_KRB5
+# endif
+# if defined(OPENSSL_NO_LOCKING) && !defined(NO_LOCKING)
+# define NO_LOCKING
+# endif
+# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2)
+# define NO_MD2
+# endif
+# if defined(OPENSSL_NO_MD4) && !defined(NO_MD4)
+# define NO_MD4
+# endif
+# if defined(OPENSSL_NO_MD5) && !defined(NO_MD5)
+# define NO_MD5
+# endif
+# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2)
+# define NO_MDC2
+# endif
+# if defined(OPENSSL_NO_OCSP) && !defined(NO_OCSP)
+# define NO_OCSP
+# endif
+# if defined(OPENSSL_NO_PEM) && !defined(NO_PEM)
+# define NO_PEM
+# endif
+# if defined(OPENSSL_NO_PKCS12) && !defined(NO_PKCS12)
+# define NO_PKCS12
+# endif
+# if defined(OPENSSL_NO_PQUEUE) && !defined(NO_PQUEUE)
+# define NO_PQUEUE
+# endif
+# if defined(OPENSSL_NO_RC2) && !defined(NO_RC2)
+# define NO_RC2
+# endif
+# if defined(OPENSSL_NO_RC4) && !defined(NO_RC4)
+# define NO_RC4
+# endif
+# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5)
+# define NO_RC5
+# endif
+# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND)
+# define NO_RDRAND
+# endif
+# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)
+# define NO_RFC3779
+# endif
+# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD)
+# define NO_RIPEMD
+# endif
+# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)
+# define NO_RSAX
+# endif
+# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP)
+# define NO_SCTP
+# endif
+# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED)
+# define NO_SEED
+# endif
+# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0)
+# define NO_SHA0
+# endif
+# if defined(OPENSSL_NO_SRP) && !defined(NO_SRP)
+# define NO_SRP
+# endif
+# if defined(OPENSSL_NO_SSL2) && !defined(NO_SSL2)
+# define NO_SSL2
+# endif
+# if defined(OPENSSL_NO_SSL3) && !defined(NO_SSL3)
+# define NO_SSL3
+# endif
+# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE)
+# define NO_STATIC_ENGINE
+# endif
+# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE)
+# define NO_STORE
+# endif
+# if defined(OPENSSL_NO_TLS1) && !defined(NO_TLS1)
+# define NO_TLS1
+# endif
+# if defined(OPENSSL_NO_TLSEXT) && !defined(NO_TLSEXT)
+# define NO_TLSEXT
+# endif
+# if defined(OPENSSL_NO_TS) && !defined(NO_TS)
+# define NO_TS
+# endif
+# if defined(OPENSSL_NO_TXT_DB) && !defined(NO_TXT_DB)
+# define NO_TXT_DB
+# endif
+# if defined(OPENSSL_NO_UI) && !defined(NO_UI)
+# define NO_UI
+# endif
+# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL)
+# define NO_WHIRLPOOL
+# endif
+#endif
+
+/* crypto/opensslconf.h.in */
+
+/* Generate 80386 code? */
+#undef I386_ONLY
+
+#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */
+#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR)
+#define ENGINESDIR "/usr/local/ssl/lib/engines"
+#define OPENSSLDIR "/usr/local/ssl"
+#endif
+#endif
+
+#undef OPENSSL_UNISTD
+#define OPENSSL_UNISTD <trusty_std.h>
+
+#undef OPENSSL_EXPORT_VAR_AS_FUNCTION
+
+#if defined(HEADER_IDEA_H) && !defined(IDEA_INT)
+#define IDEA_INT unsigned int
+#endif
+
+#if defined(HEADER_MD2_H) && !defined(MD2_INT)
+#define MD2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC2_H) && !defined(RC2_INT)
+/* I need to put in a mod for the alpha - eay */
+#define RC2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC4_H)
+#if !defined(RC4_INT)
+/* using int types make the structure larger but make the code faster
+ * on most boxes I have tested - up to %20 faster. */
+/*
+ * I don't know what does "most" mean, but declaring "int" is a must on:
+ * - Intel P6 because partial register stalls are very expensive;
+ * - elder Alpha because it lacks byte load/store instructions;
+ */
+#define RC4_INT unsigned int
+#endif
+#if !defined(RC4_CHUNK)
+/*
+ * This enables code handling data aligned at natural CPU word
+ * boundary. See crypto/rc4/rc4_enc.c for further details.
+ */
+#undef RC4_CHUNK
+#endif
+#endif
+
+#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG)
+/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a
+ * %20 speed up (longs are 8 bytes, int's are 4). */
+#ifndef DES_LONG
+#define DES_LONG unsigned long
+#endif
+#endif
+
+#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H)
+#define CONFIG_HEADER_BN_H
+#undef BN_LLONG
+
+/* Should we define BN_DIV2W here? */
+
+/* Only one for the following should be defined */
+#undef SIXTY_FOUR_BIT_LONG
+#undef SIXTY_FOUR_BIT
+#define THIRTY_TWO_BIT
+#endif
+
+#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H)
+#define CONFIG_HEADER_RC4_LOCL_H
+/* if this is defined data[i] is used instead of *data, this is a %20
+ * speedup on x86 */
+#undef RC4_INDEX
+#endif
+
+#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H)
+#define CONFIG_HEADER_BF_LOCL_H
+#undef BF_PTR
+#endif /* HEADER_BF_LOCL_H */
+
+#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H)
+#define CONFIG_HEADER_DES_LOCL_H
+#ifndef DES_DEFAULT_OPTIONS
+/* the following is tweaked from a config script, that is why it is a
+ * protected undef/define */
+#ifndef DES_PTR
+#undef DES_PTR
+#endif
+
+/* This helps C compiler generate the correct code for multiple functional
+ * units. It reduces register dependancies at the expense of 2 more
+ * registers */
+#ifndef DES_RISC1
+#undef DES_RISC1
+#endif
+
+#ifndef DES_RISC2
+#undef DES_RISC2
+#endif
+
+#if defined(DES_RISC1) && defined(DES_RISC2)
+YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!!
+#endif
+
+/* Unroll the inner loop, this sometimes helps, sometimes hinders.
+ * Very mucy CPU dependant */
+#ifndef DES_UNROLL
+#undef DES_UNROLL
+#endif
+
+/* These default values were supplied by
+ * Peter Gutman <[email protected]>
+ * They are only used if nothing else has been defined */
+#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL)
+/* Special defines which change the way the code is built depending on the
+ CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find
+ even newer MIPS CPU's, but at the moment one size fits all for
+ optimization options. Older Sparc's work better with only UNROLL, but
+ there's no way to tell at compile time what it is you're running on */
+
+#if defined( sun ) /* Newer Sparc's */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#elif defined( __ultrix ) /* Older MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined( __osf1__ ) /* Alpha */
+# define DES_PTR
+# define DES_RISC2
+#elif defined ( _AIX ) /* RS6000 */
+ /* Unknown */
+#elif defined( __hpux ) /* HP-PA */
+ /* Unknown */
+#elif defined( __aux ) /* 68K */
+ /* Unknown */
+#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */
+# define DES_UNROLL
+#elif defined( __sgi ) /* Newer MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#endif /* Systems-specific speed defines */
+#endif
+
+#endif /* DES_DEFAULT_OPTIONS */
+#endif /* HEADER_DES_LOCL_H */
diff --git a/crypto/opensslconf.h b/crypto/opensslconf.h
index d00590b..94212a0 100644
--- a/crypto/opensslconf.h
+++ b/crypto/opensslconf.h
@@ -1,6 +1,10 @@
// Auto-generated - DO NOT EDIT!
+#ifndef OPENSSL_SYS_TRUSTY
#if defined(__LP64__)
#include "opensslconf-64.h"
#else
#include "opensslconf-32.h"
#endif
+#else
+#include "opensslconf-trusty.h"
+#endif
diff --git a/crypto/opensslv.h b/crypto/opensslv.h
index b27a5bb..c3b6ace 100644
--- a/crypto/opensslv.h
+++ b/crypto/opensslv.h
@@ -25,11 +25,11 @@
* (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for
* major minor fix final patch/beta)
*/
-#define OPENSSL_VERSION_NUMBER 0x1000106fL
+#define OPENSSL_VERSION_NUMBER 0x1000108fL
#ifdef OPENSSL_FIPS
-#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1f-fips 6 Jan 2014"
+#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1h-fips 5 Jun 2014"
#else
-#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1f 6 Jan 2014"
+#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1h 5 Jun 2014"
#endif
#define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT
diff --git a/crypto/perlasm/x86asm.pl b/crypto/perlasm/x86asm.pl
index eb543db..3f190ae 100644
--- a/crypto/perlasm/x86asm.pl
+++ b/crypto/perlasm/x86asm.pl
@@ -257,4 +257,6 @@
&file($filename);
}
+sub ::hidden {}
+
1;
diff --git a/crypto/perlasm/x86gas.pl b/crypto/perlasm/x86gas.pl
index 682a3a3..735c1ad 100644
--- a/crypto/perlasm/x86gas.pl
+++ b/crypto/perlasm/x86gas.pl
@@ -250,4 +250,6 @@
sub ::dataseg
{ push(@out,".data\n"); }
+*::hidden = sub { push(@out,".hidden\t$nmdecor$_[0]\n"); } if ($::elf);
+
1;
diff --git a/crypto/pkcs12/p12_crt.c b/crypto/pkcs12/p12_crt.c
index a34915d..35e8a4a 100644
--- a/crypto/pkcs12/p12_crt.c
+++ b/crypto/pkcs12/p12_crt.c
@@ -96,7 +96,11 @@
nid_cert = NID_pbe_WithSHA1And3_Key_TripleDES_CBC;
else
#endif
+#ifdef OPENSSL_NO_RC2
+ nid_cert = NID_pbe_WithSHA1And3_Key_TripleDES_CBC;
+#else
nid_cert = NID_pbe_WithSHA1And40BitRC2_CBC;
+#endif
}
if (!nid_key)
nid_key = NID_pbe_WithSHA1And3_Key_TripleDES_CBC;
@@ -286,7 +290,11 @@
free_safes = 0;
if (nid_safe == 0)
+#ifdef OPENSSL_NO_RC2
+ nid_safe = NID_pbe_WithSHA1And3_Key_TripleDES_CBC;
+#else
nid_safe = NID_pbe_WithSHA1And40BitRC2_CBC;
+#endif
if (nid_safe == -1)
p7 = PKCS12_pack_p7data(bags);
diff --git a/crypto/pkcs12/p12_kiss.c b/crypto/pkcs12/p12_kiss.c
index 206b1b0..c9b7ab6 100644
--- a/crypto/pkcs12/p12_kiss.c
+++ b/crypto/pkcs12/p12_kiss.c
@@ -269,7 +269,7 @@
int len, r;
unsigned char *data;
len = ASN1_STRING_to_UTF8(&data, fname);
- if(len > 0) {
+ if(len >= 0) {
r = X509_alias_set1(x509, data, len);
OPENSSL_free(data);
if (!r)
diff --git a/crypto/pkcs7/pk7_doit.c b/crypto/pkcs7/pk7_doit.c
index 77fda3b..d91aa11 100644
--- a/crypto/pkcs7/pk7_doit.c
+++ b/crypto/pkcs7/pk7_doit.c
@@ -440,6 +440,11 @@
{
case NID_pkcs7_signed:
data_body=PKCS7_get_octet_string(p7->d.sign->contents);
+ if (!PKCS7_is_detached(p7) && data_body == NULL)
+ {
+ PKCS7err(PKCS7_F_PKCS7_DATADECODE,PKCS7_R_INVALID_SIGNED_DATA_TYPE);
+ goto err;
+ }
md_sk=p7->d.sign->md_algs;
break;
case NID_pkcs7_signedAndEnveloped:
@@ -928,6 +933,7 @@
if (EVP_DigestSignUpdate(&mctx,abuf,alen) <= 0)
goto err;
OPENSSL_free(abuf);
+ abuf = NULL;
if (EVP_DigestSignFinal(&mctx, NULL, &siglen) <= 0)
goto err;
abuf = OPENSSL_malloc(siglen);
diff --git a/crypto/pkcs7/pkcs7.h b/crypto/pkcs7/pkcs7.h
index e4d4431..04f6037 100644
--- a/crypto/pkcs7/pkcs7.h
+++ b/crypto/pkcs7/pkcs7.h
@@ -453,6 +453,7 @@
#define PKCS7_R_ERROR_SETTING_CIPHER 121
#define PKCS7_R_INVALID_MIME_TYPE 131
#define PKCS7_R_INVALID_NULL_POINTER 143
+#define PKCS7_R_INVALID_SIGNED_DATA_TYPE 155
#define PKCS7_R_MIME_NO_CONTENT_TYPE 132
#define PKCS7_R_MIME_PARSE_ERROR 133
#define PKCS7_R_MIME_SIG_PARSE_ERROR 134
diff --git a/crypto/pkcs7/pkcs7err.c b/crypto/pkcs7/pkcs7err.c
index d0af32a..f3db08e 100644
--- a/crypto/pkcs7/pkcs7err.c
+++ b/crypto/pkcs7/pkcs7err.c
@@ -1,6 +1,6 @@
/* crypto/pkcs7/pkcs7err.c */
/* ====================================================================
- * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved.
+ * Copyright (c) 1999-2014 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -130,6 +130,7 @@
{ERR_REASON(PKCS7_R_ERROR_SETTING_CIPHER),"error setting cipher"},
{ERR_REASON(PKCS7_R_INVALID_MIME_TYPE) ,"invalid mime type"},
{ERR_REASON(PKCS7_R_INVALID_NULL_POINTER),"invalid null pointer"},
+{ERR_REASON(PKCS7_R_INVALID_SIGNED_DATA_TYPE),"invalid signed data type"},
{ERR_REASON(PKCS7_R_MIME_NO_CONTENT_TYPE),"mime no content type"},
{ERR_REASON(PKCS7_R_MIME_PARSE_ERROR) ,"mime parse error"},
{ERR_REASON(PKCS7_R_MIME_SIG_PARSE_ERROR),"mime sig parse error"},
diff --git a/crypto/rand/md_rand.c b/crypto/rand/md_rand.c
index dd29163..aee1c30 100644
--- a/crypto/rand/md_rand.c
+++ b/crypto/rand/md_rand.c
@@ -198,6 +198,9 @@
EVP_MD_CTX m;
int do_not_lock;
+ if (!num)
+ return;
+
/*
* (Based on the rand(3) manpage)
*
diff --git a/crypto/rand/rand_win.c b/crypto/rand/rand_win.c
index 5d134e1..34ffcd2 100644
--- a/crypto/rand/rand_win.c
+++ b/crypto/rand/rand_win.c
@@ -750,7 +750,7 @@
int y; /* y-coordinate of screen lines to grab */
int n = 16; /* number of screen lines to grab at a time */
- if (GetVersion() < 0x80000000 && OPENSSL_isservice()>0)
+ if (check_winnt() && OPENSSL_isservice()>0)
return;
/* Create a screen DC and a memory DC compatible to screen DC */
diff --git a/crypto/ripemd/README b/crypto/ripemd/README
deleted file mode 100644
index f1ffc8b..0000000
--- a/crypto/ripemd/README
+++ /dev/null
@@ -1,15 +0,0 @@
-RIPEMD-160
-http://www.esat.kuleuven.ac.be/~bosselae/ripemd160.html
-
-This is my implementation of RIPEMD-160. The pentium assember is a little
-off the pace since I only get 1050 cycles, while the best is 1013.
-I have a few ideas for how to get another 20 or so cycles, but at
-this point I will not bother right now. I believe the trick will be
-to remove my 'copy X array onto stack' until inside the RIP1() finctions the
-first time round. To do this I need another register and will only have one
-temporary one. A bit tricky.... I can also cleanup the saving of the 5 words
-after the first half of the calculation. I should read the origional
-value, add then write. Currently I just save the new and read the origioal.
-I then read both at the end. Bad.
-
-eric (20-Jan-1998)
diff --git a/crypto/ripemd/asm/rips.cpp b/crypto/ripemd/asm/rips.cpp
deleted file mode 100644
index f7a1367..0000000
--- a/crypto/ripemd/asm/rips.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-//
-// gettsc.inl
-//
-// gives access to the Pentium's (secret) cycle counter
-//
-// This software was written by Leonard Janke ([email protected])
-// in 1996-7 and is entered, by him, into the public domain.
-
-#if defined(__WATCOMC__)
-void GetTSC(unsigned long&);
-#pragma aux GetTSC = 0x0f 0x31 "mov [edi], eax" parm [edi] modify [edx eax];
-#elif defined(__GNUC__)
-inline
-void GetTSC(unsigned long& tsc)
-{
- asm volatile(".byte 15, 49\n\t"
- : "=eax" (tsc)
- :
- : "%edx", "%eax");
-}
-#elif defined(_MSC_VER)
-inline
-void GetTSC(unsigned long& tsc)
-{
- unsigned long a;
- __asm _emit 0fh
- __asm _emit 31h
- __asm mov a, eax;
- tsc=a;
-}
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <openssl/ripemd.h>
-
-#define ripemd160_block_x86 ripemd160_block_asm_host_order
-
-extern "C" {
-void ripemd160_block_x86(RIPEMD160_CTX *ctx, unsigned char *buffer,int num);
-}
-
-void main(int argc,char *argv[])
- {
- unsigned char buffer[64*256];
- RIPEMD160_CTX ctx;
- unsigned long s1,s2,e1,e2;
- unsigned char k[16];
- unsigned long data[2];
- unsigned char iv[8];
- int i,num=0,numm;
- int j=0;
-
- if (argc >= 2)
- num=atoi(argv[1]);
-
- if (num == 0) num=16;
- if (num > 250) num=16;
- numm=num+2;
-#if 0
- num*=64;
- numm*=64;
-#endif
-
- for (j=0; j<6; j++)
- {
- for (i=0; i<10; i++) /**/
- {
- ripemd160_block_x86(&ctx,buffer,numm);
- GetTSC(s1);
- ripemd160_block_x86(&ctx,buffer,numm);
- GetTSC(e1);
- GetTSC(s2);
- ripemd160_block_x86(&ctx,buffer,num);
- GetTSC(e2);
- ripemd160_block_x86(&ctx,buffer,num);
- }
- printf("ripemd160 (%d bytes) %d %d (%.2f)\n",num*64,
- e1-s1,e2-s2,(double)((e1-s1)-(e2-s2))/2);
- }
- }
-
diff --git a/crypto/ripemd/asm/rmd-586.pl b/crypto/ripemd/asm/rmd-586.pl
deleted file mode 100644
index e8b2bc2..0000000
--- a/crypto/ripemd/asm/rmd-586.pl
+++ /dev/null
@@ -1,591 +0,0 @@
-#!/usr/local/bin/perl
-
-# Normal is the
-# ripemd160_block_asm_data_order(RIPEMD160_CTX *c, ULONG *X,int blocks);
-
-$normal=0;
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-push(@INC,"${dir}","${dir}../../perlasm");
-require "x86asm.pl";
-
-&asm_init($ARGV[0],$0);
-
-$A="ecx";
-$B="esi";
-$C="edi";
-$D="ebx";
-$E="ebp";
-$tmp1="eax";
-$tmp2="edx";
-
-$KL1=0x5A827999;
-$KL2=0x6ED9EBA1;
-$KL3=0x8F1BBCDC;
-$KL4=0xA953FD4E;
-$KR0=0x50A28BE6;
-$KR1=0x5C4DD124;
-$KR2=0x6D703EF3;
-$KR3=0x7A6D76E9;
-
-
-@wl=( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,
- 7, 4,13, 1,10, 6,15, 3,12, 0, 9, 5, 2,14,11, 8,
- 3,10,14, 4, 9,15, 8, 1, 2, 7, 0, 6,13,11, 5,12,
- 1, 9,11,10, 0, 8,12, 4,13, 3, 7,15,14, 5, 6, 2,
- 4, 0, 5, 9, 7,12, 2,10,14, 1, 3, 8,11, 6,15,13,
- );
-
-@wr=( 5,14, 7, 0, 9, 2,11, 4,13, 6,15, 8, 1,10, 3,12,
- 6,11, 3, 7, 0,13, 5,10,14,15, 8,12, 4, 9, 1, 2,
- 15, 5, 1, 3, 7,14, 6, 9,11, 8,12, 2,10, 0, 4,13,
- 8, 6, 4, 1, 3,11,15, 0, 5,12, 2,13, 9, 7,10,14,
- 12,15,10, 4, 1, 5, 8, 7, 6, 2,13,14, 0, 3, 9,11,
- );
-
-@sl=( 11,14,15,12, 5, 8, 7, 9,11,13,14,15, 6, 7, 9, 8,
- 7, 6, 8,13,11, 9, 7,15, 7,12,15, 9,11, 7,13,12,
- 11,13, 6, 7,14, 9,13,15,14, 8,13, 6, 5,12, 7, 5,
- 11,12,14,15,14,15, 9, 8, 9,14, 5, 6, 8, 6, 5,12,
- 9,15, 5,11, 6, 8,13,12, 5,12,13,14,11, 8, 5, 6,
- );
-
-@sr=( 8, 9, 9,11,13,15,15, 5, 7, 7, 8,11,14,14,12, 6,
- 9,13,15, 7,12, 8, 9,11, 7, 7,12, 7, 6,15,13,11,
- 9, 7,15,11, 8, 6, 6,14,12,13, 5,14,13,13, 7, 5,
- 15, 5, 8,11,14,14, 6,14, 6, 9,12, 9,12, 5,15, 8,
- 8, 5,12, 9,12, 5,14, 6, 8,13, 6, 5,15,13,11,11,
- );
-
-&ripemd160_block("ripemd160_block_asm_data_order");
-&asm_finish();
-
-sub Xv
- {
- local($n)=@_;
- return(&swtmp($n));
- # tmp on stack
- }
-
-sub Np
- {
- local($p)=@_;
- local(%n)=($A,$E,$B,$A,$C,$B,$D,$C,$E,$D);
- return($n{$p});
- }
-
-sub RIP1
- {
- local($a,$b,$c,$d,$e,$pos,$s,$o,$pos2)=@_;
-
- &comment($p++);
- if ($p & 1)
- {
- #&mov($tmp1, $c) if $o == -1;
- &xor($tmp1, $d) if $o == -1;
- &mov($tmp2, &Xv($pos));
- &xor($tmp1, $b);
- &add($a, $tmp2);
- &rotl($c, 10);
- &add($a, $tmp1);
- &mov($tmp1, &Np($c)); # NEXT
- # XXX
- &rotl($a, $s);
- &add($a, $e);
- }
- else
- {
- &xor($tmp1, $d);
- &mov($tmp2, &Xv($pos));
- &xor($tmp1, $b);
- &add($a, $tmp1);
- &mov($tmp1, &Np($c)) if $o <= 0;
- &mov($tmp1, -1) if $o == 1;
- # XXX if $o == 2;
- &rotl($c, 10);
- &add($a, $tmp2);
- &xor($tmp1, &Np($d)) if $o <= 0;
- &mov($tmp2, &Xv($pos2)) if $o == 1;
- &mov($tmp2, &wparam(0)) if $o == 2;
- &rotl($a, $s);
- &add($a, $e);
- }
- }
-
-sub RIP2
- {
- local($a,$b,$c,$d,$e,$pos,$pos2,$s,$K,$o)=@_;
-
-# XXXXXX
- &comment($p++);
- if ($p & 1)
- {
-# &mov($tmp2, &Xv($pos)) if $o < -1;
-# &mov($tmp1, -1) if $o < -1;
-
- &add($a, $tmp2);
- &mov($tmp2, $c);
- &sub($tmp1, $b);
- &and($tmp2, $b);
- &and($tmp1, $d);
- &or($tmp2, $tmp1);
- &mov($tmp1, &Xv($pos2)) if $o <= 0; # XXXXXXXXXXXXXX
- # XXX
- &rotl($c, 10);
- &lea($a, &DWP($K,$a,$tmp2,1));
- &mov($tmp2, -1) if $o <= 0;
- # XXX
- &rotl($a, $s);
- &add($a, $e);
- }
- else
- {
- # XXX
- &add($a, $tmp1);
- &mov($tmp1, $c);
- &sub($tmp2, $b);
- &and($tmp1, $b);
- &and($tmp2, $d);
- if ($o != 2)
- {
- &or($tmp1, $tmp2);
- &mov($tmp2, &Xv($pos2)) if $o <= 0;
- &mov($tmp2, -1) if $o == 1;
- &rotl($c, 10);
- &lea($a, &DWP($K,$a,$tmp1,1));
- &mov($tmp1, -1) if $o <= 0;
- &sub($tmp2, &Np($c)) if $o == 1;
- } else {
- &or($tmp2, $tmp1);
- &mov($tmp1, &Np($c));
- &rotl($c, 10);
- &lea($a, &DWP($K,$a,$tmp2,1));
- &xor($tmp1, &Np($d));
- }
- &rotl($a, $s);
- &add($a, $e);
- }
- }
-
-sub RIP3
- {
- local($a,$b,$c,$d,$e,$pos,$s,$K,$o,$pos2)=@_;
-
- &comment($p++);
- if ($p & 1)
- {
-# &mov($tmp2, -1) if $o < -1;
-# &sub($tmp2, $c) if $o < -1;
- &mov($tmp1, &Xv($pos));
- &or($tmp2, $b);
- &add($a, $tmp1);
- &xor($tmp2, $d);
- &mov($tmp1, -1) if $o <= 0; # NEXT
- # XXX
- &rotl($c, 10);
- &lea($a, &DWP($K,$a,$tmp2,1));
- &sub($tmp1, &Np($c)) if $o <= 0; # NEXT
- # XXX
- &rotl($a, $s);
- &add($a, $e);
- }
- else
- {
- &mov($tmp2, &Xv($pos));
- &or($tmp1, $b);
- &add($a, $tmp2);
- &xor($tmp1, $d);
- &mov($tmp2, -1) if $o <= 0; # NEXT
- &mov($tmp2, -1) if $o == 1;
- &mov($tmp2, &Xv($pos2)) if $o == 2;
- &rotl($c, 10);
- &lea($a, &DWP($K,$a,$tmp1,1));
- &sub($tmp2, &Np($c)) if $o <= 0; # NEXT
- &mov($tmp1, &Np($d)) if $o == 1;
- &mov($tmp1, -1) if $o == 2;
- &rotl($a, $s);
- &add($a, $e);
- }
- }
-
-sub RIP4
- {
- local($a,$b,$c,$d,$e,$pos,$s,$K,$o)=@_;
-
- &comment($p++);
- if ($p & 1)
- {
-# &mov($tmp2, -1) if $o == -2;
-# &mov($tmp1, $d) if $o == -2;
- &sub($tmp2, $d);
- &and($tmp1, $b);
- &and($tmp2, $c);
- &or($tmp2, $tmp1);
- &mov($tmp1, &Xv($pos));
- &rotl($c, 10);
- &lea($a, &DWP($K,$a,$tmp2));
- &mov($tmp2, -1) unless $o > 0; # NEXT
- # XXX
- &add($a, $tmp1);
- &mov($tmp1, &Np($d)) unless $o > 0; # NEXT
- # XXX
- &rotl($a, $s);
- &add($a, $e);
- }
- else
- {
- &sub($tmp2, $d);
- &and($tmp1, $b);
- &and($tmp2, $c);
- &or($tmp2, $tmp1);
- &mov($tmp1, &Xv($pos));
- &rotl($c, 10);
- &lea($a, &DWP($K,$a,$tmp2));
- &mov($tmp2, -1) if $o == 0; # NEXT
- &mov($tmp2, -1) if $o == 1;
- &mov($tmp2, -1) if $o == 2;
- # XXX
- &add($a, $tmp1);
- &mov($tmp1, &Np($d)) if $o == 0; # NEXT
- &sub($tmp2, &Np($d)) if $o == 1;
- &sub($tmp2, &Np($c)) if $o == 2;
- # XXX
- &rotl($a, $s);
- &add($a, $e);
- }
- }
-
-sub RIP5
- {
- local($a,$b,$c,$d,$e,$pos,$s,$K,$o)=@_;
-
- &comment($p++);
- if ($p & 1)
- {
- &mov($tmp2, -1) if $o == -2;
- &sub($tmp2, $d) if $o == -2;
- &mov($tmp1, &Xv($pos));
- &or($tmp2, $c);
- &add($a, $tmp1);
- &xor($tmp2, $b);
- &mov($tmp1, -1) if $o <= 0;
- # XXX
- &rotl($c, 10);
- &lea($a, &DWP($K,$a,$tmp2,1));
- &sub($tmp1, &Np($d)) if $o <= 0;
- # XXX
- &rotl($a, $s);
- &add($a, $e);
- }
- else
- {
- &mov($tmp2, &Xv($pos));
- &or($tmp1, $c);
- &add($a, $tmp2);
- &xor($tmp1, $b);
- &mov($tmp2, -1) if $o <= 0;
- &mov($tmp2, &wparam(0)) if $o == 1; # Middle code
- &mov($tmp2, -1) if $o == 2;
- &rotl($c, 10);
- &lea($a, &DWP($K,$a,$tmp1,1));
- &sub($tmp2, &Np($d)) if $o <= 0;
- &mov(&swtmp(16), $A) if $o == 1;
- &mov($tmp1, &Np($d)) if $o == 2;
- &rotl($a, $s);
- &add($a, $e);
- }
- }
-
-sub ripemd160_block
- {
- local($name)=@_;
-
- &function_begin_B($name,"",3);
-
- # parameter 1 is the RIPEMD160_CTX structure.
- # A 0
- # B 4
- # C 8
- # D 12
- # E 16
-
- &mov($tmp2, &wparam(0));
- &mov($tmp1, &wparam(1));
- &push("esi");
- &mov($A, &DWP( 0,$tmp2,"",0));
- &push("edi");
- &mov($B, &DWP( 4,$tmp2,"",0));
- &push("ebp");
- &mov($C, &DWP( 8,$tmp2,"",0));
- &push("ebx");
- &stack_push(16+5+6);
- # Special comment about the figure of 6.
- # Idea is to pad the current frame so
- # that the top of the stack gets fairly
- # aligned. Well, as you realize it would
- # always depend on how the frame below is
- # aligned. The good news are that gcc-2.95
- # and later does keep first argument at
- # least double-wise aligned.
- # <[email protected]>
-
- &set_label("start") unless $normal;
- &comment("");
-
- # &mov($tmp1, &wparam(1)); # Done at end of loop
- # &mov($tmp2, &wparam(0)); # Done at end of loop
-
- for ($z=0; $z<16; $z+=2)
- {
- &mov($D, &DWP( $z*4,$tmp1,"",0));
- &mov($E, &DWP( ($z+1)*4,$tmp1,"",0));
- &mov(&swtmp($z), $D);
- &mov(&swtmp($z+1), $E);
- }
- &mov($tmp1, $C);
- &mov($D, &DWP(12,$tmp2,"",0));
- &mov($E, &DWP(16,$tmp2,"",0));
-
- &RIP1($A,$B,$C,$D,$E,$wl[ 0],$sl[ 0],-1);
- &RIP1($E,$A,$B,$C,$D,$wl[ 1],$sl[ 1],0);
- &RIP1($D,$E,$A,$B,$C,$wl[ 2],$sl[ 2],0);
- &RIP1($C,$D,$E,$A,$B,$wl[ 3],$sl[ 3],0);
- &RIP1($B,$C,$D,$E,$A,$wl[ 4],$sl[ 4],0);
- &RIP1($A,$B,$C,$D,$E,$wl[ 5],$sl[ 5],0);
- &RIP1($E,$A,$B,$C,$D,$wl[ 6],$sl[ 6],0);
- &RIP1($D,$E,$A,$B,$C,$wl[ 7],$sl[ 7],0);
- &RIP1($C,$D,$E,$A,$B,$wl[ 8],$sl[ 8],0);
- &RIP1($B,$C,$D,$E,$A,$wl[ 9],$sl[ 9],0);
- &RIP1($A,$B,$C,$D,$E,$wl[10],$sl[10],0);
- &RIP1($E,$A,$B,$C,$D,$wl[11],$sl[11],0);
- &RIP1($D,$E,$A,$B,$C,$wl[12],$sl[12],0);
- &RIP1($C,$D,$E,$A,$B,$wl[13],$sl[13],0);
- &RIP1($B,$C,$D,$E,$A,$wl[14],$sl[14],0);
- &RIP1($A,$B,$C,$D,$E,$wl[15],$sl[15],1,$wl[16]);
-
- &RIP2($E,$A,$B,$C,$D,$wl[16],$wl[17],$sl[16],$KL1,-1);
- &RIP2($D,$E,$A,$B,$C,$wl[17],$wl[18],$sl[17],$KL1,0);
- &RIP2($C,$D,$E,$A,$B,$wl[18],$wl[19],$sl[18],$KL1,0);
- &RIP2($B,$C,$D,$E,$A,$wl[19],$wl[20],$sl[19],$KL1,0);
- &RIP2($A,$B,$C,$D,$E,$wl[20],$wl[21],$sl[20],$KL1,0);
- &RIP2($E,$A,$B,$C,$D,$wl[21],$wl[22],$sl[21],$KL1,0);
- &RIP2($D,$E,$A,$B,$C,$wl[22],$wl[23],$sl[22],$KL1,0);
- &RIP2($C,$D,$E,$A,$B,$wl[23],$wl[24],$sl[23],$KL1,0);
- &RIP2($B,$C,$D,$E,$A,$wl[24],$wl[25],$sl[24],$KL1,0);
- &RIP2($A,$B,$C,$D,$E,$wl[25],$wl[26],$sl[25],$KL1,0);
- &RIP2($E,$A,$B,$C,$D,$wl[26],$wl[27],$sl[26],$KL1,0);
- &RIP2($D,$E,$A,$B,$C,$wl[27],$wl[28],$sl[27],$KL1,0);
- &RIP2($C,$D,$E,$A,$B,$wl[28],$wl[29],$sl[28],$KL1,0);
- &RIP2($B,$C,$D,$E,$A,$wl[29],$wl[30],$sl[29],$KL1,0);
- &RIP2($A,$B,$C,$D,$E,$wl[30],$wl[31],$sl[30],$KL1,0);
- &RIP2($E,$A,$B,$C,$D,$wl[31],$wl[32],$sl[31],$KL1,1);
-
- &RIP3($D,$E,$A,$B,$C,$wl[32],$sl[32],$KL2,-1);
- &RIP3($C,$D,$E,$A,$B,$wl[33],$sl[33],$KL2,0);
- &RIP3($B,$C,$D,$E,$A,$wl[34],$sl[34],$KL2,0);
- &RIP3($A,$B,$C,$D,$E,$wl[35],$sl[35],$KL2,0);
- &RIP3($E,$A,$B,$C,$D,$wl[36],$sl[36],$KL2,0);
- &RIP3($D,$E,$A,$B,$C,$wl[37],$sl[37],$KL2,0);
- &RIP3($C,$D,$E,$A,$B,$wl[38],$sl[38],$KL2,0);
- &RIP3($B,$C,$D,$E,$A,$wl[39],$sl[39],$KL2,0);
- &RIP3($A,$B,$C,$D,$E,$wl[40],$sl[40],$KL2,0);
- &RIP3($E,$A,$B,$C,$D,$wl[41],$sl[41],$KL2,0);
- &RIP3($D,$E,$A,$B,$C,$wl[42],$sl[42],$KL2,0);
- &RIP3($C,$D,$E,$A,$B,$wl[43],$sl[43],$KL2,0);
- &RIP3($B,$C,$D,$E,$A,$wl[44],$sl[44],$KL2,0);
- &RIP3($A,$B,$C,$D,$E,$wl[45],$sl[45],$KL2,0);
- &RIP3($E,$A,$B,$C,$D,$wl[46],$sl[46],$KL2,0);
- &RIP3($D,$E,$A,$B,$C,$wl[47],$sl[47],$KL2,1);
-
- &RIP4($C,$D,$E,$A,$B,$wl[48],$sl[48],$KL3,-1);
- &RIP4($B,$C,$D,$E,$A,$wl[49],$sl[49],$KL3,0);
- &RIP4($A,$B,$C,$D,$E,$wl[50],$sl[50],$KL3,0);
- &RIP4($E,$A,$B,$C,$D,$wl[51],$sl[51],$KL3,0);
- &RIP4($D,$E,$A,$B,$C,$wl[52],$sl[52],$KL3,0);
- &RIP4($C,$D,$E,$A,$B,$wl[53],$sl[53],$KL3,0);
- &RIP4($B,$C,$D,$E,$A,$wl[54],$sl[54],$KL3,0);
- &RIP4($A,$B,$C,$D,$E,$wl[55],$sl[55],$KL3,0);
- &RIP4($E,$A,$B,$C,$D,$wl[56],$sl[56],$KL3,0);
- &RIP4($D,$E,$A,$B,$C,$wl[57],$sl[57],$KL3,0);
- &RIP4($C,$D,$E,$A,$B,$wl[58],$sl[58],$KL3,0);
- &RIP4($B,$C,$D,$E,$A,$wl[59],$sl[59],$KL3,0);
- &RIP4($A,$B,$C,$D,$E,$wl[60],$sl[60],$KL3,0);
- &RIP4($E,$A,$B,$C,$D,$wl[61],$sl[61],$KL3,0);
- &RIP4($D,$E,$A,$B,$C,$wl[62],$sl[62],$KL3,0);
- &RIP4($C,$D,$E,$A,$B,$wl[63],$sl[63],$KL3,1);
-
- &RIP5($B,$C,$D,$E,$A,$wl[64],$sl[64],$KL4,-1);
- &RIP5($A,$B,$C,$D,$E,$wl[65],$sl[65],$KL4,0);
- &RIP5($E,$A,$B,$C,$D,$wl[66],$sl[66],$KL4,0);
- &RIP5($D,$E,$A,$B,$C,$wl[67],$sl[67],$KL4,0);
- &RIP5($C,$D,$E,$A,$B,$wl[68],$sl[68],$KL4,0);
- &RIP5($B,$C,$D,$E,$A,$wl[69],$sl[69],$KL4,0);
- &RIP5($A,$B,$C,$D,$E,$wl[70],$sl[70],$KL4,0);
- &RIP5($E,$A,$B,$C,$D,$wl[71],$sl[71],$KL4,0);
- &RIP5($D,$E,$A,$B,$C,$wl[72],$sl[72],$KL4,0);
- &RIP5($C,$D,$E,$A,$B,$wl[73],$sl[73],$KL4,0);
- &RIP5($B,$C,$D,$E,$A,$wl[74],$sl[74],$KL4,0);
- &RIP5($A,$B,$C,$D,$E,$wl[75],$sl[75],$KL4,0);
- &RIP5($E,$A,$B,$C,$D,$wl[76],$sl[76],$KL4,0);
- &RIP5($D,$E,$A,$B,$C,$wl[77],$sl[77],$KL4,0);
- &RIP5($C,$D,$E,$A,$B,$wl[78],$sl[78],$KL4,0);
- &RIP5($B,$C,$D,$E,$A,$wl[79],$sl[79],$KL4,1);
-
- # &mov($tmp2, &wparam(0)); # moved into last RIP5
- # &mov(&swtmp(16), $A);
- &mov($A, &DWP( 0,$tmp2,"",0));
- &mov(&swtmp(16+1), $B);
- &mov(&swtmp(16+2), $C);
- &mov($B, &DWP( 4,$tmp2,"",0));
- &mov(&swtmp(16+3), $D);
- &mov($C, &DWP( 8,$tmp2,"",0));
- &mov(&swtmp(16+4), $E);
- &mov($D, &DWP(12,$tmp2,"",0));
- &mov($E, &DWP(16,$tmp2,"",0));
-
- &RIP5($A,$B,$C,$D,$E,$wr[ 0],$sr[ 0],$KR0,-2);
- &RIP5($E,$A,$B,$C,$D,$wr[ 1],$sr[ 1],$KR0,0);
- &RIP5($D,$E,$A,$B,$C,$wr[ 2],$sr[ 2],$KR0,0);
- &RIP5($C,$D,$E,$A,$B,$wr[ 3],$sr[ 3],$KR0,0);
- &RIP5($B,$C,$D,$E,$A,$wr[ 4],$sr[ 4],$KR0,0);
- &RIP5($A,$B,$C,$D,$E,$wr[ 5],$sr[ 5],$KR0,0);
- &RIP5($E,$A,$B,$C,$D,$wr[ 6],$sr[ 6],$KR0,0);
- &RIP5($D,$E,$A,$B,$C,$wr[ 7],$sr[ 7],$KR0,0);
- &RIP5($C,$D,$E,$A,$B,$wr[ 8],$sr[ 8],$KR0,0);
- &RIP5($B,$C,$D,$E,$A,$wr[ 9],$sr[ 9],$KR0,0);
- &RIP5($A,$B,$C,$D,$E,$wr[10],$sr[10],$KR0,0);
- &RIP5($E,$A,$B,$C,$D,$wr[11],$sr[11],$KR0,0);
- &RIP5($D,$E,$A,$B,$C,$wr[12],$sr[12],$KR0,0);
- &RIP5($C,$D,$E,$A,$B,$wr[13],$sr[13],$KR0,0);
- &RIP5($B,$C,$D,$E,$A,$wr[14],$sr[14],$KR0,0);
- &RIP5($A,$B,$C,$D,$E,$wr[15],$sr[15],$KR0,2);
-
- &RIP4($E,$A,$B,$C,$D,$wr[16],$sr[16],$KR1,-2);
- &RIP4($D,$E,$A,$B,$C,$wr[17],$sr[17],$KR1,0);
- &RIP4($C,$D,$E,$A,$B,$wr[18],$sr[18],$KR1,0);
- &RIP4($B,$C,$D,$E,$A,$wr[19],$sr[19],$KR1,0);
- &RIP4($A,$B,$C,$D,$E,$wr[20],$sr[20],$KR1,0);
- &RIP4($E,$A,$B,$C,$D,$wr[21],$sr[21],$KR1,0);
- &RIP4($D,$E,$A,$B,$C,$wr[22],$sr[22],$KR1,0);
- &RIP4($C,$D,$E,$A,$B,$wr[23],$sr[23],$KR1,0);
- &RIP4($B,$C,$D,$E,$A,$wr[24],$sr[24],$KR1,0);
- &RIP4($A,$B,$C,$D,$E,$wr[25],$sr[25],$KR1,0);
- &RIP4($E,$A,$B,$C,$D,$wr[26],$sr[26],$KR1,0);
- &RIP4($D,$E,$A,$B,$C,$wr[27],$sr[27],$KR1,0);
- &RIP4($C,$D,$E,$A,$B,$wr[28],$sr[28],$KR1,0);
- &RIP4($B,$C,$D,$E,$A,$wr[29],$sr[29],$KR1,0);
- &RIP4($A,$B,$C,$D,$E,$wr[30],$sr[30],$KR1,0);
- &RIP4($E,$A,$B,$C,$D,$wr[31],$sr[31],$KR1,2);
-
- &RIP3($D,$E,$A,$B,$C,$wr[32],$sr[32],$KR2,-2);
- &RIP3($C,$D,$E,$A,$B,$wr[33],$sr[33],$KR2,0);
- &RIP3($B,$C,$D,$E,$A,$wr[34],$sr[34],$KR2,0);
- &RIP3($A,$B,$C,$D,$E,$wr[35],$sr[35],$KR2,0);
- &RIP3($E,$A,$B,$C,$D,$wr[36],$sr[36],$KR2,0);
- &RIP3($D,$E,$A,$B,$C,$wr[37],$sr[37],$KR2,0);
- &RIP3($C,$D,$E,$A,$B,$wr[38],$sr[38],$KR2,0);
- &RIP3($B,$C,$D,$E,$A,$wr[39],$sr[39],$KR2,0);
- &RIP3($A,$B,$C,$D,$E,$wr[40],$sr[40],$KR2,0);
- &RIP3($E,$A,$B,$C,$D,$wr[41],$sr[41],$KR2,0);
- &RIP3($D,$E,$A,$B,$C,$wr[42],$sr[42],$KR2,0);
- &RIP3($C,$D,$E,$A,$B,$wr[43],$sr[43],$KR2,0);
- &RIP3($B,$C,$D,$E,$A,$wr[44],$sr[44],$KR2,0);
- &RIP3($A,$B,$C,$D,$E,$wr[45],$sr[45],$KR2,0);
- &RIP3($E,$A,$B,$C,$D,$wr[46],$sr[46],$KR2,0);
- &RIP3($D,$E,$A,$B,$C,$wr[47],$sr[47],$KR2,2,$wr[48]);
-
- &RIP2($C,$D,$E,$A,$B,$wr[48],$wr[49],$sr[48],$KR3,-2);
- &RIP2($B,$C,$D,$E,$A,$wr[49],$wr[50],$sr[49],$KR3,0);
- &RIP2($A,$B,$C,$D,$E,$wr[50],$wr[51],$sr[50],$KR3,0);
- &RIP2($E,$A,$B,$C,$D,$wr[51],$wr[52],$sr[51],$KR3,0);
- &RIP2($D,$E,$A,$B,$C,$wr[52],$wr[53],$sr[52],$KR3,0);
- &RIP2($C,$D,$E,$A,$B,$wr[53],$wr[54],$sr[53],$KR3,0);
- &RIP2($B,$C,$D,$E,$A,$wr[54],$wr[55],$sr[54],$KR3,0);
- &RIP2($A,$B,$C,$D,$E,$wr[55],$wr[56],$sr[55],$KR3,0);
- &RIP2($E,$A,$B,$C,$D,$wr[56],$wr[57],$sr[56],$KR3,0);
- &RIP2($D,$E,$A,$B,$C,$wr[57],$wr[58],$sr[57],$KR3,0);
- &RIP2($C,$D,$E,$A,$B,$wr[58],$wr[59],$sr[58],$KR3,0);
- &RIP2($B,$C,$D,$E,$A,$wr[59],$wr[60],$sr[59],$KR3,0);
- &RIP2($A,$B,$C,$D,$E,$wr[60],$wr[61],$sr[60],$KR3,0);
- &RIP2($E,$A,$B,$C,$D,$wr[61],$wr[62],$sr[61],$KR3,0);
- &RIP2($D,$E,$A,$B,$C,$wr[62],$wr[63],$sr[62],$KR3,0);
- &RIP2($C,$D,$E,$A,$B,$wr[63],$wr[64],$sr[63],$KR3,2);
-
- &RIP1($B,$C,$D,$E,$A,$wr[64],$sr[64],-2);
- &RIP1($A,$B,$C,$D,$E,$wr[65],$sr[65],0);
- &RIP1($E,$A,$B,$C,$D,$wr[66],$sr[66],0);
- &RIP1($D,$E,$A,$B,$C,$wr[67],$sr[67],0);
- &RIP1($C,$D,$E,$A,$B,$wr[68],$sr[68],0);
- &RIP1($B,$C,$D,$E,$A,$wr[69],$sr[69],0);
- &RIP1($A,$B,$C,$D,$E,$wr[70],$sr[70],0);
- &RIP1($E,$A,$B,$C,$D,$wr[71],$sr[71],0);
- &RIP1($D,$E,$A,$B,$C,$wr[72],$sr[72],0);
- &RIP1($C,$D,$E,$A,$B,$wr[73],$sr[73],0);
- &RIP1($B,$C,$D,$E,$A,$wr[74],$sr[74],0);
- &RIP1($A,$B,$C,$D,$E,$wr[75],$sr[75],0);
- &RIP1($E,$A,$B,$C,$D,$wr[76],$sr[76],0);
- &RIP1($D,$E,$A,$B,$C,$wr[77],$sr[77],0);
- &RIP1($C,$D,$E,$A,$B,$wr[78],$sr[78],0);
- &RIP1($B,$C,$D,$E,$A,$wr[79],$sr[79],2);
-
- # &mov($tmp2, &wparam(0)); # Moved into last round
-
- &mov($tmp1, &DWP( 4,$tmp2,"",0)); # ctx->B
- &add($D, $tmp1);
- &mov($tmp1, &swtmp(16+2)); # $c
- &add($D, $tmp1);
-
- &mov($tmp1, &DWP( 8,$tmp2,"",0)); # ctx->C
- &add($E, $tmp1);
- &mov($tmp1, &swtmp(16+3)); # $d
- &add($E, $tmp1);
-
- &mov($tmp1, &DWP(12,$tmp2,"",0)); # ctx->D
- &add($A, $tmp1);
- &mov($tmp1, &swtmp(16+4)); # $e
- &add($A, $tmp1);
-
-
- &mov($tmp1, &DWP(16,$tmp2,"",0)); # ctx->E
- &add($B, $tmp1);
- &mov($tmp1, &swtmp(16+0)); # $a
- &add($B, $tmp1);
-
- &mov($tmp1, &DWP( 0,$tmp2,"",0)); # ctx->A
- &add($C, $tmp1);
- &mov($tmp1, &swtmp(16+1)); # $b
- &add($C, $tmp1);
-
- &mov($tmp1, &wparam(2));
-
- &mov(&DWP( 0,$tmp2,"",0), $D);
- &mov(&DWP( 4,$tmp2,"",0), $E);
- &mov(&DWP( 8,$tmp2,"",0), $A);
- &sub($tmp1,1);
- &mov(&DWP(12,$tmp2,"",0), $B);
- &mov(&DWP(16,$tmp2,"",0), $C);
-
- &jle(&label("get_out"));
-
- &mov(&wparam(2),$tmp1);
- &mov($C, $A);
- &mov($tmp1, &wparam(1));
- &mov($A, $D);
- &add($tmp1, 64);
- &mov($B, $E);
- &mov(&wparam(1),$tmp1);
-
- &jmp(&label("start"));
-
- &set_label("get_out");
-
- &stack_pop(16+5+6);
-
- &pop("ebx");
- &pop("ebp");
- &pop("edi");
- &pop("esi");
- &ret();
- &function_end_B($name);
- }
-
diff --git a/crypto/ripemd/ripemd.h b/crypto/ripemd/ripemd.h
deleted file mode 100644
index 189bd8c..0000000
--- a/crypto/ripemd/ripemd.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/* crypto/ripemd/ripemd.h */
-/* Copyright (C) 1995-1998 Eric Young ([email protected])
- * All rights reserved.
- *
- * This package is an SSL implementation written
- * by Eric Young ([email protected]).
- * The implementation was written so as to conform with Netscapes SSL.
- *
- * This library is free for commercial and non-commercial use as long as
- * the following conditions are aheared to. The following conditions
- * apply to all code found in this distribution, be it the RC4, RSA,
- * lhash, DES, etc., code; not just the SSL code. The SSL documentation
- * included with this distribution is covered by the same copyright terms
- * except that the holder is Tim Hudson ([email protected]).
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.
- * If this package is used in a product, Eric Young should be given attribution
- * as the author of the parts of the library used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * "This product includes cryptographic software written by
- * Eric Young ([email protected])"
- * The word 'cryptographic' can be left out if the rouines from the library
- * being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from
- * the apps directory (application code) you must include an acknowledgement:
- * "This product includes software written by Tim Hudson ([email protected])"
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed. i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
- */
-
-#ifndef HEADER_RIPEMD_H
-#define HEADER_RIPEMD_H
-
-#include <openssl/e_os2.h>
-#include <stddef.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef OPENSSL_NO_RIPEMD
-#error RIPEMD is disabled.
-#endif
-
-#if defined(__LP32__)
-#define RIPEMD160_LONG unsigned long
-#elif defined(OPENSSL_SYS_CRAY) || defined(__ILP64__)
-#define RIPEMD160_LONG unsigned long
-#define RIPEMD160_LONG_LOG2 3
-#else
-#define RIPEMD160_LONG unsigned int
-#endif
-
-#define RIPEMD160_CBLOCK 64
-#define RIPEMD160_LBLOCK (RIPEMD160_CBLOCK/4)
-#define RIPEMD160_DIGEST_LENGTH 20
-
-typedef struct RIPEMD160state_st
- {
- RIPEMD160_LONG A,B,C,D,E;
- RIPEMD160_LONG Nl,Nh;
- RIPEMD160_LONG data[RIPEMD160_LBLOCK];
- unsigned int num;
- } RIPEMD160_CTX;
-
-#ifdef OPENSSL_FIPS
-int private_RIPEMD160_Init(RIPEMD160_CTX *c);
-#endif
-int RIPEMD160_Init(RIPEMD160_CTX *c);
-int RIPEMD160_Update(RIPEMD160_CTX *c, const void *data, size_t len);
-int RIPEMD160_Final(unsigned char *md, RIPEMD160_CTX *c);
-unsigned char *RIPEMD160(const unsigned char *d, size_t n,
- unsigned char *md);
-void RIPEMD160_Transform(RIPEMD160_CTX *c, const unsigned char *b);
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/crypto/ripemd/rmd160.c b/crypto/ripemd/rmd160.c
deleted file mode 100644
index b0ec574..0000000
--- a/crypto/ripemd/rmd160.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/* crypto/ripemd/rmd160.c */
-/* Copyright (C) 1995-1998 Eric Young ([email protected])
- * All rights reserved.
- *
- * This package is an SSL implementation written
- * by Eric Young ([email protected]).
- * The implementation was written so as to conform with Netscapes SSL.
- *
- * This library is free for commercial and non-commercial use as long as
- * the following conditions are aheared to. The following conditions
- * apply to all code found in this distribution, be it the RC4, RSA,
- * lhash, DES, etc., code; not just the SSL code. The SSL documentation
- * included with this distribution is covered by the same copyright terms
- * except that the holder is Tim Hudson ([email protected]).
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.
- * If this package is used in a product, Eric Young should be given attribution
- * as the author of the parts of the library used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * "This product includes cryptographic software written by
- * Eric Young ([email protected])"
- * The word 'cryptographic' can be left out if the rouines from the library
- * being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from
- * the apps directory (application code) you must include an acknowledgement:
- * "This product includes software written by Tim Hudson ([email protected])"
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed. i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <openssl/ripemd.h>
-
-#define BUFSIZE 1024*16
-
-void do_fp(FILE *f);
-void pt(unsigned char *md);
-#if !defined(_OSD_POSIX) && !defined(__DJGPP__)
-int read(int, void *, unsigned int);
-#endif
-
-int main(int argc, char **argv)
- {
- int i,err=0;
- FILE *IN;
-
- if (argc == 1)
- {
- do_fp(stdin);
- }
- else
- {
- for (i=1; i<argc; i++)
- {
- IN=fopen(argv[i],"r");
- if (IN == NULL)
- {
- perror(argv[i]);
- err++;
- continue;
- }
- printf("RIPEMD160(%s)= ",argv[i]);
- do_fp(IN);
- fclose(IN);
- }
- }
- exit(err);
- }
-
-void do_fp(FILE *f)
- {
- RIPEMD160_CTX c;
- unsigned char md[RIPEMD160_DIGEST_LENGTH];
- int fd;
- int i;
- static unsigned char buf[BUFSIZE];
-
- fd=fileno(f);
- RIPEMD160_Init(&c);
- for (;;)
- {
- i=read(fd,buf,BUFSIZE);
- if (i <= 0) break;
- RIPEMD160_Update(&c,buf,(unsigned long)i);
- }
- RIPEMD160_Final(&(md[0]),&c);
- pt(md);
- }
-
-void pt(unsigned char *md)
- {
- int i;
-
- for (i=0; i<RIPEMD160_DIGEST_LENGTH; i++)
- printf("%02x",md[i]);
- printf("\n");
- }
-
diff --git a/crypto/ripemd/rmd_dgst.c b/crypto/ripemd/rmd_dgst.c
deleted file mode 100644
index d8e72da..0000000
--- a/crypto/ripemd/rmd_dgst.c
+++ /dev/null
@@ -1,292 +0,0 @@
-/* crypto/ripemd/rmd_dgst.c */
-/* Copyright (C) 1995-1998 Eric Young ([email protected])
- * All rights reserved.
- *
- * This package is an SSL implementation written
- * by Eric Young ([email protected]).
- * The implementation was written so as to conform with Netscapes SSL.
- *
- * This library is free for commercial and non-commercial use as long as
- * the following conditions are aheared to. The following conditions
- * apply to all code found in this distribution, be it the RC4, RSA,
- * lhash, DES, etc., code; not just the SSL code. The SSL documentation
- * included with this distribution is covered by the same copyright terms
- * except that the holder is Tim Hudson ([email protected]).
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.
- * If this package is used in a product, Eric Young should be given attribution
- * as the author of the parts of the library used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * "This product includes cryptographic software written by
- * Eric Young ([email protected])"
- * The word 'cryptographic' can be left out if the rouines from the library
- * being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from
- * the apps directory (application code) you must include an acknowledgement:
- * "This product includes software written by Tim Hudson ([email protected])"
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed. i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
- */
-
-#include <stdio.h>
-#include "rmd_locl.h"
-#include <openssl/opensslv.h>
-#include <openssl/crypto.h>
-
-const char RMD160_version[]="RIPE-MD160" OPENSSL_VERSION_PTEXT;
-
-# ifdef RMD160_ASM
- void ripemd160_block_x86(RIPEMD160_CTX *c, unsigned long *p,size_t num);
-# define ripemd160_block ripemd160_block_x86
-# else
- void ripemd160_block(RIPEMD160_CTX *c, unsigned long *p,size_t num);
-# endif
-
-fips_md_init(RIPEMD160)
- {
- memset (c,0,sizeof(*c));
- c->A=RIPEMD160_A;
- c->B=RIPEMD160_B;
- c->C=RIPEMD160_C;
- c->D=RIPEMD160_D;
- c->E=RIPEMD160_E;
- return 1;
- }
-
-#ifndef ripemd160_block_data_order
-#ifdef X
-#undef X
-#endif
-void ripemd160_block_data_order (RIPEMD160_CTX *ctx, const void *p, size_t num)
- {
- const unsigned char *data=p;
- register unsigned MD32_REG_T A,B,C,D,E;
- unsigned MD32_REG_T a,b,c,d,e,l;
-#ifndef MD32_XARRAY
- /* See comment in crypto/sha/sha_locl.h for details. */
- unsigned MD32_REG_T XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7,
- XX8, XX9,XX10,XX11,XX12,XX13,XX14,XX15;
-# define X(i) XX##i
-#else
- RIPEMD160_LONG XX[16];
-# define X(i) XX[i]
-#endif
-
- for (;num--;)
- {
-
- A=ctx->A; B=ctx->B; C=ctx->C; D=ctx->D; E=ctx->E;
-
- (void)HOST_c2l(data,l); X( 0)=l;(void)HOST_c2l(data,l); X( 1)=l;
- RIP1(A,B,C,D,E,WL00,SL00); (void)HOST_c2l(data,l); X( 2)=l;
- RIP1(E,A,B,C,D,WL01,SL01); (void)HOST_c2l(data,l); X( 3)=l;
- RIP1(D,E,A,B,C,WL02,SL02); (void)HOST_c2l(data,l); X( 4)=l;
- RIP1(C,D,E,A,B,WL03,SL03); (void)HOST_c2l(data,l); X( 5)=l;
- RIP1(B,C,D,E,A,WL04,SL04); (void)HOST_c2l(data,l); X( 6)=l;
- RIP1(A,B,C,D,E,WL05,SL05); (void)HOST_c2l(data,l); X( 7)=l;
- RIP1(E,A,B,C,D,WL06,SL06); (void)HOST_c2l(data,l); X( 8)=l;
- RIP1(D,E,A,B,C,WL07,SL07); (void)HOST_c2l(data,l); X( 9)=l;
- RIP1(C,D,E,A,B,WL08,SL08); (void)HOST_c2l(data,l); X(10)=l;
- RIP1(B,C,D,E,A,WL09,SL09); (void)HOST_c2l(data,l); X(11)=l;
- RIP1(A,B,C,D,E,WL10,SL10); (void)HOST_c2l(data,l); X(12)=l;
- RIP1(E,A,B,C,D,WL11,SL11); (void)HOST_c2l(data,l); X(13)=l;
- RIP1(D,E,A,B,C,WL12,SL12); (void)HOST_c2l(data,l); X(14)=l;
- RIP1(C,D,E,A,B,WL13,SL13); (void)HOST_c2l(data,l); X(15)=l;
- RIP1(B,C,D,E,A,WL14,SL14);
- RIP1(A,B,C,D,E,WL15,SL15);
-
- RIP2(E,A,B,C,D,WL16,SL16,KL1);
- RIP2(D,E,A,B,C,WL17,SL17,KL1);
- RIP2(C,D,E,A,B,WL18,SL18,KL1);
- RIP2(B,C,D,E,A,WL19,SL19,KL1);
- RIP2(A,B,C,D,E,WL20,SL20,KL1);
- RIP2(E,A,B,C,D,WL21,SL21,KL1);
- RIP2(D,E,A,B,C,WL22,SL22,KL1);
- RIP2(C,D,E,A,B,WL23,SL23,KL1);
- RIP2(B,C,D,E,A,WL24,SL24,KL1);
- RIP2(A,B,C,D,E,WL25,SL25,KL1);
- RIP2(E,A,B,C,D,WL26,SL26,KL1);
- RIP2(D,E,A,B,C,WL27,SL27,KL1);
- RIP2(C,D,E,A,B,WL28,SL28,KL1);
- RIP2(B,C,D,E,A,WL29,SL29,KL1);
- RIP2(A,B,C,D,E,WL30,SL30,KL1);
- RIP2(E,A,B,C,D,WL31,SL31,KL1);
-
- RIP3(D,E,A,B,C,WL32,SL32,KL2);
- RIP3(C,D,E,A,B,WL33,SL33,KL2);
- RIP3(B,C,D,E,A,WL34,SL34,KL2);
- RIP3(A,B,C,D,E,WL35,SL35,KL2);
- RIP3(E,A,B,C,D,WL36,SL36,KL2);
- RIP3(D,E,A,B,C,WL37,SL37,KL2);
- RIP3(C,D,E,A,B,WL38,SL38,KL2);
- RIP3(B,C,D,E,A,WL39,SL39,KL2);
- RIP3(A,B,C,D,E,WL40,SL40,KL2);
- RIP3(E,A,B,C,D,WL41,SL41,KL2);
- RIP3(D,E,A,B,C,WL42,SL42,KL2);
- RIP3(C,D,E,A,B,WL43,SL43,KL2);
- RIP3(B,C,D,E,A,WL44,SL44,KL2);
- RIP3(A,B,C,D,E,WL45,SL45,KL2);
- RIP3(E,A,B,C,D,WL46,SL46,KL2);
- RIP3(D,E,A,B,C,WL47,SL47,KL2);
-
- RIP4(C,D,E,A,B,WL48,SL48,KL3);
- RIP4(B,C,D,E,A,WL49,SL49,KL3);
- RIP4(A,B,C,D,E,WL50,SL50,KL3);
- RIP4(E,A,B,C,D,WL51,SL51,KL3);
- RIP4(D,E,A,B,C,WL52,SL52,KL3);
- RIP4(C,D,E,A,B,WL53,SL53,KL3);
- RIP4(B,C,D,E,A,WL54,SL54,KL3);
- RIP4(A,B,C,D,E,WL55,SL55,KL3);
- RIP4(E,A,B,C,D,WL56,SL56,KL3);
- RIP4(D,E,A,B,C,WL57,SL57,KL3);
- RIP4(C,D,E,A,B,WL58,SL58,KL3);
- RIP4(B,C,D,E,A,WL59,SL59,KL3);
- RIP4(A,B,C,D,E,WL60,SL60,KL3);
- RIP4(E,A,B,C,D,WL61,SL61,KL3);
- RIP4(D,E,A,B,C,WL62,SL62,KL3);
- RIP4(C,D,E,A,B,WL63,SL63,KL3);
-
- RIP5(B,C,D,E,A,WL64,SL64,KL4);
- RIP5(A,B,C,D,E,WL65,SL65,KL4);
- RIP5(E,A,B,C,D,WL66,SL66,KL4);
- RIP5(D,E,A,B,C,WL67,SL67,KL4);
- RIP5(C,D,E,A,B,WL68,SL68,KL4);
- RIP5(B,C,D,E,A,WL69,SL69,KL4);
- RIP5(A,B,C,D,E,WL70,SL70,KL4);
- RIP5(E,A,B,C,D,WL71,SL71,KL4);
- RIP5(D,E,A,B,C,WL72,SL72,KL4);
- RIP5(C,D,E,A,B,WL73,SL73,KL4);
- RIP5(B,C,D,E,A,WL74,SL74,KL4);
- RIP5(A,B,C,D,E,WL75,SL75,KL4);
- RIP5(E,A,B,C,D,WL76,SL76,KL4);
- RIP5(D,E,A,B,C,WL77,SL77,KL4);
- RIP5(C,D,E,A,B,WL78,SL78,KL4);
- RIP5(B,C,D,E,A,WL79,SL79,KL4);
-
- a=A; b=B; c=C; d=D; e=E;
- /* Do other half */
- A=ctx->A; B=ctx->B; C=ctx->C; D=ctx->D; E=ctx->E;
-
- RIP5(A,B,C,D,E,WR00,SR00,KR0);
- RIP5(E,A,B,C,D,WR01,SR01,KR0);
- RIP5(D,E,A,B,C,WR02,SR02,KR0);
- RIP5(C,D,E,A,B,WR03,SR03,KR0);
- RIP5(B,C,D,E,A,WR04,SR04,KR0);
- RIP5(A,B,C,D,E,WR05,SR05,KR0);
- RIP5(E,A,B,C,D,WR06,SR06,KR0);
- RIP5(D,E,A,B,C,WR07,SR07,KR0);
- RIP5(C,D,E,A,B,WR08,SR08,KR0);
- RIP5(B,C,D,E,A,WR09,SR09,KR0);
- RIP5(A,B,C,D,E,WR10,SR10,KR0);
- RIP5(E,A,B,C,D,WR11,SR11,KR0);
- RIP5(D,E,A,B,C,WR12,SR12,KR0);
- RIP5(C,D,E,A,B,WR13,SR13,KR0);
- RIP5(B,C,D,E,A,WR14,SR14,KR0);
- RIP5(A,B,C,D,E,WR15,SR15,KR0);
-
- RIP4(E,A,B,C,D,WR16,SR16,KR1);
- RIP4(D,E,A,B,C,WR17,SR17,KR1);
- RIP4(C,D,E,A,B,WR18,SR18,KR1);
- RIP4(B,C,D,E,A,WR19,SR19,KR1);
- RIP4(A,B,C,D,E,WR20,SR20,KR1);
- RIP4(E,A,B,C,D,WR21,SR21,KR1);
- RIP4(D,E,A,B,C,WR22,SR22,KR1);
- RIP4(C,D,E,A,B,WR23,SR23,KR1);
- RIP4(B,C,D,E,A,WR24,SR24,KR1);
- RIP4(A,B,C,D,E,WR25,SR25,KR1);
- RIP4(E,A,B,C,D,WR26,SR26,KR1);
- RIP4(D,E,A,B,C,WR27,SR27,KR1);
- RIP4(C,D,E,A,B,WR28,SR28,KR1);
- RIP4(B,C,D,E,A,WR29,SR29,KR1);
- RIP4(A,B,C,D,E,WR30,SR30,KR1);
- RIP4(E,A,B,C,D,WR31,SR31,KR1);
-
- RIP3(D,E,A,B,C,WR32,SR32,KR2);
- RIP3(C,D,E,A,B,WR33,SR33,KR2);
- RIP3(B,C,D,E,A,WR34,SR34,KR2);
- RIP3(A,B,C,D,E,WR35,SR35,KR2);
- RIP3(E,A,B,C,D,WR36,SR36,KR2);
- RIP3(D,E,A,B,C,WR37,SR37,KR2);
- RIP3(C,D,E,A,B,WR38,SR38,KR2);
- RIP3(B,C,D,E,A,WR39,SR39,KR2);
- RIP3(A,B,C,D,E,WR40,SR40,KR2);
- RIP3(E,A,B,C,D,WR41,SR41,KR2);
- RIP3(D,E,A,B,C,WR42,SR42,KR2);
- RIP3(C,D,E,A,B,WR43,SR43,KR2);
- RIP3(B,C,D,E,A,WR44,SR44,KR2);
- RIP3(A,B,C,D,E,WR45,SR45,KR2);
- RIP3(E,A,B,C,D,WR46,SR46,KR2);
- RIP3(D,E,A,B,C,WR47,SR47,KR2);
-
- RIP2(C,D,E,A,B,WR48,SR48,KR3);
- RIP2(B,C,D,E,A,WR49,SR49,KR3);
- RIP2(A,B,C,D,E,WR50,SR50,KR3);
- RIP2(E,A,B,C,D,WR51,SR51,KR3);
- RIP2(D,E,A,B,C,WR52,SR52,KR3);
- RIP2(C,D,E,A,B,WR53,SR53,KR3);
- RIP2(B,C,D,E,A,WR54,SR54,KR3);
- RIP2(A,B,C,D,E,WR55,SR55,KR3);
- RIP2(E,A,B,C,D,WR56,SR56,KR3);
- RIP2(D,E,A,B,C,WR57,SR57,KR3);
- RIP2(C,D,E,A,B,WR58,SR58,KR3);
- RIP2(B,C,D,E,A,WR59,SR59,KR3);
- RIP2(A,B,C,D,E,WR60,SR60,KR3);
- RIP2(E,A,B,C,D,WR61,SR61,KR3);
- RIP2(D,E,A,B,C,WR62,SR62,KR3);
- RIP2(C,D,E,A,B,WR63,SR63,KR3);
-
- RIP1(B,C,D,E,A,WR64,SR64);
- RIP1(A,B,C,D,E,WR65,SR65);
- RIP1(E,A,B,C,D,WR66,SR66);
- RIP1(D,E,A,B,C,WR67,SR67);
- RIP1(C,D,E,A,B,WR68,SR68);
- RIP1(B,C,D,E,A,WR69,SR69);
- RIP1(A,B,C,D,E,WR70,SR70);
- RIP1(E,A,B,C,D,WR71,SR71);
- RIP1(D,E,A,B,C,WR72,SR72);
- RIP1(C,D,E,A,B,WR73,SR73);
- RIP1(B,C,D,E,A,WR74,SR74);
- RIP1(A,B,C,D,E,WR75,SR75);
- RIP1(E,A,B,C,D,WR76,SR76);
- RIP1(D,E,A,B,C,WR77,SR77);
- RIP1(C,D,E,A,B,WR78,SR78);
- RIP1(B,C,D,E,A,WR79,SR79);
-
- D =ctx->B+c+D;
- ctx->B=ctx->C+d+E;
- ctx->C=ctx->D+e+A;
- ctx->D=ctx->E+a+B;
- ctx->E=ctx->A+b+C;
- ctx->A=D;
-
- }
- }
-#endif
diff --git a/crypto/ripemd/rmd_locl.h b/crypto/ripemd/rmd_locl.h
deleted file mode 100644
index 2bd8957..0000000
--- a/crypto/ripemd/rmd_locl.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/* crypto/ripemd/rmd_locl.h */
-/* Copyright (C) 1995-1998 Eric Young ([email protected])
- * All rights reserved.
- *
- * This package is an SSL implementation written
- * by Eric Young ([email protected]).
- * The implementation was written so as to conform with Netscapes SSL.
- *
- * This library is free for commercial and non-commercial use as long as
- * the following conditions are aheared to. The following conditions
- * apply to all code found in this distribution, be it the RC4, RSA,
- * lhash, DES, etc., code; not just the SSL code. The SSL documentation
- * included with this distribution is covered by the same copyright terms
- * except that the holder is Tim Hudson ([email protected]).
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.
- * If this package is used in a product, Eric Young should be given attribution
- * as the author of the parts of the library used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * "This product includes cryptographic software written by
- * Eric Young ([email protected])"
- * The word 'cryptographic' can be left out if the rouines from the library
- * being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from
- * the apps directory (application code) you must include an acknowledgement:
- * "This product includes software written by Tim Hudson ([email protected])"
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed. i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
- */
-
-#include <stdlib.h>
-#include <string.h>
-#include <openssl/opensslconf.h>
-#include <openssl/ripemd.h>
-
-#ifndef RIPEMD160_LONG_LOG2
-#define RIPEMD160_LONG_LOG2 2 /* default to 32 bits */
-#endif
-
-/*
- * DO EXAMINE COMMENTS IN crypto/md5/md5_locl.h & crypto/md5/md5_dgst.c
- * FOR EXPLANATIONS ON FOLLOWING "CODE."
- * <[email protected]>
- */
-#ifdef RMD160_ASM
-# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__)
-# define ripemd160_block_data_order ripemd160_block_asm_data_order
-# endif
-#endif
-
-void ripemd160_block_data_order (RIPEMD160_CTX *c, const void *p,size_t num);
-
-#define DATA_ORDER_IS_LITTLE_ENDIAN
-
-#define HASH_LONG RIPEMD160_LONG
-#define HASH_CTX RIPEMD160_CTX
-#define HASH_CBLOCK RIPEMD160_CBLOCK
-#define HASH_UPDATE RIPEMD160_Update
-#define HASH_TRANSFORM RIPEMD160_Transform
-#define HASH_FINAL RIPEMD160_Final
-#define HASH_MAKE_STRING(c,s) do { \
- unsigned long ll; \
- ll=(c)->A; (void)HOST_l2c(ll,(s)); \
- ll=(c)->B; (void)HOST_l2c(ll,(s)); \
- ll=(c)->C; (void)HOST_l2c(ll,(s)); \
- ll=(c)->D; (void)HOST_l2c(ll,(s)); \
- ll=(c)->E; (void)HOST_l2c(ll,(s)); \
- } while (0)
-#define HASH_BLOCK_DATA_ORDER ripemd160_block_data_order
-
-#include "md32_common.h"
-
-#if 0
-#define F1(x,y,z) ((x)^(y)^(z))
-#define F2(x,y,z) (((x)&(y))|((~x)&z))
-#define F3(x,y,z) (((x)|(~y))^(z))
-#define F4(x,y,z) (((x)&(z))|((y)&(~(z))))
-#define F5(x,y,z) ((x)^((y)|(~(z))))
-#else
-/*
- * Transformed F2 and F4 are courtesy of Wei Dai <[email protected]>
- */
-#define F1(x,y,z) ((x) ^ (y) ^ (z))
-#define F2(x,y,z) ((((y) ^ (z)) & (x)) ^ (z))
-#define F3(x,y,z) (((~(y)) | (x)) ^ (z))
-#define F4(x,y,z) ((((x) ^ (y)) & (z)) ^ (y))
-#define F5(x,y,z) (((~(z)) | (y)) ^ (x))
-#endif
-
-#define RIPEMD160_A 0x67452301L
-#define RIPEMD160_B 0xEFCDAB89L
-#define RIPEMD160_C 0x98BADCFEL
-#define RIPEMD160_D 0x10325476L
-#define RIPEMD160_E 0xC3D2E1F0L
-
-#include "rmdconst.h"
-
-#define RIP1(a,b,c,d,e,w,s) { \
- a+=F1(b,c,d)+X(w); \
- a=ROTATE(a,s)+e; \
- c=ROTATE(c,10); }
-
-#define RIP2(a,b,c,d,e,w,s,K) { \
- a+=F2(b,c,d)+X(w)+K; \
- a=ROTATE(a,s)+e; \
- c=ROTATE(c,10); }
-
-#define RIP3(a,b,c,d,e,w,s,K) { \
- a+=F3(b,c,d)+X(w)+K; \
- a=ROTATE(a,s)+e; \
- c=ROTATE(c,10); }
-
-#define RIP4(a,b,c,d,e,w,s,K) { \
- a+=F4(b,c,d)+X(w)+K; \
- a=ROTATE(a,s)+e; \
- c=ROTATE(c,10); }
-
-#define RIP5(a,b,c,d,e,w,s,K) { \
- a+=F5(b,c,d)+X(w)+K; \
- a=ROTATE(a,s)+e; \
- c=ROTATE(c,10); }
-
diff --git a/crypto/ripemd/rmd_one.c b/crypto/ripemd/rmd_one.c
deleted file mode 100644
index 3efb137..0000000
--- a/crypto/ripemd/rmd_one.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/* crypto/ripemd/rmd_one.c */
-/* Copyright (C) 1995-1998 Eric Young ([email protected])
- * All rights reserved.
- *
- * This package is an SSL implementation written
- * by Eric Young ([email protected]).
- * The implementation was written so as to conform with Netscapes SSL.
- *
- * This library is free for commercial and non-commercial use as long as
- * the following conditions are aheared to. The following conditions
- * apply to all code found in this distribution, be it the RC4, RSA,
- * lhash, DES, etc., code; not just the SSL code. The SSL documentation
- * included with this distribution is covered by the same copyright terms
- * except that the holder is Tim Hudson ([email protected]).
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.
- * If this package is used in a product, Eric Young should be given attribution
- * as the author of the parts of the library used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * "This product includes cryptographic software written by
- * Eric Young ([email protected])"
- * The word 'cryptographic' can be left out if the rouines from the library
- * being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from
- * the apps directory (application code) you must include an acknowledgement:
- * "This product includes software written by Tim Hudson ([email protected])"
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed. i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
- */
-
-#include <stdio.h>
-#include <string.h>
-#include <openssl/ripemd.h>
-#include <openssl/crypto.h>
-
-unsigned char *RIPEMD160(const unsigned char *d, size_t n,
- unsigned char *md)
- {
- RIPEMD160_CTX c;
- static unsigned char m[RIPEMD160_DIGEST_LENGTH];
-
- if (md == NULL) md=m;
- if (!RIPEMD160_Init(&c))
- return NULL;
- RIPEMD160_Update(&c,d,n);
- RIPEMD160_Final(md,&c);
- OPENSSL_cleanse(&c,sizeof(c)); /* security consideration */
- return(md);
- }
-
diff --git a/crypto/ripemd/rmdconst.h b/crypto/ripemd/rmdconst.h
deleted file mode 100644
index 59c48de..0000000
--- a/crypto/ripemd/rmdconst.h
+++ /dev/null
@@ -1,399 +0,0 @@
-/* crypto/ripemd/rmdconst.h */
-/* Copyright (C) 1995-1998 Eric Young ([email protected])
- * All rights reserved.
- *
- * This package is an SSL implementation written
- * by Eric Young ([email protected]).
- * The implementation was written so as to conform with Netscapes SSL.
- *
- * This library is free for commercial and non-commercial use as long as
- * the following conditions are aheared to. The following conditions
- * apply to all code found in this distribution, be it the RC4, RSA,
- * lhash, DES, etc., code; not just the SSL code. The SSL documentation
- * included with this distribution is covered by the same copyright terms
- * except that the holder is Tim Hudson ([email protected]).
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.
- * If this package is used in a product, Eric Young should be given attribution
- * as the author of the parts of the library used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * "This product includes cryptographic software written by
- * Eric Young ([email protected])"
- * The word 'cryptographic' can be left out if the rouines from the library
- * being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from
- * the apps directory (application code) you must include an acknowledgement:
- * "This product includes software written by Tim Hudson ([email protected])"
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed. i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
- */
-#define KL0 0x00000000L
-#define KL1 0x5A827999L
-#define KL2 0x6ED9EBA1L
-#define KL3 0x8F1BBCDCL
-#define KL4 0xA953FD4EL
-
-#define KR0 0x50A28BE6L
-#define KR1 0x5C4DD124L
-#define KR2 0x6D703EF3L
-#define KR3 0x7A6D76E9L
-#define KR4 0x00000000L
-
-#define WL00 0
-#define SL00 11
-#define WL01 1
-#define SL01 14
-#define WL02 2
-#define SL02 15
-#define WL03 3
-#define SL03 12
-#define WL04 4
-#define SL04 5
-#define WL05 5
-#define SL05 8
-#define WL06 6
-#define SL06 7
-#define WL07 7
-#define SL07 9
-#define WL08 8
-#define SL08 11
-#define WL09 9
-#define SL09 13
-#define WL10 10
-#define SL10 14
-#define WL11 11
-#define SL11 15
-#define WL12 12
-#define SL12 6
-#define WL13 13
-#define SL13 7
-#define WL14 14
-#define SL14 9
-#define WL15 15
-#define SL15 8
-
-#define WL16 7
-#define SL16 7
-#define WL17 4
-#define SL17 6
-#define WL18 13
-#define SL18 8
-#define WL19 1
-#define SL19 13
-#define WL20 10
-#define SL20 11
-#define WL21 6
-#define SL21 9
-#define WL22 15
-#define SL22 7
-#define WL23 3
-#define SL23 15
-#define WL24 12
-#define SL24 7
-#define WL25 0
-#define SL25 12
-#define WL26 9
-#define SL26 15
-#define WL27 5
-#define SL27 9
-#define WL28 2
-#define SL28 11
-#define WL29 14
-#define SL29 7
-#define WL30 11
-#define SL30 13
-#define WL31 8
-#define SL31 12
-
-#define WL32 3
-#define SL32 11
-#define WL33 10
-#define SL33 13
-#define WL34 14
-#define SL34 6
-#define WL35 4
-#define SL35 7
-#define WL36 9
-#define SL36 14
-#define WL37 15
-#define SL37 9
-#define WL38 8
-#define SL38 13
-#define WL39 1
-#define SL39 15
-#define WL40 2
-#define SL40 14
-#define WL41 7
-#define SL41 8
-#define WL42 0
-#define SL42 13
-#define WL43 6
-#define SL43 6
-#define WL44 13
-#define SL44 5
-#define WL45 11
-#define SL45 12
-#define WL46 5
-#define SL46 7
-#define WL47 12
-#define SL47 5
-
-#define WL48 1
-#define SL48 11
-#define WL49 9
-#define SL49 12
-#define WL50 11
-#define SL50 14
-#define WL51 10
-#define SL51 15
-#define WL52 0
-#define SL52 14
-#define WL53 8
-#define SL53 15
-#define WL54 12
-#define SL54 9
-#define WL55 4
-#define SL55 8
-#define WL56 13
-#define SL56 9
-#define WL57 3
-#define SL57 14
-#define WL58 7
-#define SL58 5
-#define WL59 15
-#define SL59 6
-#define WL60 14
-#define SL60 8
-#define WL61 5
-#define SL61 6
-#define WL62 6
-#define SL62 5
-#define WL63 2
-#define SL63 12
-
-#define WL64 4
-#define SL64 9
-#define WL65 0
-#define SL65 15
-#define WL66 5
-#define SL66 5
-#define WL67 9
-#define SL67 11
-#define WL68 7
-#define SL68 6
-#define WL69 12
-#define SL69 8
-#define WL70 2
-#define SL70 13
-#define WL71 10
-#define SL71 12
-#define WL72 14
-#define SL72 5
-#define WL73 1
-#define SL73 12
-#define WL74 3
-#define SL74 13
-#define WL75 8
-#define SL75 14
-#define WL76 11
-#define SL76 11
-#define WL77 6
-#define SL77 8
-#define WL78 15
-#define SL78 5
-#define WL79 13
-#define SL79 6
-
-#define WR00 5
-#define SR00 8
-#define WR01 14
-#define SR01 9
-#define WR02 7
-#define SR02 9
-#define WR03 0
-#define SR03 11
-#define WR04 9
-#define SR04 13
-#define WR05 2
-#define SR05 15
-#define WR06 11
-#define SR06 15
-#define WR07 4
-#define SR07 5
-#define WR08 13
-#define SR08 7
-#define WR09 6
-#define SR09 7
-#define WR10 15
-#define SR10 8
-#define WR11 8
-#define SR11 11
-#define WR12 1
-#define SR12 14
-#define WR13 10
-#define SR13 14
-#define WR14 3
-#define SR14 12
-#define WR15 12
-#define SR15 6
-
-#define WR16 6
-#define SR16 9
-#define WR17 11
-#define SR17 13
-#define WR18 3
-#define SR18 15
-#define WR19 7
-#define SR19 7
-#define WR20 0
-#define SR20 12
-#define WR21 13
-#define SR21 8
-#define WR22 5
-#define SR22 9
-#define WR23 10
-#define SR23 11
-#define WR24 14
-#define SR24 7
-#define WR25 15
-#define SR25 7
-#define WR26 8
-#define SR26 12
-#define WR27 12
-#define SR27 7
-#define WR28 4
-#define SR28 6
-#define WR29 9
-#define SR29 15
-#define WR30 1
-#define SR30 13
-#define WR31 2
-#define SR31 11
-
-#define WR32 15
-#define SR32 9
-#define WR33 5
-#define SR33 7
-#define WR34 1
-#define SR34 15
-#define WR35 3
-#define SR35 11
-#define WR36 7
-#define SR36 8
-#define WR37 14
-#define SR37 6
-#define WR38 6
-#define SR38 6
-#define WR39 9
-#define SR39 14
-#define WR40 11
-#define SR40 12
-#define WR41 8
-#define SR41 13
-#define WR42 12
-#define SR42 5
-#define WR43 2
-#define SR43 14
-#define WR44 10
-#define SR44 13
-#define WR45 0
-#define SR45 13
-#define WR46 4
-#define SR46 7
-#define WR47 13
-#define SR47 5
-
-#define WR48 8
-#define SR48 15
-#define WR49 6
-#define SR49 5
-#define WR50 4
-#define SR50 8
-#define WR51 1
-#define SR51 11
-#define WR52 3
-#define SR52 14
-#define WR53 11
-#define SR53 14
-#define WR54 15
-#define SR54 6
-#define WR55 0
-#define SR55 14
-#define WR56 5
-#define SR56 6
-#define WR57 12
-#define SR57 9
-#define WR58 2
-#define SR58 12
-#define WR59 13
-#define SR59 9
-#define WR60 9
-#define SR60 12
-#define WR61 7
-#define SR61 5
-#define WR62 10
-#define SR62 15
-#define WR63 14
-#define SR63 8
-
-#define WR64 12
-#define SR64 8
-#define WR65 15
-#define SR65 5
-#define WR66 10
-#define SR66 12
-#define WR67 4
-#define SR67 9
-#define WR68 1
-#define SR68 12
-#define WR69 5
-#define SR69 5
-#define WR70 8
-#define SR70 14
-#define WR71 7
-#define SR71 6
-#define WR72 6
-#define SR72 8
-#define WR73 2
-#define SR73 13
-#define WR74 13
-#define SR74 6
-#define WR75 14
-#define SR75 5
-#define WR76 0
-#define SR76 15
-#define WR77 3
-#define SR77 13
-#define WR78 9
-#define SR78 11
-#define WR79 11
-#define SR79 11
-
diff --git a/crypto/ripemd/rmdtest.c b/crypto/ripemd/rmdtest.c
deleted file mode 100644
index fb34e0e..0000000
--- a/crypto/ripemd/rmdtest.c
+++ /dev/null
@@ -1,145 +0,0 @@
-/* crypto/ripemd/rmdtest.c */
-/* Copyright (C) 1995-1998 Eric Young ([email protected])
- * All rights reserved.
- *
- * This package is an SSL implementation written
- * by Eric Young ([email protected]).
- * The implementation was written so as to conform with Netscapes SSL.
- *
- * This library is free for commercial and non-commercial use as long as
- * the following conditions are aheared to. The following conditions
- * apply to all code found in this distribution, be it the RC4, RSA,
- * lhash, DES, etc., code; not just the SSL code. The SSL documentation
- * included with this distribution is covered by the same copyright terms
- * except that the holder is Tim Hudson ([email protected]).
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.
- * If this package is used in a product, Eric Young should be given attribution
- * as the author of the parts of the library used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * "This product includes cryptographic software written by
- * Eric Young ([email protected])"
- * The word 'cryptographic' can be left out if the rouines from the library
- * being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from
- * the apps directory (application code) you must include an acknowledgement:
- * "This product includes software written by Tim Hudson ([email protected])"
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed. i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
- */
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include "../e_os.h"
-
-#ifdef OPENSSL_NO_RIPEMD
-int main(int argc, char *argv[])
-{
- printf("No ripemd support\n");
- return(0);
-}
-#else
-#include <openssl/ripemd.h>
-#include <openssl/evp.h>
-
-#ifdef CHARSET_EBCDIC
-#include <openssl/ebcdic.h>
-#endif
-
-static char *test[]={
- "",
- "a",
- "abc",
- "message digest",
- "abcdefghijklmnopqrstuvwxyz",
- "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
- "12345678901234567890123456789012345678901234567890123456789012345678901234567890",
- NULL,
- };
-
-static char *ret[]={
- "9c1185a5c5e9fc54612808977ee8f548b2258d31",
- "0bdc9d2d256b3ee9daae347be6f4dc835a467ffe",
- "8eb208f7e05d987a9b044a8e98c6b087f15a0bfc",
- "5d0689ef49d2fae572b881b123a85ffa21595f36",
- "f71c27109c692c1b56bbdceb5b9d2865b3708dbc",
- "12a053384a9c0c88e405a06c27dcf49ada62eb2b",
- "b0e20b6e3116640286ed3a87a5713079b21f5189",
- "9b752e45573d4b39f4dbd3323cab82bf63326bfb",
- };
-
-static char *pt(unsigned char *md);
-int main(int argc, char *argv[])
- {
- int i,err=0;
- char **P,**R;
- char *p;
- unsigned char md[RIPEMD160_DIGEST_LENGTH];
-
- P=test;
- R=ret;
- i=1;
- while (*P != NULL)
- {
-#ifdef CHARSET_EBCDIC
- ebcdic2ascii((char *)*P, (char *)*P, strlen((char *)*P));
-#endif
- EVP_Digest(&(P[0][0]),strlen((char *)*P),md,NULL,EVP_ripemd160(), NULL);
- p=pt(md);
- if (strcmp(p,(char *)*R) != 0)
- {
- printf("error calculating RIPEMD160 on '%s'\n",*P);
- printf("got %s instead of %s\n",p,*R);
- err++;
- }
- else
- printf("test %d ok\n",i);
- i++;
- R++;
- P++;
- }
- EXIT(err);
- return(0);
- }
-
-static char *pt(unsigned char *md)
- {
- int i;
- static char buf[80];
-
- for (i=0; i<RIPEMD160_DIGEST_LENGTH; i++)
- sprintf(&(buf[i*2]),"%02x",md[i]);
- return(buf);
- }
-#endif
diff --git a/crypto/rsa/rsa_ameth.c b/crypto/rsa/rsa_ameth.c
index 5a2062f..4c8ecd9 100644
--- a/crypto/rsa/rsa_ameth.c
+++ b/crypto/rsa/rsa_ameth.c
@@ -358,7 +358,7 @@
if (i2a_ASN1_INTEGER(bp, pss->saltLength) <= 0)
goto err;
}
- else if (BIO_puts(bp, "0x14 (default)") <= 0)
+ else if (BIO_puts(bp, "14 (default)") <= 0)
goto err;
BIO_puts(bp, "\n");
diff --git a/crypto/sha/asm/sha1-586.S b/crypto/sha/asm/sha1-586.S
index e77f654..47bef2a 100644
--- a/crypto/sha/asm/sha1-586.S
+++ b/crypto/sha/asm/sha1-586.S
@@ -9,6 +9,21 @@
pushl %ebx
pushl %esi
pushl %edi
+ call .L000pic_point
+.L000pic_point:
+ popl %ebp
+ leal _GLOBAL_OFFSET_TABLE_+[.-.L000pic_point](%ebp),%esi
+ movl OPENSSL_ia32cap_P@GOT(%esi),%esi
+ leal .LK_XX_XX-.L000pic_point(%ebp),%ebp
+ movl (%esi),%eax
+ movl 4(%esi),%edx
+ testl $512,%edx
+ jz .L001x86
+ testl $16777216,%eax
+ jz .L001x86
+ jmp .Lssse3_shortcut
+.align 16
+.L001x86:
movl 20(%esp),%ebp
movl 24(%esp),%esi
movl 28(%esp),%eax
@@ -17,9 +32,9 @@
addl %esi,%eax
movl %eax,104(%esp)
movl 16(%ebp),%edi
- jmp .L000loop
+ jmp .L002loop
.align 16
-.L000loop:
+.L002loop:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
@@ -1366,7 +1381,7 @@
movl %ebx,12(%ebp)
movl %edx,%esi
movl %ecx,16(%ebp)
- jb .L000loop
+ jb .L002loop
addl $76,%esp
popl %edi
popl %esi
@@ -1374,7 +1389,1251 @@
popl %ebp
ret
.size sha1_block_data_order,.-.L_sha1_block_data_order_begin
+.type _sha1_block_data_order_ssse3,@function
+.align 16
+_sha1_block_data_order_ssse3:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ call .L003pic_point
+.L003pic_point:
+ popl %ebp
+ leal .LK_XX_XX-.L003pic_point(%ebp),%ebp
+.Lssse3_shortcut:
+ movdqa (%ebp),%xmm7
+ movdqa 16(%ebp),%xmm0
+ movdqa 32(%ebp),%xmm1
+ movdqa 48(%ebp),%xmm2
+ movdqa 64(%ebp),%xmm6
+ movl 20(%esp),%edi
+ movl 24(%esp),%ebp
+ movl 28(%esp),%edx
+ movl %esp,%esi
+ subl $208,%esp
+ andl $-64,%esp
+ movdqa %xmm0,112(%esp)
+ movdqa %xmm1,128(%esp)
+ movdqa %xmm2,144(%esp)
+ shll $6,%edx
+ movdqa %xmm7,160(%esp)
+ addl %ebp,%edx
+ movdqa %xmm6,176(%esp)
+ addl $64,%ebp
+ movl %edi,192(%esp)
+ movl %ebp,196(%esp)
+ movl %edx,200(%esp)
+ movl %esi,204(%esp)
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
+ movl 8(%edi),%ecx
+ movl 12(%edi),%edx
+ movl 16(%edi),%edi
+ movl %ebx,%esi
+ movdqu -64(%ebp),%xmm0
+ movdqu -48(%ebp),%xmm1
+ movdqu -32(%ebp),%xmm2
+ movdqu -16(%ebp),%xmm3
+.byte 102,15,56,0,198
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+ movdqa %xmm7,96(%esp)
+.byte 102,15,56,0,222
+ paddd %xmm7,%xmm0
+ paddd %xmm7,%xmm1
+ paddd %xmm7,%xmm2
+ movdqa %xmm0,(%esp)
+ psubd %xmm7,%xmm0
+ movdqa %xmm1,16(%esp)
+ psubd %xmm7,%xmm1
+ movdqa %xmm2,32(%esp)
+ psubd %xmm7,%xmm2
+ movdqa %xmm1,%xmm4
+ jmp .L004loop
+.align 16
+.L004loop:
+ addl (%esp),%edi
+ xorl %edx,%ecx
+.byte 102,15,58,15,224,8
+ movdqa %xmm3,%xmm6
+ movl %eax,%ebp
+ roll $5,%eax
+ paddd %xmm3,%xmm7
+ movdqa %xmm0,64(%esp)
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm6
+ xorl %edx,%esi
+ addl %eax,%edi
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%edi
+ pxor %xmm2,%xmm6
+ addl 4(%esp),%edx
+ xorl %ecx,%ebx
+ movl %edi,%esi
+ roll $5,%edi
+ pxor %xmm6,%xmm4
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
+ movdqa %xmm7,48(%esp)
+ xorl %ecx,%ebp
+ addl %edi,%edx
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm6
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 8(%esp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm0
+ paddd %xmm4,%xmm4
+ movl %edx,%ebp
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm6
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm0,%xmm7
+ rorl $7,%edi
+ addl %esi,%ecx
+ psrld $30,%xmm0
+ por %xmm6,%xmm4
+ addl 12(%esp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ pslld $2,%xmm7
+ pxor %xmm0,%xmm4
+ andl %edi,%ebp
+ xorl %eax,%edi
+ movdqa 96(%esp),%xmm0
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ pxor %xmm7,%xmm4
+ movdqa %xmm2,%xmm5
+ rorl $7,%edx
+ addl %ebp,%ebx
+ addl 16(%esp),%eax
+ xorl %edi,%edx
+.byte 102,15,58,15,233,8
+ movdqa %xmm4,%xmm7
+ movl %ebx,%ebp
+ roll $5,%ebx
+ paddd %xmm4,%xmm0
+ movdqa %xmm1,80(%esp)
+ andl %edx,%esi
+ xorl %edi,%edx
+ psrldq $4,%xmm7
+ xorl %edi,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm7
+ addl 20(%esp),%edi
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm7,%xmm5
+ andl %ecx,%ebp
+ xorl %edx,%ecx
+ movdqa %xmm0,(%esp)
+ xorl %edx,%ebp
+ addl %eax,%edi
+ movdqa %xmm5,%xmm1
+ movdqa %xmm5,%xmm7
+ rorl $7,%ebx
+ addl %ebp,%edi
+ addl 24(%esp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm1
+ paddd %xmm5,%xmm5
+ movl %edi,%ebp
+ roll $5,%edi
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm7
+ xorl %ecx,%esi
+ addl %edi,%edx
+ movdqa %xmm1,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm1
+ por %xmm7,%xmm5
+ addl 28(%esp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm0
+ pxor %xmm1,%xmm5
+ andl %eax,%ebp
+ xorl %ebx,%eax
+ movdqa 112(%esp),%xmm1
+ xorl %ebx,%ebp
+ addl %edx,%ecx
+ pxor %xmm0,%xmm5
+ movdqa %xmm3,%xmm6
+ rorl $7,%edi
+ addl %ebp,%ecx
+ addl 32(%esp),%ebx
+ xorl %eax,%edi
+.byte 102,15,58,15,242,8
+ movdqa %xmm5,%xmm0
+ movl %ecx,%ebp
+ roll $5,%ecx
+ paddd %xmm5,%xmm1
+ movdqa %xmm2,96(%esp)
+ andl %edi,%esi
+ xorl %eax,%edi
+ psrldq $4,%xmm0
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm0
+ addl 36(%esp),%eax
+ xorl %edi,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm0,%xmm6
+ andl %edx,%ebp
+ xorl %edi,%edx
+ movdqa %xmm1,16(%esp)
+ xorl %edi,%ebp
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,%xmm0
+ rorl $7,%ecx
+ addl %ebp,%eax
+ addl 40(%esp),%edi
+ xorl %edx,%ecx
+ pslldq $12,%xmm2
+ paddd %xmm6,%xmm6
+ movl %eax,%ebp
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm0
+ xorl %edx,%esi
+ addl %eax,%edi
+ movdqa %xmm2,%xmm1
+ rorl $7,%ebx
+ addl %esi,%edi
+ psrld $30,%xmm2
+ por %xmm0,%xmm6
+ addl 44(%esp),%edx
+ xorl %ecx,%ebx
+ movdqa 64(%esp),%xmm0
+ movl %edi,%esi
+ roll $5,%edi
+ pslld $2,%xmm1
+ pxor %xmm2,%xmm6
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
+ movdqa 112(%esp),%xmm2
+ xorl %ecx,%ebp
+ addl %edi,%edx
+ pxor %xmm1,%xmm6
+ movdqa %xmm4,%xmm7
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 48(%esp),%ecx
+ xorl %ebx,%eax
+.byte 102,15,58,15,251,8
+ movdqa %xmm6,%xmm1
+ movl %edx,%ebp
+ roll $5,%edx
+ paddd %xmm6,%xmm2
+ movdqa %xmm3,64(%esp)
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm1
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%edi
+ addl %esi,%ecx
+ pxor %xmm5,%xmm1
+ addl 52(%esp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ pxor %xmm1,%xmm7
+ andl %edi,%ebp
+ xorl %eax,%edi
+ movdqa %xmm2,32(%esp)
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm3
+ movdqa %xmm7,%xmm1
+ rorl $7,%edx
+ addl %ebp,%ebx
+ addl 56(%esp),%eax
+ xorl %edi,%edx
+ pslldq $12,%xmm3
+ paddd %xmm7,%xmm7
+ movl %ebx,%ebp
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %edi,%edx
+ psrld $31,%xmm1
+ xorl %edi,%esi
+ addl %ebx,%eax
+ movdqa %xmm3,%xmm2
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm3
+ por %xmm1,%xmm7
+ addl 60(%esp),%edi
+ xorl %edx,%ecx
+ movdqa 80(%esp),%xmm1
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm2
+ pxor %xmm3,%xmm7
+ andl %ecx,%ebp
+ xorl %edx,%ecx
+ movdqa 112(%esp),%xmm3
+ xorl %edx,%ebp
+ addl %eax,%edi
+ pxor %xmm2,%xmm7
+ rorl $7,%ebx
+ addl %ebp,%edi
+ movdqa %xmm7,%xmm2
+ addl (%esp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,15,58,15,214,8
+ xorl %ecx,%ebx
+ movl %edi,%ebp
+ roll $5,%edi
+ pxor %xmm1,%xmm0
+ movdqa %xmm4,80(%esp)
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm3,%xmm4
+ paddd %xmm7,%xmm3
+ xorl %ecx,%esi
+ addl %edi,%edx
+ pxor %xmm2,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%esp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm2
+ movdqa %xmm3,48(%esp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%ebp
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%ebp
+ addl %edx,%ecx
+ psrld $30,%xmm2
+ rorl $7,%edi
+ addl %ebp,%ecx
+ addl 8(%esp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%ebp
+ roll $5,%ecx
+ por %xmm2,%xmm0
+ andl %edi,%esi
+ xorl %eax,%edi
+ movdqa 96(%esp),%xmm2
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%esp),%eax
+ movdqa %xmm0,%xmm3
+ xorl %edi,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%ebp
+ xorl %edi,%edx
+ xorl %edi,%ebp
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %ebp,%eax
+ addl 16(%esp),%edi
+ pxor %xmm5,%xmm1
+.byte 102,15,58,15,223,8
+ xorl %edx,%esi
+ movl %eax,%ebp
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ movdqa %xmm5,96(%esp)
+ xorl %ecx,%esi
+ addl %eax,%edi
+ movdqa %xmm4,%xmm5
+ paddd %xmm0,%xmm4
+ rorl $7,%ebx
+ addl %esi,%edi
+ pxor %xmm3,%xmm1
+ addl 20(%esp),%edx
+ xorl %ecx,%ebp
+ movl %edi,%esi
+ roll $5,%edi
+ movdqa %xmm1,%xmm3
+ movdqa %xmm4,(%esp)
+ xorl %ebx,%ebp
+ addl %edi,%edx
+ rorl $7,%eax
+ addl %ebp,%edx
+ pslld $2,%xmm1
+ addl 24(%esp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm3
+ movl %edx,%ebp
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%edi
+ addl %esi,%ecx
+ por %xmm3,%xmm1
+ addl 28(%esp),%ebx
+ xorl %eax,%ebp
+ movdqa 64(%esp),%xmm3
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %edi,%ebp
+ addl %ecx,%ebx
+ rorl $7,%edx
+ movdqa %xmm1,%xmm4
+ addl %ebp,%ebx
+ addl 32(%esp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,15,58,15,224,8
+ xorl %edi,%esi
+ movl %ebx,%ebp
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ movdqa %xmm6,64(%esp)
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 128(%esp),%xmm6
+ paddd %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm4,%xmm2
+ addl 36(%esp),%edi
+ xorl %edx,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm4
+ movdqa %xmm5,16(%esp)
+ xorl %ecx,%ebp
+ addl %eax,%edi
+ rorl $7,%ebx
+ addl %ebp,%edi
+ pslld $2,%xmm2
+ addl 40(%esp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm4
+ movl %edi,%ebp
+ roll $5,%edi
+ xorl %ebx,%esi
+ addl %edi,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm4,%xmm2
+ addl 44(%esp),%ecx
+ xorl %ebx,%ebp
+ movdqa 80(%esp),%xmm4
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ rorl $7,%edi
+ movdqa %xmm2,%xmm5
+ addl %ebp,%ecx
+ addl 48(%esp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,15,58,15,233,8
+ xorl %eax,%esi
+ movl %ecx,%ebp
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ movdqa %xmm7,80(%esp)
+ xorl %edi,%esi
+ addl %ecx,%ebx
+ movdqa %xmm6,%xmm7
+ paddd %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm5,%xmm3
+ addl 52(%esp),%eax
+ xorl %edi,%ebp
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm5
+ movdqa %xmm6,32(%esp)
+ xorl %edx,%ebp
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %ebp,%eax
+ pslld $2,%xmm3
+ addl 56(%esp),%edi
+ xorl %edx,%esi
+ psrld $30,%xmm5
+ movl %eax,%ebp
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%edi
+ rorl $7,%ebx
+ addl %esi,%edi
+ por %xmm5,%xmm3
+ addl 60(%esp),%edx
+ xorl %ecx,%ebp
+ movdqa 96(%esp),%xmm5
+ movl %edi,%esi
+ roll $5,%edi
+ xorl %ebx,%ebp
+ addl %edi,%edx
+ rorl $7,%eax
+ movdqa %xmm3,%xmm6
+ addl %ebp,%edx
+ addl (%esp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,15,58,15,242,8
+ xorl %ebx,%esi
+ movl %edx,%ebp
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ movdqa %xmm0,96(%esp)
+ xorl %eax,%esi
+ addl %edx,%ecx
+ movdqa %xmm7,%xmm0
+ paddd %xmm3,%xmm7
+ rorl $7,%edi
+ addl %esi,%ecx
+ pxor %xmm6,%xmm4
+ addl 4(%esp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm6
+ movdqa %xmm7,48(%esp)
+ xorl %edi,%ebp
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %ebp,%ebx
+ pslld $2,%xmm4
+ addl 8(%esp),%eax
+ xorl %edi,%esi
+ psrld $30,%xmm6
+ movl %ebx,%ebp
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm6,%xmm4
+ addl 12(%esp),%edi
+ xorl %edx,%ebp
+ movdqa 64(%esp),%xmm6
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%ebp
+ addl %eax,%edi
+ rorl $7,%ebx
+ movdqa %xmm4,%xmm7
+ addl %ebp,%edi
+ addl 16(%esp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,15,58,15,251,8
+ xorl %ecx,%esi
+ movl %edi,%ebp
+ roll $5,%edi
+ pxor %xmm6,%xmm5
+ movdqa %xmm1,64(%esp)
+ xorl %ebx,%esi
+ addl %edi,%edx
+ movdqa %xmm0,%xmm1
+ paddd %xmm4,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm7,%xmm5
+ addl 20(%esp),%ecx
+ xorl %ebx,%ebp
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm7
+ movdqa %xmm0,(%esp)
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ rorl $7,%edi
+ addl %ebp,%ecx
+ pslld $2,%xmm5
+ addl 24(%esp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm7
+ movl %ecx,%ebp
+ roll $5,%ecx
+ xorl %edi,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm7,%xmm5
+ addl 28(%esp),%eax
+ xorl %edi,%ebp
+ movdqa 80(%esp),%xmm7
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%ebp
+ addl %ebx,%eax
+ rorl $7,%ecx
+ movdqa %xmm5,%xmm0
+ addl %ebp,%eax
+ movl %ecx,%ebp
+ pxor %xmm2,%xmm6
+.byte 102,15,58,15,196,8
+ xorl %edx,%ecx
+ addl 32(%esp),%edi
+ andl %edx,%ebp
+ pxor %xmm7,%xmm6
+ movdqa %xmm2,80(%esp)
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm1,%xmm2
+ paddd %xmm5,%xmm1
+ addl %ebp,%edi
+ movl %eax,%ebp
+ pxor %xmm0,%xmm6
+ roll $5,%eax
+ addl %esi,%edi
+ xorl %edx,%ecx
+ addl %eax,%edi
+ movdqa %xmm6,%xmm0
+ movdqa %xmm1,16(%esp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%esp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%ebp
+ rorl $7,%eax
+ psrld $30,%xmm0
+ addl %esi,%edx
+ movl %edi,%esi
+ roll $5,%edi
+ addl %ebp,%edx
+ xorl %ecx,%ebx
+ addl %edi,%edx
+ por %xmm0,%xmm6
+ movl %eax,%ebp
+ xorl %ebx,%eax
+ movdqa 96(%esp),%xmm0
+ addl 40(%esp),%ecx
+ andl %ebx,%ebp
+ andl %eax,%esi
+ rorl $7,%edi
+ addl %ebp,%ecx
+ movdqa %xmm6,%xmm1
+ movl %edx,%ebp
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %edi,%esi
+ xorl %eax,%edi
+ addl 44(%esp),%ebx
+ andl %eax,%esi
+ andl %edi,%ebp
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %ebp,%ebx
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movl %edx,%ebp
+ pxor %xmm3,%xmm7
+.byte 102,15,58,15,205,8
+ xorl %edi,%edx
+ addl 48(%esp),%eax
+ andl %edi,%ebp
+ pxor %xmm0,%xmm7
+ movdqa %xmm3,96(%esp)
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 144(%esp),%xmm3
+ paddd %xmm6,%xmm2
+ addl %ebp,%eax
+ movl %ebx,%ebp
+ pxor %xmm1,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %edi,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm1
+ movdqa %xmm2,32(%esp)
+ movl %ecx,%esi
+ xorl %edx,%ecx
+ addl 52(%esp),%edi
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%ebp
+ rorl $7,%ebx
+ psrld $30,%xmm1
+ addl %esi,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ addl %ebp,%edi
+ xorl %edx,%ecx
+ addl %eax,%edi
+ por %xmm1,%xmm7
+ movl %ebx,%ebp
+ xorl %ecx,%ebx
+ movdqa 64(%esp),%xmm1
+ addl 56(%esp),%edx
+ andl %ecx,%ebp
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %ebp,%edx
+ movdqa %xmm7,%xmm2
+ movl %edi,%ebp
+ roll $5,%edi
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %edi,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%esp),%ecx
+ andl %ebx,%esi
+ andl %eax,%ebp
+ rorl $7,%edi
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %ebp,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %edi,%ebp
+ pxor %xmm4,%xmm0
+.byte 102,15,58,15,214,8
+ xorl %eax,%edi
+ addl (%esp),%ebx
+ andl %eax,%ebp
+ pxor %xmm1,%xmm0
+ movdqa %xmm4,64(%esp)
+ andl %edi,%esi
+ rorl $7,%edx
+ movdqa %xmm3,%xmm4
+ paddd %xmm7,%xmm3
+ addl %ebp,%ebx
+ movl %ecx,%ebp
+ pxor %xmm2,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm2
+ movdqa %xmm3,48(%esp)
+ movl %edx,%esi
+ xorl %edi,%edx
+ addl 4(%esp),%eax
+ andl %edi,%esi
+ pslld $2,%xmm0
+ andl %edx,%ebp
+ rorl $7,%ecx
+ psrld $30,%xmm2
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %ebp,%eax
+ xorl %edi,%edx
+ addl %ebx,%eax
+ por %xmm2,%xmm0
+ movl %ecx,%ebp
+ xorl %edx,%ecx
+ movdqa 80(%esp),%xmm2
+ addl 8(%esp),%edi
+ andl %edx,%ebp
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %ebp,%edi
+ movdqa %xmm0,%xmm3
+ movl %eax,%ebp
+ roll $5,%eax
+ addl %esi,%edi
+ xorl %edx,%ecx
+ addl %eax,%edi
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%esp),%edx
+ andl %ecx,%esi
+ andl %ebx,%ebp
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %edi,%esi
+ roll $5,%edi
+ addl %ebp,%edx
+ xorl %ecx,%ebx
+ addl %edi,%edx
+ movl %eax,%ebp
+ pxor %xmm5,%xmm1
+.byte 102,15,58,15,223,8
+ xorl %ebx,%eax
+ addl 16(%esp),%ecx
+ andl %ebx,%ebp
+ pxor %xmm2,%xmm1
+ movdqa %xmm5,80(%esp)
+ andl %eax,%esi
+ rorl $7,%edi
+ movdqa %xmm4,%xmm5
+ paddd %xmm0,%xmm4
+ addl %ebp,%ecx
+ movl %edx,%ebp
+ pxor %xmm3,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm3
+ movdqa %xmm4,(%esp)
+ movl %edi,%esi
+ xorl %eax,%edi
+ addl 20(%esp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %edi,%ebp
+ rorl $7,%edx
+ psrld $30,%xmm3
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %ebp,%ebx
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ por %xmm3,%xmm1
+ movl %edx,%ebp
+ xorl %edi,%edx
+ movdqa 96(%esp),%xmm3
+ addl 24(%esp),%eax
+ andl %edi,%ebp
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %ebp,%eax
+ movdqa %xmm1,%xmm4
+ movl %ebx,%ebp
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %edi,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ xorl %edx,%ecx
+ addl 28(%esp),%edi
+ andl %edx,%esi
+ andl %ecx,%ebp
+ rorl $7,%ebx
+ addl %esi,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ addl %ebp,%edi
+ xorl %edx,%ecx
+ addl %eax,%edi
+ movl %ebx,%ebp
+ pxor %xmm6,%xmm2
+.byte 102,15,58,15,224,8
+ xorl %ecx,%ebx
+ addl 32(%esp),%edx
+ andl %ecx,%ebp
+ pxor %xmm3,%xmm2
+ movdqa %xmm6,96(%esp)
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm5,%xmm6
+ paddd %xmm1,%xmm5
+ addl %ebp,%edx
+ movl %edi,%ebp
+ pxor %xmm4,%xmm2
+ roll $5,%edi
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %edi,%edx
+ movdqa %xmm2,%xmm4
+ movdqa %xmm5,16(%esp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%esp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%ebp
+ rorl $7,%edi
+ psrld $30,%xmm4
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %ebp,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm4,%xmm2
+ movl %edi,%ebp
+ xorl %eax,%edi
+ movdqa 64(%esp),%xmm4
+ addl 40(%esp),%ebx
+ andl %eax,%ebp
+ andl %edi,%esi
+ rorl $7,%edx
+ addl %ebp,%ebx
+ movdqa %xmm2,%xmm5
+ movl %ecx,%ebp
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %edi,%edx
+ addl 44(%esp),%eax
+ andl %edi,%esi
+ andl %edx,%ebp
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %ebp,%eax
+ xorl %edi,%edx
+ addl %ebx,%eax
+ addl 48(%esp),%edi
+ pxor %xmm7,%xmm3
+.byte 102,15,58,15,233,8
+ xorl %edx,%esi
+ movl %eax,%ebp
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ movdqa %xmm7,64(%esp)
+ xorl %ecx,%esi
+ addl %eax,%edi
+ movdqa %xmm6,%xmm7
+ paddd %xmm2,%xmm6
+ rorl $7,%ebx
+ addl %esi,%edi
+ pxor %xmm5,%xmm3
+ addl 52(%esp),%edx
+ xorl %ecx,%ebp
+ movl %edi,%esi
+ roll $5,%edi
+ movdqa %xmm3,%xmm5
+ movdqa %xmm6,32(%esp)
+ xorl %ebx,%ebp
+ addl %edi,%edx
+ rorl $7,%eax
+ addl %ebp,%edx
+ pslld $2,%xmm3
+ addl 56(%esp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm5
+ movl %edx,%ebp
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%edi
+ addl %esi,%ecx
+ por %xmm5,%xmm3
+ addl 60(%esp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %edi,%ebp
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %ebp,%ebx
+ addl (%esp),%eax
+ paddd %xmm3,%xmm7
+ xorl %edi,%esi
+ movl %ebx,%ebp
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm7,48(%esp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%esp),%edi
+ xorl %edx,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%ebp
+ addl %eax,%edi
+ rorl $7,%ebx
+ addl %ebp,%edi
+ addl 8(%esp),%edx
+ xorl %ecx,%esi
+ movl %edi,%ebp
+ roll $5,%edi
+ xorl %ebx,%esi
+ addl %edi,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%esp),%ecx
+ xorl %ebx,%ebp
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ rorl $7,%edi
+ addl %ebp,%ecx
+ movl 196(%esp),%ebp
+ cmpl 200(%esp),%ebp
+ je .L005done
+ movdqa 160(%esp),%xmm7
+ movdqa 176(%esp),%xmm6
+ movdqu (%ebp),%xmm0
+ movdqu 16(%ebp),%xmm1
+ movdqu 32(%ebp),%xmm2
+ movdqu 48(%ebp),%xmm3
+ addl $64,%ebp
+.byte 102,15,56,0,198
+ movl %ebp,196(%esp)
+ movdqa %xmm7,96(%esp)
+ addl 16(%esp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%ebp
+ roll $5,%ecx
+ paddd %xmm7,%xmm0
+ xorl %edi,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,(%esp)
+ addl 20(%esp),%eax
+ xorl %edi,%ebp
+ psubd %xmm7,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%ebp
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %ebp,%eax
+ addl 24(%esp),%edi
+ xorl %edx,%esi
+ movl %eax,%ebp
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%edi
+ rorl $7,%ebx
+ addl %esi,%edi
+ addl 28(%esp),%edx
+ xorl %ecx,%ebp
+ movl %edi,%esi
+ roll $5,%edi
+ xorl %ebx,%ebp
+ addl %edi,%edx
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 32(%esp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%ebp
+ roll $5,%edx
+ paddd %xmm7,%xmm1
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%edi
+ addl %esi,%ecx
+ movdqa %xmm1,16(%esp)
+ addl 36(%esp),%ebx
+ xorl %eax,%ebp
+ psubd %xmm7,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %edi,%ebp
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %ebp,%ebx
+ addl 40(%esp),%eax
+ xorl %edi,%esi
+ movl %ebx,%ebp
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%esp),%edi
+ xorl %edx,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%ebp
+ addl %eax,%edi
+ rorl $7,%ebx
+ addl %ebp,%edi
+ addl 48(%esp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %edi,%ebp
+ roll $5,%edi
+ paddd %xmm7,%xmm2
+ xorl %ebx,%esi
+ addl %edi,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%esp)
+ addl 52(%esp),%ecx
+ xorl %ebx,%ebp
+ psubd %xmm7,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ rorl $7,%edi
+ addl %ebp,%ecx
+ addl 56(%esp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%ebp
+ roll $5,%ecx
+ xorl %edi,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%esp),%eax
+ xorl %edi,%ebp
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%ebp
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %ebp,%eax
+ movl 192(%esp),%ebp
+ addl (%ebp),%eax
+ addl 4(%ebp),%esi
+ addl 8(%ebp),%ecx
+ movl %eax,(%ebp)
+ addl 12(%ebp),%edx
+ movl %esi,4(%ebp)
+ addl 16(%ebp),%edi
+ movl %ecx,8(%ebp)
+ movl %esi,%ebx
+ movl %edx,12(%ebp)
+ movl %edi,16(%ebp)
+ movdqa %xmm1,%xmm4
+ jmp .L004loop
+.align 16
+.L005done:
+ addl 16(%esp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%ebp
+ roll $5,%ecx
+ xorl %edi,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%esp),%eax
+ xorl %edi,%ebp
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%ebp
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %ebp,%eax
+ addl 24(%esp),%edi
+ xorl %edx,%esi
+ movl %eax,%ebp
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%edi
+ rorl $7,%ebx
+ addl %esi,%edi
+ addl 28(%esp),%edx
+ xorl %ecx,%ebp
+ movl %edi,%esi
+ roll $5,%edi
+ xorl %ebx,%ebp
+ addl %edi,%edx
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 32(%esp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%ebp
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%edi
+ addl %esi,%ecx
+ addl 36(%esp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %edi,%ebp
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %ebp,%ebx
+ addl 40(%esp),%eax
+ xorl %edi,%esi
+ movl %ebx,%ebp
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%esp),%edi
+ xorl %edx,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%ebp
+ addl %eax,%edi
+ rorl $7,%ebx
+ addl %ebp,%edi
+ addl 48(%esp),%edx
+ xorl %ecx,%esi
+ movl %edi,%ebp
+ roll $5,%edi
+ xorl %ebx,%esi
+ addl %edi,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%esp),%ecx
+ xorl %ebx,%ebp
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ rorl $7,%edi
+ addl %ebp,%ecx
+ addl 56(%esp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%ebp
+ roll $5,%ecx
+ xorl %edi,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%esp),%eax
+ xorl %edi,%ebp
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%ebp
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %ebp,%eax
+ movl 192(%esp),%ebp
+ addl (%ebp),%eax
+ movl 204(%esp),%esp
+ addl 4(%ebp),%esi
+ addl 8(%ebp),%ecx
+ movl %eax,(%ebp)
+ addl 12(%ebp),%edx
+ movl %esi,4(%ebp)
+ addl 16(%ebp),%edi
+ movl %ecx,8(%ebp)
+ movl %edx,12(%ebp)
+ movl %edi,16(%ebp)
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size _sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3
+.align 64
+.LK_XX_XX:
+.long 1518500249,1518500249,1518500249,1518500249
+.long 1859775393,1859775393,1859775393,1859775393
+.long 2400959708,2400959708,2400959708,2400959708
+.long 3395469782,3395469782,3395469782,3395469782
+.long 66051,67438087,134810123,202182159
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.comm OPENSSL_ia32cap_P,8,4
diff --git a/crypto/sha/asm/sha1-armv4-large.S b/crypto/sha/asm/sha1-armv4-large.S
index 639ae78..a156288 100644
--- a/crypto/sha/asm/sha1-armv4-large.S
+++ b/crypto/sha/asm/sha1-armv4-large.S
@@ -1,12 +1,22 @@
#include "arm_arch.h"
.text
+.code 32
.global sha1_block_data_order
.type sha1_block_data_order,%function
-.align 2
+.align 5
sha1_block_data_order:
+#if __ARM_ARCH__>=7
+ sub r3,pc,#8 @ sha1_block_data_order
+ ldr r12,.LOPENSSL_armcap
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
+ tst r12,#ARMV8_SHA1
+ bne .LARMv8
+ tst r12,#ARMV7_NEON
+ bne .LNEON
+#endif
stmdb sp!,{r4-r12,lr}
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
ldmia r0,{r3,r4,r5,r6,r7}
@@ -442,11 +452,999 @@
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
-.align 2
+.size sha1_block_data_order,.-sha1_block_data_order
+
+.align 5
.LK_00_19: .word 0x5a827999
.LK_20_39: .word 0x6ed9eba1
.LK_40_59: .word 0x8f1bbcdc
.LK_60_79: .word 0xca62c1d6
-.size sha1_block_data_order,.-sha1_block_data_order
-.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <[email protected]>"
-.align 2
+.LOPENSSL_armcap:
+.word OPENSSL_armcap_P-sha1_block_data_order
+.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <[email protected]>"
+.align 5
+#if __ARM_ARCH__>=7
+.fpu neon
+
+.type sha1_block_data_order_neon,%function
+.align 4
+sha1_block_data_order_neon:
+.LNEON:
+ stmdb sp!,{r4-r12,lr}
+ add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
+ @ dmb @ errata #451034 on early Cortex A8
+ @ vstmdb sp!,{d8-d15} @ ABI specification says so
+ mov r14,sp
+ sub sp,sp,#64 @ alloca
+ adr r8,.LK_00_19
+ bic sp,sp,#15 @ align for 128-bit stores
+
+ ldmia r0,{r3,r4,r5,r6,r7} @ load context
+ mov r12,sp
+
+ vld1.8 {q0-q1},[r1]! @ handles unaligned
+ veor q15,q15,q15
+ vld1.8 {q2-q3},[r1]!
+ vld1.32 {d28[],d29[]},[r8,:32]! @ load K_00_19
+ vrev32.8 q0,q0 @ yes, even on
+ vrev32.8 q1,q1 @ big-endian...
+ vrev32.8 q2,q2
+ vadd.i32 q8,q0,q14
+ vrev32.8 q3,q3
+ vadd.i32 q9,q1,q14
+ vst1.32 {q8},[r12,:128]!
+ vadd.i32 q10,q2,q14
+ vst1.32 {q9},[r12,:128]!
+ vst1.32 {q10},[r12,:128]!
+ ldr r9,[sp] @ big RAW stall
+
+.Loop_neon:
+ vext.8 q8,q0,q1,#8
+ bic r10,r6,r4
+ add r7,r7,r9
+ and r11,r5,r4
+ vadd.i32 q13,q3,q14
+ ldr r9,[sp,#4]
+ add r7,r7,r3,ror#27
+ vext.8 q12,q3,q15,#4
+ eor r11,r11,r10
+ mov r4,r4,ror#2
+ add r7,r7,r11
+ veor q8,q8,q0
+ bic r10,r5,r3
+ add r6,r6,r9
+ veor q12,q12,q2
+ and r11,r4,r3
+ ldr r9,[sp,#8]
+ veor q12,q12,q8
+ add r6,r6,r7,ror#27
+ eor r11,r11,r10
+ vst1.32 {q13},[r12,:128]!
+ sub r12,r12,#64
+ mov r3,r3,ror#2
+ add r6,r6,r11
+ vext.8 q13,q15,q12,#4
+ bic r10,r4,r7
+ add r5,r5,r9
+ vadd.i32 q8,q12,q12
+ and r11,r3,r7
+ ldr r9,[sp,#12]
+ vsri.32 q8,q12,#31
+ add r5,r5,r6,ror#27
+ eor r11,r11,r10
+ mov r7,r7,ror#2
+ vshr.u32 q12,q13,#30
+ add r5,r5,r11
+ bic r10,r3,r6
+ vshl.u32 q13,q13,#2
+ add r4,r4,r9
+ and r11,r7,r6
+ veor q8,q8,q12
+ ldr r9,[sp,#16]
+ add r4,r4,r5,ror#27
+ veor q8,q8,q13
+ eor r11,r11,r10
+ mov r6,r6,ror#2
+ add r4,r4,r11
+ vext.8 q9,q1,q2,#8
+ bic r10,r7,r5
+ add r3,r3,r9
+ and r11,r6,r5
+ vadd.i32 q13,q8,q14
+ ldr r9,[sp,#20]
+ vld1.32 {d28[],d29[]},[r8,:32]!
+ add r3,r3,r4,ror#27
+ vext.8 q12,q8,q15,#4
+ eor r11,r11,r10
+ mov r5,r5,ror#2
+ add r3,r3,r11
+ veor q9,q9,q1
+ bic r10,r6,r4
+ add r7,r7,r9
+ veor q12,q12,q3
+ and r11,r5,r4
+ ldr r9,[sp,#24]
+ veor q12,q12,q9
+ add r7,r7,r3,ror#27
+ eor r11,r11,r10
+ vst1.32 {q13},[r12,:128]!
+ mov r4,r4,ror#2
+ add r7,r7,r11
+ vext.8 q13,q15,q12,#4
+ bic r10,r5,r3
+ add r6,r6,r9
+ vadd.i32 q9,q12,q12
+ and r11,r4,r3
+ ldr r9,[sp,#28]
+ vsri.32 q9,q12,#31
+ add r6,r6,r7,ror#27
+ eor r11,r11,r10
+ mov r3,r3,ror#2
+ vshr.u32 q12,q13,#30
+ add r6,r6,r11
+ bic r10,r4,r7
+ vshl.u32 q13,q13,#2
+ add r5,r5,r9
+ and r11,r3,r7
+ veor q9,q9,q12
+ ldr r9,[sp,#32]
+ add r5,r5,r6,ror#27
+ veor q9,q9,q13
+ eor r11,r11,r10
+ mov r7,r7,ror#2
+ add r5,r5,r11
+ vext.8 q10,q2,q3,#8
+ bic r10,r3,r6
+ add r4,r4,r9
+ and r11,r7,r6
+ vadd.i32 q13,q9,q14
+ ldr r9,[sp,#36]
+ add r4,r4,r5,ror#27
+ vext.8 q12,q9,q15,#4
+ eor r11,r11,r10
+ mov r6,r6,ror#2
+ add r4,r4,r11
+ veor q10,q10,q2
+ bic r10,r7,r5
+ add r3,r3,r9
+ veor q12,q12,q8
+ and r11,r6,r5
+ ldr r9,[sp,#40]
+ veor q12,q12,q10
+ add r3,r3,r4,ror#27
+ eor r11,r11,r10
+ vst1.32 {q13},[r12,:128]!
+ mov r5,r5,ror#2
+ add r3,r3,r11
+ vext.8 q13,q15,q12,#4
+ bic r10,r6,r4
+ add r7,r7,r9
+ vadd.i32 q10,q12,q12
+ and r11,r5,r4
+ ldr r9,[sp,#44]
+ vsri.32 q10,q12,#31
+ add r7,r7,r3,ror#27
+ eor r11,r11,r10
+ mov r4,r4,ror#2
+ vshr.u32 q12,q13,#30
+ add r7,r7,r11
+ bic r10,r5,r3
+ vshl.u32 q13,q13,#2
+ add r6,r6,r9
+ and r11,r4,r3
+ veor q10,q10,q12
+ ldr r9,[sp,#48]
+ add r6,r6,r7,ror#27
+ veor q10,q10,q13
+ eor r11,r11,r10
+ mov r3,r3,ror#2
+ add r6,r6,r11
+ vext.8 q11,q3,q8,#8
+ bic r10,r4,r7
+ add r5,r5,r9
+ and r11,r3,r7
+ vadd.i32 q13,q10,q14
+ ldr r9,[sp,#52]
+ add r5,r5,r6,ror#27
+ vext.8 q12,q10,q15,#4
+ eor r11,r11,r10
+ mov r7,r7,ror#2
+ add r5,r5,r11
+ veor q11,q11,q3
+ bic r10,r3,r6
+ add r4,r4,r9
+ veor q12,q12,q9
+ and r11,r7,r6
+ ldr r9,[sp,#56]
+ veor q12,q12,q11
+ add r4,r4,r5,ror#27
+ eor r11,r11,r10
+ vst1.32 {q13},[r12,:128]!
+ mov r6,r6,ror#2
+ add r4,r4,r11
+ vext.8 q13,q15,q12,#4
+ bic r10,r7,r5
+ add r3,r3,r9
+ vadd.i32 q11,q12,q12
+ and r11,r6,r5
+ ldr r9,[sp,#60]
+ vsri.32 q11,q12,#31
+ add r3,r3,r4,ror#27
+ eor r11,r11,r10
+ mov r5,r5,ror#2
+ vshr.u32 q12,q13,#30
+ add r3,r3,r11
+ bic r10,r6,r4
+ vshl.u32 q13,q13,#2
+ add r7,r7,r9
+ and r11,r5,r4
+ veor q11,q11,q12
+ ldr r9,[sp,#0]
+ add r7,r7,r3,ror#27
+ veor q11,q11,q13
+ eor r11,r11,r10
+ mov r4,r4,ror#2
+ add r7,r7,r11
+ vext.8 q12,q10,q11,#8
+ bic r10,r5,r3
+ add r6,r6,r9
+ and r11,r4,r3
+ veor q0,q0,q8
+ ldr r9,[sp,#4]
+ add r6,r6,r7,ror#27
+ veor q0,q0,q1
+ eor r11,r11,r10
+ mov r3,r3,ror#2
+ vadd.i32 q13,q11,q14
+ add r6,r6,r11
+ bic r10,r4,r7
+ veor q12,q12,q0
+ add r5,r5,r9
+ and r11,r3,r7
+ vshr.u32 q0,q12,#30
+ ldr r9,[sp,#8]
+ add r5,r5,r6,ror#27
+ vst1.32 {q13},[r12,:128]!
+ sub r12,r12,#64
+ eor r11,r11,r10
+ mov r7,r7,ror#2
+ vsli.32 q0,q12,#2
+ add r5,r5,r11
+ bic r10,r3,r6
+ add r4,r4,r9
+ and r11,r7,r6
+ ldr r9,[sp,#12]
+ add r4,r4,r5,ror#27
+ eor r11,r11,r10
+ mov r6,r6,ror#2
+ add r4,r4,r11
+ bic r10,r7,r5
+ add r3,r3,r9
+ and r11,r6,r5
+ ldr r9,[sp,#16]
+ add r3,r3,r4,ror#27
+ eor r11,r11,r10
+ mov r5,r5,ror#2
+ add r3,r3,r11
+ vext.8 q12,q11,q0,#8
+ eor r10,r4,r6
+ add r7,r7,r9
+ ldr r9,[sp,#20]
+ veor q1,q1,q9
+ eor r11,r10,r5
+ add r7,r7,r3,ror#27
+ veor q1,q1,q2
+ mov r4,r4,ror#2
+ add r7,r7,r11
+ vadd.i32 q13,q0,q14
+ eor r10,r3,r5
+ add r6,r6,r9
+ veor q12,q12,q1
+ ldr r9,[sp,#24]
+ eor r11,r10,r4
+ vshr.u32 q1,q12,#30
+ add r6,r6,r7,ror#27
+ mov r3,r3,ror#2
+ vst1.32 {q13},[r12,:128]!
+ add r6,r6,r11
+ eor r10,r7,r4
+ vsli.32 q1,q12,#2
+ add r5,r5,r9
+ ldr r9,[sp,#28]
+ eor r11,r10,r3
+ add r5,r5,r6,ror#27
+ mov r7,r7,ror#2
+ add r5,r5,r11
+ eor r10,r6,r3
+ add r4,r4,r9
+ ldr r9,[sp,#32]
+ eor r11,r10,r7
+ add r4,r4,r5,ror#27
+ mov r6,r6,ror#2
+ add r4,r4,r11
+ vext.8 q12,q0,q1,#8
+ eor r10,r5,r7
+ add r3,r3,r9
+ ldr r9,[sp,#36]
+ veor q2,q2,q10
+ eor r11,r10,r6
+ add r3,r3,r4,ror#27
+ veor q2,q2,q3
+ mov r5,r5,ror#2
+ add r3,r3,r11
+ vadd.i32 q13,q1,q14
+ eor r10,r4,r6
+ vld1.32 {d28[],d29[]},[r8,:32]!
+ add r7,r7,r9
+ veor q12,q12,q2
+ ldr r9,[sp,#40]
+ eor r11,r10,r5
+ vshr.u32 q2,q12,#30
+ add r7,r7,r3,ror#27
+ mov r4,r4,ror#2
+ vst1.32 {q13},[r12,:128]!
+ add r7,r7,r11
+ eor r10,r3,r5
+ vsli.32 q2,q12,#2
+ add r6,r6,r9
+ ldr r9,[sp,#44]
+ eor r11,r10,r4
+ add r6,r6,r7,ror#27
+ mov r3,r3,ror#2
+ add r6,r6,r11
+ eor r10,r7,r4
+ add r5,r5,r9
+ ldr r9,[sp,#48]
+ eor r11,r10,r3
+ add r5,r5,r6,ror#27
+ mov r7,r7,ror#2
+ add r5,r5,r11
+ vext.8 q12,q1,q2,#8
+ eor r10,r6,r3
+ add r4,r4,r9
+ ldr r9,[sp,#52]
+ veor q3,q3,q11
+ eor r11,r10,r7
+ add r4,r4,r5,ror#27
+ veor q3,q3,q8
+ mov r6,r6,ror#2
+ add r4,r4,r11
+ vadd.i32 q13,q2,q14
+ eor r10,r5,r7
+ add r3,r3,r9
+ veor q12,q12,q3
+ ldr r9,[sp,#56]
+ eor r11,r10,r6
+ vshr.u32 q3,q12,#30
+ add r3,r3,r4,ror#27
+ mov r5,r5,ror#2
+ vst1.32 {q13},[r12,:128]!
+ add r3,r3,r11
+ eor r10,r4,r6
+ vsli.32 q3,q12,#2
+ add r7,r7,r9
+ ldr r9,[sp,#60]
+ eor r11,r10,r5
+ add r7,r7,r3,ror#27
+ mov r4,r4,ror#2
+ add r7,r7,r11
+ eor r10,r3,r5
+ add r6,r6,r9
+ ldr r9,[sp,#0]
+ eor r11,r10,r4
+ add r6,r6,r7,ror#27
+ mov r3,r3,ror#2
+ add r6,r6,r11
+ vext.8 q12,q2,q3,#8
+ eor r10,r7,r4
+ add r5,r5,r9
+ ldr r9,[sp,#4]
+ veor q8,q8,q0
+ eor r11,r10,r3
+ add r5,r5,r6,ror#27
+ veor q8,q8,q9
+ mov r7,r7,ror#2
+ add r5,r5,r11
+ vadd.i32 q13,q3,q14
+ eor r10,r6,r3
+ add r4,r4,r9
+ veor q12,q12,q8
+ ldr r9,[sp,#8]
+ eor r11,r10,r7
+ vshr.u32 q8,q12,#30
+ add r4,r4,r5,ror#27
+ mov r6,r6,ror#2
+ vst1.32 {q13},[r12,:128]!
+ sub r12,r12,#64
+ add r4,r4,r11
+ eor r10,r5,r7
+ vsli.32 q8,q12,#2
+ add r3,r3,r9
+ ldr r9,[sp,#12]
+ eor r11,r10,r6
+ add r3,r3,r4,ror#27
+ mov r5,r5,ror#2
+ add r3,r3,r11
+ eor r10,r4,r6
+ add r7,r7,r9
+ ldr r9,[sp,#16]
+ eor r11,r10,r5
+ add r7,r7,r3,ror#27
+ mov r4,r4,ror#2
+ add r7,r7,r11
+ vext.8 q12,q3,q8,#8
+ eor r10,r3,r5
+ add r6,r6,r9
+ ldr r9,[sp,#20]
+ veor q9,q9,q1
+ eor r11,r10,r4
+ add r6,r6,r7,ror#27
+ veor q9,q9,q10
+ mov r3,r3,ror#2
+ add r6,r6,r11
+ vadd.i32 q13,q8,q14
+ eor r10,r7,r4
+ add r5,r5,r9
+ veor q12,q12,q9
+ ldr r9,[sp,#24]
+ eor r11,r10,r3
+ vshr.u32 q9,q12,#30
+ add r5,r5,r6,ror#27
+ mov r7,r7,ror#2
+ vst1.32 {q13},[r12,:128]!
+ add r5,r5,r11
+ eor r10,r6,r3
+ vsli.32 q9,q12,#2
+ add r4,r4,r9
+ ldr r9,[sp,#28]
+ eor r11,r10,r7
+ add r4,r4,r5,ror#27
+ mov r6,r6,ror#2
+ add r4,r4,r11
+ eor r10,r5,r7
+ add r3,r3,r9
+ ldr r9,[sp,#32]
+ eor r11,r10,r6
+ add r3,r3,r4,ror#27
+ mov r5,r5,ror#2
+ add r3,r3,r11
+ vext.8 q12,q8,q9,#8
+ add r7,r7,r9
+ and r10,r5,r6
+ ldr r9,[sp,#36]
+ veor q10,q10,q2
+ add r7,r7,r3,ror#27
+ eor r11,r5,r6
+ veor q10,q10,q11
+ add r7,r7,r10
+ and r11,r11,r4
+ vadd.i32 q13,q9,q14
+ mov r4,r4,ror#2
+ add r7,r7,r11
+ veor q12,q12,q10
+ add r6,r6,r9
+ and r10,r4,r5
+ vshr.u32 q10,q12,#30
+ ldr r9,[sp,#40]
+ add r6,r6,r7,ror#27
+ vst1.32 {q13},[r12,:128]!
+ eor r11,r4,r5
+ add r6,r6,r10
+ vsli.32 q10,q12,#2
+ and r11,r11,r3
+ mov r3,r3,ror#2
+ add r6,r6,r11
+ add r5,r5,r9
+ and r10,r3,r4
+ ldr r9,[sp,#44]
+ add r5,r5,r6,ror#27
+ eor r11,r3,r4
+ add r5,r5,r10
+ and r11,r11,r7
+ mov r7,r7,ror#2
+ add r5,r5,r11
+ add r4,r4,r9
+ and r10,r7,r3
+ ldr r9,[sp,#48]
+ add r4,r4,r5,ror#27
+ eor r11,r7,r3
+ add r4,r4,r10
+ and r11,r11,r6
+ mov r6,r6,ror#2
+ add r4,r4,r11
+ vext.8 q12,q9,q10,#8
+ add r3,r3,r9
+ and r10,r6,r7
+ ldr r9,[sp,#52]
+ veor q11,q11,q3
+ add r3,r3,r4,ror#27
+ eor r11,r6,r7
+ veor q11,q11,q0
+ add r3,r3,r10
+ and r11,r11,r5
+ vadd.i32 q13,q10,q14
+ mov r5,r5,ror#2
+ vld1.32 {d28[],d29[]},[r8,:32]!
+ add r3,r3,r11
+ veor q12,q12,q11
+ add r7,r7,r9
+ and r10,r5,r6
+ vshr.u32 q11,q12,#30
+ ldr r9,[sp,#56]
+ add r7,r7,r3,ror#27
+ vst1.32 {q13},[r12,:128]!
+ eor r11,r5,r6
+ add r7,r7,r10
+ vsli.32 q11,q12,#2
+ and r11,r11,r4
+ mov r4,r4,ror#2
+ add r7,r7,r11
+ add r6,r6,r9
+ and r10,r4,r5
+ ldr r9,[sp,#60]
+ add r6,r6,r7,ror#27
+ eor r11,r4,r5
+ add r6,r6,r10
+ and r11,r11,r3
+ mov r3,r3,ror#2
+ add r6,r6,r11
+ add r5,r5,r9
+ and r10,r3,r4
+ ldr r9,[sp,#0]
+ add r5,r5,r6,ror#27
+ eor r11,r3,r4
+ add r5,r5,r10
+ and r11,r11,r7
+ mov r7,r7,ror#2
+ add r5,r5,r11
+ vext.8 q12,q10,q11,#8
+ add r4,r4,r9
+ and r10,r7,r3
+ ldr r9,[sp,#4]
+ veor q0,q0,q8
+ add r4,r4,r5,ror#27
+ eor r11,r7,r3
+ veor q0,q0,q1
+ add r4,r4,r10
+ and r11,r11,r6
+ vadd.i32 q13,q11,q14
+ mov r6,r6,ror#2
+ add r4,r4,r11
+ veor q12,q12,q0
+ add r3,r3,r9
+ and r10,r6,r7
+ vshr.u32 q0,q12,#30
+ ldr r9,[sp,#8]
+ add r3,r3,r4,ror#27
+ vst1.32 {q13},[r12,:128]!
+ sub r12,r12,#64
+ eor r11,r6,r7
+ add r3,r3,r10
+ vsli.32 q0,q12,#2
+ and r11,r11,r5
+ mov r5,r5,ror#2
+ add r3,r3,r11
+ add r7,r7,r9
+ and r10,r5,r6
+ ldr r9,[sp,#12]
+ add r7,r7,r3,ror#27
+ eor r11,r5,r6
+ add r7,r7,r10
+ and r11,r11,r4
+ mov r4,r4,ror#2
+ add r7,r7,r11
+ add r6,r6,r9
+ and r10,r4,r5
+ ldr r9,[sp,#16]
+ add r6,r6,r7,ror#27
+ eor r11,r4,r5
+ add r6,r6,r10
+ and r11,r11,r3
+ mov r3,r3,ror#2
+ add r6,r6,r11
+ vext.8 q12,q11,q0,#8
+ add r5,r5,r9
+ and r10,r3,r4
+ ldr r9,[sp,#20]
+ veor q1,q1,q9
+ add r5,r5,r6,ror#27
+ eor r11,r3,r4
+ veor q1,q1,q2
+ add r5,r5,r10
+ and r11,r11,r7
+ vadd.i32 q13,q0,q14
+ mov r7,r7,ror#2
+ add r5,r5,r11
+ veor q12,q12,q1
+ add r4,r4,r9
+ and r10,r7,r3
+ vshr.u32 q1,q12,#30
+ ldr r9,[sp,#24]
+ add r4,r4,r5,ror#27
+ vst1.32 {q13},[r12,:128]!
+ eor r11,r7,r3
+ add r4,r4,r10
+ vsli.32 q1,q12,#2
+ and r11,r11,r6
+ mov r6,r6,ror#2
+ add r4,r4,r11
+ add r3,r3,r9
+ and r10,r6,r7
+ ldr r9,[sp,#28]
+ add r3,r3,r4,ror#27
+ eor r11,r6,r7
+ add r3,r3,r10
+ and r11,r11,r5
+ mov r5,r5,ror#2
+ add r3,r3,r11
+ add r7,r7,r9
+ and r10,r5,r6
+ ldr r9,[sp,#32]
+ add r7,r7,r3,ror#27
+ eor r11,r5,r6
+ add r7,r7,r10
+ and r11,r11,r4
+ mov r4,r4,ror#2
+ add r7,r7,r11
+ vext.8 q12,q0,q1,#8
+ add r6,r6,r9
+ and r10,r4,r5
+ ldr r9,[sp,#36]
+ veor q2,q2,q10
+ add r6,r6,r7,ror#27
+ eor r11,r4,r5
+ veor q2,q2,q3
+ add r6,r6,r10
+ and r11,r11,r3
+ vadd.i32 q13,q1,q14
+ mov r3,r3,ror#2
+ add r6,r6,r11
+ veor q12,q12,q2
+ add r5,r5,r9
+ and r10,r3,r4
+ vshr.u32 q2,q12,#30
+ ldr r9,[sp,#40]
+ add r5,r5,r6,ror#27
+ vst1.32 {q13},[r12,:128]!
+ eor r11,r3,r4
+ add r5,r5,r10
+ vsli.32 q2,q12,#2
+ and r11,r11,r7
+ mov r7,r7,ror#2
+ add r5,r5,r11
+ add r4,r4,r9
+ and r10,r7,r3
+ ldr r9,[sp,#44]
+ add r4,r4,r5,ror#27
+ eor r11,r7,r3
+ add r4,r4,r10
+ and r11,r11,r6
+ mov r6,r6,ror#2
+ add r4,r4,r11
+ add r3,r3,r9
+ and r10,r6,r7
+ ldr r9,[sp,#48]
+ add r3,r3,r4,ror#27
+ eor r11,r6,r7
+ add r3,r3,r10
+ and r11,r11,r5
+ mov r5,r5,ror#2
+ add r3,r3,r11
+ vext.8 q12,q1,q2,#8
+ eor r10,r4,r6
+ add r7,r7,r9
+ ldr r9,[sp,#52]
+ veor q3,q3,q11
+ eor r11,r10,r5
+ add r7,r7,r3,ror#27
+ veor q3,q3,q8
+ mov r4,r4,ror#2
+ add r7,r7,r11
+ vadd.i32 q13,q2,q14
+ eor r10,r3,r5
+ add r6,r6,r9
+ veor q12,q12,q3
+ ldr r9,[sp,#56]
+ eor r11,r10,r4
+ vshr.u32 q3,q12,#30
+ add r6,r6,r7,ror#27
+ mov r3,r3,ror#2
+ vst1.32 {q13},[r12,:128]!
+ add r6,r6,r11
+ eor r10,r7,r4
+ vsli.32 q3,q12,#2
+ add r5,r5,r9
+ ldr r9,[sp,#60]
+ eor r11,r10,r3
+ add r5,r5,r6,ror#27
+ mov r7,r7,ror#2
+ add r5,r5,r11
+ eor r10,r6,r3
+ add r4,r4,r9
+ ldr r9,[sp,#0]
+ eor r11,r10,r7
+ add r4,r4,r5,ror#27
+ mov r6,r6,ror#2
+ add r4,r4,r11
+ vadd.i32 q13,q3,q14
+ eor r10,r5,r7
+ add r3,r3,r9
+ vst1.32 {q13},[r12,:128]!
+ sub r12,r12,#64
+ teq r1,r2
+ sub r8,r8,#16
+ subeq r1,r1,#64
+ vld1.8 {q0-q1},[r1]!
+ ldr r9,[sp,#4]
+ eor r11,r10,r6
+ vld1.8 {q2-q3},[r1]!
+ add r3,r3,r4,ror#27
+ mov r5,r5,ror#2
+ vld1.32 {d28[],d29[]},[r8,:32]!
+ add r3,r3,r11
+ eor r10,r4,r6
+ vrev32.8 q0,q0
+ add r7,r7,r9
+ ldr r9,[sp,#8]
+ eor r11,r10,r5
+ add r7,r7,r3,ror#27
+ mov r4,r4,ror#2
+ add r7,r7,r11
+ eor r10,r3,r5
+ add r6,r6,r9
+ ldr r9,[sp,#12]
+ eor r11,r10,r4
+ add r6,r6,r7,ror#27
+ mov r3,r3,ror#2
+ add r6,r6,r11
+ eor r10,r7,r4
+ add r5,r5,r9
+ ldr r9,[sp,#16]
+ eor r11,r10,r3
+ add r5,r5,r6,ror#27
+ mov r7,r7,ror#2
+ add r5,r5,r11
+ vrev32.8 q1,q1
+ eor r10,r6,r3
+ add r4,r4,r9
+ vadd.i32 q8,q0,q14
+ ldr r9,[sp,#20]
+ eor r11,r10,r7
+ vst1.32 {q8},[r12,:128]!
+ add r4,r4,r5,ror#27
+ mov r6,r6,ror#2
+ add r4,r4,r11
+ eor r10,r5,r7
+ add r3,r3,r9
+ ldr r9,[sp,#24]
+ eor r11,r10,r6
+ add r3,r3,r4,ror#27
+ mov r5,r5,ror#2
+ add r3,r3,r11
+ eor r10,r4,r6
+ add r7,r7,r9
+ ldr r9,[sp,#28]
+ eor r11,r10,r5
+ add r7,r7,r3,ror#27
+ mov r4,r4,ror#2
+ add r7,r7,r11
+ eor r10,r3,r5
+ add r6,r6,r9
+ ldr r9,[sp,#32]
+ eor r11,r10,r4
+ add r6,r6,r7,ror#27
+ mov r3,r3,ror#2
+ add r6,r6,r11
+ vrev32.8 q2,q2
+ eor r10,r7,r4
+ add r5,r5,r9
+ vadd.i32 q9,q1,q14
+ ldr r9,[sp,#36]
+ eor r11,r10,r3
+ vst1.32 {q9},[r12,:128]!
+ add r5,r5,r6,ror#27
+ mov r7,r7,ror#2
+ add r5,r5,r11
+ eor r10,r6,r3
+ add r4,r4,r9
+ ldr r9,[sp,#40]
+ eor r11,r10,r7
+ add r4,r4,r5,ror#27
+ mov r6,r6,ror#2
+ add r4,r4,r11
+ eor r10,r5,r7
+ add r3,r3,r9
+ ldr r9,[sp,#44]
+ eor r11,r10,r6
+ add r3,r3,r4,ror#27
+ mov r5,r5,ror#2
+ add r3,r3,r11
+ eor r10,r4,r6
+ add r7,r7,r9
+ ldr r9,[sp,#48]
+ eor r11,r10,r5
+ add r7,r7,r3,ror#27
+ mov r4,r4,ror#2
+ add r7,r7,r11
+ vrev32.8 q3,q3
+ eor r10,r3,r5
+ add r6,r6,r9
+ vadd.i32 q10,q2,q14
+ ldr r9,[sp,#52]
+ eor r11,r10,r4
+ vst1.32 {q10},[r12,:128]!
+ add r6,r6,r7,ror#27
+ mov r3,r3,ror#2
+ add r6,r6,r11
+ eor r10,r7,r4
+ add r5,r5,r9
+ ldr r9,[sp,#56]
+ eor r11,r10,r3
+ add r5,r5,r6,ror#27
+ mov r7,r7,ror#2
+ add r5,r5,r11
+ eor r10,r6,r3
+ add r4,r4,r9
+ ldr r9,[sp,#60]
+ eor r11,r10,r7
+ add r4,r4,r5,ror#27
+ mov r6,r6,ror#2
+ add r4,r4,r11
+ eor r10,r5,r7
+ add r3,r3,r9
+ eor r11,r10,r6
+ add r3,r3,r4,ror#27
+ mov r5,r5,ror#2
+ add r3,r3,r11
+ ldmia r0,{r9,r10,r11,r12} @ accumulate context
+ add r3,r3,r9
+ ldr r9,[r0,#16]
+ add r4,r4,r10
+ add r5,r5,r11
+ add r6,r6,r12
+ moveq sp,r14
+ add r7,r7,r9
+ ldrne r9,[sp]
+ stmia r0,{r3,r4,r5,r6,r7}
+ addne r12,sp,#3*16
+ bne .Loop_neon
+
+ @ vldmia sp!,{d8-d15}
+ ldmia sp!,{r4-r12,pc}
+.size sha1_block_data_order_neon,.-sha1_block_data_order_neon
+#endif
+#if __ARM_ARCH__>=7
+.type sha1_block_data_order_armv8,%function
+.align 5
+sha1_block_data_order_armv8:
+.LARMv8:
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+
+ veor q1,q1,q1
+ adr r3,.LK_00_19
+ vld1.32 {q0},[r0]!
+ vld1.32 {d2[0]},[r0]
+ sub r0,r0,#16
+ vld1.32 {d16[],d17[]},[r3,:32]!
+ vld1.32 {d18[],d19[]},[r3,:32]!
+ vld1.32 {d20[],d21[]},[r3,:32]!
+ vld1.32 {d22[],d23[]},[r3,:32]
+
+.Loop_v8:
+ vld1.8 {q4-q5},[r1]!
+ vld1.8 {q6-q7},[r1]!
+ vrev32.8 q4,q4
+ vrev32.8 q5,q5
+
+ vadd.i32 q12,q8,q4
+ vrev32.8 q6,q6
+ vmov q14,q0 @ offload
+ subs r2,r2,#1
+
+ vadd.i32 q13,q8,q5
+ vrev32.8 q7,q7
+ .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 0
+ .byte 0x68,0x0c,0x02,0xf2 @ sha1c q0,q1,q12
+ vadd.i32 q12,q8,q6
+ .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6
+ .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 1
+ .byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13
+ vadd.i32 q13,q8,q7
+ .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7
+ .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7
+ .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 2
+ .byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12
+ vadd.i32 q12,q8,q4
+ .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4
+ .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4
+ .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 3
+ .byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13
+ vadd.i32 q13,q9,q5
+ .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5
+ .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5
+ .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 4
+ .byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12
+ vadd.i32 q12,q9,q6
+ .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6
+ .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6
+ .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 5
+ .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
+ vadd.i32 q13,q9,q7
+ .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7
+ .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7
+ .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 6
+ .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12
+ vadd.i32 q12,q9,q4
+ .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4
+ .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4
+ .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 7
+ .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
+ vadd.i32 q13,q9,q5
+ .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5
+ .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5
+ .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 8
+ .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12
+ vadd.i32 q12,q10,q6
+ .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6
+ .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6
+ .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 9
+ .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
+ vadd.i32 q13,q10,q7
+ .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7
+ .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7
+ .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 10
+ .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12
+ vadd.i32 q12,q10,q4
+ .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4
+ .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4
+ .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 11
+ .byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13
+ vadd.i32 q13,q10,q5
+ .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5
+ .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5
+ .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 12
+ .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12
+ vadd.i32 q12,q10,q6
+ .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6
+ .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6
+ .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 13
+ .byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13
+ vadd.i32 q13,q11,q7
+ .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7
+ .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7
+ .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 14
+ .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12
+ vadd.i32 q12,q11,q4
+ .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4
+ .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4
+ .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 15
+ .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
+ vadd.i32 q13,q11,q5
+ .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5
+ .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5
+ .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 16
+ .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12
+ vadd.i32 q12,q11,q6
+ .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6
+ .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 17
+ .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
+ vadd.i32 q13,q11,q7
+
+ .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 18
+ .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12
+
+ .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 19
+ .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13
+
+ vadd.i32 q1,q1,q2
+ vadd.i32 q0,q0,q14
+ bne .Loop_v8
+
+ vst1.32 {q0},[r0]!
+ vst1.32 {d2[0]},[r0]
+
+ vldmia sp!,{d8-d15}
+ bx lr @ bx lr
+.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8
+#endif
+.comm OPENSSL_armcap_P,4,4
diff --git a/crypto/sha/asm/sha1-armv4-large.pl b/crypto/sha/asm/sha1-armv4-large.pl
index 33da3e0..50bd07b 100644
--- a/crypto/sha/asm/sha1-armv4-large.pl
+++ b/crypto/sha/asm/sha1-armv4-large.pl
@@ -1,7 +1,7 @@
#!/usr/bin/env perl
# ====================================================================
-# Written by Andy Polyakov <[email protected]> for the OpenSSL
+# Written by Andy Polyakov <[email protected]> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@@ -52,6 +52,20 @@
# Profiler-assisted and platform-specific optimization resulted in 10%
# improvement on Cortex A8 core and 12.2 cycles per byte.
+# September 2013.
+#
+# Add NEON implementation (see sha1-586.pl for background info). On
+# Cortex A8 it was measured to process one byte in 6.7 cycles or >80%
+# faster than integer-only code. Because [fully unrolled] NEON code
+# is ~2.5x larger and there are some redundant instructions executed
+# when processing last block, improvement is not as big for smallest
+# blocks, only ~30%. Snapdragon S4 is a tad faster, 6.4 cycles per
+# byte, which is also >80% faster than integer-only code.
+
+# May 2014.
+#
+# Add ARMv8 code path performing at 2.35 cpb on Apple A7.
+
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
@@ -153,12 +167,22 @@
#include "arm_arch.h"
.text
+.code 32
.global sha1_block_data_order
.type sha1_block_data_order,%function
-.align 2
+.align 5
sha1_block_data_order:
+#if __ARM_ARCH__>=7
+ sub r3,pc,#8 @ sha1_block_data_order
+ ldr r12,.LOPENSSL_armcap
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
+ tst r12,#ARMV8_SHA1
+ bne .LARMv8
+ tst r12,#ARMV7_NEON
+ bne .LNEON
+#endif
stmdb sp!,{r4-r12,lr}
add $len,$inp,$len,lsl#6 @ $len to point at the end of $inp
ldmia $ctx,{$a,$b,$c,$d,$e}
@@ -233,16 +257,422 @@
moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-)
#endif
-.align 2
+.size sha1_block_data_order,.-sha1_block_data_order
+
+.align 5
.LK_00_19: .word 0x5a827999
.LK_20_39: .word 0x6ed9eba1
.LK_40_59: .word 0x8f1bbcdc
.LK_60_79: .word 0xca62c1d6
-.size sha1_block_data_order,.-sha1_block_data_order
-.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
-.align 2
+.LOPENSSL_armcap:
+.word OPENSSL_armcap_P-sha1_block_data_order
+.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+.align 5
+___
+#####################################################################
+# NEON stuff
+#
+{{{
+my @V=($a,$b,$c,$d,$e);
+my ($K_XX_XX,$Ki,$t0,$t1,$Xfer,$saved_sp)=map("r$_",(8..12,14));
+my $Xi=4;
+my @X=map("q$_",(8..11,0..3));
+my @Tx=("q12","q13");
+my ($K,$zero)=("q14","q15");
+my $j=0;
+
+sub AUTOLOAD() # thunk [simplified] x86-style perlasm
+{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
+ my $arg = pop;
+ $arg = "#$arg" if ($arg*1 eq $arg);
+ $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
+}
+
+sub body_00_19 () {
+ (
+ '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'.
+ '&bic ($t0,$d,$b)',
+ '&add ($e,$e,$Ki)', # e+=X[i]+K
+ '&and ($t1,$c,$b)',
+ '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15))',
+ '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27)
+ '&eor ($t1,$t1,$t0)', # F_00_19
+ '&mov ($b,$b,"ror#2")', # b=ROR(b,2)
+ '&add ($e,$e,$t1);'. # e+=F_00_19
+ '$j++; unshift(@V,pop(@V));'
+ )
+}
+sub body_20_39 () {
+ (
+ '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'.
+ '&eor ($t0,$b,$d)',
+ '&add ($e,$e,$Ki)', # e+=X[i]+K
+ '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15)) if ($j<79)',
+ '&eor ($t1,$t0,$c)', # F_20_39
+ '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27)
+ '&mov ($b,$b,"ror#2")', # b=ROR(b,2)
+ '&add ($e,$e,$t1);'. # e+=F_20_39
+ '$j++; unshift(@V,pop(@V));'
+ )
+}
+sub body_40_59 () {
+ (
+ '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'.
+ '&add ($e,$e,$Ki)', # e+=X[i]+K
+ '&and ($t0,$c,$d)',
+ '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15))',
+ '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27)
+ '&eor ($t1,$c,$d)',
+ '&add ($e,$e,$t0)',
+ '&and ($t1,$t1,$b)',
+ '&mov ($b,$b,"ror#2")', # b=ROR(b,2)
+ '&add ($e,$e,$t1);'. # e+=F_40_59
+ '$j++; unshift(@V,pop(@V));'
+ )
+}
+
+sub Xupdate_16_31 ()
+{ use integer;
+ my $body = shift;
+ my @insns = (&$body,&$body,&$body,&$body);
+ my ($a,$b,$c,$d,$e);
+
+ &vext_8 (@X[0],@X[-4&7],@X[-3&7],8); # compose "X[-14]" in "X[0]"
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 (@Tx[1],@X[-1&7],$K);
+ eval(shift(@insns));
+ &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!") if ($Xi%5==0);
+ eval(shift(@insns));
+ &vext_8 (@Tx[0],@X[-1&7],$zero,4); # "X[-3]", 3 words
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]"
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor (@Tx[0],@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]"
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor (@Tx[0],@Tx[0],@X[0]); # "X[0]"^="X[-3]"^"X[-8]
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!"); # X[]+K xfer
+ &sub ($Xfer,$Xfer,64) if ($Xi%4==0);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vext_8 (@Tx[1],$zero,@Tx[0],4); # "X[0]"<<96, extract one dword
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 (@X[0],@Tx[0],@Tx[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsri_32 (@X[0],@Tx[0],31); # "X[0]"<<<=1
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 (@Tx[0],@Tx[1],30);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshl_u32 (@Tx[1],@Tx[1],2);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor (@X[0],@X[0],@Tx[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor (@X[0],@X[0],@Tx[1]); # "X[0]"^=("X[0]">>96)<<<2
+
+ foreach (@insns) { eval; } # remaining instructions [if any]
+
+ $Xi++; push(@X,shift(@X)); # "rotate" X[]
+}
+
+sub Xupdate_32_79 ()
+{ use integer;
+ my $body = shift;
+ my @insns = (&$body,&$body,&$body,&$body);
+ my ($a,$b,$c,$d,$e);
+
+ &vext_8 (@Tx[0],@X[-2&7],@X[-1&7],8); # compose "X[-6]"
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor (@X[0],@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]"
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]"
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 (@Tx[1],@X[-1&7],$K);
+ eval(shift(@insns));
+ &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!") if ($Xi%5==0);
+ eval(shift(@insns));
+ &veor (@Tx[0],@Tx[0],@X[0]); # "X[-6]"^="X[0]"
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 (@X[0],@Tx[0],30);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!"); # X[]+K xfer
+ &sub ($Xfer,$Xfer,64) if ($Xi%4==0);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 (@X[0],@Tx[0],2); # "X[0]"="X[-6]"<<<2
+
+ foreach (@insns) { eval; } # remaining instructions [if any]
+
+ $Xi++; push(@X,shift(@X)); # "rotate" X[]
+}
+
+sub Xuplast_80 ()
+{ use integer;
+ my $body = shift;
+ my @insns = (&$body,&$body,&$body,&$body);
+ my ($a,$b,$c,$d,$e);
+
+ &vadd_i32 (@Tx[1],@X[-1&7],$K);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!");
+ &sub ($Xfer,$Xfer,64);
+
+ &teq ($inp,$len);
+ &sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX
+ &subeq ($inp,$inp,64); # reload last block to avoid SEGV
+ &vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!");
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vld1_8 ("{@X[-2&7]-@X[-1&7]}","[$inp]!");
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!"); # load K_00_19
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vrev32_8 (@X[-4&7],@X[-4&7]);
+
+ foreach (@insns) { eval; } # remaining instructions
+
+ $Xi=0;
+}
+
+sub Xloop()
+{ use integer;
+ my $body = shift;
+ my @insns = (&$body,&$body,&$body,&$body);
+ my ($a,$b,$c,$d,$e);
+
+ &vrev32_8 (@X[($Xi-3)&7],@X[($Xi-3)&7]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 (@X[$Xi&7],@X[($Xi-4)&7],$K);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vst1_32 ("{@X[$Xi&7]}","[$Xfer,:128]!");# X[]+K xfer to IALU
+
+ foreach (@insns) { eval; }
+
+ $Xi++;
+}
+
+$code.=<<___;
+#if __ARM_ARCH__>=7
+.fpu neon
+
+.type sha1_block_data_order_neon,%function
+.align 4
+sha1_block_data_order_neon:
+.LNEON:
+ stmdb sp!,{r4-r12,lr}
+ add $len,$inp,$len,lsl#6 @ $len to point at the end of $inp
+ @ dmb @ errata #451034 on early Cortex A8
+ @ vstmdb sp!,{d8-d15} @ ABI specification says so
+ mov $saved_sp,sp
+ sub sp,sp,#64 @ alloca
+ adr $K_XX_XX,.LK_00_19
+ bic sp,sp,#15 @ align for 128-bit stores
+
+ ldmia $ctx,{$a,$b,$c,$d,$e} @ load context
+ mov $Xfer,sp
+
+ vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned
+ veor $zero,$zero,$zero
+ vld1.8 {@X[-2&7]-@X[-1&7]},[$inp]!
+ vld1.32 {${K}\[]},[$K_XX_XX,:32]! @ load K_00_19
+ vrev32.8 @X[-4&7],@X[-4&7] @ yes, even on
+ vrev32.8 @X[-3&7],@X[-3&7] @ big-endian...
+ vrev32.8 @X[-2&7],@X[-2&7]
+ vadd.i32 @X[0],@X[-4&7],$K
+ vrev32.8 @X[-1&7],@X[-1&7]
+ vadd.i32 @X[1],@X[-3&7],$K
+ vst1.32 {@X[0]},[$Xfer,:128]!
+ vadd.i32 @X[2],@X[-2&7],$K
+ vst1.32 {@X[1]},[$Xfer,:128]!
+ vst1.32 {@X[2]},[$Xfer,:128]!
+ ldr $Ki,[sp] @ big RAW stall
+
+.Loop_neon:
+___
+ &Xupdate_16_31(\&body_00_19);
+ &Xupdate_16_31(\&body_00_19);
+ &Xupdate_16_31(\&body_00_19);
+ &Xupdate_16_31(\&body_00_19);
+ &Xupdate_32_79(\&body_00_19);
+ &Xupdate_32_79(\&body_20_39);
+ &Xupdate_32_79(\&body_20_39);
+ &Xupdate_32_79(\&body_20_39);
+ &Xupdate_32_79(\&body_20_39);
+ &Xupdate_32_79(\&body_20_39);
+ &Xupdate_32_79(\&body_40_59);
+ &Xupdate_32_79(\&body_40_59);
+ &Xupdate_32_79(\&body_40_59);
+ &Xupdate_32_79(\&body_40_59);
+ &Xupdate_32_79(\&body_40_59);
+ &Xupdate_32_79(\&body_20_39);
+ &Xuplast_80(\&body_20_39);
+ &Xloop(\&body_20_39);
+ &Xloop(\&body_20_39);
+ &Xloop(\&body_20_39);
+$code.=<<___;
+ ldmia $ctx,{$Ki,$t0,$t1,$Xfer} @ accumulate context
+ add $a,$a,$Ki
+ ldr $Ki,[$ctx,#16]
+ add $b,$b,$t0
+ add $c,$c,$t1
+ add $d,$d,$Xfer
+ moveq sp,$saved_sp
+ add $e,$e,$Ki
+ ldrne $Ki,[sp]
+ stmia $ctx,{$a,$b,$c,$d,$e}
+ addne $Xfer,sp,#3*16
+ bne .Loop_neon
+
+ @ vldmia sp!,{d8-d15}
+ ldmia sp!,{r4-r12,pc}
+.size sha1_block_data_order_neon,.-sha1_block_data_order_neon
+#endif
+___
+}}}
+#####################################################################
+# ARMv8 stuff
+#
+{{{
+my ($ABCD,$E,$E0,$E1)=map("q$_",(0..3));
+my @MSG=map("q$_",(4..7));
+my @Kxx=map("q$_",(8..11));
+my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14));
+
+$code.=<<___;
+#if __ARM_ARCH__>=7
+.type sha1_block_data_order_armv8,%function
+.align 5
+sha1_block_data_order_armv8:
+.LARMv8:
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+
+ veor $E,$E,$E
+ adr r3,.LK_00_19
+ vld1.32 {$ABCD},[$ctx]!
+ vld1.32 {$E\[0]},[$ctx]
+ sub $ctx,$ctx,#16
+ vld1.32 {@Kxx[0]\[]},[r3,:32]!
+ vld1.32 {@Kxx[1]\[]},[r3,:32]!
+ vld1.32 {@Kxx[2]\[]},[r3,:32]!
+ vld1.32 {@Kxx[3]\[]},[r3,:32]
+
+.Loop_v8:
+ vld1.8 {@MSG[0]-@MSG[1]},[$inp]!
+ vld1.8 {@MSG[2]-@MSG[3]},[$inp]!
+ vrev32.8 @MSG[0],@MSG[0]
+ vrev32.8 @MSG[1],@MSG[1]
+
+ vadd.i32 $W0,@Kxx[0],@MSG[0]
+ vrev32.8 @MSG[2],@MSG[2]
+ vmov $ABCD_SAVE,$ABCD @ offload
+ subs $len,$len,#1
+
+ vadd.i32 $W1,@Kxx[0],@MSG[1]
+ vrev32.8 @MSG[3],@MSG[3]
+ sha1h $E1,$ABCD @ 0
+ sha1c $ABCD,$E,$W0
+ vadd.i32 $W0,@Kxx[$j],@MSG[2]
+ sha1su0 @MSG[0],@MSG[1],@MSG[2]
+___
+for ($j=0,$i=1;$i<20-3;$i++) {
+my $f=("c","p","m","p")[$i/5];
+$code.=<<___;
+ sha1h $E0,$ABCD @ $i
+ sha1$f $ABCD,$E1,$W1
+ vadd.i32 $W1,@Kxx[$j],@MSG[3]
+ sha1su1 @MSG[0],@MSG[3]
+___
+$code.=<<___ if ($i<20-4);
+ sha1su0 @MSG[1],@MSG[2],@MSG[3]
+___
+ ($E0,$E1)=($E1,$E0); ($W0,$W1)=($W1,$W0);
+ push(@MSG,shift(@MSG)); $j++ if ((($i+3)%5)==0);
+}
+$code.=<<___;
+ sha1h $E0,$ABCD @ $i
+ sha1p $ABCD,$E1,$W1
+ vadd.i32 $W1,@Kxx[$j],@MSG[3]
+
+ sha1h $E1,$ABCD @ 18
+ sha1p $ABCD,$E0,$W0
+
+ sha1h $E0,$ABCD @ 19
+ sha1p $ABCD,$E1,$W1
+
+ vadd.i32 $E,$E,$E0
+ vadd.i32 $ABCD,$ABCD,$ABCD_SAVE
+ bne .Loop_v8
+
+ vst1.32 {$ABCD},[$ctx]!
+ vst1.32 {$E\[0]},[$ctx]
+
+ vldmia sp!,{d8-d15}
+ ret @ bx lr
+.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8
+#endif
+___
+}}}
+$code.=<<___;
+.comm OPENSSL_armcap_P,4,4
___
-$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
-print $code;
+{ my %opcode = (
+ "sha1c" => 0xf2000c40, "sha1p" => 0xf2100c40,
+ "sha1m" => 0xf2200c40, "sha1su0" => 0xf2300c40,
+ "sha1h" => 0xf3b902c0, "sha1su1" => 0xf3ba0380 );
+
+ sub unsha1 {
+ my ($mnemonic,$arg)=@_;
+
+ if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
+ my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
+ |(($2&7)<<17)|(($2&8)<<4)
+ |(($3&7)<<1) |(($3&8)<<2);
+ # since ARMv7 instructions are always encoded little-endian.
+ # correct solution is to use .inst directive, but older
+ # assemblers don't implement it:-(
+ sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+ $word&0xff,($word>>8)&0xff,
+ ($word>>16)&0xff,($word>>24)&0xff,
+ $mnemonic,$arg;
+ }
+ }
+}
+
+foreach (split($/,$code)) {
+ s/{q([0-9]+)\[\]}/sprintf "{d%d[],d%d[]}",2*$1,2*$1+1/eo or
+ s/{q([0-9]+)\[0\]}/sprintf "{d%d[0]}",2*$1/eo;
+
+ s/\b(sha1\w+)\s+(q.*)/unsha1($1,$2)/geo;
+
+ s/\bret\b/bx lr/o or
+ s/\bbx\s+lr\b/.word\t0xe12fff1e/o; # make it possible to compile with -march=armv4
+
+ print $_,$/;
+}
+
close STDOUT; # enforce flush
diff --git a/crypto/sha/asm/sha1-armv8.S b/crypto/sha/asm/sha1-armv8.S
new file mode 100644
index 0000000..f9d1262
--- /dev/null
+++ b/crypto/sha/asm/sha1-armv8.S
@@ -0,0 +1,1211 @@
+#include "arm_arch.h"
+
+.text
+
+.globl sha1_block_data_order
+.type sha1_block_data_order,%function
+.align 6
+sha1_block_data_order:
+ ldr x16,.LOPENSSL_armcap_P
+ adr x17,.LOPENSSL_armcap_P
+ add x16,x16,x17
+ ldr w16,[x16]
+ tst w16,#ARMV8_SHA1
+ b.ne .Lv8_entry
+
+ stp x29,x30,[sp,#-96]!
+ add x29,sp,#0
+ stp x19,x20,[sp,#16]
+ stp x21,x22,[sp,#32]
+ stp x23,x24,[sp,#48]
+ stp x25,x26,[sp,#64]
+ stp x27,x28,[sp,#80]
+
+ ldp w20,w21,[x0]
+ ldp w22,w23,[x0,#8]
+ ldr w24,[x0,#16]
+
+.Loop:
+ ldr x3,[x1],#64
+ movz w28,#0x7999
+ sub x2,x2,#1
+ movk w28,#0x5a82,lsl#16
+#ifdef __ARMEB__
+ ror x3,x3,#32
+#else
+ rev32 x3,x3
+#endif
+ add w24,w24,w28 // warm it up
+ add w24,w24,w3
+ lsr x4,x3,#32
+ ldr x5,[x1,#-56]
+ bic w25,w23,w21
+ and w26,w22,w21
+ ror w27,w20,#27
+ add w23,w23,w28 // future e+=K
+ orr w25,w25,w26
+ add w24,w24,w27 // e+=rot(a,5)
+ ror w21,w21,#2
+ add w23,w23,w4 // future e+=X[i]
+ add w24,w24,w25 // e+=F(b,c,d)
+#ifdef __ARMEB__
+ ror x5,x5,#32
+#else
+ rev32 x5,x5
+#endif
+ bic w25,w22,w20
+ and w26,w21,w20
+ ror w27,w24,#27
+ add w22,w22,w28 // future e+=K
+ orr w25,w25,w26
+ add w23,w23,w27 // e+=rot(a,5)
+ ror w20,w20,#2
+ add w22,w22,w5 // future e+=X[i]
+ add w23,w23,w25 // e+=F(b,c,d)
+ lsr x6,x5,#32
+ ldr x7,[x1,#-48]
+ bic w25,w21,w24
+ and w26,w20,w24
+ ror w27,w23,#27
+ add w21,w21,w28 // future e+=K
+ orr w25,w25,w26
+ add w22,w22,w27 // e+=rot(a,5)
+ ror w24,w24,#2
+ add w21,w21,w6 // future e+=X[i]
+ add w22,w22,w25 // e+=F(b,c,d)
+#ifdef __ARMEB__
+ ror x7,x7,#32
+#else
+ rev32 x7,x7
+#endif
+ bic w25,w20,w23
+ and w26,w24,w23
+ ror w27,w22,#27
+ add w20,w20,w28 // future e+=K
+ orr w25,w25,w26
+ add w21,w21,w27 // e+=rot(a,5)
+ ror w23,w23,#2
+ add w20,w20,w7 // future e+=X[i]
+ add w21,w21,w25 // e+=F(b,c,d)
+ lsr x8,x7,#32
+ ldr x9,[x1,#-40]
+ bic w25,w24,w22
+ and w26,w23,w22
+ ror w27,w21,#27
+ add w24,w24,w28 // future e+=K
+ orr w25,w25,w26
+ add w20,w20,w27 // e+=rot(a,5)
+ ror w22,w22,#2
+ add w24,w24,w8 // future e+=X[i]
+ add w20,w20,w25 // e+=F(b,c,d)
+#ifdef __ARMEB__
+ ror x9,x9,#32
+#else
+ rev32 x9,x9
+#endif
+ bic w25,w23,w21
+ and w26,w22,w21
+ ror w27,w20,#27
+ add w23,w23,w28 // future e+=K
+ orr w25,w25,w26
+ add w24,w24,w27 // e+=rot(a,5)
+ ror w21,w21,#2
+ add w23,w23,w9 // future e+=X[i]
+ add w24,w24,w25 // e+=F(b,c,d)
+ lsr x10,x9,#32
+ ldr x11,[x1,#-32]
+ bic w25,w22,w20
+ and w26,w21,w20
+ ror w27,w24,#27
+ add w22,w22,w28 // future e+=K
+ orr w25,w25,w26
+ add w23,w23,w27 // e+=rot(a,5)
+ ror w20,w20,#2
+ add w22,w22,w10 // future e+=X[i]
+ add w23,w23,w25 // e+=F(b,c,d)
+#ifdef __ARMEB__
+ ror x11,x11,#32
+#else
+ rev32 x11,x11
+#endif
+ bic w25,w21,w24
+ and w26,w20,w24
+ ror w27,w23,#27
+ add w21,w21,w28 // future e+=K
+ orr w25,w25,w26
+ add w22,w22,w27 // e+=rot(a,5)
+ ror w24,w24,#2
+ add w21,w21,w11 // future e+=X[i]
+ add w22,w22,w25 // e+=F(b,c,d)
+ lsr x12,x11,#32
+ ldr x13,[x1,#-24]
+ bic w25,w20,w23
+ and w26,w24,w23
+ ror w27,w22,#27
+ add w20,w20,w28 // future e+=K
+ orr w25,w25,w26
+ add w21,w21,w27 // e+=rot(a,5)
+ ror w23,w23,#2
+ add w20,w20,w12 // future e+=X[i]
+ add w21,w21,w25 // e+=F(b,c,d)
+#ifdef __ARMEB__
+ ror x13,x13,#32
+#else
+ rev32 x13,x13
+#endif
+ bic w25,w24,w22
+ and w26,w23,w22
+ ror w27,w21,#27
+ add w24,w24,w28 // future e+=K
+ orr w25,w25,w26
+ add w20,w20,w27 // e+=rot(a,5)
+ ror w22,w22,#2
+ add w24,w24,w13 // future e+=X[i]
+ add w20,w20,w25 // e+=F(b,c,d)
+ lsr x14,x13,#32
+ ldr x15,[x1,#-16]
+ bic w25,w23,w21
+ and w26,w22,w21
+ ror w27,w20,#27
+ add w23,w23,w28 // future e+=K
+ orr w25,w25,w26
+ add w24,w24,w27 // e+=rot(a,5)
+ ror w21,w21,#2
+ add w23,w23,w14 // future e+=X[i]
+ add w24,w24,w25 // e+=F(b,c,d)
+#ifdef __ARMEB__
+ ror x15,x15,#32
+#else
+ rev32 x15,x15
+#endif
+ bic w25,w22,w20
+ and w26,w21,w20
+ ror w27,w24,#27
+ add w22,w22,w28 // future e+=K
+ orr w25,w25,w26
+ add w23,w23,w27 // e+=rot(a,5)
+ ror w20,w20,#2
+ add w22,w22,w15 // future e+=X[i]
+ add w23,w23,w25 // e+=F(b,c,d)
+ lsr x16,x15,#32
+ ldr x17,[x1,#-8]
+ bic w25,w21,w24
+ and w26,w20,w24
+ ror w27,w23,#27
+ add w21,w21,w28 // future e+=K
+ orr w25,w25,w26
+ add w22,w22,w27 // e+=rot(a,5)
+ ror w24,w24,#2
+ add w21,w21,w16 // future e+=X[i]
+ add w22,w22,w25 // e+=F(b,c,d)
+#ifdef __ARMEB__
+ ror x17,x17,#32
+#else
+ rev32 x17,x17
+#endif
+ bic w25,w20,w23
+ and w26,w24,w23
+ ror w27,w22,#27
+ add w20,w20,w28 // future e+=K
+ orr w25,w25,w26
+ add w21,w21,w27 // e+=rot(a,5)
+ ror w23,w23,#2
+ add w20,w20,w17 // future e+=X[i]
+ add w21,w21,w25 // e+=F(b,c,d)
+ lsr x19,x17,#32
+ eor w3,w3,w5
+ bic w25,w24,w22
+ and w26,w23,w22
+ ror w27,w21,#27
+ eor w3,w3,w11
+ add w24,w24,w28 // future e+=K
+ orr w25,w25,w26
+ add w20,w20,w27 // e+=rot(a,5)
+ eor w3,w3,w16
+ ror w22,w22,#2
+ add w24,w24,w19 // future e+=X[i]
+ add w20,w20,w25 // e+=F(b,c,d)
+ ror w3,w3,#31
+ eor w4,w4,w6
+ bic w25,w23,w21
+ and w26,w22,w21
+ ror w27,w20,#27
+ eor w4,w4,w12
+ add w23,w23,w28 // future e+=K
+ orr w25,w25,w26
+ add w24,w24,w27 // e+=rot(a,5)
+ eor w4,w4,w17
+ ror w21,w21,#2
+ add w23,w23,w3 // future e+=X[i]
+ add w24,w24,w25 // e+=F(b,c,d)
+ ror w4,w4,#31
+ eor w5,w5,w7
+ bic w25,w22,w20
+ and w26,w21,w20
+ ror w27,w24,#27
+ eor w5,w5,w13
+ add w22,w22,w28 // future e+=K
+ orr w25,w25,w26
+ add w23,w23,w27 // e+=rot(a,5)
+ eor w5,w5,w19
+ ror w20,w20,#2
+ add w22,w22,w4 // future e+=X[i]
+ add w23,w23,w25 // e+=F(b,c,d)
+ ror w5,w5,#31
+ eor w6,w6,w8
+ bic w25,w21,w24
+ and w26,w20,w24
+ ror w27,w23,#27
+ eor w6,w6,w14
+ add w21,w21,w28 // future e+=K
+ orr w25,w25,w26
+ add w22,w22,w27 // e+=rot(a,5)
+ eor w6,w6,w3
+ ror w24,w24,#2
+ add w21,w21,w5 // future e+=X[i]
+ add w22,w22,w25 // e+=F(b,c,d)
+ ror w6,w6,#31
+ eor w7,w7,w9
+ bic w25,w20,w23
+ and w26,w24,w23
+ ror w27,w22,#27
+ eor w7,w7,w15
+ add w20,w20,w28 // future e+=K
+ orr w25,w25,w26
+ add w21,w21,w27 // e+=rot(a,5)
+ eor w7,w7,w4
+ ror w23,w23,#2
+ add w20,w20,w6 // future e+=X[i]
+ add w21,w21,w25 // e+=F(b,c,d)
+ ror w7,w7,#31
+ movz w28,#0xeba1
+ movk w28,#0x6ed9,lsl#16
+ eor w8,w8,w10
+ bic w25,w24,w22
+ and w26,w23,w22
+ ror w27,w21,#27
+ eor w8,w8,w16
+ add w24,w24,w28 // future e+=K
+ orr w25,w25,w26
+ add w20,w20,w27 // e+=rot(a,5)
+ eor w8,w8,w5
+ ror w22,w22,#2
+ add w24,w24,w7 // future e+=X[i]
+ add w20,w20,w25 // e+=F(b,c,d)
+ ror w8,w8,#31
+ eor w9,w9,w11
+ eor w25,w23,w21
+ ror w27,w20,#27
+ add w23,w23,w28 // future e+=K
+ eor w9,w9,w17
+ eor w25,w25,w22
+ add w24,w24,w27 // e+=rot(a,5)
+ ror w21,w21,#2
+ eor w9,w9,w6
+ add w23,w23,w8 // future e+=X[i]
+ add w24,w24,w25 // e+=F(b,c,d)
+ ror w9,w9,#31
+ eor w10,w10,w12
+ eor w25,w22,w20
+ ror w27,w24,#27
+ add w22,w22,w28 // future e+=K
+ eor w10,w10,w19
+ eor w25,w25,w21
+ add w23,w23,w27 // e+=rot(a,5)
+ ror w20,w20,#2
+ eor w10,w10,w7
+ add w22,w22,w9 // future e+=X[i]
+ add w23,w23,w25 // e+=F(b,c,d)
+ ror w10,w10,#31
+ eor w11,w11,w13
+ eor w25,w21,w24
+ ror w27,w23,#27
+ add w21,w21,w28 // future e+=K
+ eor w11,w11,w3
+ eor w25,w25,w20
+ add w22,w22,w27 // e+=rot(a,5)
+ ror w24,w24,#2
+ eor w11,w11,w8
+ add w21,w21,w10 // future e+=X[i]
+ add w22,w22,w25 // e+=F(b,c,d)
+ ror w11,w11,#31
+ eor w12,w12,w14
+ eor w25,w20,w23
+ ror w27,w22,#27
+ add w20,w20,w28 // future e+=K
+ eor w12,w12,w4
+ eor w25,w25,w24
+ add w21,w21,w27 // e+=rot(a,5)
+ ror w23,w23,#2
+ eor w12,w12,w9
+ add w20,w20,w11 // future e+=X[i]
+ add w21,w21,w25 // e+=F(b,c,d)
+ ror w12,w12,#31
+ eor w13,w13,w15
+ eor w25,w24,w22
+ ror w27,w21,#27
+ add w24,w24,w28 // future e+=K
+ eor w13,w13,w5
+ eor w25,w25,w23
+ add w20,w20,w27 // e+=rot(a,5)
+ ror w22,w22,#2
+ eor w13,w13,w10
+ add w24,w24,w12 // future e+=X[i]
+ add w20,w20,w25 // e+=F(b,c,d)
+ ror w13,w13,#31
+ eor w14,w14,w16
+ eor w25,w23,w21
+ ror w27,w20,#27
+ add w23,w23,w28 // future e+=K
+ eor w14,w14,w6
+ eor w25,w25,w22
+ add w24,w24,w27 // e+=rot(a,5)
+ ror w21,w21,#2
+ eor w14,w14,w11
+ add w23,w23,w13 // future e+=X[i]
+ add w24,w24,w25 // e+=F(b,c,d)
+ ror w14,w14,#31
+ eor w15,w15,w17
+ eor w25,w22,w20
+ ror w27,w24,#27
+ add w22,w22,w28 // future e+=K
+ eor w15,w15,w7
+ eor w25,w25,w21
+ add w23,w23,w27 // e+=rot(a,5)
+ ror w20,w20,#2
+ eor w15,w15,w12
+ add w22,w22,w14 // future e+=X[i]
+ add w23,w23,w25 // e+=F(b,c,d)
+ ror w15,w15,#31
+ eor w16,w16,w19
+ eor w25,w21,w24
+ ror w27,w23,#27
+ add w21,w21,w28 // future e+=K
+ eor w16,w16,w8
+ eor w25,w25,w20
+ add w22,w22,w27 // e+=rot(a,5)
+ ror w24,w24,#2
+ eor w16,w16,w13
+ add w21,w21,w15 // future e+=X[i]
+ add w22,w22,w25 // e+=F(b,c,d)
+ ror w16,w16,#31
+ eor w17,w17,w3
+ eor w25,w20,w23
+ ror w27,w22,#27
+ add w20,w20,w28 // future e+=K
+ eor w17,w17,w9
+ eor w25,w25,w24
+ add w21,w21,w27 // e+=rot(a,5)
+ ror w23,w23,#2
+ eor w17,w17,w14
+ add w20,w20,w16 // future e+=X[i]
+ add w21,w21,w25 // e+=F(b,c,d)
+ ror w17,w17,#31
+ eor w19,w19,w4
+ eor w25,w24,w22
+ ror w27,w21,#27
+ add w24,w24,w28 // future e+=K
+ eor w19,w19,w10
+ eor w25,w25,w23
+ add w20,w20,w27 // e+=rot(a,5)
+ ror w22,w22,#2
+ eor w19,w19,w15
+ add w24,w24,w17 // future e+=X[i]
+ add w20,w20,w25 // e+=F(b,c,d)
+ ror w19,w19,#31
+ eor w3,w3,w5
+ eor w25,w23,w21
+ ror w27,w20,#27
+ add w23,w23,w28 // future e+=K
+ eor w3,w3,w11
+ eor w25,w25,w22
+ add w24,w24,w27 // e+=rot(a,5)
+ ror w21,w21,#2
+ eor w3,w3,w16
+ add w23,w23,w19 // future e+=X[i]
+ add w24,w24,w25 // e+=F(b,c,d)
+ ror w3,w3,#31
+ eor w4,w4,w6
+ eor w25,w22,w20
+ ror w27,w24,#27
+ add w22,w22,w28 // future e+=K
+ eor w4,w4,w12
+ eor w25,w25,w21
+ add w23,w23,w27 // e+=rot(a,5)
+ ror w20,w20,#2
+ eor w4,w4,w17
+ add w22,w22,w3 // future e+=X[i]
+ add w23,w23,w25 // e+=F(b,c,d)
+ ror w4,w4,#31
+ eor w5,w5,w7
+ eor w25,w21,w24
+ ror w27,w23,#27
+ add w21,w21,w28 // future e+=K
+ eor w5,w5,w13
+ eor w25,w25,w20
+ add w22,w22,w27 // e+=rot(a,5)
+ ror w24,w24,#2
+ eor w5,w5,w19
+ add w21,w21,w4 // future e+=X[i]
+ add w22,w22,w25 // e+=F(b,c,d)
+ ror w5,w5,#31
+ eor w6,w6,w8
+ eor w25,w20,w23
+ ror w27,w22,#27
+ add w20,w20,w28 // future e+=K
+ eor w6,w6,w14
+ eor w25,w25,w24
+ add w21,w21,w27 // e+=rot(a,5)
+ ror w23,w23,#2
+ eor w6,w6,w3
+ add w20,w20,w5 // future e+=X[i]
+ add w21,w21,w25 // e+=F(b,c,d)
+ ror w6,w6,#31
+ eor w7,w7,w9
+ eor w25,w24,w22
+ ror w27,w21,#27
+ add w24,w24,w28 // future e+=K
+ eor w7,w7,w15
+ eor w25,w25,w23
+ add w20,w20,w27 // e+=rot(a,5)
+ ror w22,w22,#2
+ eor w7,w7,w4
+ add w24,w24,w6 // future e+=X[i]
+ add w20,w20,w25 // e+=F(b,c,d)
+ ror w7,w7,#31
+ eor w8,w8,w10
+ eor w25,w23,w21
+ ror w27,w20,#27
+ add w23,w23,w28 // future e+=K
+ eor w8,w8,w16
+ eor w25,w25,w22
+ add w24,w24,w27 // e+=rot(a,5)
+ ror w21,w21,#2
+ eor w8,w8,w5
+ add w23,w23,w7 // future e+=X[i]
+ add w24,w24,w25 // e+=F(b,c,d)
+ ror w8,w8,#31
+ eor w9,w9,w11
+ eor w25,w22,w20
+ ror w27,w24,#27
+ add w22,w22,w28 // future e+=K
+ eor w9,w9,w17
+ eor w25,w25,w21
+ add w23,w23,w27 // e+=rot(a,5)
+ ror w20,w20,#2
+ eor w9,w9,w6
+ add w22,w22,w8 // future e+=X[i]
+ add w23,w23,w25 // e+=F(b,c,d)
+ ror w9,w9,#31
+ eor w10,w10,w12
+ eor w25,w21,w24
+ ror w27,w23,#27
+ add w21,w21,w28 // future e+=K
+ eor w10,w10,w19
+ eor w25,w25,w20
+ add w22,w22,w27 // e+=rot(a,5)
+ ror w24,w24,#2
+ eor w10,w10,w7
+ add w21,w21,w9 // future e+=X[i]
+ add w22,w22,w25 // e+=F(b,c,d)
+ ror w10,w10,#31
+ eor w11,w11,w13
+ eor w25,w20,w23
+ ror w27,w22,#27
+ add w20,w20,w28 // future e+=K
+ eor w11,w11,w3
+ eor w25,w25,w24
+ add w21,w21,w27 // e+=rot(a,5)
+ ror w23,w23,#2
+ eor w11,w11,w8
+ add w20,w20,w10 // future e+=X[i]
+ add w21,w21,w25 // e+=F(b,c,d)
+ ror w11,w11,#31
+ movz w28,#0xbcdc
+ movk w28,#0x8f1b,lsl#16
+ eor w12,w12,w14
+ eor w25,w24,w22
+ ror w27,w21,#27
+ add w24,w24,w28 // future e+=K
+ eor w12,w12,w4
+ eor w25,w25,w23
+ add w20,w20,w27 // e+=rot(a,5)
+ ror w22,w22,#2
+ eor w12,w12,w9
+ add w24,w24,w11 // future e+=X[i]
+ add w20,w20,w25 // e+=F(b,c,d)
+ ror w12,w12,#31
+ orr w25,w21,w22
+ and w26,w21,w22
+ eor w13,w13,w15
+ ror w27,w20,#27
+ and w25,w25,w23
+ add w23,w23,w28 // future e+=K
+ eor w13,w13,w5
+ add w24,w24,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w21,w21,#2
+ eor w13,w13,w10
+ add w23,w23,w12 // future e+=X[i]
+ add w24,w24,w25 // e+=F(b,c,d)
+ ror w13,w13,#31
+ orr w25,w20,w21
+ and w26,w20,w21
+ eor w14,w14,w16
+ ror w27,w24,#27
+ and w25,w25,w22
+ add w22,w22,w28 // future e+=K
+ eor w14,w14,w6
+ add w23,w23,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w20,w20,#2
+ eor w14,w14,w11
+ add w22,w22,w13 // future e+=X[i]
+ add w23,w23,w25 // e+=F(b,c,d)
+ ror w14,w14,#31
+ orr w25,w24,w20
+ and w26,w24,w20
+ eor w15,w15,w17
+ ror w27,w23,#27
+ and w25,w25,w21
+ add w21,w21,w28 // future e+=K
+ eor w15,w15,w7
+ add w22,w22,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w24,w24,#2
+ eor w15,w15,w12
+ add w21,w21,w14 // future e+=X[i]
+ add w22,w22,w25 // e+=F(b,c,d)
+ ror w15,w15,#31
+ orr w25,w23,w24
+ and w26,w23,w24
+ eor w16,w16,w19
+ ror w27,w22,#27
+ and w25,w25,w20
+ add w20,w20,w28 // future e+=K
+ eor w16,w16,w8
+ add w21,w21,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w23,w23,#2
+ eor w16,w16,w13
+ add w20,w20,w15 // future e+=X[i]
+ add w21,w21,w25 // e+=F(b,c,d)
+ ror w16,w16,#31
+ orr w25,w22,w23
+ and w26,w22,w23
+ eor w17,w17,w3
+ ror w27,w21,#27
+ and w25,w25,w24
+ add w24,w24,w28 // future e+=K
+ eor w17,w17,w9
+ add w20,w20,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w22,w22,#2
+ eor w17,w17,w14
+ add w24,w24,w16 // future e+=X[i]
+ add w20,w20,w25 // e+=F(b,c,d)
+ ror w17,w17,#31
+ orr w25,w21,w22
+ and w26,w21,w22
+ eor w19,w19,w4
+ ror w27,w20,#27
+ and w25,w25,w23
+ add w23,w23,w28 // future e+=K
+ eor w19,w19,w10
+ add w24,w24,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w21,w21,#2
+ eor w19,w19,w15
+ add w23,w23,w17 // future e+=X[i]
+ add w24,w24,w25 // e+=F(b,c,d)
+ ror w19,w19,#31
+ orr w25,w20,w21
+ and w26,w20,w21
+ eor w3,w3,w5
+ ror w27,w24,#27
+ and w25,w25,w22
+ add w22,w22,w28 // future e+=K
+ eor w3,w3,w11
+ add w23,w23,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w20,w20,#2
+ eor w3,w3,w16
+ add w22,w22,w19 // future e+=X[i]
+ add w23,w23,w25 // e+=F(b,c,d)
+ ror w3,w3,#31
+ orr w25,w24,w20
+ and w26,w24,w20
+ eor w4,w4,w6
+ ror w27,w23,#27
+ and w25,w25,w21
+ add w21,w21,w28 // future e+=K
+ eor w4,w4,w12
+ add w22,w22,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w24,w24,#2
+ eor w4,w4,w17
+ add w21,w21,w3 // future e+=X[i]
+ add w22,w22,w25 // e+=F(b,c,d)
+ ror w4,w4,#31
+ orr w25,w23,w24
+ and w26,w23,w24
+ eor w5,w5,w7
+ ror w27,w22,#27
+ and w25,w25,w20
+ add w20,w20,w28 // future e+=K
+ eor w5,w5,w13
+ add w21,w21,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w23,w23,#2
+ eor w5,w5,w19
+ add w20,w20,w4 // future e+=X[i]
+ add w21,w21,w25 // e+=F(b,c,d)
+ ror w5,w5,#31
+ orr w25,w22,w23
+ and w26,w22,w23
+ eor w6,w6,w8
+ ror w27,w21,#27
+ and w25,w25,w24
+ add w24,w24,w28 // future e+=K
+ eor w6,w6,w14
+ add w20,w20,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w22,w22,#2
+ eor w6,w6,w3
+ add w24,w24,w5 // future e+=X[i]
+ add w20,w20,w25 // e+=F(b,c,d)
+ ror w6,w6,#31
+ orr w25,w21,w22
+ and w26,w21,w22
+ eor w7,w7,w9
+ ror w27,w20,#27
+ and w25,w25,w23
+ add w23,w23,w28 // future e+=K
+ eor w7,w7,w15
+ add w24,w24,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w21,w21,#2
+ eor w7,w7,w4
+ add w23,w23,w6 // future e+=X[i]
+ add w24,w24,w25 // e+=F(b,c,d)
+ ror w7,w7,#31
+ orr w25,w20,w21
+ and w26,w20,w21
+ eor w8,w8,w10
+ ror w27,w24,#27
+ and w25,w25,w22
+ add w22,w22,w28 // future e+=K
+ eor w8,w8,w16
+ add w23,w23,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w20,w20,#2
+ eor w8,w8,w5
+ add w22,w22,w7 // future e+=X[i]
+ add w23,w23,w25 // e+=F(b,c,d)
+ ror w8,w8,#31
+ orr w25,w24,w20
+ and w26,w24,w20
+ eor w9,w9,w11
+ ror w27,w23,#27
+ and w25,w25,w21
+ add w21,w21,w28 // future e+=K
+ eor w9,w9,w17
+ add w22,w22,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w24,w24,#2
+ eor w9,w9,w6
+ add w21,w21,w8 // future e+=X[i]
+ add w22,w22,w25 // e+=F(b,c,d)
+ ror w9,w9,#31
+ orr w25,w23,w24
+ and w26,w23,w24
+ eor w10,w10,w12
+ ror w27,w22,#27
+ and w25,w25,w20
+ add w20,w20,w28 // future e+=K
+ eor w10,w10,w19
+ add w21,w21,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w23,w23,#2
+ eor w10,w10,w7
+ add w20,w20,w9 // future e+=X[i]
+ add w21,w21,w25 // e+=F(b,c,d)
+ ror w10,w10,#31
+ orr w25,w22,w23
+ and w26,w22,w23
+ eor w11,w11,w13
+ ror w27,w21,#27
+ and w25,w25,w24
+ add w24,w24,w28 // future e+=K
+ eor w11,w11,w3
+ add w20,w20,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w22,w22,#2
+ eor w11,w11,w8
+ add w24,w24,w10 // future e+=X[i]
+ add w20,w20,w25 // e+=F(b,c,d)
+ ror w11,w11,#31
+ orr w25,w21,w22
+ and w26,w21,w22
+ eor w12,w12,w14
+ ror w27,w20,#27
+ and w25,w25,w23
+ add w23,w23,w28 // future e+=K
+ eor w12,w12,w4
+ add w24,w24,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w21,w21,#2
+ eor w12,w12,w9
+ add w23,w23,w11 // future e+=X[i]
+ add w24,w24,w25 // e+=F(b,c,d)
+ ror w12,w12,#31
+ orr w25,w20,w21
+ and w26,w20,w21
+ eor w13,w13,w15
+ ror w27,w24,#27
+ and w25,w25,w22
+ add w22,w22,w28 // future e+=K
+ eor w13,w13,w5
+ add w23,w23,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w20,w20,#2
+ eor w13,w13,w10
+ add w22,w22,w12 // future e+=X[i]
+ add w23,w23,w25 // e+=F(b,c,d)
+ ror w13,w13,#31
+ orr w25,w24,w20
+ and w26,w24,w20
+ eor w14,w14,w16
+ ror w27,w23,#27
+ and w25,w25,w21
+ add w21,w21,w28 // future e+=K
+ eor w14,w14,w6
+ add w22,w22,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w24,w24,#2
+ eor w14,w14,w11
+ add w21,w21,w13 // future e+=X[i]
+ add w22,w22,w25 // e+=F(b,c,d)
+ ror w14,w14,#31
+ orr w25,w23,w24
+ and w26,w23,w24
+ eor w15,w15,w17
+ ror w27,w22,#27
+ and w25,w25,w20
+ add w20,w20,w28 // future e+=K
+ eor w15,w15,w7
+ add w21,w21,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w23,w23,#2
+ eor w15,w15,w12
+ add w20,w20,w14 // future e+=X[i]
+ add w21,w21,w25 // e+=F(b,c,d)
+ ror w15,w15,#31
+ movz w28,#0xc1d6
+ movk w28,#0xca62,lsl#16
+ orr w25,w22,w23
+ and w26,w22,w23
+ eor w16,w16,w19
+ ror w27,w21,#27
+ and w25,w25,w24
+ add w24,w24,w28 // future e+=K
+ eor w16,w16,w8
+ add w20,w20,w27 // e+=rot(a,5)
+ orr w25,w25,w26
+ ror w22,w22,#2
+ eor w16,w16,w13
+ add w24,w24,w15 // future e+=X[i]
+ add w20,w20,w25 // e+=F(b,c,d)
+ ror w16,w16,#31
+ eor w17,w17,w3
+ eor w25,w23,w21
+ ror w27,w20,#27
+ add w23,w23,w28 // future e+=K
+ eor w17,w17,w9
+ eor w25,w25,w22
+ add w24,w24,w27 // e+=rot(a,5)
+ ror w21,w21,#2
+ eor w17,w17,w14
+ add w23,w23,w16 // future e+=X[i]
+ add w24,w24,w25 // e+=F(b,c,d)
+ ror w17,w17,#31
+ eor w19,w19,w4
+ eor w25,w22,w20
+ ror w27,w24,#27
+ add w22,w22,w28 // future e+=K
+ eor w19,w19,w10
+ eor w25,w25,w21
+ add w23,w23,w27 // e+=rot(a,5)
+ ror w20,w20,#2
+ eor w19,w19,w15
+ add w22,w22,w17 // future e+=X[i]
+ add w23,w23,w25 // e+=F(b,c,d)
+ ror w19,w19,#31
+ eor w3,w3,w5
+ eor w25,w21,w24
+ ror w27,w23,#27
+ add w21,w21,w28 // future e+=K
+ eor w3,w3,w11
+ eor w25,w25,w20
+ add w22,w22,w27 // e+=rot(a,5)
+ ror w24,w24,#2
+ eor w3,w3,w16
+ add w21,w21,w19 // future e+=X[i]
+ add w22,w22,w25 // e+=F(b,c,d)
+ ror w3,w3,#31
+ eor w4,w4,w6
+ eor w25,w20,w23
+ ror w27,w22,#27
+ add w20,w20,w28 // future e+=K
+ eor w4,w4,w12
+ eor w25,w25,w24
+ add w21,w21,w27 // e+=rot(a,5)
+ ror w23,w23,#2
+ eor w4,w4,w17
+ add w20,w20,w3 // future e+=X[i]
+ add w21,w21,w25 // e+=F(b,c,d)
+ ror w4,w4,#31
+ eor w5,w5,w7
+ eor w25,w24,w22
+ ror w27,w21,#27
+ add w24,w24,w28 // future e+=K
+ eor w5,w5,w13
+ eor w25,w25,w23
+ add w20,w20,w27 // e+=rot(a,5)
+ ror w22,w22,#2
+ eor w5,w5,w19
+ add w24,w24,w4 // future e+=X[i]
+ add w20,w20,w25 // e+=F(b,c,d)
+ ror w5,w5,#31
+ eor w6,w6,w8
+ eor w25,w23,w21
+ ror w27,w20,#27
+ add w23,w23,w28 // future e+=K
+ eor w6,w6,w14
+ eor w25,w25,w22
+ add w24,w24,w27 // e+=rot(a,5)
+ ror w21,w21,#2
+ eor w6,w6,w3
+ add w23,w23,w5 // future e+=X[i]
+ add w24,w24,w25 // e+=F(b,c,d)
+ ror w6,w6,#31
+ eor w7,w7,w9
+ eor w25,w22,w20
+ ror w27,w24,#27
+ add w22,w22,w28 // future e+=K
+ eor w7,w7,w15
+ eor w25,w25,w21
+ add w23,w23,w27 // e+=rot(a,5)
+ ror w20,w20,#2
+ eor w7,w7,w4
+ add w22,w22,w6 // future e+=X[i]
+ add w23,w23,w25 // e+=F(b,c,d)
+ ror w7,w7,#31
+ eor w8,w8,w10
+ eor w25,w21,w24
+ ror w27,w23,#27
+ add w21,w21,w28 // future e+=K
+ eor w8,w8,w16
+ eor w25,w25,w20
+ add w22,w22,w27 // e+=rot(a,5)
+ ror w24,w24,#2
+ eor w8,w8,w5
+ add w21,w21,w7 // future e+=X[i]
+ add w22,w22,w25 // e+=F(b,c,d)
+ ror w8,w8,#31
+ eor w9,w9,w11
+ eor w25,w20,w23
+ ror w27,w22,#27
+ add w20,w20,w28 // future e+=K
+ eor w9,w9,w17
+ eor w25,w25,w24
+ add w21,w21,w27 // e+=rot(a,5)
+ ror w23,w23,#2
+ eor w9,w9,w6
+ add w20,w20,w8 // future e+=X[i]
+ add w21,w21,w25 // e+=F(b,c,d)
+ ror w9,w9,#31
+ eor w10,w10,w12
+ eor w25,w24,w22
+ ror w27,w21,#27
+ add w24,w24,w28 // future e+=K
+ eor w10,w10,w19
+ eor w25,w25,w23
+ add w20,w20,w27 // e+=rot(a,5)
+ ror w22,w22,#2
+ eor w10,w10,w7
+ add w24,w24,w9 // future e+=X[i]
+ add w20,w20,w25 // e+=F(b,c,d)
+ ror w10,w10,#31
+ eor w11,w11,w13
+ eor w25,w23,w21
+ ror w27,w20,#27
+ add w23,w23,w28 // future e+=K
+ eor w11,w11,w3
+ eor w25,w25,w22
+ add w24,w24,w27 // e+=rot(a,5)
+ ror w21,w21,#2
+ eor w11,w11,w8
+ add w23,w23,w10 // future e+=X[i]
+ add w24,w24,w25 // e+=F(b,c,d)
+ ror w11,w11,#31
+ eor w12,w12,w14
+ eor w25,w22,w20
+ ror w27,w24,#27
+ add w22,w22,w28 // future e+=K
+ eor w12,w12,w4
+ eor w25,w25,w21
+ add w23,w23,w27 // e+=rot(a,5)
+ ror w20,w20,#2
+ eor w12,w12,w9
+ add w22,w22,w11 // future e+=X[i]
+ add w23,w23,w25 // e+=F(b,c,d)
+ ror w12,w12,#31
+ eor w13,w13,w15
+ eor w25,w21,w24
+ ror w27,w23,#27
+ add w21,w21,w28 // future e+=K
+ eor w13,w13,w5
+ eor w25,w25,w20
+ add w22,w22,w27 // e+=rot(a,5)
+ ror w24,w24,#2
+ eor w13,w13,w10
+ add w21,w21,w12 // future e+=X[i]
+ add w22,w22,w25 // e+=F(b,c,d)
+ ror w13,w13,#31
+ eor w14,w14,w16
+ eor w25,w20,w23
+ ror w27,w22,#27
+ add w20,w20,w28 // future e+=K
+ eor w14,w14,w6
+ eor w25,w25,w24
+ add w21,w21,w27 // e+=rot(a,5)
+ ror w23,w23,#2
+ eor w14,w14,w11
+ add w20,w20,w13 // future e+=X[i]
+ add w21,w21,w25 // e+=F(b,c,d)
+ ror w14,w14,#31
+ eor w15,w15,w17
+ eor w25,w24,w22
+ ror w27,w21,#27
+ add w24,w24,w28 // future e+=K
+ eor w15,w15,w7
+ eor w25,w25,w23
+ add w20,w20,w27 // e+=rot(a,5)
+ ror w22,w22,#2
+ eor w15,w15,w12
+ add w24,w24,w14 // future e+=X[i]
+ add w20,w20,w25 // e+=F(b,c,d)
+ ror w15,w15,#31
+ eor w16,w16,w19
+ eor w25,w23,w21
+ ror w27,w20,#27
+ add w23,w23,w28 // future e+=K
+ eor w16,w16,w8
+ eor w25,w25,w22
+ add w24,w24,w27 // e+=rot(a,5)
+ ror w21,w21,#2
+ eor w16,w16,w13
+ add w23,w23,w15 // future e+=X[i]
+ add w24,w24,w25 // e+=F(b,c,d)
+ ror w16,w16,#31
+ eor w17,w17,w3
+ eor w25,w22,w20
+ ror w27,w24,#27
+ add w22,w22,w28 // future e+=K
+ eor w17,w17,w9
+ eor w25,w25,w21
+ add w23,w23,w27 // e+=rot(a,5)
+ ror w20,w20,#2
+ eor w17,w17,w14
+ add w22,w22,w16 // future e+=X[i]
+ add w23,w23,w25 // e+=F(b,c,d)
+ ror w17,w17,#31
+ eor w19,w19,w4
+ eor w25,w21,w24
+ ror w27,w23,#27
+ add w21,w21,w28 // future e+=K
+ eor w19,w19,w10
+ eor w25,w25,w20
+ add w22,w22,w27 // e+=rot(a,5)
+ ror w24,w24,#2
+ eor w19,w19,w15
+ add w21,w21,w17 // future e+=X[i]
+ add w22,w22,w25 // e+=F(b,c,d)
+ ror w19,w19,#31
+ ldp w4,w5,[x0]
+ eor w25,w20,w23
+ ror w27,w22,#27
+ add w20,w20,w28 // future e+=K
+ eor w25,w25,w24
+ add w21,w21,w27 // e+=rot(a,5)
+ ror w23,w23,#2
+ add w20,w20,w19 // future e+=X[i]
+ add w21,w21,w25 // e+=F(b,c,d)
+ ldp w6,w7,[x0,#8]
+ eor w25,w24,w22
+ ror w27,w21,#27
+ eor w25,w25,w23
+ add w20,w20,w27 // e+=rot(a,5)
+ ror w22,w22,#2
+ ldr w8,[x0,#16]
+ add w20,w20,w25 // e+=F(b,c,d)
+ add w21,w21,w5
+ add w22,w22,w6
+ add w20,w20,w4
+ add w23,w23,w7
+ add w24,w24,w8
+ stp w20,w21,[x0]
+ stp w22,w23,[x0,#8]
+ str w24,[x0,#16]
+ cbnz x2,.Loop
+
+ ldp x19,x20,[sp,#16]
+ ldp x21,x22,[sp,#32]
+ ldp x23,x24,[sp,#48]
+ ldp x25,x26,[sp,#64]
+ ldp x27,x28,[sp,#80]
+ ldr x29,[sp],#96
+ ret
+.size sha1_block_data_order,.-sha1_block_data_order
+.type sha1_block_armv8,%function
+.align 6
+sha1_block_armv8:
+.Lv8_entry:
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+
+ adr x4,.Lconst
+ eor v1.16b,v1.16b,v1.16b
+ ld1 {v0.4s},[x0],#16
+ ld1 {v1.s}[0],[x0]
+ sub x0,x0,#16
+ ld1 {v16.4s-v19.4s},[x4]
+
+.Loop_hw:
+ ld1 {v4.16b-v7.16b},[x1],#64
+ sub x2,x2,#1
+ rev32 v4.16b,v4.16b
+ rev32 v5.16b,v5.16b
+
+ add v20.4s,v16.4s,v4.4s
+ rev32 v6.16b,v6.16b
+ orr v22.16b,v0.16b,v0.16b // offload
+
+ add v21.4s,v16.4s,v5.4s
+ rev32 v7.16b,v7.16b
+ .inst 0x5e280803 //sha1h v3.16b,v0.16b
+ .inst 0x5e140020 //sha1c v0.16b,v1.16b,v20.4s // 0
+ add v20.4s,v16.4s,v6.4s
+ .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b
+ .inst 0x5e280802 //sha1h v2.16b,v0.16b // 1
+ .inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s
+ add v21.4s,v16.4s,v7.4s
+ .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b
+ .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b
+ .inst 0x5e280803 //sha1h v3.16b,v0.16b // 2
+ .inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s
+ add v20.4s,v16.4s,v4.4s
+ .inst 0x5e281885 //sha1su1 v5.16b,v4.16b
+ .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b
+ .inst 0x5e280802 //sha1h v2.16b,v0.16b // 3
+ .inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s
+ add v21.4s,v17.4s,v5.4s
+ .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b
+ .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b
+ .inst 0x5e280803 //sha1h v3.16b,v0.16b // 4
+ .inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s
+ add v20.4s,v17.4s,v6.4s
+ .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b
+ .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b
+ .inst 0x5e280802 //sha1h v2.16b,v0.16b // 5
+ .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
+ add v21.4s,v17.4s,v7.4s
+ .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b
+ .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b
+ .inst 0x5e280803 //sha1h v3.16b,v0.16b // 6
+ .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s
+ add v20.4s,v17.4s,v4.4s
+ .inst 0x5e281885 //sha1su1 v5.16b,v4.16b
+ .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b
+ .inst 0x5e280802 //sha1h v2.16b,v0.16b // 7
+ .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
+ add v21.4s,v17.4s,v5.4s
+ .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b
+ .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b
+ .inst 0x5e280803 //sha1h v3.16b,v0.16b // 8
+ .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s
+ add v20.4s,v18.4s,v6.4s
+ .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b
+ .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b
+ .inst 0x5e280802 //sha1h v2.16b,v0.16b // 9
+ .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
+ add v21.4s,v18.4s,v7.4s
+ .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b
+ .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b
+ .inst 0x5e280803 //sha1h v3.16b,v0.16b // 10
+ .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s
+ add v20.4s,v18.4s,v4.4s
+ .inst 0x5e281885 //sha1su1 v5.16b,v4.16b
+ .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b
+ .inst 0x5e280802 //sha1h v2.16b,v0.16b // 11
+ .inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s
+ add v21.4s,v18.4s,v5.4s
+ .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b
+ .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b
+ .inst 0x5e280803 //sha1h v3.16b,v0.16b // 12
+ .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s
+ add v20.4s,v18.4s,v6.4s
+ .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b
+ .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b
+ .inst 0x5e280802 //sha1h v2.16b,v0.16b // 13
+ .inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s
+ add v21.4s,v19.4s,v7.4s
+ .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b
+ .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b
+ .inst 0x5e280803 //sha1h v3.16b,v0.16b // 14
+ .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s
+ add v20.4s,v19.4s,v4.4s
+ .inst 0x5e281885 //sha1su1 v5.16b,v4.16b
+ .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b
+ .inst 0x5e280802 //sha1h v2.16b,v0.16b // 15
+ .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
+ add v21.4s,v19.4s,v5.4s
+ .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b
+ .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b
+ .inst 0x5e280803 //sha1h v3.16b,v0.16b // 16
+ .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s
+ add v20.4s,v19.4s,v6.4s
+ .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b
+ .inst 0x5e280802 //sha1h v2.16b,v0.16b // 17
+ .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
+ add v21.4s,v19.4s,v7.4s
+
+ .inst 0x5e280803 //sha1h v3.16b,v0.16b // 18
+ .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s
+
+ .inst 0x5e280802 //sha1h v2.16b,v0.16b // 19
+ .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s
+
+ add v1.4s,v1.4s,v2.4s
+ add v0.4s,v0.4s,v22.4s
+
+ cbnz x2,.Loop_hw
+
+ st1 {v0.4s},[x0],#16
+ st1 {v1.s}[0],[x0]
+
+ ldr x29,[sp],#16
+ ret
+.size sha1_block_armv8,.-sha1_block_armv8
+.align 6
+.Lconst:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79
+.LOPENSSL_armcap_P:
+.quad OPENSSL_armcap_P-.
+.asciz "SHA1 block transform for ARMv8, CRYPTOGAMS by <[email protected]>"
+.align 2
+.comm OPENSSL_armcap_P,4,4
diff --git a/crypto/sha/asm/sha1-armv8.pl b/crypto/sha/asm/sha1-armv8.pl
new file mode 100644
index 0000000..c1f552b
--- /dev/null
+++ b/crypto/sha/asm/sha1-armv8.pl
@@ -0,0 +1,333 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <[email protected]> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# SHA1 for ARMv8.
+#
+# Performance in cycles per processed byte and improvement coefficient
+# over code generated with "default" compiler:
+#
+# hardware-assisted software(*)
+# Apple A7 2.31 4.13 (+14%)
+# Cortex-A5x n/a n/a
+#
+# (*) Software results are presented mostly for reference purposes.
+
+$flavour = shift;
+open STDOUT,">".shift;
+
+($ctx,$inp,$num)=("x0","x1","x2");
+@Xw=map("w$_",(3..17,19));
+@Xx=map("x$_",(3..17,19));
+@V=($A,$B,$C,$D,$E)=map("w$_",(20..24));
+($t0,$t1,$t2,$K)=map("w$_",(25..28));
+
+
+sub BODY_00_19 {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=($i+2)&15;
+
+$code.=<<___ if ($i<15 && !($i&1));
+ lsr @Xx[$i+1],@Xx[$i],#32
+___
+$code.=<<___ if ($i<14 && !($i&1));
+ ldr @Xx[$i+2],[$inp,#`($i+2)*4-64`]
+___
+$code.=<<___ if ($i<14 && ($i&1));
+#ifdef __ARMEB__
+ ror @Xx[$i+1],@Xx[$i+1],#32
+#else
+ rev32 @Xx[$i+1],@Xx[$i+1]
+#endif
+___
+$code.=<<___ if ($i<14);
+ bic $t0,$d,$b
+ and $t1,$c,$b
+ ror $t2,$a,#27
+ add $d,$d,$K // future e+=K
+ orr $t0,$t0,$t1
+ add $e,$e,$t2 // e+=rot(a,5)
+ ror $b,$b,#2
+ add $d,$d,@Xw[($i+1)&15] // future e+=X[i]
+ add $e,$e,$t0 // e+=F(b,c,d)
+___
+$code.=<<___ if ($i==19);
+ movz $K,#0xeba1
+ movk $K,#0x6ed9,lsl#16
+___
+$code.=<<___ if ($i>=14);
+ eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15]
+ bic $t0,$d,$b
+ and $t1,$c,$b
+ ror $t2,$a,#27
+ eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15]
+ add $d,$d,$K // future e+=K
+ orr $t0,$t0,$t1
+ add $e,$e,$t2 // e+=rot(a,5)
+ eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15]
+ ror $b,$b,#2
+ add $d,$d,@Xw[($i+1)&15] // future e+=X[i]
+ add $e,$e,$t0 // e+=F(b,c,d)
+ ror @Xw[$j],@Xw[$j],#31
+___
+}
+
+sub BODY_40_59 {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=($i+2)&15;
+
+$code.=<<___ if ($i==59);
+ movz $K,#0xc1d6
+ movk $K,#0xca62,lsl#16
+___
+$code.=<<___;
+ orr $t0,$b,$c
+ and $t1,$b,$c
+ eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15]
+ ror $t2,$a,#27
+ and $t0,$t0,$d
+ add $d,$d,$K // future e+=K
+ eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15]
+ add $e,$e,$t2 // e+=rot(a,5)
+ orr $t0,$t0,$t1
+ ror $b,$b,#2
+ eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15]
+ add $d,$d,@Xw[($i+1)&15] // future e+=X[i]
+ add $e,$e,$t0 // e+=F(b,c,d)
+ ror @Xw[$j],@Xw[$j],#31
+___
+}
+
+sub BODY_20_39 {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=($i+2)&15;
+
+$code.=<<___ if ($i==39);
+ movz $K,#0xbcdc
+ movk $K,#0x8f1b,lsl#16
+___
+$code.=<<___ if ($i<78);
+ eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15]
+ eor $t0,$d,$b
+ ror $t2,$a,#27
+ add $d,$d,$K // future e+=K
+ eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15]
+ eor $t0,$t0,$c
+ add $e,$e,$t2 // e+=rot(a,5)
+ ror $b,$b,#2
+ eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15]
+ add $d,$d,@Xw[($i+1)&15] // future e+=X[i]
+ add $e,$e,$t0 // e+=F(b,c,d)
+ ror @Xw[$j],@Xw[$j],#31
+___
+$code.=<<___ if ($i==78);
+ ldp @Xw[1],@Xw[2],[$ctx]
+ eor $t0,$d,$b
+ ror $t2,$a,#27
+ add $d,$d,$K // future e+=K
+ eor $t0,$t0,$c
+ add $e,$e,$t2 // e+=rot(a,5)
+ ror $b,$b,#2
+ add $d,$d,@Xw[($i+1)&15] // future e+=X[i]
+ add $e,$e,$t0 // e+=F(b,c,d)
+___
+$code.=<<___ if ($i==79);
+ ldp @Xw[3],@Xw[4],[$ctx,#8]
+ eor $t0,$d,$b
+ ror $t2,$a,#27
+ eor $t0,$t0,$c
+ add $e,$e,$t2 // e+=rot(a,5)
+ ror $b,$b,#2
+ ldr @Xw[5],[$ctx,#16]
+ add $e,$e,$t0 // e+=F(b,c,d)
+___
+}
+
+$code.=<<___;
+#include "arm_arch.h"
+
+.text
+
+.globl sha1_block_data_order
+.type sha1_block_data_order,%function
+.align 6
+sha1_block_data_order:
+ ldr x16,.LOPENSSL_armcap_P
+ adr x17,.LOPENSSL_armcap_P
+ add x16,x16,x17
+ ldr w16,[x16]
+ tst w16,#ARMV8_SHA1
+ b.ne .Lv8_entry
+
+ stp x29,x30,[sp,#-96]!
+ add x29,sp,#0
+ stp x19,x20,[sp,#16]
+ stp x21,x22,[sp,#32]
+ stp x23,x24,[sp,#48]
+ stp x25,x26,[sp,#64]
+ stp x27,x28,[sp,#80]
+
+ ldp $A,$B,[$ctx]
+ ldp $C,$D,[$ctx,#8]
+ ldr $E,[$ctx,#16]
+
+.Loop:
+ ldr @Xx[0],[$inp],#64
+ movz $K,#0x7999
+ sub $num,$num,#1
+ movk $K,#0x5a82,lsl#16
+#ifdef __ARMEB__
+ ror $Xx[0],@Xx[0],#32
+#else
+ rev32 @Xx[0],@Xx[0]
+#endif
+ add $E,$E,$K // warm it up
+ add $E,$E,@Xw[0]
+___
+for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
+for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
+for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
+for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+ add $B,$B,@Xw[2]
+ add $C,$C,@Xw[3]
+ add $A,$A,@Xw[1]
+ add $D,$D,@Xw[4]
+ add $E,$E,@Xw[5]
+ stp $A,$B,[$ctx]
+ stp $C,$D,[$ctx,#8]
+ str $E,[$ctx,#16]
+ cbnz $num,.Loop
+
+ ldp x19,x20,[sp,#16]
+ ldp x21,x22,[sp,#32]
+ ldp x23,x24,[sp,#48]
+ ldp x25,x26,[sp,#64]
+ ldp x27,x28,[sp,#80]
+ ldr x29,[sp],#96
+ ret
+.size sha1_block_data_order,.-sha1_block_data_order
+___
+{{{
+my ($ABCD,$E,$E0,$E1)=map("v$_.16b",(0..3));
+my @MSG=map("v$_.16b",(4..7));
+my @Kxx=map("v$_.4s",(16..19));
+my ($W0,$W1)=("v20.4s","v21.4s");
+my $ABCD_SAVE="v22.16b";
+
+$code.=<<___;
+.type sha1_block_armv8,%function
+.align 6
+sha1_block_armv8:
+.Lv8_entry:
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+
+ adr x4,.Lconst
+ eor $E,$E,$E
+ ld1.32 {$ABCD},[$ctx],#16
+ ld1.32 {$E}[0],[$ctx]
+ sub $ctx,$ctx,#16
+ ld1.32 {@Kxx[0]-@Kxx[3]},[x4]
+
+.Loop_hw:
+ ld1 {@MSG[0]-@MSG[3]},[$inp],#64
+ sub $num,$num,#1
+ rev32 @MSG[0],@MSG[0]
+ rev32 @MSG[1],@MSG[1]
+
+ add.i32 $W0,@Kxx[0],@MSG[0]
+ rev32 @MSG[2],@MSG[2]
+ orr $ABCD_SAVE,$ABCD,$ABCD // offload
+
+ add.i32 $W1,@Kxx[0],@MSG[1]
+ rev32 @MSG[3],@MSG[3]
+ sha1h $E1,$ABCD
+ sha1c $ABCD,$E,$W0 // 0
+ add.i32 $W0,@Kxx[$j],@MSG[2]
+ sha1su0 @MSG[0],@MSG[1],@MSG[2]
+___
+for ($j=0,$i=1;$i<20-3;$i++) {
+my $f=("c","p","m","p")[$i/5];
+$code.=<<___;
+ sha1h $E0,$ABCD // $i
+ sha1$f $ABCD,$E1,$W1
+ add.i32 $W1,@Kxx[$j],@MSG[3]
+ sha1su1 @MSG[0],@MSG[3]
+___
+$code.=<<___ if ($i<20-4);
+ sha1su0 @MSG[1],@MSG[2],@MSG[3]
+___
+ ($E0,$E1)=($E1,$E0); ($W0,$W1)=($W1,$W0);
+ push(@MSG,shift(@MSG)); $j++ if ((($i+3)%5)==0);
+}
+$code.=<<___;
+ sha1h $E0,$ABCD // $i
+ sha1p $ABCD,$E1,$W1
+ add.i32 $W1,@Kxx[$j],@MSG[3]
+
+ sha1h $E1,$ABCD // 18
+ sha1p $ABCD,$E0,$W0
+
+ sha1h $E0,$ABCD // 19
+ sha1p $ABCD,$E1,$W1
+
+ add.i32 $E,$E,$E0
+ add.i32 $ABCD,$ABCD,$ABCD_SAVE
+
+ cbnz $num,.Loop_hw
+
+ st1.32 {$ABCD},[$ctx],#16
+ st1.32 {$E}[0],[$ctx]
+
+ ldr x29,[sp],#16
+ ret
+.size sha1_block_armv8,.-sha1_block_armv8
+.align 6
+.Lconst:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79
+.LOPENSSL_armcap_P:
+.quad OPENSSL_armcap_P-.
+.asciz "SHA1 block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
+.comm OPENSSL_armcap_P,4,4
+___
+}}}
+
+{ my %opcode = (
+ "sha1c" => 0x5e000000, "sha1p" => 0x5e001000,
+ "sha1m" => 0x5e002000, "sha1su0" => 0x5e003000,
+ "sha1h" => 0x5e280800, "sha1su1" => 0x5e281800 );
+
+ sub unsha1 {
+ my ($mnemonic,$arg)=@_;
+
+ $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
+ &&
+ sprintf ".inst\t0x%08x\t//%s %s",
+ $opcode{$mnemonic}|$1|($2<<5)|($3<<16),
+ $mnemonic,$arg;
+ }
+}
+
+foreach(split("\n",$code)) {
+
+ s/\`([^\`]*)\`/eval($1)/geo;
+
+ s/\b(sha1\w+)\s+([qv].*)/unsha1($1,$2)/geo;
+
+ s/\.\w?32\b//o and s/\.16b/\.4s/go;
+ m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go;
+
+ print $_,"\n";
+}
+
+close STDOUT;
diff --git a/crypto/sha/asm/sha256-armv4.S b/crypto/sha/asm/sha256-armv4.S
index 9c20a63..853d7da 100644
--- a/crypto/sha/asm/sha256-armv4.S
+++ b/crypto/sha/asm/sha256-armv4.S
@@ -23,1463 +23,1721 @@
.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.size K256,.-K256
+.word 0 @ terminator
+.LOPENSSL_armcap:
+.word OPENSSL_armcap_P-sha256_block_data_order
+.align 5
.global sha256_block_data_order
.type sha256_block_data_order,%function
sha256_block_data_order:
sub r3,pc,#8 @ sha256_block_data_order
add r2,r1,r2,lsl#6 @ len to point at the end of inp
+#if __ARM_ARCH__>=7
+ ldr r12,.LOPENSSL_armcap
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
+ tst r12,#ARMV8_SHA256
+ bne .LARMv8
+ tst r12,#ARMV7_NEON
+ bne .LNEON
+#endif
stmdb sp!,{r0,r1,r2,r4-r11,lr}
ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
- sub r14,r3,#256 @ K256
+ sub r14,r3,#256+32 @ K256
sub sp,sp,#16*4 @ alloca(X[16])
.Loop:
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ magic
+ eor r12,r12,r12
#if __ARM_ARCH__>=7
- ldr r3,[r1],#4
+ @ ldr r2,[r1],#4 @ 0
+# if 0==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r8,r8,ror#5
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+ rev r2,r2
#else
- ldrb r3,[r1,#3] @ 0
+ @ ldrb r2,[r1,#3] @ 0
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
ldrb r12,[r1,#2]
- ldrb r2,[r1,#1]
- ldrb r0,[r1],#4
- orr r3,r3,r12,lsl#8
- orr r3,r3,r2,lsl#16
- orr r3,r3,r0,lsl#24
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 0==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r8,r8,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
#endif
- mov r0,r8,ror#6
ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r8,ror#11
+ add r11,r11,r2 @ h+=X[i]
+ str r2,[sp,#0*4]
eor r2,r9,r10
-#if 0>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 0==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r8,ror#25 @ Sigma1(e)
+ add r11,r11,r0,ror#6 @ h+=Sigma1(e)
and r2,r2,r8
- str r3,[sp,#0*4]
- add r3,r3,r0
+ add r11,r11,r12 @ h+=K256[i]
eor r2,r2,r10 @ Ch(e,f,g)
- add r3,r3,r11
- mov r11,r4,ror#2
- add r3,r3,r2
- eor r11,r11,r4,ror#13
- add r3,r3,r12
- eor r11,r11,r4,ror#22 @ Sigma0(a)
-#if 0>=15
- ldr r1,[sp,#2*4] @ from BODY_16_xx
-#endif
- orr r0,r4,r5
- and r2,r4,r5
- and r0,r0,r6
- add r11,r11,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r7,r7,r3
- add r11,r11,r0
-#if __ARM_ARCH__>=7
- ldr r3,[r1],#4
-#else
- ldrb r3,[r1,#3] @ 1
- ldrb r12,[r1,#2]
- ldrb r2,[r1,#1]
- ldrb r0,[r1],#4
- orr r3,r3,r12,lsl#8
- orr r3,r3,r2,lsl#16
- orr r3,r3,r0,lsl#24
-#endif
- mov r0,r7,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r7,ror#11
- eor r2,r8,r9
-#if 1>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 1==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r7,ror#25 @ Sigma1(e)
- and r2,r2,r7
- str r3,[sp,#1*4]
- add r3,r3,r0
- eor r2,r2,r9 @ Ch(e,f,g)
- add r3,r3,r10
- mov r10,r11,ror#2
- add r3,r3,r2
- eor r10,r10,r11,ror#13
- add r3,r3,r12
- eor r10,r10,r11,ror#22 @ Sigma0(a)
-#if 1>=15
- ldr r1,[sp,#3*4] @ from BODY_16_xx
-#endif
- orr r0,r11,r4
- and r2,r11,r4
- and r0,r0,r5
- add r10,r10,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r6,r6,r3
- add r10,r10,r0
-#if __ARM_ARCH__>=7
- ldr r3,[r1],#4
-#else
- ldrb r3,[r1,#3] @ 2
- ldrb r12,[r1,#2]
- ldrb r2,[r1,#1]
- ldrb r0,[r1],#4
- orr r3,r3,r12,lsl#8
- orr r3,r3,r2,lsl#16
- orr r3,r3,r0,lsl#24
-#endif
- mov r0,r6,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r6,ror#11
- eor r2,r7,r8
-#if 2>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 2==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r6,ror#25 @ Sigma1(e)
- and r2,r2,r6
- str r3,[sp,#2*4]
- add r3,r3,r0
- eor r2,r2,r8 @ Ch(e,f,g)
- add r3,r3,r9
- mov r9,r10,ror#2
- add r3,r3,r2
- eor r9,r9,r10,ror#13
- add r3,r3,r12
- eor r9,r9,r10,ror#22 @ Sigma0(a)
-#if 2>=15
- ldr r1,[sp,#4*4] @ from BODY_16_xx
-#endif
- orr r0,r10,r11
- and r2,r10,r11
- and r0,r0,r4
- add r9,r9,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r5,r5,r3
- add r9,r9,r0
-#if __ARM_ARCH__>=7
- ldr r3,[r1],#4
-#else
- ldrb r3,[r1,#3] @ 3
- ldrb r12,[r1,#2]
- ldrb r2,[r1,#1]
- ldrb r0,[r1],#4
- orr r3,r3,r12,lsl#8
- orr r3,r3,r2,lsl#16
- orr r3,r3,r0,lsl#24
-#endif
- mov r0,r5,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r5,ror#11
- eor r2,r6,r7
-#if 3>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 3==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r5,ror#25 @ Sigma1(e)
- and r2,r2,r5
- str r3,[sp,#3*4]
- add r3,r3,r0
- eor r2,r2,r7 @ Ch(e,f,g)
- add r3,r3,r8
- mov r8,r9,ror#2
- add r3,r3,r2
- eor r8,r8,r9,ror#13
- add r3,r3,r12
- eor r8,r8,r9,ror#22 @ Sigma0(a)
-#if 3>=15
- ldr r1,[sp,#5*4] @ from BODY_16_xx
-#endif
- orr r0,r9,r10
- and r2,r9,r10
- and r0,r0,r11
- add r8,r8,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r4,r4,r3
- add r8,r8,r0
-#if __ARM_ARCH__>=7
- ldr r3,[r1],#4
-#else
- ldrb r3,[r1,#3] @ 4
- ldrb r12,[r1,#2]
- ldrb r2,[r1,#1]
- ldrb r0,[r1],#4
- orr r3,r3,r12,lsl#8
- orr r3,r3,r2,lsl#16
- orr r3,r3,r0,lsl#24
-#endif
- mov r0,r4,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r4,ror#11
- eor r2,r5,r6
-#if 4>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 4==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r4,ror#25 @ Sigma1(e)
- and r2,r2,r4
- str r3,[sp,#4*4]
- add r3,r3,r0
- eor r2,r2,r6 @ Ch(e,f,g)
- add r3,r3,r7
- mov r7,r8,ror#2
- add r3,r3,r2
- eor r7,r7,r8,ror#13
- add r3,r3,r12
- eor r7,r7,r8,ror#22 @ Sigma0(a)
-#if 4>=15
- ldr r1,[sp,#6*4] @ from BODY_16_xx
-#endif
- orr r0,r8,r9
- and r2,r8,r9
- and r0,r0,r10
- add r7,r7,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r11,r11,r3
- add r7,r7,r0
-#if __ARM_ARCH__>=7
- ldr r3,[r1],#4
-#else
- ldrb r3,[r1,#3] @ 5
- ldrb r12,[r1,#2]
- ldrb r2,[r1,#1]
- ldrb r0,[r1],#4
- orr r3,r3,r12,lsl#8
- orr r3,r3,r2,lsl#16
- orr r3,r3,r0,lsl#24
-#endif
- mov r0,r11,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r11,ror#11
- eor r2,r4,r5
-#if 5>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 5==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r11,ror#25 @ Sigma1(e)
- and r2,r2,r11
- str r3,[sp,#5*4]
- add r3,r3,r0
- eor r2,r2,r5 @ Ch(e,f,g)
- add r3,r3,r6
- mov r6,r7,ror#2
- add r3,r3,r2
- eor r6,r6,r7,ror#13
- add r3,r3,r12
- eor r6,r6,r7,ror#22 @ Sigma0(a)
-#if 5>=15
- ldr r1,[sp,#7*4] @ from BODY_16_xx
-#endif
- orr r0,r7,r8
- and r2,r7,r8
- and r0,r0,r9
- add r6,r6,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r10,r10,r3
- add r6,r6,r0
-#if __ARM_ARCH__>=7
- ldr r3,[r1],#4
-#else
- ldrb r3,[r1,#3] @ 6
- ldrb r12,[r1,#2]
- ldrb r2,[r1,#1]
- ldrb r0,[r1],#4
- orr r3,r3,r12,lsl#8
- orr r3,r3,r2,lsl#16
- orr r3,r3,r0,lsl#24
-#endif
- mov r0,r10,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r10,ror#11
- eor r2,r11,r4
-#if 6>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 6==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r10,ror#25 @ Sigma1(e)
- and r2,r2,r10
- str r3,[sp,#6*4]
- add r3,r3,r0
- eor r2,r2,r4 @ Ch(e,f,g)
- add r3,r3,r5
- mov r5,r6,ror#2
- add r3,r3,r2
- eor r5,r5,r6,ror#13
- add r3,r3,r12
- eor r5,r5,r6,ror#22 @ Sigma0(a)
-#if 6>=15
- ldr r1,[sp,#8*4] @ from BODY_16_xx
-#endif
- orr r0,r6,r7
- and r2,r6,r7
- and r0,r0,r8
- add r5,r5,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r9,r9,r3
- add r5,r5,r0
-#if __ARM_ARCH__>=7
- ldr r3,[r1],#4
-#else
- ldrb r3,[r1,#3] @ 7
- ldrb r12,[r1,#2]
- ldrb r2,[r1,#1]
- ldrb r0,[r1],#4
- orr r3,r3,r12,lsl#8
- orr r3,r3,r2,lsl#16
- orr r3,r3,r0,lsl#24
-#endif
- mov r0,r9,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r9,ror#11
- eor r2,r10,r11
-#if 7>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 7==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r9,ror#25 @ Sigma1(e)
- and r2,r2,r9
- str r3,[sp,#7*4]
- add r3,r3,r0
- eor r2,r2,r11 @ Ch(e,f,g)
- add r3,r3,r4
- mov r4,r5,ror#2
- add r3,r3,r2
- eor r4,r4,r5,ror#13
- add r3,r3,r12
- eor r4,r4,r5,ror#22 @ Sigma0(a)
-#if 7>=15
- ldr r1,[sp,#9*4] @ from BODY_16_xx
-#endif
- orr r0,r5,r6
- and r2,r5,r6
- and r0,r0,r7
- add r4,r4,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r8,r8,r3
- add r4,r4,r0
-#if __ARM_ARCH__>=7
- ldr r3,[r1],#4
-#else
- ldrb r3,[r1,#3] @ 8
- ldrb r12,[r1,#2]
- ldrb r2,[r1,#1]
- ldrb r0,[r1],#4
- orr r3,r3,r12,lsl#8
- orr r3,r3,r2,lsl#16
- orr r3,r3,r0,lsl#24
-#endif
- mov r0,r8,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r8,ror#11
- eor r2,r9,r10
-#if 8>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 8==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r8,ror#25 @ Sigma1(e)
- and r2,r2,r8
- str r3,[sp,#8*4]
- add r3,r3,r0
- eor r2,r2,r10 @ Ch(e,f,g)
- add r3,r3,r11
- mov r11,r4,ror#2
- add r3,r3,r2
- eor r11,r11,r4,ror#13
- add r3,r3,r12
- eor r11,r11,r4,ror#22 @ Sigma0(a)
-#if 8>=15
- ldr r1,[sp,#10*4] @ from BODY_16_xx
-#endif
- orr r0,r4,r5
- and r2,r4,r5
- and r0,r0,r6
- add r11,r11,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r7,r7,r3
- add r11,r11,r0
-#if __ARM_ARCH__>=7
- ldr r3,[r1],#4
-#else
- ldrb r3,[r1,#3] @ 9
- ldrb r12,[r1,#2]
- ldrb r2,[r1,#1]
- ldrb r0,[r1],#4
- orr r3,r3,r12,lsl#8
- orr r3,r3,r2,lsl#16
- orr r3,r3,r0,lsl#24
-#endif
- mov r0,r7,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r7,ror#11
- eor r2,r8,r9
-#if 9>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 9==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r7,ror#25 @ Sigma1(e)
- and r2,r2,r7
- str r3,[sp,#9*4]
- add r3,r3,r0
- eor r2,r2,r9 @ Ch(e,f,g)
- add r3,r3,r10
- mov r10,r11,ror#2
- add r3,r3,r2
- eor r10,r10,r11,ror#13
- add r3,r3,r12
- eor r10,r10,r11,ror#22 @ Sigma0(a)
-#if 9>=15
- ldr r1,[sp,#11*4] @ from BODY_16_xx
-#endif
- orr r0,r11,r4
- and r2,r11,r4
- and r0,r0,r5
- add r10,r10,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r6,r6,r3
- add r10,r10,r0
-#if __ARM_ARCH__>=7
- ldr r3,[r1],#4
-#else
- ldrb r3,[r1,#3] @ 10
- ldrb r12,[r1,#2]
- ldrb r2,[r1,#1]
- ldrb r0,[r1],#4
- orr r3,r3,r12,lsl#8
- orr r3,r3,r2,lsl#16
- orr r3,r3,r0,lsl#24
-#endif
- mov r0,r6,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r6,ror#11
- eor r2,r7,r8
-#if 10>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 10==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r6,ror#25 @ Sigma1(e)
- and r2,r2,r6
- str r3,[sp,#10*4]
- add r3,r3,r0
- eor r2,r2,r8 @ Ch(e,f,g)
- add r3,r3,r9
- mov r9,r10,ror#2
- add r3,r3,r2
- eor r9,r9,r10,ror#13
- add r3,r3,r12
- eor r9,r9,r10,ror#22 @ Sigma0(a)
-#if 10>=15
- ldr r1,[sp,#12*4] @ from BODY_16_xx
-#endif
- orr r0,r10,r11
- and r2,r10,r11
- and r0,r0,r4
- add r9,r9,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r5,r5,r3
- add r9,r9,r0
-#if __ARM_ARCH__>=7
- ldr r3,[r1],#4
-#else
- ldrb r3,[r1,#3] @ 11
- ldrb r12,[r1,#2]
- ldrb r2,[r1,#1]
- ldrb r0,[r1],#4
- orr r3,r3,r12,lsl#8
- orr r3,r3,r2,lsl#16
- orr r3,r3,r0,lsl#24
-#endif
- mov r0,r5,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r5,ror#11
- eor r2,r6,r7
-#if 11>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 11==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r5,ror#25 @ Sigma1(e)
- and r2,r2,r5
- str r3,[sp,#11*4]
- add r3,r3,r0
- eor r2,r2,r7 @ Ch(e,f,g)
- add r3,r3,r8
- mov r8,r9,ror#2
- add r3,r3,r2
- eor r8,r8,r9,ror#13
- add r3,r3,r12
- eor r8,r8,r9,ror#22 @ Sigma0(a)
-#if 11>=15
- ldr r1,[sp,#13*4] @ from BODY_16_xx
-#endif
- orr r0,r9,r10
- and r2,r9,r10
- and r0,r0,r11
- add r8,r8,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r4,r4,r3
- add r8,r8,r0
-#if __ARM_ARCH__>=7
- ldr r3,[r1],#4
-#else
- ldrb r3,[r1,#3] @ 12
- ldrb r12,[r1,#2]
- ldrb r2,[r1,#1]
- ldrb r0,[r1],#4
- orr r3,r3,r12,lsl#8
- orr r3,r3,r2,lsl#16
- orr r3,r3,r0,lsl#24
-#endif
- mov r0,r4,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r4,ror#11
- eor r2,r5,r6
-#if 12>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 12==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r4,ror#25 @ Sigma1(e)
- and r2,r2,r4
- str r3,[sp,#12*4]
- add r3,r3,r0
- eor r2,r2,r6 @ Ch(e,f,g)
- add r3,r3,r7
- mov r7,r8,ror#2
- add r3,r3,r2
- eor r7,r7,r8,ror#13
- add r3,r3,r12
- eor r7,r7,r8,ror#22 @ Sigma0(a)
-#if 12>=15
- ldr r1,[sp,#14*4] @ from BODY_16_xx
-#endif
- orr r0,r8,r9
- and r2,r8,r9
- and r0,r0,r10
- add r7,r7,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r11,r11,r3
- add r7,r7,r0
-#if __ARM_ARCH__>=7
- ldr r3,[r1],#4
-#else
- ldrb r3,[r1,#3] @ 13
- ldrb r12,[r1,#2]
- ldrb r2,[r1,#1]
- ldrb r0,[r1],#4
- orr r3,r3,r12,lsl#8
- orr r3,r3,r2,lsl#16
- orr r3,r3,r0,lsl#24
-#endif
- mov r0,r11,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r11,ror#11
- eor r2,r4,r5
-#if 13>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 13==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r11,ror#25 @ Sigma1(e)
- and r2,r2,r11
- str r3,[sp,#13*4]
- add r3,r3,r0
- eor r2,r2,r5 @ Ch(e,f,g)
- add r3,r3,r6
- mov r6,r7,ror#2
- add r3,r3,r2
- eor r6,r6,r7,ror#13
- add r3,r3,r12
- eor r6,r6,r7,ror#22 @ Sigma0(a)
-#if 13>=15
- ldr r1,[sp,#15*4] @ from BODY_16_xx
-#endif
- orr r0,r7,r8
- and r2,r7,r8
- and r0,r0,r9
- add r6,r6,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r10,r10,r3
- add r6,r6,r0
-#if __ARM_ARCH__>=7
- ldr r3,[r1],#4
-#else
- ldrb r3,[r1,#3] @ 14
- ldrb r12,[r1,#2]
- ldrb r2,[r1,#1]
- ldrb r0,[r1],#4
- orr r3,r3,r12,lsl#8
- orr r3,r3,r2,lsl#16
- orr r3,r3,r0,lsl#24
-#endif
- mov r0,r10,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r10,ror#11
- eor r2,r11,r4
-#if 14>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 14==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r10,ror#25 @ Sigma1(e)
- and r2,r2,r10
- str r3,[sp,#14*4]
- add r3,r3,r0
- eor r2,r2,r4 @ Ch(e,f,g)
- add r3,r3,r5
- mov r5,r6,ror#2
- add r3,r3,r2
- eor r5,r5,r6,ror#13
- add r3,r3,r12
- eor r5,r5,r6,ror#22 @ Sigma0(a)
-#if 14>=15
- ldr r1,[sp,#0*4] @ from BODY_16_xx
-#endif
- orr r0,r6,r7
- and r2,r6,r7
- and r0,r0,r8
- add r5,r5,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r9,r9,r3
- add r5,r5,r0
-#if __ARM_ARCH__>=7
- ldr r3,[r1],#4
-#else
- ldrb r3,[r1,#3] @ 15
- ldrb r12,[r1,#2]
- ldrb r2,[r1,#1]
- ldrb r0,[r1],#4
- orr r3,r3,r12,lsl#8
- orr r3,r3,r2,lsl#16
- orr r3,r3,r0,lsl#24
-#endif
- mov r0,r9,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r9,ror#11
- eor r2,r10,r11
-#if 15>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 15==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r9,ror#25 @ Sigma1(e)
- and r2,r2,r9
- str r3,[sp,#15*4]
- add r3,r3,r0
- eor r2,r2,r11 @ Ch(e,f,g)
- add r3,r3,r4
- mov r4,r5,ror#2
- add r3,r3,r2
- eor r4,r4,r5,ror#13
- add r3,r3,r12
- eor r4,r4,r5,ror#22 @ Sigma0(a)
-#if 15>=15
- ldr r1,[sp,#1*4] @ from BODY_16_xx
-#endif
- orr r0,r5,r6
- and r2,r5,r6
- and r0,r0,r7
- add r4,r4,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r8,r8,r3
- add r4,r4,r0
-.Lrounds_16_xx:
- @ ldr r1,[sp,#1*4] @ 16
- ldr r12,[sp,#14*4]
- mov r0,r1,ror#7
- ldr r3,[sp,#0*4]
- eor r0,r0,r1,ror#18
- ldr r2,[sp,#9*4]
- eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
- mov r1,r12,ror#17
- add r3,r3,r0
- eor r1,r1,r12,ror#19
- add r3,r3,r2
- eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
- @ add r3,r3,r1
- mov r0,r8,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r8,ror#11
- eor r2,r9,r10
-#if 16>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 16==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r8,ror#25 @ Sigma1(e)
- and r2,r2,r8
- str r3,[sp,#0*4]
- add r3,r3,r0
- eor r2,r2,r10 @ Ch(e,f,g)
- add r3,r3,r11
- mov r11,r4,ror#2
- add r3,r3,r2
- eor r11,r11,r4,ror#13
- add r3,r3,r12
- eor r11,r11,r4,ror#22 @ Sigma0(a)
-#if 16>=15
- ldr r1,[sp,#2*4] @ from BODY_16_xx
-#endif
- orr r0,r4,r5
- and r2,r4,r5
- and r0,r0,r6
- add r11,r11,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r7,r7,r3
- add r11,r11,r0
- @ ldr r1,[sp,#2*4] @ 17
- ldr r12,[sp,#15*4]
- mov r0,r1,ror#7
- ldr r3,[sp,#1*4]
- eor r0,r0,r1,ror#18
- ldr r2,[sp,#10*4]
- eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
- mov r1,r12,ror#17
- add r3,r3,r0
- eor r1,r1,r12,ror#19
- add r3,r3,r2
- eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
- @ add r3,r3,r1
- mov r0,r7,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r7,ror#11
- eor r2,r8,r9
-#if 17>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 17==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r7,ror#25 @ Sigma1(e)
- and r2,r2,r7
- str r3,[sp,#1*4]
- add r3,r3,r0
- eor r2,r2,r9 @ Ch(e,f,g)
- add r3,r3,r10
- mov r10,r11,ror#2
- add r3,r3,r2
- eor r10,r10,r11,ror#13
- add r3,r3,r12
- eor r10,r10,r11,ror#22 @ Sigma0(a)
-#if 17>=15
- ldr r1,[sp,#3*4] @ from BODY_16_xx
-#endif
- orr r0,r11,r4
- and r2,r11,r4
- and r0,r0,r5
- add r10,r10,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r6,r6,r3
- add r10,r10,r0
- @ ldr r1,[sp,#3*4] @ 18
- ldr r12,[sp,#0*4]
- mov r0,r1,ror#7
- ldr r3,[sp,#2*4]
- eor r0,r0,r1,ror#18
- ldr r2,[sp,#11*4]
- eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
- mov r1,r12,ror#17
- add r3,r3,r0
- eor r1,r1,r12,ror#19
- add r3,r3,r2
- eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
- @ add r3,r3,r1
- mov r0,r6,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r6,ror#11
- eor r2,r7,r8
-#if 18>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 18==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r6,ror#25 @ Sigma1(e)
- and r2,r2,r6
- str r3,[sp,#2*4]
- add r3,r3,r0
- eor r2,r2,r8 @ Ch(e,f,g)
- add r3,r3,r9
- mov r9,r10,ror#2
- add r3,r3,r2
- eor r9,r9,r10,ror#13
- add r3,r3,r12
- eor r9,r9,r10,ror#22 @ Sigma0(a)
-#if 18>=15
- ldr r1,[sp,#4*4] @ from BODY_16_xx
-#endif
- orr r0,r10,r11
- and r2,r10,r11
- and r0,r0,r4
- add r9,r9,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r5,r5,r3
- add r9,r9,r0
- @ ldr r1,[sp,#4*4] @ 19
- ldr r12,[sp,#1*4]
- mov r0,r1,ror#7
- ldr r3,[sp,#3*4]
- eor r0,r0,r1,ror#18
- ldr r2,[sp,#12*4]
- eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
- mov r1,r12,ror#17
- add r3,r3,r0
- eor r1,r1,r12,ror#19
- add r3,r3,r2
- eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
- @ add r3,r3,r1
- mov r0,r5,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r5,ror#11
- eor r2,r6,r7
-#if 19>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 19==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r5,ror#25 @ Sigma1(e)
- and r2,r2,r5
- str r3,[sp,#3*4]
- add r3,r3,r0
- eor r2,r2,r7 @ Ch(e,f,g)
- add r3,r3,r8
- mov r8,r9,ror#2
- add r3,r3,r2
- eor r8,r8,r9,ror#13
- add r3,r3,r12
- eor r8,r8,r9,ror#22 @ Sigma0(a)
-#if 19>=15
- ldr r1,[sp,#5*4] @ from BODY_16_xx
-#endif
- orr r0,r9,r10
- and r2,r9,r10
- and r0,r0,r11
- add r8,r8,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r4,r4,r3
- add r8,r8,r0
- @ ldr r1,[sp,#5*4] @ 20
- ldr r12,[sp,#2*4]
- mov r0,r1,ror#7
- ldr r3,[sp,#4*4]
- eor r0,r0,r1,ror#18
- ldr r2,[sp,#13*4]
- eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
- mov r1,r12,ror#17
- add r3,r3,r0
- eor r1,r1,r12,ror#19
- add r3,r3,r2
- eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
- @ add r3,r3,r1
- mov r0,r4,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r4,ror#11
- eor r2,r5,r6
-#if 20>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 20==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r4,ror#25 @ Sigma1(e)
- and r2,r2,r4
- str r3,[sp,#4*4]
- add r3,r3,r0
- eor r2,r2,r6 @ Ch(e,f,g)
- add r3,r3,r7
- mov r7,r8,ror#2
- add r3,r3,r2
- eor r7,r7,r8,ror#13
- add r3,r3,r12
- eor r7,r7,r8,ror#22 @ Sigma0(a)
-#if 20>=15
- ldr r1,[sp,#6*4] @ from BODY_16_xx
-#endif
- orr r0,r8,r9
- and r2,r8,r9
- and r0,r0,r10
- add r7,r7,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r11,r11,r3
- add r7,r7,r0
- @ ldr r1,[sp,#6*4] @ 21
- ldr r12,[sp,#3*4]
- mov r0,r1,ror#7
- ldr r3,[sp,#5*4]
- eor r0,r0,r1,ror#18
- ldr r2,[sp,#14*4]
- eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
- mov r1,r12,ror#17
- add r3,r3,r0
- eor r1,r1,r12,ror#19
- add r3,r3,r2
- eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
- @ add r3,r3,r1
- mov r0,r11,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r11,ror#11
- eor r2,r4,r5
-#if 21>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 21==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r11,ror#25 @ Sigma1(e)
- and r2,r2,r11
- str r3,[sp,#5*4]
- add r3,r3,r0
- eor r2,r2,r5 @ Ch(e,f,g)
- add r3,r3,r6
- mov r6,r7,ror#2
- add r3,r3,r2
- eor r6,r6,r7,ror#13
- add r3,r3,r12
- eor r6,r6,r7,ror#22 @ Sigma0(a)
-#if 21>=15
- ldr r1,[sp,#7*4] @ from BODY_16_xx
-#endif
- orr r0,r7,r8
- and r2,r7,r8
- and r0,r0,r9
- add r6,r6,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r10,r10,r3
- add r6,r6,r0
- @ ldr r1,[sp,#7*4] @ 22
- ldr r12,[sp,#4*4]
- mov r0,r1,ror#7
- ldr r3,[sp,#6*4]
- eor r0,r0,r1,ror#18
- ldr r2,[sp,#15*4]
- eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
- mov r1,r12,ror#17
- add r3,r3,r0
- eor r1,r1,r12,ror#19
- add r3,r3,r2
- eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
- @ add r3,r3,r1
- mov r0,r10,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r10,ror#11
- eor r2,r11,r4
-#if 22>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 22==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r10,ror#25 @ Sigma1(e)
- and r2,r2,r10
- str r3,[sp,#6*4]
- add r3,r3,r0
- eor r2,r2,r4 @ Ch(e,f,g)
- add r3,r3,r5
- mov r5,r6,ror#2
- add r3,r3,r2
- eor r5,r5,r6,ror#13
- add r3,r3,r12
- eor r5,r5,r6,ror#22 @ Sigma0(a)
-#if 22>=15
- ldr r1,[sp,#8*4] @ from BODY_16_xx
-#endif
- orr r0,r6,r7
- and r2,r6,r7
- and r0,r0,r8
- add r5,r5,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r9,r9,r3
- add r5,r5,r0
- @ ldr r1,[sp,#8*4] @ 23
- ldr r12,[sp,#5*4]
- mov r0,r1,ror#7
- ldr r3,[sp,#7*4]
- eor r0,r0,r1,ror#18
- ldr r2,[sp,#0*4]
- eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
- mov r1,r12,ror#17
- add r3,r3,r0
- eor r1,r1,r12,ror#19
- add r3,r3,r2
- eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
- @ add r3,r3,r1
- mov r0,r9,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r9,ror#11
- eor r2,r10,r11
-#if 23>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 23==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r9,ror#25 @ Sigma1(e)
- and r2,r2,r9
- str r3,[sp,#7*4]
- add r3,r3,r0
- eor r2,r2,r11 @ Ch(e,f,g)
- add r3,r3,r4
- mov r4,r5,ror#2
- add r3,r3,r2
- eor r4,r4,r5,ror#13
- add r3,r3,r12
- eor r4,r4,r5,ror#22 @ Sigma0(a)
-#if 23>=15
- ldr r1,[sp,#9*4] @ from BODY_16_xx
-#endif
- orr r0,r5,r6
- and r2,r5,r6
- and r0,r0,r7
- add r4,r4,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r8,r8,r3
- add r4,r4,r0
- @ ldr r1,[sp,#9*4] @ 24
- ldr r12,[sp,#6*4]
- mov r0,r1,ror#7
- ldr r3,[sp,#8*4]
- eor r0,r0,r1,ror#18
- ldr r2,[sp,#1*4]
- eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
- mov r1,r12,ror#17
- add r3,r3,r0
- eor r1,r1,r12,ror#19
- add r3,r3,r2
- eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
- @ add r3,r3,r1
- mov r0,r8,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r8,ror#11
- eor r2,r9,r10
-#if 24>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 24==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r8,ror#25 @ Sigma1(e)
- and r2,r2,r8
- str r3,[sp,#8*4]
- add r3,r3,r0
- eor r2,r2,r10 @ Ch(e,f,g)
- add r3,r3,r11
- mov r11,r4,ror#2
- add r3,r3,r2
- eor r11,r11,r4,ror#13
- add r3,r3,r12
- eor r11,r11,r4,ror#22 @ Sigma0(a)
-#if 24>=15
- ldr r1,[sp,#10*4] @ from BODY_16_xx
-#endif
- orr r0,r4,r5
- and r2,r4,r5
- and r0,r0,r6
- add r11,r11,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r7,r7,r3
- add r11,r11,r0
- @ ldr r1,[sp,#10*4] @ 25
- ldr r12,[sp,#7*4]
- mov r0,r1,ror#7
- ldr r3,[sp,#9*4]
- eor r0,r0,r1,ror#18
- ldr r2,[sp,#2*4]
- eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
- mov r1,r12,ror#17
- add r3,r3,r0
- eor r1,r1,r12,ror#19
- add r3,r3,r2
- eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
- @ add r3,r3,r1
- mov r0,r7,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r7,ror#11
- eor r2,r8,r9
-#if 25>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 25==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r7,ror#25 @ Sigma1(e)
- and r2,r2,r7
- str r3,[sp,#9*4]
- add r3,r3,r0
- eor r2,r2,r9 @ Ch(e,f,g)
- add r3,r3,r10
- mov r10,r11,ror#2
- add r3,r3,r2
- eor r10,r10,r11,ror#13
- add r3,r3,r12
- eor r10,r10,r11,ror#22 @ Sigma0(a)
-#if 25>=15
- ldr r1,[sp,#11*4] @ from BODY_16_xx
-#endif
- orr r0,r11,r4
- and r2,r11,r4
- and r0,r0,r5
- add r10,r10,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r6,r6,r3
- add r10,r10,r0
- @ ldr r1,[sp,#11*4] @ 26
- ldr r12,[sp,#8*4]
- mov r0,r1,ror#7
- ldr r3,[sp,#10*4]
- eor r0,r0,r1,ror#18
- ldr r2,[sp,#3*4]
- eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
- mov r1,r12,ror#17
- add r3,r3,r0
- eor r1,r1,r12,ror#19
- add r3,r3,r2
- eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
- @ add r3,r3,r1
- mov r0,r6,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r6,ror#11
- eor r2,r7,r8
-#if 26>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 26==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r6,ror#25 @ Sigma1(e)
- and r2,r2,r6
- str r3,[sp,#10*4]
- add r3,r3,r0
- eor r2,r2,r8 @ Ch(e,f,g)
- add r3,r3,r9
- mov r9,r10,ror#2
- add r3,r3,r2
- eor r9,r9,r10,ror#13
- add r3,r3,r12
- eor r9,r9,r10,ror#22 @ Sigma0(a)
-#if 26>=15
- ldr r1,[sp,#12*4] @ from BODY_16_xx
-#endif
- orr r0,r10,r11
- and r2,r10,r11
- and r0,r0,r4
- add r9,r9,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r5,r5,r3
- add r9,r9,r0
- @ ldr r1,[sp,#12*4] @ 27
- ldr r12,[sp,#9*4]
- mov r0,r1,ror#7
- ldr r3,[sp,#11*4]
- eor r0,r0,r1,ror#18
- ldr r2,[sp,#4*4]
- eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
- mov r1,r12,ror#17
- add r3,r3,r0
- eor r1,r1,r12,ror#19
- add r3,r3,r2
- eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
- @ add r3,r3,r1
- mov r0,r5,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r5,ror#11
- eor r2,r6,r7
-#if 27>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 27==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r5,ror#25 @ Sigma1(e)
- and r2,r2,r5
- str r3,[sp,#11*4]
- add r3,r3,r0
- eor r2,r2,r7 @ Ch(e,f,g)
- add r3,r3,r8
- mov r8,r9,ror#2
- add r3,r3,r2
- eor r8,r8,r9,ror#13
- add r3,r3,r12
- eor r8,r8,r9,ror#22 @ Sigma0(a)
-#if 27>=15
- ldr r1,[sp,#13*4] @ from BODY_16_xx
-#endif
- orr r0,r9,r10
- and r2,r9,r10
- and r0,r0,r11
- add r8,r8,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r4,r4,r3
- add r8,r8,r0
- @ ldr r1,[sp,#13*4] @ 28
- ldr r12,[sp,#10*4]
- mov r0,r1,ror#7
- ldr r3,[sp,#12*4]
- eor r0,r0,r1,ror#18
- ldr r2,[sp,#5*4]
- eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
- mov r1,r12,ror#17
- add r3,r3,r0
- eor r1,r1,r12,ror#19
- add r3,r3,r2
- eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
- @ add r3,r3,r1
- mov r0,r4,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r4,ror#11
- eor r2,r5,r6
-#if 28>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 28==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r4,ror#25 @ Sigma1(e)
- and r2,r2,r4
- str r3,[sp,#12*4]
- add r3,r3,r0
- eor r2,r2,r6 @ Ch(e,f,g)
- add r3,r3,r7
- mov r7,r8,ror#2
- add r3,r3,r2
- eor r7,r7,r8,ror#13
- add r3,r3,r12
- eor r7,r7,r8,ror#22 @ Sigma0(a)
-#if 28>=15
- ldr r1,[sp,#14*4] @ from BODY_16_xx
-#endif
- orr r0,r8,r9
- and r2,r8,r9
- and r0,r0,r10
- add r7,r7,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r11,r11,r3
- add r7,r7,r0
- @ ldr r1,[sp,#14*4] @ 29
- ldr r12,[sp,#11*4]
- mov r0,r1,ror#7
- ldr r3,[sp,#13*4]
- eor r0,r0,r1,ror#18
- ldr r2,[sp,#6*4]
- eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
- mov r1,r12,ror#17
- add r3,r3,r0
- eor r1,r1,r12,ror#19
- add r3,r3,r2
- eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
- @ add r3,r3,r1
- mov r0,r11,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r11,ror#11
- eor r2,r4,r5
-#if 29>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 29==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r11,ror#25 @ Sigma1(e)
- and r2,r2,r11
- str r3,[sp,#13*4]
- add r3,r3,r0
- eor r2,r2,r5 @ Ch(e,f,g)
- add r3,r3,r6
- mov r6,r7,ror#2
- add r3,r3,r2
- eor r6,r6,r7,ror#13
- add r3,r3,r12
- eor r6,r6,r7,ror#22 @ Sigma0(a)
-#if 29>=15
- ldr r1,[sp,#15*4] @ from BODY_16_xx
-#endif
- orr r0,r7,r8
- and r2,r7,r8
- and r0,r0,r9
- add r6,r6,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r10,r10,r3
- add r6,r6,r0
- @ ldr r1,[sp,#15*4] @ 30
- ldr r12,[sp,#12*4]
- mov r0,r1,ror#7
- ldr r3,[sp,#14*4]
- eor r0,r0,r1,ror#18
- ldr r2,[sp,#7*4]
- eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
- mov r1,r12,ror#17
- add r3,r3,r0
- eor r1,r1,r12,ror#19
- add r3,r3,r2
- eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
- @ add r3,r3,r1
- mov r0,r10,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r10,ror#11
- eor r2,r11,r4
-#if 30>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 30==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r10,ror#25 @ Sigma1(e)
- and r2,r2,r10
- str r3,[sp,#14*4]
- add r3,r3,r0
- eor r2,r2,r4 @ Ch(e,f,g)
- add r3,r3,r5
- mov r5,r6,ror#2
- add r3,r3,r2
- eor r5,r5,r6,ror#13
- add r3,r3,r12
- eor r5,r5,r6,ror#22 @ Sigma0(a)
-#if 30>=15
- ldr r1,[sp,#0*4] @ from BODY_16_xx
-#endif
- orr r0,r6,r7
- and r2,r6,r7
- and r0,r0,r8
- add r5,r5,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r9,r9,r3
- add r5,r5,r0
- @ ldr r1,[sp,#0*4] @ 31
- ldr r12,[sp,#13*4]
- mov r0,r1,ror#7
- ldr r3,[sp,#15*4]
- eor r0,r0,r1,ror#18
- ldr r2,[sp,#8*4]
- eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
- mov r1,r12,ror#17
- add r3,r3,r0
- eor r1,r1,r12,ror#19
- add r3,r3,r2
- eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
- @ add r3,r3,r1
- mov r0,r9,ror#6
- ldr r12,[r14],#4 @ *K256++
- eor r0,r0,r9,ror#11
- eor r2,r10,r11
-#if 31>=16
- add r3,r3,r1 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev r3,r3
-#endif
-#if 31==15
- str r1,[sp,#17*4] @ leave room for r1
-#endif
- eor r0,r0,r9,ror#25 @ Sigma1(e)
- and r2,r2,r9
- str r3,[sp,#15*4]
- add r3,r3,r0
- eor r2,r2,r11 @ Ch(e,f,g)
- add r3,r3,r4
- mov r4,r5,ror#2
- add r3,r3,r2
- eor r4,r4,r5,ror#13
- add r3,r3,r12
- eor r4,r4,r5,ror#22 @ Sigma0(a)
-#if 31>=15
- ldr r1,[sp,#1*4] @ from BODY_16_xx
-#endif
- orr r0,r5,r6
- and r2,r5,r6
- and r0,r0,r7
- add r4,r4,r3
- orr r0,r0,r2 @ Maj(a,b,c)
- add r8,r8,r3
- add r4,r4,r0
+ eor r0,r4,r4,ror#11
+ add r11,r11,r2 @ h+=Ch(e,f,g)
+#if 0==31
and r12,r12,#0xff
- cmp r12,#0xf2
+ cmp r12,#0xf2 @ done?
+#endif
+#if 0<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r4,r5 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#2*4] @ from future BODY_16_xx
+ eor r12,r4,r5 @ a^b, b^c in next round
+ ldr r1,[sp,#15*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r4,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r7,r7,r11 @ d+=h
+ eor r3,r3,r5 @ Maj(a,b,c)
+ add r11,r11,r0,ror#2 @ h+=Sigma0(a)
+ @ add r11,r11,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 1
+# if 1==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r7,r7,ror#5
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+ rev r2,r2
+#else
+ @ ldrb r2,[r1,#3] @ 1
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 1==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r7,r7,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r10,r10,r2 @ h+=X[i]
+ str r2,[sp,#1*4]
+ eor r2,r8,r9
+ add r10,r10,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r7
+ add r10,r10,r3 @ h+=K256[i]
+ eor r2,r2,r9 @ Ch(e,f,g)
+ eor r0,r11,r11,ror#11
+ add r10,r10,r2 @ h+=Ch(e,f,g)
+#if 1==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 1<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r11,r4 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#3*4] @ from future BODY_16_xx
+ eor r3,r11,r4 @ a^b, b^c in next round
+ ldr r1,[sp,#0*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r11,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r6,r6,r10 @ d+=h
+ eor r12,r12,r4 @ Maj(a,b,c)
+ add r10,r10,r0,ror#2 @ h+=Sigma0(a)
+ @ add r10,r10,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 2
+# if 2==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+ rev r2,r2
+#else
+ @ ldrb r2,[r1,#3] @ 2
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 2==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r6,r6,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r9,r9,r2 @ h+=X[i]
+ str r2,[sp,#2*4]
+ eor r2,r7,r8
+ add r9,r9,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r6
+ add r9,r9,r12 @ h+=K256[i]
+ eor r2,r2,r8 @ Ch(e,f,g)
+ eor r0,r10,r10,ror#11
+ add r9,r9,r2 @ h+=Ch(e,f,g)
+#if 2==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 2<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r10,r11 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#4*4] @ from future BODY_16_xx
+ eor r12,r10,r11 @ a^b, b^c in next round
+ ldr r1,[sp,#1*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r10,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r5,r5,r9 @ d+=h
+ eor r3,r3,r11 @ Maj(a,b,c)
+ add r9,r9,r0,ror#2 @ h+=Sigma0(a)
+ @ add r9,r9,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 3
+# if 3==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r5,r5,ror#5
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+ rev r2,r2
+#else
+ @ ldrb r2,[r1,#3] @ 3
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 3==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r5,r5,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r8,r8,r2 @ h+=X[i]
+ str r2,[sp,#3*4]
+ eor r2,r6,r7
+ add r8,r8,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r5
+ add r8,r8,r3 @ h+=K256[i]
+ eor r2,r2,r7 @ Ch(e,f,g)
+ eor r0,r9,r9,ror#11
+ add r8,r8,r2 @ h+=Ch(e,f,g)
+#if 3==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 3<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r9,r10 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#5*4] @ from future BODY_16_xx
+ eor r3,r9,r10 @ a^b, b^c in next round
+ ldr r1,[sp,#2*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r9,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r4,r4,r8 @ d+=h
+ eor r12,r12,r10 @ Maj(a,b,c)
+ add r8,r8,r0,ror#2 @ h+=Sigma0(a)
+ @ add r8,r8,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 4
+# if 4==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r4,r4,ror#5
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+ rev r2,r2
+#else
+ @ ldrb r2,[r1,#3] @ 4
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 4==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r4,r4,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r7,r7,r2 @ h+=X[i]
+ str r2,[sp,#4*4]
+ eor r2,r5,r6
+ add r7,r7,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r4
+ add r7,r7,r12 @ h+=K256[i]
+ eor r2,r2,r6 @ Ch(e,f,g)
+ eor r0,r8,r8,ror#11
+ add r7,r7,r2 @ h+=Ch(e,f,g)
+#if 4==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 4<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r8,r9 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#6*4] @ from future BODY_16_xx
+ eor r12,r8,r9 @ a^b, b^c in next round
+ ldr r1,[sp,#3*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r8,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r11,r11,r7 @ d+=h
+ eor r3,r3,r9 @ Maj(a,b,c)
+ add r7,r7,r0,ror#2 @ h+=Sigma0(a)
+ @ add r7,r7,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 5
+# if 5==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r11,r11,ror#5
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+ rev r2,r2
+#else
+ @ ldrb r2,[r1,#3] @ 5
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 5==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r11,r11,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r6,r6,r2 @ h+=X[i]
+ str r2,[sp,#5*4]
+ eor r2,r4,r5
+ add r6,r6,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r11
+ add r6,r6,r3 @ h+=K256[i]
+ eor r2,r2,r5 @ Ch(e,f,g)
+ eor r0,r7,r7,ror#11
+ add r6,r6,r2 @ h+=Ch(e,f,g)
+#if 5==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 5<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r7,r8 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#7*4] @ from future BODY_16_xx
+ eor r3,r7,r8 @ a^b, b^c in next round
+ ldr r1,[sp,#4*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r7,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r10,r10,r6 @ d+=h
+ eor r12,r12,r8 @ Maj(a,b,c)
+ add r6,r6,r0,ror#2 @ h+=Sigma0(a)
+ @ add r6,r6,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 6
+# if 6==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+ rev r2,r2
+#else
+ @ ldrb r2,[r1,#3] @ 6
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 6==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r10,r10,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r5,r5,r2 @ h+=X[i]
+ str r2,[sp,#6*4]
+ eor r2,r11,r4
+ add r5,r5,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r10
+ add r5,r5,r12 @ h+=K256[i]
+ eor r2,r2,r4 @ Ch(e,f,g)
+ eor r0,r6,r6,ror#11
+ add r5,r5,r2 @ h+=Ch(e,f,g)
+#if 6==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 6<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r6,r7 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#8*4] @ from future BODY_16_xx
+ eor r12,r6,r7 @ a^b, b^c in next round
+ ldr r1,[sp,#5*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r6,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r9,r9,r5 @ d+=h
+ eor r3,r3,r7 @ Maj(a,b,c)
+ add r5,r5,r0,ror#2 @ h+=Sigma0(a)
+ @ add r5,r5,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 7
+# if 7==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r9,r9,ror#5
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+ rev r2,r2
+#else
+ @ ldrb r2,[r1,#3] @ 7
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 7==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r9,r9,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r4,r4,r2 @ h+=X[i]
+ str r2,[sp,#7*4]
+ eor r2,r10,r11
+ add r4,r4,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r9
+ add r4,r4,r3 @ h+=K256[i]
+ eor r2,r2,r11 @ Ch(e,f,g)
+ eor r0,r5,r5,ror#11
+ add r4,r4,r2 @ h+=Ch(e,f,g)
+#if 7==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 7<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#9*4] @ from future BODY_16_xx
+ eor r3,r5,r6 @ a^b, b^c in next round
+ ldr r1,[sp,#6*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r5,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r8,r8,r4 @ d+=h
+ eor r12,r12,r6 @ Maj(a,b,c)
+ add r4,r4,r0,ror#2 @ h+=Sigma0(a)
+ @ add r4,r4,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 8
+# if 8==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r8,r8,ror#5
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+ rev r2,r2
+#else
+ @ ldrb r2,[r1,#3] @ 8
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 8==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r8,r8,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r11,r11,r2 @ h+=X[i]
+ str r2,[sp,#8*4]
+ eor r2,r9,r10
+ add r11,r11,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r8
+ add r11,r11,r12 @ h+=K256[i]
+ eor r2,r2,r10 @ Ch(e,f,g)
+ eor r0,r4,r4,ror#11
+ add r11,r11,r2 @ h+=Ch(e,f,g)
+#if 8==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 8<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r4,r5 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#10*4] @ from future BODY_16_xx
+ eor r12,r4,r5 @ a^b, b^c in next round
+ ldr r1,[sp,#7*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r4,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r7,r7,r11 @ d+=h
+ eor r3,r3,r5 @ Maj(a,b,c)
+ add r11,r11,r0,ror#2 @ h+=Sigma0(a)
+ @ add r11,r11,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 9
+# if 9==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r7,r7,ror#5
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+ rev r2,r2
+#else
+ @ ldrb r2,[r1,#3] @ 9
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 9==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r7,r7,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r10,r10,r2 @ h+=X[i]
+ str r2,[sp,#9*4]
+ eor r2,r8,r9
+ add r10,r10,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r7
+ add r10,r10,r3 @ h+=K256[i]
+ eor r2,r2,r9 @ Ch(e,f,g)
+ eor r0,r11,r11,ror#11
+ add r10,r10,r2 @ h+=Ch(e,f,g)
+#if 9==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 9<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r11,r4 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#11*4] @ from future BODY_16_xx
+ eor r3,r11,r4 @ a^b, b^c in next round
+ ldr r1,[sp,#8*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r11,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r6,r6,r10 @ d+=h
+ eor r12,r12,r4 @ Maj(a,b,c)
+ add r10,r10,r0,ror#2 @ h+=Sigma0(a)
+ @ add r10,r10,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 10
+# if 10==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+ rev r2,r2
+#else
+ @ ldrb r2,[r1,#3] @ 10
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 10==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r6,r6,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r9,r9,r2 @ h+=X[i]
+ str r2,[sp,#10*4]
+ eor r2,r7,r8
+ add r9,r9,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r6
+ add r9,r9,r12 @ h+=K256[i]
+ eor r2,r2,r8 @ Ch(e,f,g)
+ eor r0,r10,r10,ror#11
+ add r9,r9,r2 @ h+=Ch(e,f,g)
+#if 10==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 10<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r10,r11 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#12*4] @ from future BODY_16_xx
+ eor r12,r10,r11 @ a^b, b^c in next round
+ ldr r1,[sp,#9*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r10,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r5,r5,r9 @ d+=h
+ eor r3,r3,r11 @ Maj(a,b,c)
+ add r9,r9,r0,ror#2 @ h+=Sigma0(a)
+ @ add r9,r9,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 11
+# if 11==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r5,r5,ror#5
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+ rev r2,r2
+#else
+ @ ldrb r2,[r1,#3] @ 11
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 11==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r5,r5,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r8,r8,r2 @ h+=X[i]
+ str r2,[sp,#11*4]
+ eor r2,r6,r7
+ add r8,r8,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r5
+ add r8,r8,r3 @ h+=K256[i]
+ eor r2,r2,r7 @ Ch(e,f,g)
+ eor r0,r9,r9,ror#11
+ add r8,r8,r2 @ h+=Ch(e,f,g)
+#if 11==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 11<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r9,r10 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#13*4] @ from future BODY_16_xx
+ eor r3,r9,r10 @ a^b, b^c in next round
+ ldr r1,[sp,#10*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r9,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r4,r4,r8 @ d+=h
+ eor r12,r12,r10 @ Maj(a,b,c)
+ add r8,r8,r0,ror#2 @ h+=Sigma0(a)
+ @ add r8,r8,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 12
+# if 12==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r4,r4,ror#5
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+ rev r2,r2
+#else
+ @ ldrb r2,[r1,#3] @ 12
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 12==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r4,r4,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r7,r7,r2 @ h+=X[i]
+ str r2,[sp,#12*4]
+ eor r2,r5,r6
+ add r7,r7,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r4
+ add r7,r7,r12 @ h+=K256[i]
+ eor r2,r2,r6 @ Ch(e,f,g)
+ eor r0,r8,r8,ror#11
+ add r7,r7,r2 @ h+=Ch(e,f,g)
+#if 12==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 12<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r8,r9 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#14*4] @ from future BODY_16_xx
+ eor r12,r8,r9 @ a^b, b^c in next round
+ ldr r1,[sp,#11*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r8,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r11,r11,r7 @ d+=h
+ eor r3,r3,r9 @ Maj(a,b,c)
+ add r7,r7,r0,ror#2 @ h+=Sigma0(a)
+ @ add r7,r7,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 13
+# if 13==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r11,r11,ror#5
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+ rev r2,r2
+#else
+ @ ldrb r2,[r1,#3] @ 13
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 13==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r11,r11,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r6,r6,r2 @ h+=X[i]
+ str r2,[sp,#13*4]
+ eor r2,r4,r5
+ add r6,r6,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r11
+ add r6,r6,r3 @ h+=K256[i]
+ eor r2,r2,r5 @ Ch(e,f,g)
+ eor r0,r7,r7,ror#11
+ add r6,r6,r2 @ h+=Ch(e,f,g)
+#if 13==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 13<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r7,r8 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#15*4] @ from future BODY_16_xx
+ eor r3,r7,r8 @ a^b, b^c in next round
+ ldr r1,[sp,#12*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r7,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r10,r10,r6 @ d+=h
+ eor r12,r12,r8 @ Maj(a,b,c)
+ add r6,r6,r0,ror#2 @ h+=Sigma0(a)
+ @ add r6,r6,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 14
+# if 14==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+ rev r2,r2
+#else
+ @ ldrb r2,[r1,#3] @ 14
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 14==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r10,r10,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r5,r5,r2 @ h+=X[i]
+ str r2,[sp,#14*4]
+ eor r2,r11,r4
+ add r5,r5,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r10
+ add r5,r5,r12 @ h+=K256[i]
+ eor r2,r2,r4 @ Ch(e,f,g)
+ eor r0,r6,r6,ror#11
+ add r5,r5,r2 @ h+=Ch(e,f,g)
+#if 14==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 14<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r6,r7 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#0*4] @ from future BODY_16_xx
+ eor r12,r6,r7 @ a^b, b^c in next round
+ ldr r1,[sp,#13*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r6,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r9,r9,r5 @ d+=h
+ eor r3,r3,r7 @ Maj(a,b,c)
+ add r5,r5,r0,ror#2 @ h+=Sigma0(a)
+ @ add r5,r5,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 15
+# if 15==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r9,r9,ror#5
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+ rev r2,r2
+#else
+ @ ldrb r2,[r1,#3] @ 15
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 15==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r9,r9,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r4,r4,r2 @ h+=X[i]
+ str r2,[sp,#15*4]
+ eor r2,r10,r11
+ add r4,r4,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r9
+ add r4,r4,r3 @ h+=K256[i]
+ eor r2,r2,r11 @ Ch(e,f,g)
+ eor r0,r5,r5,ror#11
+ add r4,r4,r2 @ h+=Ch(e,f,g)
+#if 15==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 15<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#1*4] @ from future BODY_16_xx
+ eor r3,r5,r6 @ a^b, b^c in next round
+ ldr r1,[sp,#14*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r5,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r8,r8,r4 @ d+=h
+ eor r12,r12,r6 @ Maj(a,b,c)
+ add r4,r4,r0,ror#2 @ h+=Sigma0(a)
+ @ add r4,r4,r12 @ h+=Maj(a,b,c)
+.Lrounds_16_xx:
+ @ ldr r2,[sp,#1*4] @ 16
+ @ ldr r1,[sp,#14*4]
+ mov r0,r2,ror#7
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#0*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#9*4]
+
+ add r12,r12,r0
+ eor r0,r8,r8,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r11,r11,r2 @ h+=X[i]
+ str r2,[sp,#0*4]
+ eor r2,r9,r10
+ add r11,r11,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r8
+ add r11,r11,r12 @ h+=K256[i]
+ eor r2,r2,r10 @ Ch(e,f,g)
+ eor r0,r4,r4,ror#11
+ add r11,r11,r2 @ h+=Ch(e,f,g)
+#if 16==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 16<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r4,r5 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#2*4] @ from future BODY_16_xx
+ eor r12,r4,r5 @ a^b, b^c in next round
+ ldr r1,[sp,#15*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r4,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r7,r7,r11 @ d+=h
+ eor r3,r3,r5 @ Maj(a,b,c)
+ add r11,r11,r0,ror#2 @ h+=Sigma0(a)
+ @ add r11,r11,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#2*4] @ 17
+ @ ldr r1,[sp,#15*4]
+ mov r0,r2,ror#7
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#1*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#10*4]
+
+ add r3,r3,r0
+ eor r0,r7,r7,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r10,r10,r2 @ h+=X[i]
+ str r2,[sp,#1*4]
+ eor r2,r8,r9
+ add r10,r10,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r7
+ add r10,r10,r3 @ h+=K256[i]
+ eor r2,r2,r9 @ Ch(e,f,g)
+ eor r0,r11,r11,ror#11
+ add r10,r10,r2 @ h+=Ch(e,f,g)
+#if 17==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 17<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r11,r4 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#3*4] @ from future BODY_16_xx
+ eor r3,r11,r4 @ a^b, b^c in next round
+ ldr r1,[sp,#0*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r11,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r6,r6,r10 @ d+=h
+ eor r12,r12,r4 @ Maj(a,b,c)
+ add r10,r10,r0,ror#2 @ h+=Sigma0(a)
+ @ add r10,r10,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#3*4] @ 18
+ @ ldr r1,[sp,#0*4]
+ mov r0,r2,ror#7
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#2*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#11*4]
+
+ add r12,r12,r0
+ eor r0,r6,r6,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r9,r9,r2 @ h+=X[i]
+ str r2,[sp,#2*4]
+ eor r2,r7,r8
+ add r9,r9,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r6
+ add r9,r9,r12 @ h+=K256[i]
+ eor r2,r2,r8 @ Ch(e,f,g)
+ eor r0,r10,r10,ror#11
+ add r9,r9,r2 @ h+=Ch(e,f,g)
+#if 18==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 18<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r10,r11 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#4*4] @ from future BODY_16_xx
+ eor r12,r10,r11 @ a^b, b^c in next round
+ ldr r1,[sp,#1*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r10,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r5,r5,r9 @ d+=h
+ eor r3,r3,r11 @ Maj(a,b,c)
+ add r9,r9,r0,ror#2 @ h+=Sigma0(a)
+ @ add r9,r9,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#4*4] @ 19
+ @ ldr r1,[sp,#1*4]
+ mov r0,r2,ror#7
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#3*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#12*4]
+
+ add r3,r3,r0
+ eor r0,r5,r5,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r8,r8,r2 @ h+=X[i]
+ str r2,[sp,#3*4]
+ eor r2,r6,r7
+ add r8,r8,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r5
+ add r8,r8,r3 @ h+=K256[i]
+ eor r2,r2,r7 @ Ch(e,f,g)
+ eor r0,r9,r9,ror#11
+ add r8,r8,r2 @ h+=Ch(e,f,g)
+#if 19==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 19<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r9,r10 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#5*4] @ from future BODY_16_xx
+ eor r3,r9,r10 @ a^b, b^c in next round
+ ldr r1,[sp,#2*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r9,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r4,r4,r8 @ d+=h
+ eor r12,r12,r10 @ Maj(a,b,c)
+ add r8,r8,r0,ror#2 @ h+=Sigma0(a)
+ @ add r8,r8,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#5*4] @ 20
+ @ ldr r1,[sp,#2*4]
+ mov r0,r2,ror#7
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#4*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#13*4]
+
+ add r12,r12,r0
+ eor r0,r4,r4,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r7,r7,r2 @ h+=X[i]
+ str r2,[sp,#4*4]
+ eor r2,r5,r6
+ add r7,r7,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r4
+ add r7,r7,r12 @ h+=K256[i]
+ eor r2,r2,r6 @ Ch(e,f,g)
+ eor r0,r8,r8,ror#11
+ add r7,r7,r2 @ h+=Ch(e,f,g)
+#if 20==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 20<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r8,r9 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#6*4] @ from future BODY_16_xx
+ eor r12,r8,r9 @ a^b, b^c in next round
+ ldr r1,[sp,#3*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r8,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r11,r11,r7 @ d+=h
+ eor r3,r3,r9 @ Maj(a,b,c)
+ add r7,r7,r0,ror#2 @ h+=Sigma0(a)
+ @ add r7,r7,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#6*4] @ 21
+ @ ldr r1,[sp,#3*4]
+ mov r0,r2,ror#7
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#5*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#14*4]
+
+ add r3,r3,r0
+ eor r0,r11,r11,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r6,r6,r2 @ h+=X[i]
+ str r2,[sp,#5*4]
+ eor r2,r4,r5
+ add r6,r6,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r11
+ add r6,r6,r3 @ h+=K256[i]
+ eor r2,r2,r5 @ Ch(e,f,g)
+ eor r0,r7,r7,ror#11
+ add r6,r6,r2 @ h+=Ch(e,f,g)
+#if 21==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 21<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r7,r8 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#7*4] @ from future BODY_16_xx
+ eor r3,r7,r8 @ a^b, b^c in next round
+ ldr r1,[sp,#4*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r7,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r10,r10,r6 @ d+=h
+ eor r12,r12,r8 @ Maj(a,b,c)
+ add r6,r6,r0,ror#2 @ h+=Sigma0(a)
+ @ add r6,r6,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#7*4] @ 22
+ @ ldr r1,[sp,#4*4]
+ mov r0,r2,ror#7
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#6*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#15*4]
+
+ add r12,r12,r0
+ eor r0,r10,r10,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r5,r5,r2 @ h+=X[i]
+ str r2,[sp,#6*4]
+ eor r2,r11,r4
+ add r5,r5,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r10
+ add r5,r5,r12 @ h+=K256[i]
+ eor r2,r2,r4 @ Ch(e,f,g)
+ eor r0,r6,r6,ror#11
+ add r5,r5,r2 @ h+=Ch(e,f,g)
+#if 22==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 22<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r6,r7 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#8*4] @ from future BODY_16_xx
+ eor r12,r6,r7 @ a^b, b^c in next round
+ ldr r1,[sp,#5*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r6,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r9,r9,r5 @ d+=h
+ eor r3,r3,r7 @ Maj(a,b,c)
+ add r5,r5,r0,ror#2 @ h+=Sigma0(a)
+ @ add r5,r5,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#8*4] @ 23
+ @ ldr r1,[sp,#5*4]
+ mov r0,r2,ror#7
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#7*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#0*4]
+
+ add r3,r3,r0
+ eor r0,r9,r9,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r4,r4,r2 @ h+=X[i]
+ str r2,[sp,#7*4]
+ eor r2,r10,r11
+ add r4,r4,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r9
+ add r4,r4,r3 @ h+=K256[i]
+ eor r2,r2,r11 @ Ch(e,f,g)
+ eor r0,r5,r5,ror#11
+ add r4,r4,r2 @ h+=Ch(e,f,g)
+#if 23==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 23<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#9*4] @ from future BODY_16_xx
+ eor r3,r5,r6 @ a^b, b^c in next round
+ ldr r1,[sp,#6*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r5,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r8,r8,r4 @ d+=h
+ eor r12,r12,r6 @ Maj(a,b,c)
+ add r4,r4,r0,ror#2 @ h+=Sigma0(a)
+ @ add r4,r4,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#9*4] @ 24
+ @ ldr r1,[sp,#6*4]
+ mov r0,r2,ror#7
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#8*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#1*4]
+
+ add r12,r12,r0
+ eor r0,r8,r8,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r11,r11,r2 @ h+=X[i]
+ str r2,[sp,#8*4]
+ eor r2,r9,r10
+ add r11,r11,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r8
+ add r11,r11,r12 @ h+=K256[i]
+ eor r2,r2,r10 @ Ch(e,f,g)
+ eor r0,r4,r4,ror#11
+ add r11,r11,r2 @ h+=Ch(e,f,g)
+#if 24==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 24<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r4,r5 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#10*4] @ from future BODY_16_xx
+ eor r12,r4,r5 @ a^b, b^c in next round
+ ldr r1,[sp,#7*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r4,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r7,r7,r11 @ d+=h
+ eor r3,r3,r5 @ Maj(a,b,c)
+ add r11,r11,r0,ror#2 @ h+=Sigma0(a)
+ @ add r11,r11,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#10*4] @ 25
+ @ ldr r1,[sp,#7*4]
+ mov r0,r2,ror#7
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#9*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#2*4]
+
+ add r3,r3,r0
+ eor r0,r7,r7,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r10,r10,r2 @ h+=X[i]
+ str r2,[sp,#9*4]
+ eor r2,r8,r9
+ add r10,r10,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r7
+ add r10,r10,r3 @ h+=K256[i]
+ eor r2,r2,r9 @ Ch(e,f,g)
+ eor r0,r11,r11,ror#11
+ add r10,r10,r2 @ h+=Ch(e,f,g)
+#if 25==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 25<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r11,r4 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#11*4] @ from future BODY_16_xx
+ eor r3,r11,r4 @ a^b, b^c in next round
+ ldr r1,[sp,#8*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r11,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r6,r6,r10 @ d+=h
+ eor r12,r12,r4 @ Maj(a,b,c)
+ add r10,r10,r0,ror#2 @ h+=Sigma0(a)
+ @ add r10,r10,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#11*4] @ 26
+ @ ldr r1,[sp,#8*4]
+ mov r0,r2,ror#7
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#10*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#3*4]
+
+ add r12,r12,r0
+ eor r0,r6,r6,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r9,r9,r2 @ h+=X[i]
+ str r2,[sp,#10*4]
+ eor r2,r7,r8
+ add r9,r9,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r6
+ add r9,r9,r12 @ h+=K256[i]
+ eor r2,r2,r8 @ Ch(e,f,g)
+ eor r0,r10,r10,ror#11
+ add r9,r9,r2 @ h+=Ch(e,f,g)
+#if 26==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 26<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r10,r11 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#12*4] @ from future BODY_16_xx
+ eor r12,r10,r11 @ a^b, b^c in next round
+ ldr r1,[sp,#9*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r10,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r5,r5,r9 @ d+=h
+ eor r3,r3,r11 @ Maj(a,b,c)
+ add r9,r9,r0,ror#2 @ h+=Sigma0(a)
+ @ add r9,r9,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#12*4] @ 27
+ @ ldr r1,[sp,#9*4]
+ mov r0,r2,ror#7
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#11*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#4*4]
+
+ add r3,r3,r0
+ eor r0,r5,r5,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r8,r8,r2 @ h+=X[i]
+ str r2,[sp,#11*4]
+ eor r2,r6,r7
+ add r8,r8,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r5
+ add r8,r8,r3 @ h+=K256[i]
+ eor r2,r2,r7 @ Ch(e,f,g)
+ eor r0,r9,r9,ror#11
+ add r8,r8,r2 @ h+=Ch(e,f,g)
+#if 27==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 27<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r9,r10 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#13*4] @ from future BODY_16_xx
+ eor r3,r9,r10 @ a^b, b^c in next round
+ ldr r1,[sp,#10*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r9,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r4,r4,r8 @ d+=h
+ eor r12,r12,r10 @ Maj(a,b,c)
+ add r8,r8,r0,ror#2 @ h+=Sigma0(a)
+ @ add r8,r8,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#13*4] @ 28
+ @ ldr r1,[sp,#10*4]
+ mov r0,r2,ror#7
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#12*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#5*4]
+
+ add r12,r12,r0
+ eor r0,r4,r4,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r7,r7,r2 @ h+=X[i]
+ str r2,[sp,#12*4]
+ eor r2,r5,r6
+ add r7,r7,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r4
+ add r7,r7,r12 @ h+=K256[i]
+ eor r2,r2,r6 @ Ch(e,f,g)
+ eor r0,r8,r8,ror#11
+ add r7,r7,r2 @ h+=Ch(e,f,g)
+#if 28==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 28<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r8,r9 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#14*4] @ from future BODY_16_xx
+ eor r12,r8,r9 @ a^b, b^c in next round
+ ldr r1,[sp,#11*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r8,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r11,r11,r7 @ d+=h
+ eor r3,r3,r9 @ Maj(a,b,c)
+ add r7,r7,r0,ror#2 @ h+=Sigma0(a)
+ @ add r7,r7,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#14*4] @ 29
+ @ ldr r1,[sp,#11*4]
+ mov r0,r2,ror#7
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#13*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#6*4]
+
+ add r3,r3,r0
+ eor r0,r11,r11,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r6,r6,r2 @ h+=X[i]
+ str r2,[sp,#13*4]
+ eor r2,r4,r5
+ add r6,r6,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r11
+ add r6,r6,r3 @ h+=K256[i]
+ eor r2,r2,r5 @ Ch(e,f,g)
+ eor r0,r7,r7,ror#11
+ add r6,r6,r2 @ h+=Ch(e,f,g)
+#if 29==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 29<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r7,r8 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#15*4] @ from future BODY_16_xx
+ eor r3,r7,r8 @ a^b, b^c in next round
+ ldr r1,[sp,#12*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r7,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r10,r10,r6 @ d+=h
+ eor r12,r12,r8 @ Maj(a,b,c)
+ add r6,r6,r0,ror#2 @ h+=Sigma0(a)
+ @ add r6,r6,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#15*4] @ 30
+ @ ldr r1,[sp,#12*4]
+ mov r0,r2,ror#7
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#14*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#7*4]
+
+ add r12,r12,r0
+ eor r0,r10,r10,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r5,r5,r2 @ h+=X[i]
+ str r2,[sp,#14*4]
+ eor r2,r11,r4
+ add r5,r5,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r10
+ add r5,r5,r12 @ h+=K256[i]
+ eor r2,r2,r4 @ Ch(e,f,g)
+ eor r0,r6,r6,ror#11
+ add r5,r5,r2 @ h+=Ch(e,f,g)
+#if 30==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 30<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r6,r7 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#0*4] @ from future BODY_16_xx
+ eor r12,r6,r7 @ a^b, b^c in next round
+ ldr r1,[sp,#13*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r6,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r9,r9,r5 @ d+=h
+ eor r3,r3,r7 @ Maj(a,b,c)
+ add r5,r5,r0,ror#2 @ h+=Sigma0(a)
+ @ add r5,r5,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#0*4] @ 31
+ @ ldr r1,[sp,#13*4]
+ mov r0,r2,ror#7
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#15*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#8*4]
+
+ add r3,r3,r0
+ eor r0,r9,r9,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r4,r4,r2 @ h+=X[i]
+ str r2,[sp,#15*4]
+ eor r2,r10,r11
+ add r4,r4,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r9
+ add r4,r4,r3 @ h+=K256[i]
+ eor r2,r2,r11 @ Ch(e,f,g)
+ eor r0,r5,r5,ror#11
+ add r4,r4,r2 @ h+=Ch(e,f,g)
+#if 31==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 31<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#1*4] @ from future BODY_16_xx
+ eor r3,r5,r6 @ a^b, b^c in next round
+ ldr r1,[sp,#14*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r5,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r8,r8,r4 @ d+=h
+ eor r12,r12,r6 @ Maj(a,b,c)
+ add r4,r4,r0,ror#2 @ h+=Sigma0(a)
+ @ add r4,r4,r12 @ h+=Maj(a,b,c)
+ ldreq r3,[sp,#16*4] @ pull ctx
bne .Lrounds_16_xx
- ldr r3,[sp,#16*4] @ pull ctx
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
ldr r0,[r3,#0]
ldr r2,[r3,#4]
ldr r12,[r3,#8]
@@ -1512,6 +1770,921 @@
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
-.size sha256_block_data_order,.-sha256_block_data_order
-.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by <[email protected]>"
+.size sha256_block_data_order,.-sha256_block_data_order
+#if __ARM_ARCH__>=7
+.fpu neon
+
+.type sha256_block_data_order_neon,%function
+.align 4
+sha256_block_data_order_neon:
+.LNEON:
+ stmdb sp!,{r4-r12,lr}
+
+ mov r12,sp
+ sub sp,sp,#16*4+16 @ alloca
+ sub r14,r3,#256+32 @ K256
+ bic sp,sp,#15 @ align for 128-bit stores
+
+ vld1.8 {q0},[r1]!
+ vld1.8 {q1},[r1]!
+ vld1.8 {q2},[r1]!
+ vld1.8 {q3},[r1]!
+ vld1.32 {q8},[r14,:128]!
+ vld1.32 {q9},[r14,:128]!
+ vld1.32 {q10},[r14,:128]!
+ vld1.32 {q11},[r14,:128]!
+ vrev32.8 q0,q0 @ yes, even on
+ str r0,[sp,#64]
+ vrev32.8 q1,q1 @ big-endian
+ str r1,[sp,#68]
+ mov r1,sp
+ vrev32.8 q2,q2
+ str r2,[sp,#72]
+ vrev32.8 q3,q3
+ str r12,[sp,#76] @ save original sp
+ vadd.i32 q8,q8,q0
+ vadd.i32 q9,q9,q1
+ vst1.32 {q8},[r1,:128]!
+ vadd.i32 q10,q10,q2
+ vst1.32 {q9},[r1,:128]!
+ vadd.i32 q11,q11,q3
+ vst1.32 {q10},[r1,:128]!
+ vst1.32 {q11},[r1,:128]!
+
+ ldmia r0,{r4-r11}
+ sub r1,r1,#64
+ ldr r2,[sp,#0]
+ eor r12,r12,r12
+ eor r3,r5,r6
+ b .L_00_48
+
+.align 4
+.L_00_48:
+ vext.8 q8,q0,q1,#4
+ add r11,r11,r2
+ eor r2,r9,r10
+ eor r0,r8,r8,ror#5
+ vext.8 q9,q2,q3,#4
+ add r4,r4,r12
+ and r2,r2,r8
+ eor r12,r0,r8,ror#19
+ vshr.u32 q10,q8,#7
+ eor r0,r4,r4,ror#11
+ eor r2,r2,r10
+ vadd.i32 q0,q0,q9
+ add r11,r11,r12,ror#6
+ eor r12,r4,r5
+ vshr.u32 q9,q8,#3
+ eor r0,r0,r4,ror#20
+ add r11,r11,r2
+ vsli.32 q10,q8,#25
+ ldr r2,[sp,#4]
+ and r3,r3,r12
+ vshr.u32 q11,q8,#18
+ add r7,r7,r11
+ add r11,r11,r0,ror#2
+ eor r3,r3,r5
+ veor q9,q9,q10
+ add r10,r10,r2
+ vsli.32 q11,q8,#14
+ eor r2,r8,r9
+ eor r0,r7,r7,ror#5
+ vshr.u32 d24,d7,#17
+ add r11,r11,r3
+ and r2,r2,r7
+ veor q9,q9,q11
+ eor r3,r0,r7,ror#19
+ eor r0,r11,r11,ror#11
+ vsli.32 d24,d7,#15
+ eor r2,r2,r9
+ add r10,r10,r3,ror#6
+ vshr.u32 d25,d7,#10
+ eor r3,r11,r4
+ eor r0,r0,r11,ror#20
+ vadd.i32 q0,q0,q9
+ add r10,r10,r2
+ ldr r2,[sp,#8]
+ veor d25,d25,d24
+ and r12,r12,r3
+ add r6,r6,r10
+ vshr.u32 d24,d7,#19
+ add r10,r10,r0,ror#2
+ eor r12,r12,r4
+ vsli.32 d24,d7,#13
+ add r9,r9,r2
+ eor r2,r7,r8
+ veor d25,d25,d24
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12
+ vadd.i32 d0,d0,d25
+ and r2,r2,r6
+ eor r12,r0,r6,ror#19
+ vshr.u32 d24,d0,#17
+ eor r0,r10,r10,ror#11
+ eor r2,r2,r8
+ vsli.32 d24,d0,#15
+ add r9,r9,r12,ror#6
+ eor r12,r10,r11
+ vshr.u32 d25,d0,#10
+ eor r0,r0,r10,ror#20
+ add r9,r9,r2
+ veor d25,d25,d24
+ ldr r2,[sp,#12]
+ and r3,r3,r12
+ vshr.u32 d24,d0,#19
+ add r5,r5,r9
+ add r9,r9,r0,ror#2
+ eor r3,r3,r11
+ vld1.32 {q8},[r14,:128]!
+ add r8,r8,r2
+ vsli.32 d24,d0,#13
+ eor r2,r6,r7
+ eor r0,r5,r5,ror#5
+ veor d25,d25,d24
+ add r9,r9,r3
+ and r2,r2,r5
+ vadd.i32 d1,d1,d25
+ eor r3,r0,r5,ror#19
+ eor r0,r9,r9,ror#11
+ vadd.i32 q8,q8,q0
+ eor r2,r2,r7
+ add r8,r8,r3,ror#6
+ eor r3,r9,r10
+ eor r0,r0,r9,ror#20
+ add r8,r8,r2
+ ldr r2,[sp,#16]
+ and r12,r12,r3
+ add r4,r4,r8
+ vst1.32 {q8},[r1,:128]!
+ add r8,r8,r0,ror#2
+ eor r12,r12,r10
+ vext.8 q8,q1,q2,#4
+ add r7,r7,r2
+ eor r2,r5,r6
+ eor r0,r4,r4,ror#5
+ vext.8 q9,q3,q0,#4
+ add r8,r8,r12
+ and r2,r2,r4
+ eor r12,r0,r4,ror#19
+ vshr.u32 q10,q8,#7
+ eor r0,r8,r8,ror#11
+ eor r2,r2,r6
+ vadd.i32 q1,q1,q9
+ add r7,r7,r12,ror#6
+ eor r12,r8,r9
+ vshr.u32 q9,q8,#3
+ eor r0,r0,r8,ror#20
+ add r7,r7,r2
+ vsli.32 q10,q8,#25
+ ldr r2,[sp,#20]
+ and r3,r3,r12
+ vshr.u32 q11,q8,#18
+ add r11,r11,r7
+ add r7,r7,r0,ror#2
+ eor r3,r3,r9
+ veor q9,q9,q10
+ add r6,r6,r2
+ vsli.32 q11,q8,#14
+ eor r2,r4,r5
+ eor r0,r11,r11,ror#5
+ vshr.u32 d24,d1,#17
+ add r7,r7,r3
+ and r2,r2,r11
+ veor q9,q9,q11
+ eor r3,r0,r11,ror#19
+ eor r0,r7,r7,ror#11
+ vsli.32 d24,d1,#15
+ eor r2,r2,r5
+ add r6,r6,r3,ror#6
+ vshr.u32 d25,d1,#10
+ eor r3,r7,r8
+ eor r0,r0,r7,ror#20
+ vadd.i32 q1,q1,q9
+ add r6,r6,r2
+ ldr r2,[sp,#24]
+ veor d25,d25,d24
+ and r12,r12,r3
+ add r10,r10,r6
+ vshr.u32 d24,d1,#19
+ add r6,r6,r0,ror#2
+ eor r12,r12,r8
+ vsli.32 d24,d1,#13
+ add r5,r5,r2
+ eor r2,r11,r4
+ veor d25,d25,d24
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12
+ vadd.i32 d2,d2,d25
+ and r2,r2,r10
+ eor r12,r0,r10,ror#19
+ vshr.u32 d24,d2,#17
+ eor r0,r6,r6,ror#11
+ eor r2,r2,r4
+ vsli.32 d24,d2,#15
+ add r5,r5,r12,ror#6
+ eor r12,r6,r7
+ vshr.u32 d25,d2,#10
+ eor r0,r0,r6,ror#20
+ add r5,r5,r2
+ veor d25,d25,d24
+ ldr r2,[sp,#28]
+ and r3,r3,r12
+ vshr.u32 d24,d2,#19
+ add r9,r9,r5
+ add r5,r5,r0,ror#2
+ eor r3,r3,r7
+ vld1.32 {q8},[r14,:128]!
+ add r4,r4,r2
+ vsli.32 d24,d2,#13
+ eor r2,r10,r11
+ eor r0,r9,r9,ror#5
+ veor d25,d25,d24
+ add r5,r5,r3
+ and r2,r2,r9
+ vadd.i32 d3,d3,d25
+ eor r3,r0,r9,ror#19
+ eor r0,r5,r5,ror#11
+ vadd.i32 q8,q8,q1
+ eor r2,r2,r11
+ add r4,r4,r3,ror#6
+ eor r3,r5,r6
+ eor r0,r0,r5,ror#20
+ add r4,r4,r2
+ ldr r2,[sp,#32]
+ and r12,r12,r3
+ add r8,r8,r4
+ vst1.32 {q8},[r1,:128]!
+ add r4,r4,r0,ror#2
+ eor r12,r12,r6
+ vext.8 q8,q2,q3,#4
+ add r11,r11,r2
+ eor r2,r9,r10
+ eor r0,r8,r8,ror#5
+ vext.8 q9,q0,q1,#4
+ add r4,r4,r12
+ and r2,r2,r8
+ eor r12,r0,r8,ror#19
+ vshr.u32 q10,q8,#7
+ eor r0,r4,r4,ror#11
+ eor r2,r2,r10
+ vadd.i32 q2,q2,q9
+ add r11,r11,r12,ror#6
+ eor r12,r4,r5
+ vshr.u32 q9,q8,#3
+ eor r0,r0,r4,ror#20
+ add r11,r11,r2
+ vsli.32 q10,q8,#25
+ ldr r2,[sp,#36]
+ and r3,r3,r12
+ vshr.u32 q11,q8,#18
+ add r7,r7,r11
+ add r11,r11,r0,ror#2
+ eor r3,r3,r5
+ veor q9,q9,q10
+ add r10,r10,r2
+ vsli.32 q11,q8,#14
+ eor r2,r8,r9
+ eor r0,r7,r7,ror#5
+ vshr.u32 d24,d3,#17
+ add r11,r11,r3
+ and r2,r2,r7
+ veor q9,q9,q11
+ eor r3,r0,r7,ror#19
+ eor r0,r11,r11,ror#11
+ vsli.32 d24,d3,#15
+ eor r2,r2,r9
+ add r10,r10,r3,ror#6
+ vshr.u32 d25,d3,#10
+ eor r3,r11,r4
+ eor r0,r0,r11,ror#20
+ vadd.i32 q2,q2,q9
+ add r10,r10,r2
+ ldr r2,[sp,#40]
+ veor d25,d25,d24
+ and r12,r12,r3
+ add r6,r6,r10
+ vshr.u32 d24,d3,#19
+ add r10,r10,r0,ror#2
+ eor r12,r12,r4
+ vsli.32 d24,d3,#13
+ add r9,r9,r2
+ eor r2,r7,r8
+ veor d25,d25,d24
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12
+ vadd.i32 d4,d4,d25
+ and r2,r2,r6
+ eor r12,r0,r6,ror#19
+ vshr.u32 d24,d4,#17
+ eor r0,r10,r10,ror#11
+ eor r2,r2,r8
+ vsli.32 d24,d4,#15
+ add r9,r9,r12,ror#6
+ eor r12,r10,r11
+ vshr.u32 d25,d4,#10
+ eor r0,r0,r10,ror#20
+ add r9,r9,r2
+ veor d25,d25,d24
+ ldr r2,[sp,#44]
+ and r3,r3,r12
+ vshr.u32 d24,d4,#19
+ add r5,r5,r9
+ add r9,r9,r0,ror#2
+ eor r3,r3,r11
+ vld1.32 {q8},[r14,:128]!
+ add r8,r8,r2
+ vsli.32 d24,d4,#13
+ eor r2,r6,r7
+ eor r0,r5,r5,ror#5
+ veor d25,d25,d24
+ add r9,r9,r3
+ and r2,r2,r5
+ vadd.i32 d5,d5,d25
+ eor r3,r0,r5,ror#19
+ eor r0,r9,r9,ror#11
+ vadd.i32 q8,q8,q2
+ eor r2,r2,r7
+ add r8,r8,r3,ror#6
+ eor r3,r9,r10
+ eor r0,r0,r9,ror#20
+ add r8,r8,r2
+ ldr r2,[sp,#48]
+ and r12,r12,r3
+ add r4,r4,r8
+ vst1.32 {q8},[r1,:128]!
+ add r8,r8,r0,ror#2
+ eor r12,r12,r10
+ vext.8 q8,q3,q0,#4
+ add r7,r7,r2
+ eor r2,r5,r6
+ eor r0,r4,r4,ror#5
+ vext.8 q9,q1,q2,#4
+ add r8,r8,r12
+ and r2,r2,r4
+ eor r12,r0,r4,ror#19
+ vshr.u32 q10,q8,#7
+ eor r0,r8,r8,ror#11
+ eor r2,r2,r6
+ vadd.i32 q3,q3,q9
+ add r7,r7,r12,ror#6
+ eor r12,r8,r9
+ vshr.u32 q9,q8,#3
+ eor r0,r0,r8,ror#20
+ add r7,r7,r2
+ vsli.32 q10,q8,#25
+ ldr r2,[sp,#52]
+ and r3,r3,r12
+ vshr.u32 q11,q8,#18
+ add r11,r11,r7
+ add r7,r7,r0,ror#2
+ eor r3,r3,r9
+ veor q9,q9,q10
+ add r6,r6,r2
+ vsli.32 q11,q8,#14
+ eor r2,r4,r5
+ eor r0,r11,r11,ror#5
+ vshr.u32 d24,d5,#17
+ add r7,r7,r3
+ and r2,r2,r11
+ veor q9,q9,q11
+ eor r3,r0,r11,ror#19
+ eor r0,r7,r7,ror#11
+ vsli.32 d24,d5,#15
+ eor r2,r2,r5
+ add r6,r6,r3,ror#6
+ vshr.u32 d25,d5,#10
+ eor r3,r7,r8
+ eor r0,r0,r7,ror#20
+ vadd.i32 q3,q3,q9
+ add r6,r6,r2
+ ldr r2,[sp,#56]
+ veor d25,d25,d24
+ and r12,r12,r3
+ add r10,r10,r6
+ vshr.u32 d24,d5,#19
+ add r6,r6,r0,ror#2
+ eor r12,r12,r8
+ vsli.32 d24,d5,#13
+ add r5,r5,r2
+ eor r2,r11,r4
+ veor d25,d25,d24
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12
+ vadd.i32 d6,d6,d25
+ and r2,r2,r10
+ eor r12,r0,r10,ror#19
+ vshr.u32 d24,d6,#17
+ eor r0,r6,r6,ror#11
+ eor r2,r2,r4
+ vsli.32 d24,d6,#15
+ add r5,r5,r12,ror#6
+ eor r12,r6,r7
+ vshr.u32 d25,d6,#10
+ eor r0,r0,r6,ror#20
+ add r5,r5,r2
+ veor d25,d25,d24
+ ldr r2,[sp,#60]
+ and r3,r3,r12
+ vshr.u32 d24,d6,#19
+ add r9,r9,r5
+ add r5,r5,r0,ror#2
+ eor r3,r3,r7
+ vld1.32 {q8},[r14,:128]!
+ add r4,r4,r2
+ vsli.32 d24,d6,#13
+ eor r2,r10,r11
+ eor r0,r9,r9,ror#5
+ veor d25,d25,d24
+ add r5,r5,r3
+ and r2,r2,r9
+ vadd.i32 d7,d7,d25
+ eor r3,r0,r9,ror#19
+ eor r0,r5,r5,ror#11
+ vadd.i32 q8,q8,q3
+ eor r2,r2,r11
+ add r4,r4,r3,ror#6
+ eor r3,r5,r6
+ eor r0,r0,r5,ror#20
+ add r4,r4,r2
+ ldr r2,[r14]
+ and r12,r12,r3
+ add r8,r8,r4
+ vst1.32 {q8},[r1,:128]!
+ add r4,r4,r0,ror#2
+ eor r12,r12,r6
+ teq r2,#0 @ check for K256 terminator
+ ldr r2,[sp,#0]
+ sub r1,r1,#64
+ bne .L_00_48
+
+ ldr r1,[sp,#68]
+ ldr r0,[sp,#72]
+ sub r14,r14,#256 @ rewind r14
+ teq r1,r0
+ subeq r1,r1,#64 @ avoid SEGV
+ vld1.8 {q0},[r1]! @ load next input block
+ vld1.8 {q1},[r1]!
+ vld1.8 {q2},[r1]!
+ vld1.8 {q3},[r1]!
+ strne r1,[sp,#68]
+ mov r1,sp
+ add r11,r11,r2
+ eor r2,r9,r10
+ eor r0,r8,r8,ror#5
+ add r4,r4,r12
+ vld1.32 {q8},[r14,:128]!
+ and r2,r2,r8
+ eor r12,r0,r8,ror#19
+ eor r0,r4,r4,ror#11
+ eor r2,r2,r10
+ vrev32.8 q0,q0
+ add r11,r11,r12,ror#6
+ eor r12,r4,r5
+ eor r0,r0,r4,ror#20
+ add r11,r11,r2
+ vadd.i32 q8,q8,q0
+ ldr r2,[sp,#4]
+ and r3,r3,r12
+ add r7,r7,r11
+ add r11,r11,r0,ror#2
+ eor r3,r3,r5
+ add r10,r10,r2
+ eor r2,r8,r9
+ eor r0,r7,r7,ror#5
+ add r11,r11,r3
+ and r2,r2,r7
+ eor r3,r0,r7,ror#19
+ eor r0,r11,r11,ror#11
+ eor r2,r2,r9
+ add r10,r10,r3,ror#6
+ eor r3,r11,r4
+ eor r0,r0,r11,ror#20
+ add r10,r10,r2
+ ldr r2,[sp,#8]
+ and r12,r12,r3
+ add r6,r6,r10
+ add r10,r10,r0,ror#2
+ eor r12,r12,r4
+ add r9,r9,r2
+ eor r2,r7,r8
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12
+ and r2,r2,r6
+ eor r12,r0,r6,ror#19
+ eor r0,r10,r10,ror#11
+ eor r2,r2,r8
+ add r9,r9,r12,ror#6
+ eor r12,r10,r11
+ eor r0,r0,r10,ror#20
+ add r9,r9,r2
+ ldr r2,[sp,#12]
+ and r3,r3,r12
+ add r5,r5,r9
+ add r9,r9,r0,ror#2
+ eor r3,r3,r11
+ add r8,r8,r2
+ eor r2,r6,r7
+ eor r0,r5,r5,ror#5
+ add r9,r9,r3
+ and r2,r2,r5
+ eor r3,r0,r5,ror#19
+ eor r0,r9,r9,ror#11
+ eor r2,r2,r7
+ add r8,r8,r3,ror#6
+ eor r3,r9,r10
+ eor r0,r0,r9,ror#20
+ add r8,r8,r2
+ ldr r2,[sp,#16]
+ and r12,r12,r3
+ add r4,r4,r8
+ add r8,r8,r0,ror#2
+ eor r12,r12,r10
+ vst1.32 {q8},[r1,:128]!
+ add r7,r7,r2
+ eor r2,r5,r6
+ eor r0,r4,r4,ror#5
+ add r8,r8,r12
+ vld1.32 {q8},[r14,:128]!
+ and r2,r2,r4
+ eor r12,r0,r4,ror#19
+ eor r0,r8,r8,ror#11
+ eor r2,r2,r6
+ vrev32.8 q1,q1
+ add r7,r7,r12,ror#6
+ eor r12,r8,r9
+ eor r0,r0,r8,ror#20
+ add r7,r7,r2
+ vadd.i32 q8,q8,q1
+ ldr r2,[sp,#20]
+ and r3,r3,r12
+ add r11,r11,r7
+ add r7,r7,r0,ror#2
+ eor r3,r3,r9
+ add r6,r6,r2
+ eor r2,r4,r5
+ eor r0,r11,r11,ror#5
+ add r7,r7,r3
+ and r2,r2,r11
+ eor r3,r0,r11,ror#19
+ eor r0,r7,r7,ror#11
+ eor r2,r2,r5
+ add r6,r6,r3,ror#6
+ eor r3,r7,r8
+ eor r0,r0,r7,ror#20
+ add r6,r6,r2
+ ldr r2,[sp,#24]
+ and r12,r12,r3
+ add r10,r10,r6
+ add r6,r6,r0,ror#2
+ eor r12,r12,r8
+ add r5,r5,r2
+ eor r2,r11,r4
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12
+ and r2,r2,r10
+ eor r12,r0,r10,ror#19
+ eor r0,r6,r6,ror#11
+ eor r2,r2,r4
+ add r5,r5,r12,ror#6
+ eor r12,r6,r7
+ eor r0,r0,r6,ror#20
+ add r5,r5,r2
+ ldr r2,[sp,#28]
+ and r3,r3,r12
+ add r9,r9,r5
+ add r5,r5,r0,ror#2
+ eor r3,r3,r7
+ add r4,r4,r2
+ eor r2,r10,r11
+ eor r0,r9,r9,ror#5
+ add r5,r5,r3
+ and r2,r2,r9
+ eor r3,r0,r9,ror#19
+ eor r0,r5,r5,ror#11
+ eor r2,r2,r11
+ add r4,r4,r3,ror#6
+ eor r3,r5,r6
+ eor r0,r0,r5,ror#20
+ add r4,r4,r2
+ ldr r2,[sp,#32]
+ and r12,r12,r3
+ add r8,r8,r4
+ add r4,r4,r0,ror#2
+ eor r12,r12,r6
+ vst1.32 {q8},[r1,:128]!
+ add r11,r11,r2
+ eor r2,r9,r10
+ eor r0,r8,r8,ror#5
+ add r4,r4,r12
+ vld1.32 {q8},[r14,:128]!
+ and r2,r2,r8
+ eor r12,r0,r8,ror#19
+ eor r0,r4,r4,ror#11
+ eor r2,r2,r10
+ vrev32.8 q2,q2
+ add r11,r11,r12,ror#6
+ eor r12,r4,r5
+ eor r0,r0,r4,ror#20
+ add r11,r11,r2
+ vadd.i32 q8,q8,q2
+ ldr r2,[sp,#36]
+ and r3,r3,r12
+ add r7,r7,r11
+ add r11,r11,r0,ror#2
+ eor r3,r3,r5
+ add r10,r10,r2
+ eor r2,r8,r9
+ eor r0,r7,r7,ror#5
+ add r11,r11,r3
+ and r2,r2,r7
+ eor r3,r0,r7,ror#19
+ eor r0,r11,r11,ror#11
+ eor r2,r2,r9
+ add r10,r10,r3,ror#6
+ eor r3,r11,r4
+ eor r0,r0,r11,ror#20
+ add r10,r10,r2
+ ldr r2,[sp,#40]
+ and r12,r12,r3
+ add r6,r6,r10
+ add r10,r10,r0,ror#2
+ eor r12,r12,r4
+ add r9,r9,r2
+ eor r2,r7,r8
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12
+ and r2,r2,r6
+ eor r12,r0,r6,ror#19
+ eor r0,r10,r10,ror#11
+ eor r2,r2,r8
+ add r9,r9,r12,ror#6
+ eor r12,r10,r11
+ eor r0,r0,r10,ror#20
+ add r9,r9,r2
+ ldr r2,[sp,#44]
+ and r3,r3,r12
+ add r5,r5,r9
+ add r9,r9,r0,ror#2
+ eor r3,r3,r11
+ add r8,r8,r2
+ eor r2,r6,r7
+ eor r0,r5,r5,ror#5
+ add r9,r9,r3
+ and r2,r2,r5
+ eor r3,r0,r5,ror#19
+ eor r0,r9,r9,ror#11
+ eor r2,r2,r7
+ add r8,r8,r3,ror#6
+ eor r3,r9,r10
+ eor r0,r0,r9,ror#20
+ add r8,r8,r2
+ ldr r2,[sp,#48]
+ and r12,r12,r3
+ add r4,r4,r8
+ add r8,r8,r0,ror#2
+ eor r12,r12,r10
+ vst1.32 {q8},[r1,:128]!
+ add r7,r7,r2
+ eor r2,r5,r6
+ eor r0,r4,r4,ror#5
+ add r8,r8,r12
+ vld1.32 {q8},[r14,:128]!
+ and r2,r2,r4
+ eor r12,r0,r4,ror#19
+ eor r0,r8,r8,ror#11
+ eor r2,r2,r6
+ vrev32.8 q3,q3
+ add r7,r7,r12,ror#6
+ eor r12,r8,r9
+ eor r0,r0,r8,ror#20
+ add r7,r7,r2
+ vadd.i32 q8,q8,q3
+ ldr r2,[sp,#52]
+ and r3,r3,r12
+ add r11,r11,r7
+ add r7,r7,r0,ror#2
+ eor r3,r3,r9
+ add r6,r6,r2
+ eor r2,r4,r5
+ eor r0,r11,r11,ror#5
+ add r7,r7,r3
+ and r2,r2,r11
+ eor r3,r0,r11,ror#19
+ eor r0,r7,r7,ror#11
+ eor r2,r2,r5
+ add r6,r6,r3,ror#6
+ eor r3,r7,r8
+ eor r0,r0,r7,ror#20
+ add r6,r6,r2
+ ldr r2,[sp,#56]
+ and r12,r12,r3
+ add r10,r10,r6
+ add r6,r6,r0,ror#2
+ eor r12,r12,r8
+ add r5,r5,r2
+ eor r2,r11,r4
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12
+ and r2,r2,r10
+ eor r12,r0,r10,ror#19
+ eor r0,r6,r6,ror#11
+ eor r2,r2,r4
+ add r5,r5,r12,ror#6
+ eor r12,r6,r7
+ eor r0,r0,r6,ror#20
+ add r5,r5,r2
+ ldr r2,[sp,#60]
+ and r3,r3,r12
+ add r9,r9,r5
+ add r5,r5,r0,ror#2
+ eor r3,r3,r7
+ add r4,r4,r2
+ eor r2,r10,r11
+ eor r0,r9,r9,ror#5
+ add r5,r5,r3
+ and r2,r2,r9
+ eor r3,r0,r9,ror#19
+ eor r0,r5,r5,ror#11
+ eor r2,r2,r11
+ add r4,r4,r3,ror#6
+ eor r3,r5,r6
+ eor r0,r0,r5,ror#20
+ add r4,r4,r2
+ ldr r2,[sp,#64]
+ and r12,r12,r3
+ add r8,r8,r4
+ add r4,r4,r0,ror#2
+ eor r12,r12,r6
+ vst1.32 {q8},[r1,:128]!
+ ldr r0,[r2,#0]
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ ldr r12,[r2,#4]
+ ldr r3,[r2,#8]
+ ldr r1,[r2,#12]
+ add r4,r4,r0 @ accumulate
+ ldr r0,[r2,#16]
+ add r5,r5,r12
+ ldr r12,[r2,#20]
+ add r6,r6,r3
+ ldr r3,[r2,#24]
+ add r7,r7,r1
+ ldr r1,[r2,#28]
+ add r8,r8,r0
+ str r4,[r2],#4
+ add r9,r9,r12
+ str r5,[r2],#4
+ add r10,r10,r3
+ str r6,[r2],#4
+ add r11,r11,r1
+ str r7,[r2],#4
+ stmia r2,{r8-r11}
+
+ movne r1,sp
+ ldrne r2,[sp,#0]
+ eorne r12,r12,r12
+ ldreq sp,[sp,#76] @ restore original sp
+ eorne r3,r5,r6
+ bne .L_00_48
+
+ ldmia sp!,{r4-r12,pc}
+.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
+#endif
+#if __ARM_ARCH__>=7
+.type sha256_block_data_order_armv8,%function
+.align 5
+sha256_block_data_order_armv8:
+.LARMv8:
+ vld1.32 {q0,q1},[r0]
+ sub r3,r3,#sha256_block_data_order-K256
+
+.Loop_v8:
+ vld1.8 {q8-q9},[r1]!
+ vld1.8 {q10-q11},[r1]!
+ vld1.32 {q12},[r3]!
+ vrev32.8 q8,q8
+ vrev32.8 q9,q9
+ vrev32.8 q10,q10
+ vrev32.8 q11,q11
+ vmov q14,q0 @ offload
+ vmov q15,q1
+ teq r1,r2
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q8
+ .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9
+ vmov q2,q0
+ .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12
+ .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12
+ .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q9
+ .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10
+ vmov q2,q0
+ .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13
+ .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13
+ .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q10
+ .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11
+ vmov q2,q0
+ .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12
+ .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12
+ .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q11
+ .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8
+ vmov q2,q0
+ .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13
+ .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13
+ .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q8
+ .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9
+ vmov q2,q0
+ .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12
+ .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12
+ .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q9
+ .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10
+ vmov q2,q0
+ .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13
+ .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13
+ .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q10
+ .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11
+ vmov q2,q0
+ .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12
+ .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12
+ .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q11
+ .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8
+ vmov q2,q0
+ .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13
+ .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13
+ .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q8
+ .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9
+ vmov q2,q0
+ .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12
+ .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12
+ .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q9
+ .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10
+ vmov q2,q0
+ .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13
+ .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13
+ .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q10
+ .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11
+ vmov q2,q0
+ .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12
+ .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12
+ .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q11
+ .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8
+ vmov q2,q0
+ .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13
+ .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13
+ .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q8
+ vmov q2,q0
+ .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12
+ .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12
+
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q9
+ vmov q2,q0
+ .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13
+ .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13
+
+ vld1.32 {q13},[r3]
+ vadd.i32 q12,q12,q10
+ sub r3,r3,#256-16 @ rewind
+ vmov q2,q0
+ .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12
+ .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12
+
+ vadd.i32 q13,q13,q11
+ vmov q2,q0
+ .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13
+ .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13
+
+ vadd.i32 q0,q0,q14
+ vadd.i32 q1,q1,q15
+ bne .Loop_v8
+
+ vst1.32 {q0,q1},[r0]
+
+ bx lr @ bx lr
+.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
+#endif
+.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <[email protected]>"
.align 2
+.comm OPENSSL_armcap_P,4,4
diff --git a/crypto/sha/asm/sha256-armv4.pl b/crypto/sha/asm/sha256-armv4.pl
index 9c84e8d..505ca8f 100644
--- a/crypto/sha/asm/sha256-armv4.pl
+++ b/crypto/sha/asm/sha256-armv4.pl
@@ -1,7 +1,7 @@
#!/usr/bin/env perl
# ====================================================================
-# Written by Andy Polyakov <[email protected]> for the OpenSSL
+# Written by Andy Polyakov <[email protected]> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@@ -21,15 +21,27 @@
# February 2011.
#
# Profiler-assisted and platform-specific optimization resulted in 16%
-# improvement on Cortex A8 core and ~17 cycles per processed byte.
+# improvement on Cortex A8 core and ~15.4 cycles per processed byte.
+
+# September 2013.
+#
+# Add NEON implementation. On Cortex A8 it was measured to process one
+# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
+# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
+# code (meaning that latter performs sub-optimally, nothing was done
+# about it).
+
+# May 2014.
+#
+# Add ARMv8 code path performing at 2.0 cpb on Apple A7.
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$ctx="r0"; $t0="r0";
-$inp="r1"; $t3="r1";
+$inp="r1"; $t4="r1";
$len="r2"; $t1="r2";
-$T1="r3";
+$T1="r3"; $t3="r3";
$A="r4";
$B="r5";
$C="r6";
@@ -52,71 +64,88 @@
$code.=<<___ if ($i<16);
#if __ARM_ARCH__>=7
- ldr $T1,[$inp],#4
+ @ ldr $t1,[$inp],#4 @ $i
+# if $i==15
+ str $inp,[sp,#17*4] @ make room for $t4
+# endif
+ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
+ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
+ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
+ rev $t1,$t1
#else
- ldrb $T1,[$inp,#3] @ $i
+ @ ldrb $t1,[$inp,#3] @ $i
+ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
ldrb $t2,[$inp,#2]
- ldrb $t1,[$inp,#1]
- ldrb $t0,[$inp],#4
- orr $T1,$T1,$t2,lsl#8
- orr $T1,$T1,$t1,lsl#16
- orr $T1,$T1,$t0,lsl#24
+ ldrb $t0,[$inp,#1]
+ orr $t1,$t1,$t2,lsl#8
+ ldrb $t2,[$inp],#4
+ orr $t1,$t1,$t0,lsl#16
+# if $i==15
+ str $inp,[sp,#17*4] @ make room for $t4
+# endif
+ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
+ orr $t1,$t1,$t2,lsl#24
+ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
#endif
___
$code.=<<___;
- mov $t0,$e,ror#$Sigma1[0]
ldr $t2,[$Ktbl],#4 @ *K256++
- eor $t0,$t0,$e,ror#$Sigma1[1]
+ add $h,$h,$t1 @ h+=X[i]
+ str $t1,[sp,#`$i%16`*4]
eor $t1,$f,$g
-#if $i>=16
- add $T1,$T1,$t3 @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
- rev $T1,$T1
-#endif
-#if $i==15
- str $inp,[sp,#17*4] @ leave room for $t3
-#endif
- eor $t0,$t0,$e,ror#$Sigma1[2] @ Sigma1(e)
+ add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e)
and $t1,$t1,$e
- str $T1,[sp,#`$i%16`*4]
- add $T1,$T1,$t0
+ add $h,$h,$t2 @ h+=K256[i]
eor $t1,$t1,$g @ Ch(e,f,g)
- add $T1,$T1,$h
- mov $h,$a,ror#$Sigma0[0]
- add $T1,$T1,$t1
- eor $h,$h,$a,ror#$Sigma0[1]
- add $T1,$T1,$t2
- eor $h,$h,$a,ror#$Sigma0[2] @ Sigma0(a)
-#if $i>=15
- ldr $t3,[sp,#`($i+2)%16`*4] @ from BODY_16_xx
+ eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
+ add $h,$h,$t1 @ h+=Ch(e,f,g)
+#if $i==31
+ and $t2,$t2,#0xff
+ cmp $t2,#0xf2 @ done?
#endif
- orr $t0,$a,$b
- and $t1,$a,$b
- and $t0,$t0,$c
- add $h,$h,$T1
- orr $t0,$t0,$t1 @ Maj(a,b,c)
- add $d,$d,$T1
- add $h,$h,$t0
+#if $i<15
+# if __ARM_ARCH__>=7
+ ldr $t1,[$inp],#4 @ prefetch
+# else
+ ldrb $t1,[$inp,#3]
+# endif
+ eor $t2,$a,$b @ a^b, b^c in next round
+#else
+ ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx
+ eor $t2,$a,$b @ a^b, b^c in next round
+ ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx
+#endif
+ eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a)
+ and $t3,$t3,$t2 @ (b^c)&=(a^b)
+ add $d,$d,$h @ d+=h
+ eor $t3,$t3,$b @ Maj(a,b,c)
+ add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a)
+ @ add $h,$h,$t3 @ h+=Maj(a,b,c)
___
+ ($t2,$t3)=($t3,$t2);
}
sub BODY_16_XX {
my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
$code.=<<___;
- @ ldr $t3,[sp,#`($i+1)%16`*4] @ $i
- ldr $t2,[sp,#`($i+14)%16`*4]
- mov $t0,$t3,ror#$sigma0[0]
- ldr $T1,[sp,#`($i+0)%16`*4]
- eor $t0,$t0,$t3,ror#$sigma0[1]
- ldr $t1,[sp,#`($i+9)%16`*4]
- eor $t0,$t0,$t3,lsr#$sigma0[2] @ sigma0(X[i+1])
- mov $t3,$t2,ror#$sigma1[0]
- add $T1,$T1,$t0
- eor $t3,$t3,$t2,ror#$sigma1[1]
- add $T1,$T1,$t1
- eor $t3,$t3,$t2,lsr#$sigma1[2] @ sigma1(X[i+14])
- @ add $T1,$T1,$t3
+ @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i
+ @ ldr $t4,[sp,#`($i+14)%16`*4]
+ mov $t0,$t1,ror#$sigma0[0]
+ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
+ mov $t2,$t4,ror#$sigma1[0]
+ eor $t0,$t0,$t1,ror#$sigma0[1]
+ eor $t2,$t2,$t4,ror#$sigma1[1]
+ eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1])
+ ldr $t1,[sp,#`($i+0)%16`*4]
+ eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14])
+ ldr $t4,[sp,#`($i+9)%16`*4]
+
+ add $t2,$t2,$t0
+ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15
+ add $t1,$t1,$t2
+ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
+ add $t1,$t1,$t4 @ X[i]
___
&BODY_00_15(@_);
}
@@ -147,46 +176,64 @@
.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.size K256,.-K256
+.word 0 @ terminator
+.LOPENSSL_armcap:
+.word OPENSSL_armcap_P-sha256_block_data_order
+.align 5
.global sha256_block_data_order
.type sha256_block_data_order,%function
sha256_block_data_order:
sub r3,pc,#8 @ sha256_block_data_order
add $len,$inp,$len,lsl#6 @ len to point at the end of inp
+#if __ARM_ARCH__>=7
+ ldr r12,.LOPENSSL_armcap
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
+ tst r12,#ARMV8_SHA256
+ bne .LARMv8
+ tst r12,#ARMV7_NEON
+ bne .LNEON
+#endif
stmdb sp!,{$ctx,$inp,$len,r4-r11,lr}
ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
- sub $Ktbl,r3,#256 @ K256
+ sub $Ktbl,r3,#256+32 @ K256
sub sp,sp,#16*4 @ alloca(X[16])
.Loop:
+# if __ARM_ARCH__>=7
+ ldr $t1,[$inp],#4
+# else
+ ldrb $t1,[$inp,#3]
+# endif
+ eor $t3,$B,$C @ magic
+ eor $t2,$t2,$t2
___
for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
$code.=".Lrounds_16_xx:\n";
for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
$code.=<<___;
- and $t2,$t2,#0xff
- cmp $t2,#0xf2
+ ldreq $t3,[sp,#16*4] @ pull ctx
bne .Lrounds_16_xx
- ldr $T1,[sp,#16*4] @ pull ctx
- ldr $t0,[$T1,#0]
- ldr $t1,[$T1,#4]
- ldr $t2,[$T1,#8]
+ add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
+ ldr $t0,[$t3,#0]
+ ldr $t1,[$t3,#4]
+ ldr $t2,[$t3,#8]
add $A,$A,$t0
- ldr $t0,[$T1,#12]
+ ldr $t0,[$t3,#12]
add $B,$B,$t1
- ldr $t1,[$T1,#16]
+ ldr $t1,[$t3,#16]
add $C,$C,$t2
- ldr $t2,[$T1,#20]
+ ldr $t2,[$t3,#20]
add $D,$D,$t0
- ldr $t0,[$T1,#24]
+ ldr $t0,[$t3,#24]
add $E,$E,$t1
- ldr $t1,[$T1,#28]
+ ldr $t1,[$t3,#28]
add $F,$F,$t2
ldr $inp,[sp,#17*4] @ pull inp
ldr $t2,[sp,#18*4] @ pull inp+len
add $G,$G,$t0
add $H,$H,$t1
- stmia $T1,{$A,$B,$C,$D,$E,$F,$G,$H}
+ stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H}
cmp $inp,$t2
sub $Ktbl,$Ktbl,#256 @ rewind Ktbl
bne .Loop
@@ -200,12 +247,410 @@
moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-)
#endif
-.size sha256_block_data_order,.-sha256_block_data_order
-.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
+.size sha256_block_data_order,.-sha256_block_data_order
+___
+######################################################################
+# NEON stuff
+#
+{{{
+my @X=map("q$_",(0..3));
+my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
+my $Xfer=$t4;
+my $j=0;
+
+sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
+sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
+
+sub AUTOLOAD() # thunk [simplified] x86-style perlasm
+{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
+ my $arg = pop;
+ $arg = "#$arg" if ($arg*1 eq $arg);
+ $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
+}
+
+sub Xupdate()
+{ use integer;
+ my $body = shift;
+ my @insns = (&$body,&$body,&$body,&$body);
+ my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+ &vext_8 ($T0,@X[0],@X[1],4); # X[1..4]
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vext_8 ($T1,@X[2],@X[3],4); # X[9..12]
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T2,$T0,$sigma0[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12]
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T1,$T0,$sigma0[2]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T2,$T0,32-$sigma0[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T3,$T0,$sigma0[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T1,$T1,$T2);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T3,$T0,32-$sigma0[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T1,$T1,$T3); # sigma0(X[1..4])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T5,$T5,$T4);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T5,$T5,$T4); # sigma1(X[14..15])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T5,$T5,$T4);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vld1_32 ("{$T0}","[$Ktbl,:128]!");
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T5,$T5,$T4); # sigma1(X[16..17])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 ($T0,$T0,@X[0]);
+ while($#insns>=2) { eval(shift(@insns)); }
+ &vst1_32 ("{$T0}","[$Xfer,:128]!");
+ eval(shift(@insns));
+ eval(shift(@insns));
+
+ push(@X,shift(@X)); # "rotate" X[]
+}
+
+sub Xpreload()
+{ use integer;
+ my $body = shift;
+ my @insns = (&$body,&$body,&$body,&$body);
+ my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vld1_32 ("{$T0}","[$Ktbl,:128]!");
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vrev32_8 (@X[0],@X[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 ($T0,$T0,@X[0]);
+ foreach (@insns) { eval; } # remaining instructions
+ &vst1_32 ("{$T0}","[$Xfer,:128]!");
+
+ push(@X,shift(@X)); # "rotate" X[]
+}
+
+sub body_00_15 () {
+ (
+ '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
+ '&add ($h,$h,$t1)', # h+=X[i]+K[i]
+ '&eor ($t1,$f,$g)',
+ '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
+ '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past
+ '&and ($t1,$t1,$e)',
+ '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e)
+ '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
+ '&eor ($t1,$t1,$g)', # Ch(e,f,g)
+ '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e)
+ '&eor ($t2,$a,$b)', # a^b, b^c in next round
+ '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a)
+ '&add ($h,$h,$t1)', # h+=Ch(e,f,g)
+ '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'.
+ '&ldr ($t1,"[$Ktbl]") if ($j==15);'.
+ '&ldr ($t1,"[sp,#64]") if ($j==31)',
+ '&and ($t3,$t3,$t2)', # (b^c)&=(a^b)
+ '&add ($d,$d,$h)', # d+=h
+ '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a)
+ '&eor ($t3,$t3,$b)', # Maj(a,b,c)
+ '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
+ )
+}
+
+$code.=<<___;
+#if __ARM_ARCH__>=7
+.fpu neon
+
+.type sha256_block_data_order_neon,%function
+.align 4
+sha256_block_data_order_neon:
+.LNEON:
+ stmdb sp!,{r4-r12,lr}
+
+ mov $t2,sp
+ sub sp,sp,#16*4+16 @ alloca
+ sub $Ktbl,r3,#256+32 @ K256
+ bic sp,sp,#15 @ align for 128-bit stores
+
+ vld1.8 {@X[0]},[$inp]!
+ vld1.8 {@X[1]},[$inp]!
+ vld1.8 {@X[2]},[$inp]!
+ vld1.8 {@X[3]},[$inp]!
+ vld1.32 {$T0},[$Ktbl,:128]!
+ vld1.32 {$T1},[$Ktbl,:128]!
+ vld1.32 {$T2},[$Ktbl,:128]!
+ vld1.32 {$T3},[$Ktbl,:128]!
+ vrev32.8 @X[0],@X[0] @ yes, even on
+ str $ctx,[sp,#64]
+ vrev32.8 @X[1],@X[1] @ big-endian
+ str $inp,[sp,#68]
+ mov $Xfer,sp
+ vrev32.8 @X[2],@X[2]
+ str $len,[sp,#72]
+ vrev32.8 @X[3],@X[3]
+ str $t2,[sp,#76] @ save original sp
+ vadd.i32 $T0,$T0,@X[0]
+ vadd.i32 $T1,$T1,@X[1]
+ vst1.32 {$T0},[$Xfer,:128]!
+ vadd.i32 $T2,$T2,@X[2]
+ vst1.32 {$T1},[$Xfer,:128]!
+ vadd.i32 $T3,$T3,@X[3]
+ vst1.32 {$T2},[$Xfer,:128]!
+ vst1.32 {$T3},[$Xfer,:128]!
+
+ ldmia $ctx,{$A-$H}
+ sub $Xfer,$Xfer,#64
+ ldr $t1,[sp,#0]
+ eor $t2,$t2,$t2
+ eor $t3,$B,$C
+ b .L_00_48
+
+.align 4
+.L_00_48:
+___
+ &Xupdate(\&body_00_15);
+ &Xupdate(\&body_00_15);
+ &Xupdate(\&body_00_15);
+ &Xupdate(\&body_00_15);
+$code.=<<___;
+ teq $t1,#0 @ check for K256 terminator
+ ldr $t1,[sp,#0]
+ sub $Xfer,$Xfer,#64
+ bne .L_00_48
+
+ ldr $inp,[sp,#68]
+ ldr $t0,[sp,#72]
+ sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl
+ teq $inp,$t0
+ subeq $inp,$inp,#64 @ avoid SEGV
+ vld1.8 {@X[0]},[$inp]! @ load next input block
+ vld1.8 {@X[1]},[$inp]!
+ vld1.8 {@X[2]},[$inp]!
+ vld1.8 {@X[3]},[$inp]!
+ strne $inp,[sp,#68]
+ mov $Xfer,sp
+___
+ &Xpreload(\&body_00_15);
+ &Xpreload(\&body_00_15);
+ &Xpreload(\&body_00_15);
+ &Xpreload(\&body_00_15);
+$code.=<<___;
+ ldr $t0,[$t1,#0]
+ add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
+ ldr $t2,[$t1,#4]
+ ldr $t3,[$t1,#8]
+ ldr $t4,[$t1,#12]
+ add $A,$A,$t0 @ accumulate
+ ldr $t0,[$t1,#16]
+ add $B,$B,$t2
+ ldr $t2,[$t1,#20]
+ add $C,$C,$t3
+ ldr $t3,[$t1,#24]
+ add $D,$D,$t4
+ ldr $t4,[$t1,#28]
+ add $E,$E,$t0
+ str $A,[$t1],#4
+ add $F,$F,$t2
+ str $B,[$t1],#4
+ add $G,$G,$t3
+ str $C,[$t1],#4
+ add $H,$H,$t4
+ str $D,[$t1],#4
+ stmia $t1,{$E-$H}
+
+ movne $Xfer,sp
+ ldrne $t1,[sp,#0]
+ eorne $t2,$t2,$t2
+ ldreq sp,[sp,#76] @ restore original sp
+ eorne $t3,$B,$C
+ bne .L_00_48
+
+ ldmia sp!,{r4-r12,pc}
+.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
+#endif
+___
+}}}
+######################################################################
+# ARMv8 stuff
+#
+{{{
+my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
+my @MSG=map("q$_",(8..11));
+my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
+my $Ktbl="r3";
+
+$code.=<<___;
+#if __ARM_ARCH__>=7
+.type sha256_block_data_order_armv8,%function
+.align 5
+sha256_block_data_order_armv8:
+.LARMv8:
+ vld1.32 {$ABCD,$EFGH},[$ctx]
+ sub $Ktbl,r3,#sha256_block_data_order-K256
+
+.Loop_v8:
+ vld1.8 {@MSG[0]-@MSG[1]},[$inp]!
+ vld1.8 {@MSG[2]-@MSG[3]},[$inp]!
+ vld1.32 {$W0},[$Ktbl]!
+ vrev32.8 @MSG[0],@MSG[0]
+ vrev32.8 @MSG[1],@MSG[1]
+ vrev32.8 @MSG[2],@MSG[2]
+ vrev32.8 @MSG[3],@MSG[3]
+ vmov $ABCD_SAVE,$ABCD @ offload
+ vmov $EFGH_SAVE,$EFGH
+ teq $inp,$len
+___
+for($i=0;$i<12;$i++) {
+$code.=<<___;
+ vld1.32 {$W1},[$Ktbl]!
+ vadd.i32 $W0,$W0,@MSG[0]
+ sha256su0 @MSG[0],@MSG[1]
+ vmov $abcd,$ABCD
+ sha256h $ABCD,$EFGH,$W0
+ sha256h2 $EFGH,$abcd,$W0
+ sha256su1 @MSG[0],@MSG[2],@MSG[3]
+___
+ ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
+}
+$code.=<<___;
+ vld1.32 {$W1},[$Ktbl]!
+ vadd.i32 $W0,$W0,@MSG[0]
+ vmov $abcd,$ABCD
+ sha256h $ABCD,$EFGH,$W0
+ sha256h2 $EFGH,$abcd,$W0
+
+ vld1.32 {$W0},[$Ktbl]!
+ vadd.i32 $W1,$W1,@MSG[1]
+ vmov $abcd,$ABCD
+ sha256h $ABCD,$EFGH,$W1
+ sha256h2 $EFGH,$abcd,$W1
+
+ vld1.32 {$W1},[$Ktbl]
+ vadd.i32 $W0,$W0,@MSG[2]
+ sub $Ktbl,$Ktbl,#256-16 @ rewind
+ vmov $abcd,$ABCD
+ sha256h $ABCD,$EFGH,$W0
+ sha256h2 $EFGH,$abcd,$W0
+
+ vadd.i32 $W1,$W1,@MSG[3]
+ vmov $abcd,$ABCD
+ sha256h $ABCD,$EFGH,$W1
+ sha256h2 $EFGH,$abcd,$W1
+
+ vadd.i32 $ABCD,$ABCD,$ABCD_SAVE
+ vadd.i32 $EFGH,$EFGH,$EFGH_SAVE
+ bne .Loop_v8
+
+ vst1.32 {$ABCD,$EFGH},[$ctx]
+
+ ret @ bx lr
+.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
+#endif
+___
+}}}
+$code.=<<___;
+.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
+.comm OPENSSL_armcap_P,4,4
___
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
-print $code;
+{ my %opcode = (
+ "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40,
+ "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 );
+
+ sub unsha256 {
+ my ($mnemonic,$arg)=@_;
+
+ if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
+ my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
+ |(($2&7)<<17)|(($2&8)<<4)
+ |(($3&7)<<1) |(($3&8)<<2);
+ # since ARMv7 instructions are always encoded little-endian.
+ # correct solution is to use .inst directive, but older
+ # assemblers don't implement it:-(
+ sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+ $word&0xff,($word>>8)&0xff,
+ ($word>>16)&0xff,($word>>24)&0xff,
+ $mnemonic,$arg;
+ }
+ }
+}
+
+foreach (split($/,$code)) {
+
+ s/\`([^\`]*)\`/eval $1/geo;
+
+ s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
+
+ s/\bret\b/bx lr/go or
+ s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
+
+ print $_,"\n";
+}
+
close STDOUT; # enforce flush
diff --git a/crypto/sha/asm/sha256-armv8.S b/crypto/sha/asm/sha256-armv8.S
new file mode 100644
index 0000000..bd43b1f
--- /dev/null
+++ b/crypto/sha/asm/sha256-armv8.S
@@ -0,0 +1,1141 @@
+#include "arm_arch.h"
+
+.text
+
+.globl sha256_block_data_order
+.type sha256_block_data_order,%function
+.align 6
+sha256_block_data_order:
+ ldr x16,.LOPENSSL_armcap_P
+ adr x17,.LOPENSSL_armcap_P
+ add x16,x16,x17
+ ldr w16,[x16]
+ tst w16,#ARMV8_SHA256
+ b.ne .Lv8_entry
+ stp x29,x30,[sp,#-128]!
+ add x29,sp,#0
+
+ stp x19,x20,[sp,#16]
+ stp x21,x22,[sp,#32]
+ stp x23,x24,[sp,#48]
+ stp x25,x26,[sp,#64]
+ stp x27,x28,[sp,#80]
+ sub sp,sp,#4*4
+
+ ldp w20,w21,[x0] // load context
+ ldp w22,w23,[x0,#2*4]
+ ldp w24,w25,[x0,#4*4]
+ add x2,x1,x2,lsl#6 // end of input
+ ldp w26,w27,[x0,#6*4]
+ adr x30,K256
+ stp x0,x2,[x29,#96]
+
+.Loop:
+ ldp w3,w4,[x1],#2*4
+ ldr w19,[x30],#4 // *K++
+ eor w28,w21,w22 // magic seed
+ str x1,[x29,#112]
+#ifndef __ARMEB__
+ rev w3,w3 // 0
+#endif
+ ror w16,w24,#6
+ add w27,w27,w19 // h+=K[i]
+ eor w6,w24,w24,ror#14
+ and w17,w25,w24
+ bic w19,w26,w24
+ add w27,w27,w3 // h+=X[i]
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w20,w21 // a^b, b^c in next round
+ eor w16,w16,w6,ror#11 // Sigma1(e)
+ ror w6,w20,#2
+ add w27,w27,w17 // h+=Ch(e,f,g)
+ eor w17,w20,w20,ror#9
+ add w27,w27,w16 // h+=Sigma1(e)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ add w23,w23,w27 // d+=h
+ eor w28,w28,w21 // Maj(a,b,c)
+ eor w17,w6,w17,ror#13 // Sigma0(a)
+ add w27,w27,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ //add w27,w27,w17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev w4,w4 // 1
+#endif
+ ldp w5,w6,[x1],#2*4
+ add w27,w27,w17 // h+=Sigma0(a)
+ ror w16,w23,#6
+ add w26,w26,w28 // h+=K[i]
+ eor w7,w23,w23,ror#14
+ and w17,w24,w23
+ bic w28,w25,w23
+ add w26,w26,w4 // h+=X[i]
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w27,w20 // a^b, b^c in next round
+ eor w16,w16,w7,ror#11 // Sigma1(e)
+ ror w7,w27,#2
+ add w26,w26,w17 // h+=Ch(e,f,g)
+ eor w17,w27,w27,ror#9
+ add w26,w26,w16 // h+=Sigma1(e)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ add w22,w22,w26 // d+=h
+ eor w19,w19,w20 // Maj(a,b,c)
+ eor w17,w7,w17,ror#13 // Sigma0(a)
+ add w26,w26,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ //add w26,w26,w17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev w5,w5 // 2
+#endif
+ add w26,w26,w17 // h+=Sigma0(a)
+ ror w16,w22,#6
+ add w25,w25,w19 // h+=K[i]
+ eor w8,w22,w22,ror#14
+ and w17,w23,w22
+ bic w19,w24,w22
+ add w25,w25,w5 // h+=X[i]
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w26,w27 // a^b, b^c in next round
+ eor w16,w16,w8,ror#11 // Sigma1(e)
+ ror w8,w26,#2
+ add w25,w25,w17 // h+=Ch(e,f,g)
+ eor w17,w26,w26,ror#9
+ add w25,w25,w16 // h+=Sigma1(e)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ add w21,w21,w25 // d+=h
+ eor w28,w28,w27 // Maj(a,b,c)
+ eor w17,w8,w17,ror#13 // Sigma0(a)
+ add w25,w25,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ //add w25,w25,w17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev w6,w6 // 3
+#endif
+ ldp w7,w8,[x1],#2*4
+ add w25,w25,w17 // h+=Sigma0(a)
+ ror w16,w21,#6
+ add w24,w24,w28 // h+=K[i]
+ eor w9,w21,w21,ror#14
+ and w17,w22,w21
+ bic w28,w23,w21
+ add w24,w24,w6 // h+=X[i]
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w25,w26 // a^b, b^c in next round
+ eor w16,w16,w9,ror#11 // Sigma1(e)
+ ror w9,w25,#2
+ add w24,w24,w17 // h+=Ch(e,f,g)
+ eor w17,w25,w25,ror#9
+ add w24,w24,w16 // h+=Sigma1(e)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ add w20,w20,w24 // d+=h
+ eor w19,w19,w26 // Maj(a,b,c)
+ eor w17,w9,w17,ror#13 // Sigma0(a)
+ add w24,w24,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ //add w24,w24,w17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev w7,w7 // 4
+#endif
+ add w24,w24,w17 // h+=Sigma0(a)
+ ror w16,w20,#6
+ add w23,w23,w19 // h+=K[i]
+ eor w10,w20,w20,ror#14
+ and w17,w21,w20
+ bic w19,w22,w20
+ add w23,w23,w7 // h+=X[i]
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w24,w25 // a^b, b^c in next round
+ eor w16,w16,w10,ror#11 // Sigma1(e)
+ ror w10,w24,#2
+ add w23,w23,w17 // h+=Ch(e,f,g)
+ eor w17,w24,w24,ror#9
+ add w23,w23,w16 // h+=Sigma1(e)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ add w27,w27,w23 // d+=h
+ eor w28,w28,w25 // Maj(a,b,c)
+ eor w17,w10,w17,ror#13 // Sigma0(a)
+ add w23,w23,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ //add w23,w23,w17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev w8,w8 // 5
+#endif
+ ldp w9,w10,[x1],#2*4
+ add w23,w23,w17 // h+=Sigma0(a)
+ ror w16,w27,#6
+ add w22,w22,w28 // h+=K[i]
+ eor w11,w27,w27,ror#14
+ and w17,w20,w27
+ bic w28,w21,w27
+ add w22,w22,w8 // h+=X[i]
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w23,w24 // a^b, b^c in next round
+ eor w16,w16,w11,ror#11 // Sigma1(e)
+ ror w11,w23,#2
+ add w22,w22,w17 // h+=Ch(e,f,g)
+ eor w17,w23,w23,ror#9
+ add w22,w22,w16 // h+=Sigma1(e)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ add w26,w26,w22 // d+=h
+ eor w19,w19,w24 // Maj(a,b,c)
+ eor w17,w11,w17,ror#13 // Sigma0(a)
+ add w22,w22,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ //add w22,w22,w17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev w9,w9 // 6
+#endif
+ add w22,w22,w17 // h+=Sigma0(a)
+ ror w16,w26,#6
+ add w21,w21,w19 // h+=K[i]
+ eor w12,w26,w26,ror#14
+ and w17,w27,w26
+ bic w19,w20,w26
+ add w21,w21,w9 // h+=X[i]
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w22,w23 // a^b, b^c in next round
+ eor w16,w16,w12,ror#11 // Sigma1(e)
+ ror w12,w22,#2
+ add w21,w21,w17 // h+=Ch(e,f,g)
+ eor w17,w22,w22,ror#9
+ add w21,w21,w16 // h+=Sigma1(e)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ add w25,w25,w21 // d+=h
+ eor w28,w28,w23 // Maj(a,b,c)
+ eor w17,w12,w17,ror#13 // Sigma0(a)
+ add w21,w21,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ //add w21,w21,w17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev w10,w10 // 7
+#endif
+ ldp w11,w12,[x1],#2*4
+ add w21,w21,w17 // h+=Sigma0(a)
+ ror w16,w25,#6
+ add w20,w20,w28 // h+=K[i]
+ eor w13,w25,w25,ror#14
+ and w17,w26,w25
+ bic w28,w27,w25
+ add w20,w20,w10 // h+=X[i]
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w21,w22 // a^b, b^c in next round
+ eor w16,w16,w13,ror#11 // Sigma1(e)
+ ror w13,w21,#2
+ add w20,w20,w17 // h+=Ch(e,f,g)
+ eor w17,w21,w21,ror#9
+ add w20,w20,w16 // h+=Sigma1(e)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ add w24,w24,w20 // d+=h
+ eor w19,w19,w22 // Maj(a,b,c)
+ eor w17,w13,w17,ror#13 // Sigma0(a)
+ add w20,w20,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ //add w20,w20,w17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev w11,w11 // 8
+#endif
+ add w20,w20,w17 // h+=Sigma0(a)
+ ror w16,w24,#6
+ add w27,w27,w19 // h+=K[i]
+ eor w14,w24,w24,ror#14
+ and w17,w25,w24
+ bic w19,w26,w24
+ add w27,w27,w11 // h+=X[i]
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w20,w21 // a^b, b^c in next round
+ eor w16,w16,w14,ror#11 // Sigma1(e)
+ ror w14,w20,#2
+ add w27,w27,w17 // h+=Ch(e,f,g)
+ eor w17,w20,w20,ror#9
+ add w27,w27,w16 // h+=Sigma1(e)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ add w23,w23,w27 // d+=h
+ eor w28,w28,w21 // Maj(a,b,c)
+ eor w17,w14,w17,ror#13 // Sigma0(a)
+ add w27,w27,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ //add w27,w27,w17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev w12,w12 // 9
+#endif
+ ldp w13,w14,[x1],#2*4
+ add w27,w27,w17 // h+=Sigma0(a)
+ ror w16,w23,#6
+ add w26,w26,w28 // h+=K[i]
+ eor w15,w23,w23,ror#14
+ and w17,w24,w23
+ bic w28,w25,w23
+ add w26,w26,w12 // h+=X[i]
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w27,w20 // a^b, b^c in next round
+ eor w16,w16,w15,ror#11 // Sigma1(e)
+ ror w15,w27,#2
+ add w26,w26,w17 // h+=Ch(e,f,g)
+ eor w17,w27,w27,ror#9
+ add w26,w26,w16 // h+=Sigma1(e)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ add w22,w22,w26 // d+=h
+ eor w19,w19,w20 // Maj(a,b,c)
+ eor w17,w15,w17,ror#13 // Sigma0(a)
+ add w26,w26,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ //add w26,w26,w17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev w13,w13 // 10
+#endif
+ add w26,w26,w17 // h+=Sigma0(a)
+ ror w16,w22,#6
+ add w25,w25,w19 // h+=K[i]
+ eor w0,w22,w22,ror#14
+ and w17,w23,w22
+ bic w19,w24,w22
+ add w25,w25,w13 // h+=X[i]
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w26,w27 // a^b, b^c in next round
+ eor w16,w16,w0,ror#11 // Sigma1(e)
+ ror w0,w26,#2
+ add w25,w25,w17 // h+=Ch(e,f,g)
+ eor w17,w26,w26,ror#9
+ add w25,w25,w16 // h+=Sigma1(e)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ add w21,w21,w25 // d+=h
+ eor w28,w28,w27 // Maj(a,b,c)
+ eor w17,w0,w17,ror#13 // Sigma0(a)
+ add w25,w25,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ //add w25,w25,w17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev w14,w14 // 11
+#endif
+ ldp w15,w0,[x1],#2*4
+ add w25,w25,w17 // h+=Sigma0(a)
+ str w6,[sp,#12]
+ ror w16,w21,#6
+ add w24,w24,w28 // h+=K[i]
+ eor w6,w21,w21,ror#14
+ and w17,w22,w21
+ bic w28,w23,w21
+ add w24,w24,w14 // h+=X[i]
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w25,w26 // a^b, b^c in next round
+ eor w16,w16,w6,ror#11 // Sigma1(e)
+ ror w6,w25,#2
+ add w24,w24,w17 // h+=Ch(e,f,g)
+ eor w17,w25,w25,ror#9
+ add w24,w24,w16 // h+=Sigma1(e)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ add w20,w20,w24 // d+=h
+ eor w19,w19,w26 // Maj(a,b,c)
+ eor w17,w6,w17,ror#13 // Sigma0(a)
+ add w24,w24,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ //add w24,w24,w17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev w15,w15 // 12
+#endif
+ add w24,w24,w17 // h+=Sigma0(a)
+ str w7,[sp,#0]
+ ror w16,w20,#6
+ add w23,w23,w19 // h+=K[i]
+ eor w7,w20,w20,ror#14
+ and w17,w21,w20
+ bic w19,w22,w20
+ add w23,w23,w15 // h+=X[i]
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w24,w25 // a^b, b^c in next round
+ eor w16,w16,w7,ror#11 // Sigma1(e)
+ ror w7,w24,#2
+ add w23,w23,w17 // h+=Ch(e,f,g)
+ eor w17,w24,w24,ror#9
+ add w23,w23,w16 // h+=Sigma1(e)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ add w27,w27,w23 // d+=h
+ eor w28,w28,w25 // Maj(a,b,c)
+ eor w17,w7,w17,ror#13 // Sigma0(a)
+ add w23,w23,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ //add w23,w23,w17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev w0,w0 // 13
+#endif
+ ldp w1,w2,[x1]
+ add w23,w23,w17 // h+=Sigma0(a)
+ str w8,[sp,#4]
+ ror w16,w27,#6
+ add w22,w22,w28 // h+=K[i]
+ eor w8,w27,w27,ror#14
+ and w17,w20,w27
+ bic w28,w21,w27
+ add w22,w22,w0 // h+=X[i]
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w23,w24 // a^b, b^c in next round
+ eor w16,w16,w8,ror#11 // Sigma1(e)
+ ror w8,w23,#2
+ add w22,w22,w17 // h+=Ch(e,f,g)
+ eor w17,w23,w23,ror#9
+ add w22,w22,w16 // h+=Sigma1(e)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ add w26,w26,w22 // d+=h
+ eor w19,w19,w24 // Maj(a,b,c)
+ eor w17,w8,w17,ror#13 // Sigma0(a)
+ add w22,w22,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ //add w22,w22,w17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev w1,w1 // 14
+#endif
+ ldr w6,[sp,#12]
+ add w22,w22,w17 // h+=Sigma0(a)
+ str w9,[sp,#8]
+ ror w16,w26,#6
+ add w21,w21,w19 // h+=K[i]
+ eor w9,w26,w26,ror#14
+ and w17,w27,w26
+ bic w19,w20,w26
+ add w21,w21,w1 // h+=X[i]
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w22,w23 // a^b, b^c in next round
+ eor w16,w16,w9,ror#11 // Sigma1(e)
+ ror w9,w22,#2
+ add w21,w21,w17 // h+=Ch(e,f,g)
+ eor w17,w22,w22,ror#9
+ add w21,w21,w16 // h+=Sigma1(e)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ add w25,w25,w21 // d+=h
+ eor w28,w28,w23 // Maj(a,b,c)
+ eor w17,w9,w17,ror#13 // Sigma0(a)
+ add w21,w21,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ //add w21,w21,w17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev w2,w2 // 15
+#endif
+ ldr w7,[sp,#0]
+ add w21,w21,w17 // h+=Sigma0(a)
+ str w10,[sp,#12]
+ ror w16,w25,#6
+ add w20,w20,w28 // h+=K[i]
+ ror w9,w4,#7
+ and w17,w26,w25
+ ror w8,w1,#17
+ bic w28,w27,w25
+ ror w10,w21,#2
+ add w20,w20,w2 // h+=X[i]
+ eor w16,w16,w25,ror#11
+ eor w9,w9,w4,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w21,w22 // a^b, b^c in next round
+ eor w16,w16,w25,ror#25 // Sigma1(e)
+ eor w10,w10,w21,ror#13
+ add w20,w20,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w8,w8,w1,ror#19
+ eor w9,w9,w4,lsr#3 // sigma0(X[i+1])
+ add w20,w20,w16 // h+=Sigma1(e)
+ eor w19,w19,w22 // Maj(a,b,c)
+ eor w17,w10,w21,ror#22 // Sigma0(a)
+ eor w8,w8,w1,lsr#10 // sigma1(X[i+14])
+ add w3,w3,w12
+ add w24,w24,w20 // d+=h
+ add w20,w20,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w3,w3,w9
+ add w20,w20,w17 // h+=Sigma0(a)
+ add w3,w3,w8
+.Loop_16_xx:
+ ldr w8,[sp,#4]
+ str w11,[sp,#0]
+ ror w16,w24,#6
+ add w27,w27,w19 // h+=K[i]
+ ror w10,w5,#7
+ and w17,w25,w24
+ ror w9,w2,#17
+ bic w19,w26,w24
+ ror w11,w20,#2
+ add w27,w27,w3 // h+=X[i]
+ eor w16,w16,w24,ror#11
+ eor w10,w10,w5,ror#18
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w20,w21 // a^b, b^c in next round
+ eor w16,w16,w24,ror#25 // Sigma1(e)
+ eor w11,w11,w20,ror#13
+ add w27,w27,w17 // h+=Ch(e,f,g)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ eor w9,w9,w2,ror#19
+ eor w10,w10,w5,lsr#3 // sigma0(X[i+1])
+ add w27,w27,w16 // h+=Sigma1(e)
+ eor w28,w28,w21 // Maj(a,b,c)
+ eor w17,w11,w20,ror#22 // Sigma0(a)
+ eor w9,w9,w2,lsr#10 // sigma1(X[i+14])
+ add w4,w4,w13
+ add w23,w23,w27 // d+=h
+ add w27,w27,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ add w4,w4,w10
+ add w27,w27,w17 // h+=Sigma0(a)
+ add w4,w4,w9
+ ldr w9,[sp,#8]
+ str w12,[sp,#4]
+ ror w16,w23,#6
+ add w26,w26,w28 // h+=K[i]
+ ror w11,w6,#7
+ and w17,w24,w23
+ ror w10,w3,#17
+ bic w28,w25,w23
+ ror w12,w27,#2
+ add w26,w26,w4 // h+=X[i]
+ eor w16,w16,w23,ror#11
+ eor w11,w11,w6,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w27,w20 // a^b, b^c in next round
+ eor w16,w16,w23,ror#25 // Sigma1(e)
+ eor w12,w12,w27,ror#13
+ add w26,w26,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w10,w10,w3,ror#19
+ eor w11,w11,w6,lsr#3 // sigma0(X[i+1])
+ add w26,w26,w16 // h+=Sigma1(e)
+ eor w19,w19,w20 // Maj(a,b,c)
+ eor w17,w12,w27,ror#22 // Sigma0(a)
+ eor w10,w10,w3,lsr#10 // sigma1(X[i+14])
+ add w5,w5,w14
+ add w22,w22,w26 // d+=h
+ add w26,w26,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w5,w5,w11
+ add w26,w26,w17 // h+=Sigma0(a)
+ add w5,w5,w10
+ ldr w10,[sp,#12]
+ str w13,[sp,#8]
+ ror w16,w22,#6
+ add w25,w25,w19 // h+=K[i]
+ ror w12,w7,#7
+ and w17,w23,w22
+ ror w11,w4,#17
+ bic w19,w24,w22
+ ror w13,w26,#2
+ add w25,w25,w5 // h+=X[i]
+ eor w16,w16,w22,ror#11
+ eor w12,w12,w7,ror#18
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w26,w27 // a^b, b^c in next round
+ eor w16,w16,w22,ror#25 // Sigma1(e)
+ eor w13,w13,w26,ror#13
+ add w25,w25,w17 // h+=Ch(e,f,g)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ eor w11,w11,w4,ror#19
+ eor w12,w12,w7,lsr#3 // sigma0(X[i+1])
+ add w25,w25,w16 // h+=Sigma1(e)
+ eor w28,w28,w27 // Maj(a,b,c)
+ eor w17,w13,w26,ror#22 // Sigma0(a)
+ eor w11,w11,w4,lsr#10 // sigma1(X[i+14])
+ add w6,w6,w15
+ add w21,w21,w25 // d+=h
+ add w25,w25,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ add w6,w6,w12
+ add w25,w25,w17 // h+=Sigma0(a)
+ add w6,w6,w11
+ ldr w11,[sp,#0]
+ str w14,[sp,#12]
+ ror w16,w21,#6
+ add w24,w24,w28 // h+=K[i]
+ ror w13,w8,#7
+ and w17,w22,w21
+ ror w12,w5,#17
+ bic w28,w23,w21
+ ror w14,w25,#2
+ add w24,w24,w6 // h+=X[i]
+ eor w16,w16,w21,ror#11
+ eor w13,w13,w8,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w25,w26 // a^b, b^c in next round
+ eor w16,w16,w21,ror#25 // Sigma1(e)
+ eor w14,w14,w25,ror#13
+ add w24,w24,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w12,w12,w5,ror#19
+ eor w13,w13,w8,lsr#3 // sigma0(X[i+1])
+ add w24,w24,w16 // h+=Sigma1(e)
+ eor w19,w19,w26 // Maj(a,b,c)
+ eor w17,w14,w25,ror#22 // Sigma0(a)
+ eor w12,w12,w5,lsr#10 // sigma1(X[i+14])
+ add w7,w7,w0
+ add w20,w20,w24 // d+=h
+ add w24,w24,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w7,w7,w13
+ add w24,w24,w17 // h+=Sigma0(a)
+ add w7,w7,w12
+ ldr w12,[sp,#4]
+ str w15,[sp,#0]
+ ror w16,w20,#6
+ add w23,w23,w19 // h+=K[i]
+ ror w14,w9,#7
+ and w17,w21,w20
+ ror w13,w6,#17
+ bic w19,w22,w20
+ ror w15,w24,#2
+ add w23,w23,w7 // h+=X[i]
+ eor w16,w16,w20,ror#11
+ eor w14,w14,w9,ror#18
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w24,w25 // a^b, b^c in next round
+ eor w16,w16,w20,ror#25 // Sigma1(e)
+ eor w15,w15,w24,ror#13
+ add w23,w23,w17 // h+=Ch(e,f,g)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ eor w13,w13,w6,ror#19
+ eor w14,w14,w9,lsr#3 // sigma0(X[i+1])
+ add w23,w23,w16 // h+=Sigma1(e)
+ eor w28,w28,w25 // Maj(a,b,c)
+ eor w17,w15,w24,ror#22 // Sigma0(a)
+ eor w13,w13,w6,lsr#10 // sigma1(X[i+14])
+ add w8,w8,w1
+ add w27,w27,w23 // d+=h
+ add w23,w23,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ add w8,w8,w14
+ add w23,w23,w17 // h+=Sigma0(a)
+ add w8,w8,w13
+ ldr w13,[sp,#8]
+ str w0,[sp,#4]
+ ror w16,w27,#6
+ add w22,w22,w28 // h+=K[i]
+ ror w15,w10,#7
+ and w17,w20,w27
+ ror w14,w7,#17
+ bic w28,w21,w27
+ ror w0,w23,#2
+ add w22,w22,w8 // h+=X[i]
+ eor w16,w16,w27,ror#11
+ eor w15,w15,w10,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w23,w24 // a^b, b^c in next round
+ eor w16,w16,w27,ror#25 // Sigma1(e)
+ eor w0,w0,w23,ror#13
+ add w22,w22,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w14,w14,w7,ror#19
+ eor w15,w15,w10,lsr#3 // sigma0(X[i+1])
+ add w22,w22,w16 // h+=Sigma1(e)
+ eor w19,w19,w24 // Maj(a,b,c)
+ eor w17,w0,w23,ror#22 // Sigma0(a)
+ eor w14,w14,w7,lsr#10 // sigma1(X[i+14])
+ add w9,w9,w2
+ add w26,w26,w22 // d+=h
+ add w22,w22,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w9,w9,w15
+ add w22,w22,w17 // h+=Sigma0(a)
+ add w9,w9,w14
+ ldr w14,[sp,#12]
+ str w1,[sp,#8]
+ ror w16,w26,#6
+ add w21,w21,w19 // h+=K[i]
+ ror w0,w11,#7
+ and w17,w27,w26
+ ror w15,w8,#17
+ bic w19,w20,w26
+ ror w1,w22,#2
+ add w21,w21,w9 // h+=X[i]
+ eor w16,w16,w26,ror#11
+ eor w0,w0,w11,ror#18
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w22,w23 // a^b, b^c in next round
+ eor w16,w16,w26,ror#25 // Sigma1(e)
+ eor w1,w1,w22,ror#13
+ add w21,w21,w17 // h+=Ch(e,f,g)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ eor w15,w15,w8,ror#19
+ eor w0,w0,w11,lsr#3 // sigma0(X[i+1])
+ add w21,w21,w16 // h+=Sigma1(e)
+ eor w28,w28,w23 // Maj(a,b,c)
+ eor w17,w1,w22,ror#22 // Sigma0(a)
+ eor w15,w15,w8,lsr#10 // sigma1(X[i+14])
+ add w10,w10,w3
+ add w25,w25,w21 // d+=h
+ add w21,w21,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ add w10,w10,w0
+ add w21,w21,w17 // h+=Sigma0(a)
+ add w10,w10,w15
+ ldr w15,[sp,#0]
+ str w2,[sp,#12]
+ ror w16,w25,#6
+ add w20,w20,w28 // h+=K[i]
+ ror w1,w12,#7
+ and w17,w26,w25
+ ror w0,w9,#17
+ bic w28,w27,w25
+ ror w2,w21,#2
+ add w20,w20,w10 // h+=X[i]
+ eor w16,w16,w25,ror#11
+ eor w1,w1,w12,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w21,w22 // a^b, b^c in next round
+ eor w16,w16,w25,ror#25 // Sigma1(e)
+ eor w2,w2,w21,ror#13
+ add w20,w20,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w0,w0,w9,ror#19
+ eor w1,w1,w12,lsr#3 // sigma0(X[i+1])
+ add w20,w20,w16 // h+=Sigma1(e)
+ eor w19,w19,w22 // Maj(a,b,c)
+ eor w17,w2,w21,ror#22 // Sigma0(a)
+ eor w0,w0,w9,lsr#10 // sigma1(X[i+14])
+ add w11,w11,w4
+ add w24,w24,w20 // d+=h
+ add w20,w20,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w11,w11,w1
+ add w20,w20,w17 // h+=Sigma0(a)
+ add w11,w11,w0
+ ldr w0,[sp,#4]
+ str w3,[sp,#0]
+ ror w16,w24,#6
+ add w27,w27,w19 // h+=K[i]
+ ror w2,w13,#7
+ and w17,w25,w24
+ ror w1,w10,#17
+ bic w19,w26,w24
+ ror w3,w20,#2
+ add w27,w27,w11 // h+=X[i]
+ eor w16,w16,w24,ror#11
+ eor w2,w2,w13,ror#18
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w20,w21 // a^b, b^c in next round
+ eor w16,w16,w24,ror#25 // Sigma1(e)
+ eor w3,w3,w20,ror#13
+ add w27,w27,w17 // h+=Ch(e,f,g)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ eor w1,w1,w10,ror#19
+ eor w2,w2,w13,lsr#3 // sigma0(X[i+1])
+ add w27,w27,w16 // h+=Sigma1(e)
+ eor w28,w28,w21 // Maj(a,b,c)
+ eor w17,w3,w20,ror#22 // Sigma0(a)
+ eor w1,w1,w10,lsr#10 // sigma1(X[i+14])
+ add w12,w12,w5
+ add w23,w23,w27 // d+=h
+ add w27,w27,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ add w12,w12,w2
+ add w27,w27,w17 // h+=Sigma0(a)
+ add w12,w12,w1
+ ldr w1,[sp,#8]
+ str w4,[sp,#4]
+ ror w16,w23,#6
+ add w26,w26,w28 // h+=K[i]
+ ror w3,w14,#7
+ and w17,w24,w23
+ ror w2,w11,#17
+ bic w28,w25,w23
+ ror w4,w27,#2
+ add w26,w26,w12 // h+=X[i]
+ eor w16,w16,w23,ror#11
+ eor w3,w3,w14,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w27,w20 // a^b, b^c in next round
+ eor w16,w16,w23,ror#25 // Sigma1(e)
+ eor w4,w4,w27,ror#13
+ add w26,w26,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w2,w2,w11,ror#19
+ eor w3,w3,w14,lsr#3 // sigma0(X[i+1])
+ add w26,w26,w16 // h+=Sigma1(e)
+ eor w19,w19,w20 // Maj(a,b,c)
+ eor w17,w4,w27,ror#22 // Sigma0(a)
+ eor w2,w2,w11,lsr#10 // sigma1(X[i+14])
+ add w13,w13,w6
+ add w22,w22,w26 // d+=h
+ add w26,w26,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w13,w13,w3
+ add w26,w26,w17 // h+=Sigma0(a)
+ add w13,w13,w2
+ ldr w2,[sp,#12]
+ str w5,[sp,#8]
+ ror w16,w22,#6
+ add w25,w25,w19 // h+=K[i]
+ ror w4,w15,#7
+ and w17,w23,w22
+ ror w3,w12,#17
+ bic w19,w24,w22
+ ror w5,w26,#2
+ add w25,w25,w13 // h+=X[i]
+ eor w16,w16,w22,ror#11
+ eor w4,w4,w15,ror#18
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w26,w27 // a^b, b^c in next round
+ eor w16,w16,w22,ror#25 // Sigma1(e)
+ eor w5,w5,w26,ror#13
+ add w25,w25,w17 // h+=Ch(e,f,g)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ eor w3,w3,w12,ror#19
+ eor w4,w4,w15,lsr#3 // sigma0(X[i+1])
+ add w25,w25,w16 // h+=Sigma1(e)
+ eor w28,w28,w27 // Maj(a,b,c)
+ eor w17,w5,w26,ror#22 // Sigma0(a)
+ eor w3,w3,w12,lsr#10 // sigma1(X[i+14])
+ add w14,w14,w7
+ add w21,w21,w25 // d+=h
+ add w25,w25,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ add w14,w14,w4
+ add w25,w25,w17 // h+=Sigma0(a)
+ add w14,w14,w3
+ ldr w3,[sp,#0]
+ str w6,[sp,#12]
+ ror w16,w21,#6
+ add w24,w24,w28 // h+=K[i]
+ ror w5,w0,#7
+ and w17,w22,w21
+ ror w4,w13,#17
+ bic w28,w23,w21
+ ror w6,w25,#2
+ add w24,w24,w14 // h+=X[i]
+ eor w16,w16,w21,ror#11
+ eor w5,w5,w0,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w25,w26 // a^b, b^c in next round
+ eor w16,w16,w21,ror#25 // Sigma1(e)
+ eor w6,w6,w25,ror#13
+ add w24,w24,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w4,w4,w13,ror#19
+ eor w5,w5,w0,lsr#3 // sigma0(X[i+1])
+ add w24,w24,w16 // h+=Sigma1(e)
+ eor w19,w19,w26 // Maj(a,b,c)
+ eor w17,w6,w25,ror#22 // Sigma0(a)
+ eor w4,w4,w13,lsr#10 // sigma1(X[i+14])
+ add w15,w15,w8
+ add w20,w20,w24 // d+=h
+ add w24,w24,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w15,w15,w5
+ add w24,w24,w17 // h+=Sigma0(a)
+ add w15,w15,w4
+ ldr w4,[sp,#4]
+ str w7,[sp,#0]
+ ror w16,w20,#6
+ add w23,w23,w19 // h+=K[i]
+ ror w6,w1,#7
+ and w17,w21,w20
+ ror w5,w14,#17
+ bic w19,w22,w20
+ ror w7,w24,#2
+ add w23,w23,w15 // h+=X[i]
+ eor w16,w16,w20,ror#11
+ eor w6,w6,w1,ror#18
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w24,w25 // a^b, b^c in next round
+ eor w16,w16,w20,ror#25 // Sigma1(e)
+ eor w7,w7,w24,ror#13
+ add w23,w23,w17 // h+=Ch(e,f,g)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ eor w5,w5,w14,ror#19
+ eor w6,w6,w1,lsr#3 // sigma0(X[i+1])
+ add w23,w23,w16 // h+=Sigma1(e)
+ eor w28,w28,w25 // Maj(a,b,c)
+ eor w17,w7,w24,ror#22 // Sigma0(a)
+ eor w5,w5,w14,lsr#10 // sigma1(X[i+14])
+ add w0,w0,w9
+ add w27,w27,w23 // d+=h
+ add w23,w23,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ add w0,w0,w6
+ add w23,w23,w17 // h+=Sigma0(a)
+ add w0,w0,w5
+ ldr w5,[sp,#8]
+ str w8,[sp,#4]
+ ror w16,w27,#6
+ add w22,w22,w28 // h+=K[i]
+ ror w7,w2,#7
+ and w17,w20,w27
+ ror w6,w15,#17
+ bic w28,w21,w27
+ ror w8,w23,#2
+ add w22,w22,w0 // h+=X[i]
+ eor w16,w16,w27,ror#11
+ eor w7,w7,w2,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w23,w24 // a^b, b^c in next round
+ eor w16,w16,w27,ror#25 // Sigma1(e)
+ eor w8,w8,w23,ror#13
+ add w22,w22,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w6,w6,w15,ror#19
+ eor w7,w7,w2,lsr#3 // sigma0(X[i+1])
+ add w22,w22,w16 // h+=Sigma1(e)
+ eor w19,w19,w24 // Maj(a,b,c)
+ eor w17,w8,w23,ror#22 // Sigma0(a)
+ eor w6,w6,w15,lsr#10 // sigma1(X[i+14])
+ add w1,w1,w10
+ add w26,w26,w22 // d+=h
+ add w22,w22,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w1,w1,w7
+ add w22,w22,w17 // h+=Sigma0(a)
+ add w1,w1,w6
+ ldr w6,[sp,#12]
+ str w9,[sp,#8]
+ ror w16,w26,#6
+ add w21,w21,w19 // h+=K[i]
+ ror w8,w3,#7
+ and w17,w27,w26
+ ror w7,w0,#17
+ bic w19,w20,w26
+ ror w9,w22,#2
+ add w21,w21,w1 // h+=X[i]
+ eor w16,w16,w26,ror#11
+ eor w8,w8,w3,ror#18
+ orr w17,w17,w19 // Ch(e,f,g)
+ eor w19,w22,w23 // a^b, b^c in next round
+ eor w16,w16,w26,ror#25 // Sigma1(e)
+ eor w9,w9,w22,ror#13
+ add w21,w21,w17 // h+=Ch(e,f,g)
+ and w28,w28,w19 // (b^c)&=(a^b)
+ eor w7,w7,w0,ror#19
+ eor w8,w8,w3,lsr#3 // sigma0(X[i+1])
+ add w21,w21,w16 // h+=Sigma1(e)
+ eor w28,w28,w23 // Maj(a,b,c)
+ eor w17,w9,w22,ror#22 // Sigma0(a)
+ eor w7,w7,w0,lsr#10 // sigma1(X[i+14])
+ add w2,w2,w11
+ add w25,w25,w21 // d+=h
+ add w21,w21,w28 // h+=Maj(a,b,c)
+ ldr w28,[x30],#4 // *K++, w19 in next round
+ add w2,w2,w8
+ add w21,w21,w17 // h+=Sigma0(a)
+ add w2,w2,w7
+ ldr w7,[sp,#0]
+ str w10,[sp,#12]
+ ror w16,w25,#6
+ add w20,w20,w28 // h+=K[i]
+ ror w9,w4,#7
+ and w17,w26,w25
+ ror w8,w1,#17
+ bic w28,w27,w25
+ ror w10,w21,#2
+ add w20,w20,w2 // h+=X[i]
+ eor w16,w16,w25,ror#11
+ eor w9,w9,w4,ror#18
+ orr w17,w17,w28 // Ch(e,f,g)
+ eor w28,w21,w22 // a^b, b^c in next round
+ eor w16,w16,w25,ror#25 // Sigma1(e)
+ eor w10,w10,w21,ror#13
+ add w20,w20,w17 // h+=Ch(e,f,g)
+ and w19,w19,w28 // (b^c)&=(a^b)
+ eor w8,w8,w1,ror#19
+ eor w9,w9,w4,lsr#3 // sigma0(X[i+1])
+ add w20,w20,w16 // h+=Sigma1(e)
+ eor w19,w19,w22 // Maj(a,b,c)
+ eor w17,w10,w21,ror#22 // Sigma0(a)
+ eor w8,w8,w1,lsr#10 // sigma1(X[i+14])
+ add w3,w3,w12
+ add w24,w24,w20 // d+=h
+ add w20,w20,w19 // h+=Maj(a,b,c)
+ ldr w19,[x30],#4 // *K++, w28 in next round
+ add w3,w3,w9
+ add w20,w20,w17 // h+=Sigma0(a)
+ add w3,w3,w8
+ cbnz w19,.Loop_16_xx
+
+ ldp x0,x2,[x29,#96]
+ ldr x1,[x29,#112]
+ sub x30,x30,#260 // rewind
+
+ ldp w3,w4,[x0]
+ ldp w5,w6,[x0,#2*4]
+ add x1,x1,#14*4 // advance input pointer
+ ldp w7,w8,[x0,#4*4]
+ add w20,w20,w3
+ ldp w9,w10,[x0,#6*4]
+ add w21,w21,w4
+ add w22,w22,w5
+ add w23,w23,w6
+ stp w20,w21,[x0]
+ add w24,w24,w7
+ add w25,w25,w8
+ stp w22,w23,[x0,#2*4]
+ add w26,w26,w9
+ add w27,w27,w10
+ cmp x1,x2
+ stp w24,w25,[x0,#4*4]
+ stp w26,w27,[x0,#6*4]
+ b.ne .Loop
+
+ ldp x19,x20,[x29,#16]
+ add sp,sp,#4*4
+ ldp x21,x22,[x29,#32]
+ ldp x23,x24,[x29,#48]
+ ldp x25,x26,[x29,#64]
+ ldp x27,x28,[x29,#80]
+ ldp x29,x30,[sp],#128
+ ret
+.size sha256_block_data_order,.-sha256_block_data_order
+
+.align 6
+.type K256,%object
+K256:
+ .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+ .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+ .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+ .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+ .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+ .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+ .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+ .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+ .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+ .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+ .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+ .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+ .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+ .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+ .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+ .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+ .long 0 //terminator
+.size K256,.-K256
+.align 3
+.LOPENSSL_armcap_P:
+ .quad OPENSSL_armcap_P-.
+.asciz "SHA256 block transform for ARMv8, CRYPTOGAMS by <[email protected]>"
+.align 2
+.type sha256_block_armv8,%function
+.align 6
+sha256_block_armv8:
+.Lv8_entry:
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+
+ ld1 {v0.4s,v1.4s},[x0]
+ adr x3,K256
+
+.Loop_hw:
+ ld1 {v4.16b-v7.16b},[x1],#64
+ sub x2,x2,#1
+ ld1 {v16.4s},[x3],#16
+ rev32 v4.16b,v4.16b
+ rev32 v5.16b,v5.16b
+ rev32 v6.16b,v6.16b
+ rev32 v7.16b,v7.16b
+ orr v18.16b,v0.16b,v0.16b // offload
+ orr v19.16b,v1.16b,v1.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v4.4s
+ .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+ .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v5.4s
+ .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+ .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v6.4s
+ .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+ .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v7.4s
+ .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+ .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v4.4s
+ .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+ .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v5.4s
+ .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+ .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v6.4s
+ .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+ .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v7.4s
+ .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+ .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v4.4s
+ .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+ .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v5.4s
+ .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+ .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v6.4s
+ .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+ .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v7.4s
+ .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+ .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
+ ld1 {v17.4s},[x3],#16
+ add v16.4s,v16.4s,v4.4s
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+
+ ld1 {v16.4s},[x3],#16
+ add v17.4s,v17.4s,v5.4s
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+
+ ld1 {v17.4s},[x3]
+ add v16.4s,v16.4s,v6.4s
+ sub x3,x3,#64*4-16 // rewind
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
+ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
+
+ add v17.4s,v17.4s,v7.4s
+ orr v2.16b,v0.16b,v0.16b
+ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
+ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
+
+ add v0.4s,v0.4s,v18.4s
+ add v1.4s,v1.4s,v19.4s
+
+ cbnz x2,.Loop_hw
+
+ st1 {v0.4s,v1.4s},[x0]
+
+ ldr x29,[sp],#16
+ ret
+.size sha256_block_armv8,.-sha256_block_armv8
+.comm OPENSSL_armcap_P,4,4
diff --git a/crypto/sha/asm/sha512-586.S b/crypto/sha/asm/sha512-586.S
index 4b806f3..82c76c4 100644
--- a/crypto/sha/asm/sha512-586.S
+++ b/crypto/sha/asm/sha512-586.S
@@ -25,6 +25,278 @@
movl %edi,4(%esp)
movl %eax,8(%esp)
movl %ebx,12(%esp)
+ leal _GLOBAL_OFFSET_TABLE_+[.-.L001K512](%ebp),%edx
+ movl OPENSSL_ia32cap_P@GOT(%edx),%edx
+ btl $26,(%edx)
+ jnc .L002loop_x86
+ movq (%esi),%mm0
+ movq 8(%esi),%mm1
+ movq 16(%esi),%mm2
+ movq 24(%esi),%mm3
+ movq 32(%esi),%mm4
+ movq 40(%esi),%mm5
+ movq 48(%esi),%mm6
+ movq 56(%esi),%mm7
+ subl $80,%esp
+.align 16
+.L003loop_sse2:
+ movq %mm1,8(%esp)
+ movq %mm2,16(%esp)
+ movq %mm3,24(%esp)
+ movq %mm5,40(%esp)
+ movq %mm6,48(%esp)
+ movq %mm7,56(%esp)
+ movl (%edi),%ecx
+ movl 4(%edi),%edx
+ addl $8,%edi
+ bswap %ecx
+ bswap %edx
+ movl %ecx,76(%esp)
+ movl %edx,72(%esp)
+.align 16
+.L00400_14_sse2:
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
+ addl $8,%edi
+ bswap %eax
+ bswap %ebx
+ movl %eax,68(%esp)
+ movl %ebx,64(%esp)
+ movq 40(%esp),%mm5
+ movq 48(%esp),%mm6
+ movq 56(%esp),%mm7
+ movq %mm4,%mm1
+ movq %mm4,%mm2
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ psllq $23,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm2,%mm3
+ psllq $23,%mm2
+ pxor %mm1,%mm3
+ psrlq $23,%mm1
+ pxor %mm2,%mm3
+ psllq $4,%mm2
+ pxor %mm1,%mm3
+ paddq (%ebp),%mm7
+ pxor %mm2,%mm3
+ pxor %mm6,%mm5
+ movq 8(%esp),%mm1
+ pand %mm4,%mm5
+ movq 16(%esp),%mm2
+ pxor %mm6,%mm5
+ movq 24(%esp),%mm4
+ paddq %mm5,%mm3
+ movq %mm0,(%esp)
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ movq %mm0,%mm6
+ paddq 72(%esp),%mm3
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ psllq $25,%mm6
+ movq %mm5,%mm7
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ subl $8,%esp
+ pxor %mm6,%mm7
+ movq %mm0,%mm5
+ por %mm2,%mm0
+ pand %mm2,%mm5
+ pand %mm1,%mm0
+ por %mm0,%mm5
+ paddq %mm5,%mm7
+ movq %mm3,%mm0
+ movb (%ebp),%dl
+ paddq %mm7,%mm0
+ addl $8,%ebp
+ cmpb $53,%dl
+ jne .L00400_14_sse2
+ movq 40(%esp),%mm5
+ movq 48(%esp),%mm6
+ movq 56(%esp),%mm7
+ movq %mm4,%mm1
+ movq %mm4,%mm2
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ psllq $23,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm2,%mm3
+ psllq $23,%mm2
+ pxor %mm1,%mm3
+ psrlq $23,%mm1
+ pxor %mm2,%mm3
+ psllq $4,%mm2
+ pxor %mm1,%mm3
+ paddq (%ebp),%mm7
+ pxor %mm2,%mm3
+ pxor %mm6,%mm5
+ movq 8(%esp),%mm1
+ pand %mm4,%mm5
+ movq 16(%esp),%mm2
+ pxor %mm6,%mm5
+ movq 24(%esp),%mm4
+ paddq %mm5,%mm3
+ movq %mm0,(%esp)
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ movq %mm0,%mm6
+ paddq 72(%esp),%mm3
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ psllq $25,%mm6
+ movq %mm5,%mm7
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ subl $8,%esp
+ pxor %mm6,%mm7
+ movq %mm0,%mm5
+ por %mm2,%mm0
+ movq 88(%esp),%mm6
+ pand %mm2,%mm5
+ pand %mm1,%mm0
+ movq 192(%esp),%mm2
+ por %mm0,%mm5
+ paddq %mm5,%mm7
+ movq %mm3,%mm0
+ movb (%ebp),%dl
+ paddq %mm7,%mm0
+ addl $8,%ebp
+.align 16
+.L00516_79_sse2:
+ movq %mm2,%mm1
+ psrlq $1,%mm2
+ movq %mm6,%mm7
+ psrlq $6,%mm6
+ movq %mm2,%mm3
+ psrlq $6,%mm2
+ movq %mm6,%mm5
+ psrlq $13,%mm6
+ pxor %mm2,%mm3
+ psrlq $1,%mm2
+ pxor %mm6,%mm5
+ psrlq $42,%mm6
+ pxor %mm2,%mm3
+ movq 200(%esp),%mm2
+ psllq $56,%mm1
+ pxor %mm6,%mm5
+ psllq $3,%mm7
+ pxor %mm1,%mm3
+ paddq 128(%esp),%mm2
+ psllq $7,%mm1
+ pxor %mm7,%mm5
+ psllq $42,%mm7
+ pxor %mm1,%mm3
+ pxor %mm7,%mm5
+ paddq %mm5,%mm3
+ paddq %mm2,%mm3
+ movq %mm3,72(%esp)
+ movq 40(%esp),%mm5
+ movq 48(%esp),%mm6
+ movq 56(%esp),%mm7
+ movq %mm4,%mm1
+ movq %mm4,%mm2
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ psllq $23,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm2,%mm3
+ psllq $23,%mm2
+ pxor %mm1,%mm3
+ psrlq $23,%mm1
+ pxor %mm2,%mm3
+ psllq $4,%mm2
+ pxor %mm1,%mm3
+ paddq (%ebp),%mm7
+ pxor %mm2,%mm3
+ pxor %mm6,%mm5
+ movq 8(%esp),%mm1
+ pand %mm4,%mm5
+ movq 16(%esp),%mm2
+ pxor %mm6,%mm5
+ movq 24(%esp),%mm4
+ paddq %mm5,%mm3
+ movq %mm0,(%esp)
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ movq %mm0,%mm6
+ paddq 72(%esp),%mm3
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ psllq $25,%mm6
+ movq %mm5,%mm7
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ subl $8,%esp
+ pxor %mm6,%mm7
+ movq %mm0,%mm5
+ por %mm2,%mm0
+ movq 88(%esp),%mm6
+ pand %mm2,%mm5
+ pand %mm1,%mm0
+ movq 192(%esp),%mm2
+ por %mm0,%mm5
+ paddq %mm5,%mm7
+ movq %mm3,%mm0
+ movb (%ebp),%dl
+ paddq %mm7,%mm0
+ addl $8,%ebp
+ cmpb $23,%dl
+ jne .L00516_79_sse2
+ movq 8(%esp),%mm1
+ movq 16(%esp),%mm2
+ movq 24(%esp),%mm3
+ movq 40(%esp),%mm5
+ movq 48(%esp),%mm6
+ movq 56(%esp),%mm7
+ paddq (%esi),%mm0
+ paddq 8(%esi),%mm1
+ paddq 16(%esi),%mm2
+ paddq 24(%esi),%mm3
+ paddq 32(%esi),%mm4
+ paddq 40(%esi),%mm5
+ paddq 48(%esi),%mm6
+ paddq 56(%esi),%mm7
+ movq %mm0,(%esi)
+ movq %mm1,8(%esi)
+ movq %mm2,16(%esi)
+ movq %mm3,24(%esi)
+ movq %mm4,32(%esi)
+ movq %mm5,40(%esi)
+ movq %mm6,48(%esi)
+ movq %mm7,56(%esi)
+ addl $640,%esp
+ subl $640,%ebp
+ cmpl 88(%esp),%edi
+ jb .L003loop_sse2
+ emms
+ movl 92(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
.align 16
.L002loop_x86:
movl (%edi),%eax
@@ -130,7 +402,7 @@
movl $16,%ecx
.long 2784229001
.align 16
-.L00300_15_x86:
+.L00600_15_x86:
movl 40(%esp),%ecx
movl 44(%esp),%edx
movl %ecx,%esi
@@ -237,9 +509,9 @@
subl $8,%esp
leal 8(%ebp),%ebp
cmpb $148,%dl
- jne .L00300_15_x86
+ jne .L00600_15_x86
.align 16
-.L00416_79_x86:
+.L00716_79_x86:
movl 312(%esp),%ecx
movl 316(%esp),%edx
movl %ecx,%esi
@@ -412,7 +684,7 @@
subl $8,%esp
leal 8(%ebp),%ebp
cmpb $23,%dl
- jne .L00416_79_x86
+ jne .L00716_79_x86
movl 840(%esp),%esi
movl 844(%esp),%edi
movl (%esi),%eax
@@ -561,3 +833,4 @@
.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
+.comm OPENSSL_ia32cap_P,8,4
diff --git a/crypto/sha/asm/sha512-armv4.S b/crypto/sha/asm/sha512-armv4.S
index 5730192..fd46277 100644
--- a/crypto/sha/asm/sha512-armv4.S
+++ b/crypto/sha/asm/sha512-armv4.S
@@ -1775,7 +1775,7 @@
bne .Loop_neon
vldmia sp!,{d8-d15} @ epilogue
- .word 0xe12fff1e
+ bx lr @ .word 0xe12fff1e
#endif
.size sha512_block_data_order,.-sha512_block_data_order
.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <[email protected]>"
diff --git a/crypto/sha/asm/sha512-armv4.pl b/crypto/sha/asm/sha512-armv4.pl
index 7faf37b..71aa935 100644
--- a/crypto/sha/asm/sha512-armv4.pl
+++ b/crypto/sha/asm/sha512-armv4.pl
@@ -565,7 +565,7 @@
bne .Loop_neon
vldmia sp!,{d8-d15} @ epilogue
- bx lr
+ ret @ bx lr
#endif
___
}
@@ -578,5 +578,6 @@
$code =~ s/\`([^\`]*)\`/eval $1/gem;
$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
+$code =~ s/\bret\b/bx lr/gm;
print $code;
close STDOUT; # enforce flush
diff --git a/crypto/sha/asm/sha512-armv8.S b/crypto/sha/asm/sha512-armv8.S
new file mode 100644
index 0000000..6b0d194
--- /dev/null
+++ b/crypto/sha/asm/sha512-armv8.S
@@ -0,0 +1,1021 @@
+#include "arm_arch.h"
+
+.text
+
+.globl sha512_block_data_order
+.type sha512_block_data_order,%function
+.align 6
+sha512_block_data_order:
+ stp x29,x30,[sp,#-128]!
+ add x29,sp,#0
+
+ stp x19,x20,[sp,#16]
+ stp x21,x22,[sp,#32]
+ stp x23,x24,[sp,#48]
+ stp x25,x26,[sp,#64]
+ stp x27,x28,[sp,#80]
+ sub sp,sp,#4*8
+
+ ldp x20,x21,[x0] // load context
+ ldp x22,x23,[x0,#2*8]
+ ldp x24,x25,[x0,#4*8]
+ add x2,x1,x2,lsl#7 // end of input
+ ldp x26,x27,[x0,#6*8]
+ adr x30,K512
+ stp x0,x2,[x29,#96]
+
+.Loop:
+ ldp x3,x4,[x1],#2*8
+ ldr x19,[x30],#8 // *K++
+ eor x28,x21,x22 // magic seed
+ str x1,[x29,#112]
+#ifndef __ARMEB__
+ rev x3,x3 // 0
+#endif
+ ror x16,x24,#14
+ add x27,x27,x19 // h+=K[i]
+ eor x6,x24,x24,ror#23
+ and x17,x25,x24
+ bic x19,x26,x24
+ add x27,x27,x3 // h+=X[i]
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x20,x21 // a^b, b^c in next round
+ eor x16,x16,x6,ror#18 // Sigma1(e)
+ ror x6,x20,#28
+ add x27,x27,x17 // h+=Ch(e,f,g)
+ eor x17,x20,x20,ror#5
+ add x27,x27,x16 // h+=Sigma1(e)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ add x23,x23,x27 // d+=h
+ eor x28,x28,x21 // Maj(a,b,c)
+ eor x17,x6,x17,ror#34 // Sigma0(a)
+ add x27,x27,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ //add x27,x27,x17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev x4,x4 // 1
+#endif
+ ldp x5,x6,[x1],#2*8
+ add x27,x27,x17 // h+=Sigma0(a)
+ ror x16,x23,#14
+ add x26,x26,x28 // h+=K[i]
+ eor x7,x23,x23,ror#23
+ and x17,x24,x23
+ bic x28,x25,x23
+ add x26,x26,x4 // h+=X[i]
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x27,x20 // a^b, b^c in next round
+ eor x16,x16,x7,ror#18 // Sigma1(e)
+ ror x7,x27,#28
+ add x26,x26,x17 // h+=Ch(e,f,g)
+ eor x17,x27,x27,ror#5
+ add x26,x26,x16 // h+=Sigma1(e)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ add x22,x22,x26 // d+=h
+ eor x19,x19,x20 // Maj(a,b,c)
+ eor x17,x7,x17,ror#34 // Sigma0(a)
+ add x26,x26,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ //add x26,x26,x17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev x5,x5 // 2
+#endif
+ add x26,x26,x17 // h+=Sigma0(a)
+ ror x16,x22,#14
+ add x25,x25,x19 // h+=K[i]
+ eor x8,x22,x22,ror#23
+ and x17,x23,x22
+ bic x19,x24,x22
+ add x25,x25,x5 // h+=X[i]
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x26,x27 // a^b, b^c in next round
+ eor x16,x16,x8,ror#18 // Sigma1(e)
+ ror x8,x26,#28
+ add x25,x25,x17 // h+=Ch(e,f,g)
+ eor x17,x26,x26,ror#5
+ add x25,x25,x16 // h+=Sigma1(e)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ add x21,x21,x25 // d+=h
+ eor x28,x28,x27 // Maj(a,b,c)
+ eor x17,x8,x17,ror#34 // Sigma0(a)
+ add x25,x25,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ //add x25,x25,x17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev x6,x6 // 3
+#endif
+ ldp x7,x8,[x1],#2*8
+ add x25,x25,x17 // h+=Sigma0(a)
+ ror x16,x21,#14
+ add x24,x24,x28 // h+=K[i]
+ eor x9,x21,x21,ror#23
+ and x17,x22,x21
+ bic x28,x23,x21
+ add x24,x24,x6 // h+=X[i]
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x25,x26 // a^b, b^c in next round
+ eor x16,x16,x9,ror#18 // Sigma1(e)
+ ror x9,x25,#28
+ add x24,x24,x17 // h+=Ch(e,f,g)
+ eor x17,x25,x25,ror#5
+ add x24,x24,x16 // h+=Sigma1(e)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ add x20,x20,x24 // d+=h
+ eor x19,x19,x26 // Maj(a,b,c)
+ eor x17,x9,x17,ror#34 // Sigma0(a)
+ add x24,x24,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ //add x24,x24,x17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev x7,x7 // 4
+#endif
+ add x24,x24,x17 // h+=Sigma0(a)
+ ror x16,x20,#14
+ add x23,x23,x19 // h+=K[i]
+ eor x10,x20,x20,ror#23
+ and x17,x21,x20
+ bic x19,x22,x20
+ add x23,x23,x7 // h+=X[i]
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x24,x25 // a^b, b^c in next round
+ eor x16,x16,x10,ror#18 // Sigma1(e)
+ ror x10,x24,#28
+ add x23,x23,x17 // h+=Ch(e,f,g)
+ eor x17,x24,x24,ror#5
+ add x23,x23,x16 // h+=Sigma1(e)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ add x27,x27,x23 // d+=h
+ eor x28,x28,x25 // Maj(a,b,c)
+ eor x17,x10,x17,ror#34 // Sigma0(a)
+ add x23,x23,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ //add x23,x23,x17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev x8,x8 // 5
+#endif
+ ldp x9,x10,[x1],#2*8
+ add x23,x23,x17 // h+=Sigma0(a)
+ ror x16,x27,#14
+ add x22,x22,x28 // h+=K[i]
+ eor x11,x27,x27,ror#23
+ and x17,x20,x27
+ bic x28,x21,x27
+ add x22,x22,x8 // h+=X[i]
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x23,x24 // a^b, b^c in next round
+ eor x16,x16,x11,ror#18 // Sigma1(e)
+ ror x11,x23,#28
+ add x22,x22,x17 // h+=Ch(e,f,g)
+ eor x17,x23,x23,ror#5
+ add x22,x22,x16 // h+=Sigma1(e)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ add x26,x26,x22 // d+=h
+ eor x19,x19,x24 // Maj(a,b,c)
+ eor x17,x11,x17,ror#34 // Sigma0(a)
+ add x22,x22,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ //add x22,x22,x17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev x9,x9 // 6
+#endif
+ add x22,x22,x17 // h+=Sigma0(a)
+ ror x16,x26,#14
+ add x21,x21,x19 // h+=K[i]
+ eor x12,x26,x26,ror#23
+ and x17,x27,x26
+ bic x19,x20,x26
+ add x21,x21,x9 // h+=X[i]
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x22,x23 // a^b, b^c in next round
+ eor x16,x16,x12,ror#18 // Sigma1(e)
+ ror x12,x22,#28
+ add x21,x21,x17 // h+=Ch(e,f,g)
+ eor x17,x22,x22,ror#5
+ add x21,x21,x16 // h+=Sigma1(e)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ add x25,x25,x21 // d+=h
+ eor x28,x28,x23 // Maj(a,b,c)
+ eor x17,x12,x17,ror#34 // Sigma0(a)
+ add x21,x21,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ //add x21,x21,x17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev x10,x10 // 7
+#endif
+ ldp x11,x12,[x1],#2*8
+ add x21,x21,x17 // h+=Sigma0(a)
+ ror x16,x25,#14
+ add x20,x20,x28 // h+=K[i]
+ eor x13,x25,x25,ror#23
+ and x17,x26,x25
+ bic x28,x27,x25
+ add x20,x20,x10 // h+=X[i]
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x21,x22 // a^b, b^c in next round
+ eor x16,x16,x13,ror#18 // Sigma1(e)
+ ror x13,x21,#28
+ add x20,x20,x17 // h+=Ch(e,f,g)
+ eor x17,x21,x21,ror#5
+ add x20,x20,x16 // h+=Sigma1(e)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ add x24,x24,x20 // d+=h
+ eor x19,x19,x22 // Maj(a,b,c)
+ eor x17,x13,x17,ror#34 // Sigma0(a)
+ add x20,x20,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ //add x20,x20,x17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev x11,x11 // 8
+#endif
+ add x20,x20,x17 // h+=Sigma0(a)
+ ror x16,x24,#14
+ add x27,x27,x19 // h+=K[i]
+ eor x14,x24,x24,ror#23
+ and x17,x25,x24
+ bic x19,x26,x24
+ add x27,x27,x11 // h+=X[i]
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x20,x21 // a^b, b^c in next round
+ eor x16,x16,x14,ror#18 // Sigma1(e)
+ ror x14,x20,#28
+ add x27,x27,x17 // h+=Ch(e,f,g)
+ eor x17,x20,x20,ror#5
+ add x27,x27,x16 // h+=Sigma1(e)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ add x23,x23,x27 // d+=h
+ eor x28,x28,x21 // Maj(a,b,c)
+ eor x17,x14,x17,ror#34 // Sigma0(a)
+ add x27,x27,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ //add x27,x27,x17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev x12,x12 // 9
+#endif
+ ldp x13,x14,[x1],#2*8
+ add x27,x27,x17 // h+=Sigma0(a)
+ ror x16,x23,#14
+ add x26,x26,x28 // h+=K[i]
+ eor x15,x23,x23,ror#23
+ and x17,x24,x23
+ bic x28,x25,x23
+ add x26,x26,x12 // h+=X[i]
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x27,x20 // a^b, b^c in next round
+ eor x16,x16,x15,ror#18 // Sigma1(e)
+ ror x15,x27,#28
+ add x26,x26,x17 // h+=Ch(e,f,g)
+ eor x17,x27,x27,ror#5
+ add x26,x26,x16 // h+=Sigma1(e)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ add x22,x22,x26 // d+=h
+ eor x19,x19,x20 // Maj(a,b,c)
+ eor x17,x15,x17,ror#34 // Sigma0(a)
+ add x26,x26,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ //add x26,x26,x17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev x13,x13 // 10
+#endif
+ add x26,x26,x17 // h+=Sigma0(a)
+ ror x16,x22,#14
+ add x25,x25,x19 // h+=K[i]
+ eor x0,x22,x22,ror#23
+ and x17,x23,x22
+ bic x19,x24,x22
+ add x25,x25,x13 // h+=X[i]
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x26,x27 // a^b, b^c in next round
+ eor x16,x16,x0,ror#18 // Sigma1(e)
+ ror x0,x26,#28
+ add x25,x25,x17 // h+=Ch(e,f,g)
+ eor x17,x26,x26,ror#5
+ add x25,x25,x16 // h+=Sigma1(e)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ add x21,x21,x25 // d+=h
+ eor x28,x28,x27 // Maj(a,b,c)
+ eor x17,x0,x17,ror#34 // Sigma0(a)
+ add x25,x25,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ //add x25,x25,x17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev x14,x14 // 11
+#endif
+ ldp x15,x0,[x1],#2*8
+ add x25,x25,x17 // h+=Sigma0(a)
+ str x6,[sp,#24]
+ ror x16,x21,#14
+ add x24,x24,x28 // h+=K[i]
+ eor x6,x21,x21,ror#23
+ and x17,x22,x21
+ bic x28,x23,x21
+ add x24,x24,x14 // h+=X[i]
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x25,x26 // a^b, b^c in next round
+ eor x16,x16,x6,ror#18 // Sigma1(e)
+ ror x6,x25,#28
+ add x24,x24,x17 // h+=Ch(e,f,g)
+ eor x17,x25,x25,ror#5
+ add x24,x24,x16 // h+=Sigma1(e)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ add x20,x20,x24 // d+=h
+ eor x19,x19,x26 // Maj(a,b,c)
+ eor x17,x6,x17,ror#34 // Sigma0(a)
+ add x24,x24,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ //add x24,x24,x17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev x15,x15 // 12
+#endif
+ add x24,x24,x17 // h+=Sigma0(a)
+ str x7,[sp,#0]
+ ror x16,x20,#14
+ add x23,x23,x19 // h+=K[i]
+ eor x7,x20,x20,ror#23
+ and x17,x21,x20
+ bic x19,x22,x20
+ add x23,x23,x15 // h+=X[i]
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x24,x25 // a^b, b^c in next round
+ eor x16,x16,x7,ror#18 // Sigma1(e)
+ ror x7,x24,#28
+ add x23,x23,x17 // h+=Ch(e,f,g)
+ eor x17,x24,x24,ror#5
+ add x23,x23,x16 // h+=Sigma1(e)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ add x27,x27,x23 // d+=h
+ eor x28,x28,x25 // Maj(a,b,c)
+ eor x17,x7,x17,ror#34 // Sigma0(a)
+ add x23,x23,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ //add x23,x23,x17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev x0,x0 // 13
+#endif
+ ldp x1,x2,[x1]
+ add x23,x23,x17 // h+=Sigma0(a)
+ str x8,[sp,#8]
+ ror x16,x27,#14
+ add x22,x22,x28 // h+=K[i]
+ eor x8,x27,x27,ror#23
+ and x17,x20,x27
+ bic x28,x21,x27
+ add x22,x22,x0 // h+=X[i]
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x23,x24 // a^b, b^c in next round
+ eor x16,x16,x8,ror#18 // Sigma1(e)
+ ror x8,x23,#28
+ add x22,x22,x17 // h+=Ch(e,f,g)
+ eor x17,x23,x23,ror#5
+ add x22,x22,x16 // h+=Sigma1(e)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ add x26,x26,x22 // d+=h
+ eor x19,x19,x24 // Maj(a,b,c)
+ eor x17,x8,x17,ror#34 // Sigma0(a)
+ add x22,x22,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ //add x22,x22,x17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev x1,x1 // 14
+#endif
+ ldr x6,[sp,#24]
+ add x22,x22,x17 // h+=Sigma0(a)
+ str x9,[sp,#16]
+ ror x16,x26,#14
+ add x21,x21,x19 // h+=K[i]
+ eor x9,x26,x26,ror#23
+ and x17,x27,x26
+ bic x19,x20,x26
+ add x21,x21,x1 // h+=X[i]
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x22,x23 // a^b, b^c in next round
+ eor x16,x16,x9,ror#18 // Sigma1(e)
+ ror x9,x22,#28
+ add x21,x21,x17 // h+=Ch(e,f,g)
+ eor x17,x22,x22,ror#5
+ add x21,x21,x16 // h+=Sigma1(e)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ add x25,x25,x21 // d+=h
+ eor x28,x28,x23 // Maj(a,b,c)
+ eor x17,x9,x17,ror#34 // Sigma0(a)
+ add x21,x21,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ //add x21,x21,x17 // h+=Sigma0(a)
+#ifndef __ARMEB__
+ rev x2,x2 // 15
+#endif
+ ldr x7,[sp,#0]
+ add x21,x21,x17 // h+=Sigma0(a)
+ str x10,[sp,#24]
+ ror x16,x25,#14
+ add x20,x20,x28 // h+=K[i]
+ ror x9,x4,#1
+ and x17,x26,x25
+ ror x8,x1,#19
+ bic x28,x27,x25
+ ror x10,x21,#28
+ add x20,x20,x2 // h+=X[i]
+ eor x16,x16,x25,ror#18
+ eor x9,x9,x4,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x21,x22 // a^b, b^c in next round
+ eor x16,x16,x25,ror#41 // Sigma1(e)
+ eor x10,x10,x21,ror#34
+ add x20,x20,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x8,x8,x1,ror#61
+ eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
+ add x20,x20,x16 // h+=Sigma1(e)
+ eor x19,x19,x22 // Maj(a,b,c)
+ eor x17,x10,x21,ror#39 // Sigma0(a)
+ eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
+ add x3,x3,x12
+ add x24,x24,x20 // d+=h
+ add x20,x20,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x3,x3,x9
+ add x20,x20,x17 // h+=Sigma0(a)
+ add x3,x3,x8
+.Loop_16_xx:
+ ldr x8,[sp,#8]
+ str x11,[sp,#0]
+ ror x16,x24,#14
+ add x27,x27,x19 // h+=K[i]
+ ror x10,x5,#1
+ and x17,x25,x24
+ ror x9,x2,#19
+ bic x19,x26,x24
+ ror x11,x20,#28
+ add x27,x27,x3 // h+=X[i]
+ eor x16,x16,x24,ror#18
+ eor x10,x10,x5,ror#8
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x20,x21 // a^b, b^c in next round
+ eor x16,x16,x24,ror#41 // Sigma1(e)
+ eor x11,x11,x20,ror#34
+ add x27,x27,x17 // h+=Ch(e,f,g)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ eor x9,x9,x2,ror#61
+ eor x10,x10,x5,lsr#7 // sigma0(X[i+1])
+ add x27,x27,x16 // h+=Sigma1(e)
+ eor x28,x28,x21 // Maj(a,b,c)
+ eor x17,x11,x20,ror#39 // Sigma0(a)
+ eor x9,x9,x2,lsr#6 // sigma1(X[i+14])
+ add x4,x4,x13
+ add x23,x23,x27 // d+=h
+ add x27,x27,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ add x4,x4,x10
+ add x27,x27,x17 // h+=Sigma0(a)
+ add x4,x4,x9
+ ldr x9,[sp,#16]
+ str x12,[sp,#8]
+ ror x16,x23,#14
+ add x26,x26,x28 // h+=K[i]
+ ror x11,x6,#1
+ and x17,x24,x23
+ ror x10,x3,#19
+ bic x28,x25,x23
+ ror x12,x27,#28
+ add x26,x26,x4 // h+=X[i]
+ eor x16,x16,x23,ror#18
+ eor x11,x11,x6,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x27,x20 // a^b, b^c in next round
+ eor x16,x16,x23,ror#41 // Sigma1(e)
+ eor x12,x12,x27,ror#34
+ add x26,x26,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x10,x10,x3,ror#61
+ eor x11,x11,x6,lsr#7 // sigma0(X[i+1])
+ add x26,x26,x16 // h+=Sigma1(e)
+ eor x19,x19,x20 // Maj(a,b,c)
+ eor x17,x12,x27,ror#39 // Sigma0(a)
+ eor x10,x10,x3,lsr#6 // sigma1(X[i+14])
+ add x5,x5,x14
+ add x22,x22,x26 // d+=h
+ add x26,x26,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x5,x5,x11
+ add x26,x26,x17 // h+=Sigma0(a)
+ add x5,x5,x10
+ ldr x10,[sp,#24]
+ str x13,[sp,#16]
+ ror x16,x22,#14
+ add x25,x25,x19 // h+=K[i]
+ ror x12,x7,#1
+ and x17,x23,x22
+ ror x11,x4,#19
+ bic x19,x24,x22
+ ror x13,x26,#28
+ add x25,x25,x5 // h+=X[i]
+ eor x16,x16,x22,ror#18
+ eor x12,x12,x7,ror#8
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x26,x27 // a^b, b^c in next round
+ eor x16,x16,x22,ror#41 // Sigma1(e)
+ eor x13,x13,x26,ror#34
+ add x25,x25,x17 // h+=Ch(e,f,g)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ eor x11,x11,x4,ror#61
+ eor x12,x12,x7,lsr#7 // sigma0(X[i+1])
+ add x25,x25,x16 // h+=Sigma1(e)
+ eor x28,x28,x27 // Maj(a,b,c)
+ eor x17,x13,x26,ror#39 // Sigma0(a)
+ eor x11,x11,x4,lsr#6 // sigma1(X[i+14])
+ add x6,x6,x15
+ add x21,x21,x25 // d+=h
+ add x25,x25,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ add x6,x6,x12
+ add x25,x25,x17 // h+=Sigma0(a)
+ add x6,x6,x11
+ ldr x11,[sp,#0]
+ str x14,[sp,#24]
+ ror x16,x21,#14
+ add x24,x24,x28 // h+=K[i]
+ ror x13,x8,#1
+ and x17,x22,x21
+ ror x12,x5,#19
+ bic x28,x23,x21
+ ror x14,x25,#28
+ add x24,x24,x6 // h+=X[i]
+ eor x16,x16,x21,ror#18
+ eor x13,x13,x8,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x25,x26 // a^b, b^c in next round
+ eor x16,x16,x21,ror#41 // Sigma1(e)
+ eor x14,x14,x25,ror#34
+ add x24,x24,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x12,x12,x5,ror#61
+ eor x13,x13,x8,lsr#7 // sigma0(X[i+1])
+ add x24,x24,x16 // h+=Sigma1(e)
+ eor x19,x19,x26 // Maj(a,b,c)
+ eor x17,x14,x25,ror#39 // Sigma0(a)
+ eor x12,x12,x5,lsr#6 // sigma1(X[i+14])
+ add x7,x7,x0
+ add x20,x20,x24 // d+=h
+ add x24,x24,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x7,x7,x13
+ add x24,x24,x17 // h+=Sigma0(a)
+ add x7,x7,x12
+ ldr x12,[sp,#8]
+ str x15,[sp,#0]
+ ror x16,x20,#14
+ add x23,x23,x19 // h+=K[i]
+ ror x14,x9,#1
+ and x17,x21,x20
+ ror x13,x6,#19
+ bic x19,x22,x20
+ ror x15,x24,#28
+ add x23,x23,x7 // h+=X[i]
+ eor x16,x16,x20,ror#18
+ eor x14,x14,x9,ror#8
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x24,x25 // a^b, b^c in next round
+ eor x16,x16,x20,ror#41 // Sigma1(e)
+ eor x15,x15,x24,ror#34
+ add x23,x23,x17 // h+=Ch(e,f,g)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ eor x13,x13,x6,ror#61
+ eor x14,x14,x9,lsr#7 // sigma0(X[i+1])
+ add x23,x23,x16 // h+=Sigma1(e)
+ eor x28,x28,x25 // Maj(a,b,c)
+ eor x17,x15,x24,ror#39 // Sigma0(a)
+ eor x13,x13,x6,lsr#6 // sigma1(X[i+14])
+ add x8,x8,x1
+ add x27,x27,x23 // d+=h
+ add x23,x23,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ add x8,x8,x14
+ add x23,x23,x17 // h+=Sigma0(a)
+ add x8,x8,x13
+ ldr x13,[sp,#16]
+ str x0,[sp,#8]
+ ror x16,x27,#14
+ add x22,x22,x28 // h+=K[i]
+ ror x15,x10,#1
+ and x17,x20,x27
+ ror x14,x7,#19
+ bic x28,x21,x27
+ ror x0,x23,#28
+ add x22,x22,x8 // h+=X[i]
+ eor x16,x16,x27,ror#18
+ eor x15,x15,x10,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x23,x24 // a^b, b^c in next round
+ eor x16,x16,x27,ror#41 // Sigma1(e)
+ eor x0,x0,x23,ror#34
+ add x22,x22,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x14,x14,x7,ror#61
+ eor x15,x15,x10,lsr#7 // sigma0(X[i+1])
+ add x22,x22,x16 // h+=Sigma1(e)
+ eor x19,x19,x24 // Maj(a,b,c)
+ eor x17,x0,x23,ror#39 // Sigma0(a)
+ eor x14,x14,x7,lsr#6 // sigma1(X[i+14])
+ add x9,x9,x2
+ add x26,x26,x22 // d+=h
+ add x22,x22,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x9,x9,x15
+ add x22,x22,x17 // h+=Sigma0(a)
+ add x9,x9,x14
+ ldr x14,[sp,#24]
+ str x1,[sp,#16]
+ ror x16,x26,#14
+ add x21,x21,x19 // h+=K[i]
+ ror x0,x11,#1
+ and x17,x27,x26
+ ror x15,x8,#19
+ bic x19,x20,x26
+ ror x1,x22,#28
+ add x21,x21,x9 // h+=X[i]
+ eor x16,x16,x26,ror#18
+ eor x0,x0,x11,ror#8
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x22,x23 // a^b, b^c in next round
+ eor x16,x16,x26,ror#41 // Sigma1(e)
+ eor x1,x1,x22,ror#34
+ add x21,x21,x17 // h+=Ch(e,f,g)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ eor x15,x15,x8,ror#61
+ eor x0,x0,x11,lsr#7 // sigma0(X[i+1])
+ add x21,x21,x16 // h+=Sigma1(e)
+ eor x28,x28,x23 // Maj(a,b,c)
+ eor x17,x1,x22,ror#39 // Sigma0(a)
+ eor x15,x15,x8,lsr#6 // sigma1(X[i+14])
+ add x10,x10,x3
+ add x25,x25,x21 // d+=h
+ add x21,x21,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ add x10,x10,x0
+ add x21,x21,x17 // h+=Sigma0(a)
+ add x10,x10,x15
+ ldr x15,[sp,#0]
+ str x2,[sp,#24]
+ ror x16,x25,#14
+ add x20,x20,x28 // h+=K[i]
+ ror x1,x12,#1
+ and x17,x26,x25
+ ror x0,x9,#19
+ bic x28,x27,x25
+ ror x2,x21,#28
+ add x20,x20,x10 // h+=X[i]
+ eor x16,x16,x25,ror#18
+ eor x1,x1,x12,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x21,x22 // a^b, b^c in next round
+ eor x16,x16,x25,ror#41 // Sigma1(e)
+ eor x2,x2,x21,ror#34
+ add x20,x20,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x0,x0,x9,ror#61
+ eor x1,x1,x12,lsr#7 // sigma0(X[i+1])
+ add x20,x20,x16 // h+=Sigma1(e)
+ eor x19,x19,x22 // Maj(a,b,c)
+ eor x17,x2,x21,ror#39 // Sigma0(a)
+ eor x0,x0,x9,lsr#6 // sigma1(X[i+14])
+ add x11,x11,x4
+ add x24,x24,x20 // d+=h
+ add x20,x20,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x11,x11,x1
+ add x20,x20,x17 // h+=Sigma0(a)
+ add x11,x11,x0
+ ldr x0,[sp,#8]
+ str x3,[sp,#0]
+ ror x16,x24,#14
+ add x27,x27,x19 // h+=K[i]
+ ror x2,x13,#1
+ and x17,x25,x24
+ ror x1,x10,#19
+ bic x19,x26,x24
+ ror x3,x20,#28
+ add x27,x27,x11 // h+=X[i]
+ eor x16,x16,x24,ror#18
+ eor x2,x2,x13,ror#8
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x20,x21 // a^b, b^c in next round
+ eor x16,x16,x24,ror#41 // Sigma1(e)
+ eor x3,x3,x20,ror#34
+ add x27,x27,x17 // h+=Ch(e,f,g)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ eor x1,x1,x10,ror#61
+ eor x2,x2,x13,lsr#7 // sigma0(X[i+1])
+ add x27,x27,x16 // h+=Sigma1(e)
+ eor x28,x28,x21 // Maj(a,b,c)
+ eor x17,x3,x20,ror#39 // Sigma0(a)
+ eor x1,x1,x10,lsr#6 // sigma1(X[i+14])
+ add x12,x12,x5
+ add x23,x23,x27 // d+=h
+ add x27,x27,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ add x12,x12,x2
+ add x27,x27,x17 // h+=Sigma0(a)
+ add x12,x12,x1
+ ldr x1,[sp,#16]
+ str x4,[sp,#8]
+ ror x16,x23,#14
+ add x26,x26,x28 // h+=K[i]
+ ror x3,x14,#1
+ and x17,x24,x23
+ ror x2,x11,#19
+ bic x28,x25,x23
+ ror x4,x27,#28
+ add x26,x26,x12 // h+=X[i]
+ eor x16,x16,x23,ror#18
+ eor x3,x3,x14,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x27,x20 // a^b, b^c in next round
+ eor x16,x16,x23,ror#41 // Sigma1(e)
+ eor x4,x4,x27,ror#34
+ add x26,x26,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x2,x2,x11,ror#61
+ eor x3,x3,x14,lsr#7 // sigma0(X[i+1])
+ add x26,x26,x16 // h+=Sigma1(e)
+ eor x19,x19,x20 // Maj(a,b,c)
+ eor x17,x4,x27,ror#39 // Sigma0(a)
+ eor x2,x2,x11,lsr#6 // sigma1(X[i+14])
+ add x13,x13,x6
+ add x22,x22,x26 // d+=h
+ add x26,x26,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x13,x13,x3
+ add x26,x26,x17 // h+=Sigma0(a)
+ add x13,x13,x2
+ ldr x2,[sp,#24]
+ str x5,[sp,#16]
+ ror x16,x22,#14
+ add x25,x25,x19 // h+=K[i]
+ ror x4,x15,#1
+ and x17,x23,x22
+ ror x3,x12,#19
+ bic x19,x24,x22
+ ror x5,x26,#28
+ add x25,x25,x13 // h+=X[i]
+ eor x16,x16,x22,ror#18
+ eor x4,x4,x15,ror#8
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x26,x27 // a^b, b^c in next round
+ eor x16,x16,x22,ror#41 // Sigma1(e)
+ eor x5,x5,x26,ror#34
+ add x25,x25,x17 // h+=Ch(e,f,g)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ eor x3,x3,x12,ror#61
+ eor x4,x4,x15,lsr#7 // sigma0(X[i+1])
+ add x25,x25,x16 // h+=Sigma1(e)
+ eor x28,x28,x27 // Maj(a,b,c)
+ eor x17,x5,x26,ror#39 // Sigma0(a)
+ eor x3,x3,x12,lsr#6 // sigma1(X[i+14])
+ add x14,x14,x7
+ add x21,x21,x25 // d+=h
+ add x25,x25,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ add x14,x14,x4
+ add x25,x25,x17 // h+=Sigma0(a)
+ add x14,x14,x3
+ ldr x3,[sp,#0]
+ str x6,[sp,#24]
+ ror x16,x21,#14
+ add x24,x24,x28 // h+=K[i]
+ ror x5,x0,#1
+ and x17,x22,x21
+ ror x4,x13,#19
+ bic x28,x23,x21
+ ror x6,x25,#28
+ add x24,x24,x14 // h+=X[i]
+ eor x16,x16,x21,ror#18
+ eor x5,x5,x0,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x25,x26 // a^b, b^c in next round
+ eor x16,x16,x21,ror#41 // Sigma1(e)
+ eor x6,x6,x25,ror#34
+ add x24,x24,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x4,x4,x13,ror#61
+ eor x5,x5,x0,lsr#7 // sigma0(X[i+1])
+ add x24,x24,x16 // h+=Sigma1(e)
+ eor x19,x19,x26 // Maj(a,b,c)
+ eor x17,x6,x25,ror#39 // Sigma0(a)
+ eor x4,x4,x13,lsr#6 // sigma1(X[i+14])
+ add x15,x15,x8
+ add x20,x20,x24 // d+=h
+ add x24,x24,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x15,x15,x5
+ add x24,x24,x17 // h+=Sigma0(a)
+ add x15,x15,x4
+ ldr x4,[sp,#8]
+ str x7,[sp,#0]
+ ror x16,x20,#14
+ add x23,x23,x19 // h+=K[i]
+ ror x6,x1,#1
+ and x17,x21,x20
+ ror x5,x14,#19
+ bic x19,x22,x20
+ ror x7,x24,#28
+ add x23,x23,x15 // h+=X[i]
+ eor x16,x16,x20,ror#18
+ eor x6,x6,x1,ror#8
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x24,x25 // a^b, b^c in next round
+ eor x16,x16,x20,ror#41 // Sigma1(e)
+ eor x7,x7,x24,ror#34
+ add x23,x23,x17 // h+=Ch(e,f,g)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ eor x5,x5,x14,ror#61
+ eor x6,x6,x1,lsr#7 // sigma0(X[i+1])
+ add x23,x23,x16 // h+=Sigma1(e)
+ eor x28,x28,x25 // Maj(a,b,c)
+ eor x17,x7,x24,ror#39 // Sigma0(a)
+ eor x5,x5,x14,lsr#6 // sigma1(X[i+14])
+ add x0,x0,x9
+ add x27,x27,x23 // d+=h
+ add x23,x23,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ add x0,x0,x6
+ add x23,x23,x17 // h+=Sigma0(a)
+ add x0,x0,x5
+ ldr x5,[sp,#16]
+ str x8,[sp,#8]
+ ror x16,x27,#14
+ add x22,x22,x28 // h+=K[i]
+ ror x7,x2,#1
+ and x17,x20,x27
+ ror x6,x15,#19
+ bic x28,x21,x27
+ ror x8,x23,#28
+ add x22,x22,x0 // h+=X[i]
+ eor x16,x16,x27,ror#18
+ eor x7,x7,x2,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x23,x24 // a^b, b^c in next round
+ eor x16,x16,x27,ror#41 // Sigma1(e)
+ eor x8,x8,x23,ror#34
+ add x22,x22,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x6,x6,x15,ror#61
+ eor x7,x7,x2,lsr#7 // sigma0(X[i+1])
+ add x22,x22,x16 // h+=Sigma1(e)
+ eor x19,x19,x24 // Maj(a,b,c)
+ eor x17,x8,x23,ror#39 // Sigma0(a)
+ eor x6,x6,x15,lsr#6 // sigma1(X[i+14])
+ add x1,x1,x10
+ add x26,x26,x22 // d+=h
+ add x22,x22,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x1,x1,x7
+ add x22,x22,x17 // h+=Sigma0(a)
+ add x1,x1,x6
+ ldr x6,[sp,#24]
+ str x9,[sp,#16]
+ ror x16,x26,#14
+ add x21,x21,x19 // h+=K[i]
+ ror x8,x3,#1
+ and x17,x27,x26
+ ror x7,x0,#19
+ bic x19,x20,x26
+ ror x9,x22,#28
+ add x21,x21,x1 // h+=X[i]
+ eor x16,x16,x26,ror#18
+ eor x8,x8,x3,ror#8
+ orr x17,x17,x19 // Ch(e,f,g)
+ eor x19,x22,x23 // a^b, b^c in next round
+ eor x16,x16,x26,ror#41 // Sigma1(e)
+ eor x9,x9,x22,ror#34
+ add x21,x21,x17 // h+=Ch(e,f,g)
+ and x28,x28,x19 // (b^c)&=(a^b)
+ eor x7,x7,x0,ror#61
+ eor x8,x8,x3,lsr#7 // sigma0(X[i+1])
+ add x21,x21,x16 // h+=Sigma1(e)
+ eor x28,x28,x23 // Maj(a,b,c)
+ eor x17,x9,x22,ror#39 // Sigma0(a)
+ eor x7,x7,x0,lsr#6 // sigma1(X[i+14])
+ add x2,x2,x11
+ add x25,x25,x21 // d+=h
+ add x21,x21,x28 // h+=Maj(a,b,c)
+ ldr x28,[x30],#8 // *K++, x19 in next round
+ add x2,x2,x8
+ add x21,x21,x17 // h+=Sigma0(a)
+ add x2,x2,x7
+ ldr x7,[sp,#0]
+ str x10,[sp,#24]
+ ror x16,x25,#14
+ add x20,x20,x28 // h+=K[i]
+ ror x9,x4,#1
+ and x17,x26,x25
+ ror x8,x1,#19
+ bic x28,x27,x25
+ ror x10,x21,#28
+ add x20,x20,x2 // h+=X[i]
+ eor x16,x16,x25,ror#18
+ eor x9,x9,x4,ror#8
+ orr x17,x17,x28 // Ch(e,f,g)
+ eor x28,x21,x22 // a^b, b^c in next round
+ eor x16,x16,x25,ror#41 // Sigma1(e)
+ eor x10,x10,x21,ror#34
+ add x20,x20,x17 // h+=Ch(e,f,g)
+ and x19,x19,x28 // (b^c)&=(a^b)
+ eor x8,x8,x1,ror#61
+ eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
+ add x20,x20,x16 // h+=Sigma1(e)
+ eor x19,x19,x22 // Maj(a,b,c)
+ eor x17,x10,x21,ror#39 // Sigma0(a)
+ eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
+ add x3,x3,x12
+ add x24,x24,x20 // d+=h
+ add x20,x20,x19 // h+=Maj(a,b,c)
+ ldr x19,[x30],#8 // *K++, x28 in next round
+ add x3,x3,x9
+ add x20,x20,x17 // h+=Sigma0(a)
+ add x3,x3,x8
+ cbnz x19,.Loop_16_xx
+
+ ldp x0,x2,[x29,#96]
+ ldr x1,[x29,#112]
+ sub x30,x30,#648 // rewind
+
+ ldp x3,x4,[x0]
+ ldp x5,x6,[x0,#2*8]
+ add x1,x1,#14*8 // advance input pointer
+ ldp x7,x8,[x0,#4*8]
+ add x20,x20,x3
+ ldp x9,x10,[x0,#6*8]
+ add x21,x21,x4
+ add x22,x22,x5
+ add x23,x23,x6
+ stp x20,x21,[x0]
+ add x24,x24,x7
+ add x25,x25,x8
+ stp x22,x23,[x0,#2*8]
+ add x26,x26,x9
+ add x27,x27,x10
+ cmp x1,x2
+ stp x24,x25,[x0,#4*8]
+ stp x26,x27,[x0,#6*8]
+ b.ne .Loop
+
+ ldp x19,x20,[x29,#16]
+ add sp,sp,#4*8
+ ldp x21,x22,[x29,#32]
+ ldp x23,x24,[x29,#48]
+ ldp x25,x26,[x29,#64]
+ ldp x27,x28,[x29,#80]
+ ldp x29,x30,[sp],#128
+ ret
+.size sha512_block_data_order,.-sha512_block_data_order
+
+.align 6
+.type K512,%object
+K512:
+ .quad 0x428a2f98d728ae22,0x7137449123ef65cd
+ .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+ .quad 0x3956c25bf348b538,0x59f111f1b605d019
+ .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+ .quad 0xd807aa98a3030242,0x12835b0145706fbe
+ .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+ .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+ .quad 0x9bdc06a725c71235,0xc19bf174cf692694
+ .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+ .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+ .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+ .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+ .quad 0x983e5152ee66dfab,0xa831c66d2db43210
+ .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+ .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+ .quad 0x06ca6351e003826f,0x142929670a0e6e70
+ .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+ .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+ .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+ .quad 0x81c2c92e47edaee6,0x92722c851482353b
+ .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+ .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+ .quad 0xd192e819d6ef5218,0xd69906245565a910
+ .quad 0xf40e35855771202a,0x106aa07032bbd1b8
+ .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+ .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+ .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+ .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+ .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+ .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+ .quad 0x90befffa23631e28,0xa4506cebde82bde9
+ .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+ .quad 0xca273eceea26619c,0xd186b8c721c0c207
+ .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+ .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+ .quad 0x113f9804bef90dae,0x1b710b35131c471b
+ .quad 0x28db77f523047d84,0x32caab7b40c72493
+ .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+ .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+ .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+ .quad 0 // terminator
+.size K512,.-K512
+.align 3
+.LOPENSSL_armcap_P:
+ .quad OPENSSL_armcap_P-.
+.asciz "SHA512 block transform for ARMv8, CRYPTOGAMS by <[email protected]>"
+.align 2
+.comm OPENSSL_armcap_P,4,4
diff --git a/crypto/sha/asm/sha512-armv8.pl b/crypto/sha/asm/sha512-armv8.pl
new file mode 100644
index 0000000..6935ed6
--- /dev/null
+++ b/crypto/sha/asm/sha512-armv8.pl
@@ -0,0 +1,414 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <[email protected]> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# SHA256/512 for ARMv8.
+#
+# Performance in cycles per processed byte and improvement coefficient
+# over code generated with "default" compiler:
+#
+# SHA256-hw SHA256(*) SHA512
+# Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**))
+# Cortex-A5x n/a n/a n/a
+#
+# (*) Software SHA256 results are of lesser relevance, presented
+# mostly for informational purposes.
+# (**) The result is a trade-off: it's possible to improve it by
+# 10%, but at the cost of 20% loss on Cortex-A5x.
+
+$flavour=shift;
+$output=shift;
+open STDOUT,">$output";
+
+if ($output =~ /512/) {
+ $BITS=512;
+ $SZ=8;
+ @Sigma0=(28,34,39);
+ @Sigma1=(14,18,41);
+ @sigma0=(1, 8, 7);
+ @sigma1=(19,61, 6);
+ $rounds=80;
+ $reg_t="x";
+} else {
+ $BITS=256;
+ $SZ=4;
+ @Sigma0=( 2,13,22);
+ @Sigma1=( 6,11,25);
+ @sigma0=( 7,18, 3);
+ @sigma1=(17,19,10);
+ $rounds=64;
+ $reg_t="w";
+}
+
+$func="sha${BITS}_block_data_order";
+
+($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30));
+
+@X=map("$reg_t$_",(3..15,0..2));
+@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("$reg_t$_",(20..27));
+($t0,$t1,$t2,$t3)=map("$reg_t$_",(16,17,19,28));
+
+sub BODY_00_xx {
+my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
+my $j=($i+1)&15;
+my ($T0,$T1,$T2)=(@X[($i-8)&15],@X[($i-9)&15],@X[($i-10)&15]);
+ $T0=@X[$i+3] if ($i<11);
+
+$code.=<<___ if ($i<16);
+#ifndef __ARMEB__
+ rev @X[$i],@X[$i] // $i
+#endif
+___
+$code.=<<___ if ($i<13 && ($i&1));
+ ldp @X[$i+1],@X[$i+2],[$inp],#2*$SZ
+___
+$code.=<<___ if ($i==13);
+ ldp @X[14],@X[15],[$inp]
+___
+$code.=<<___ if ($i>=14);
+ ldr @X[($i-11)&15],[sp,#`$SZ*(($i-11)%4)`]
+___
+$code.=<<___ if ($i>0 && $i<16);
+ add $a,$a,$t1 // h+=Sigma0(a)
+___
+$code.=<<___ if ($i>=11);
+ str @X[($i-8)&15],[sp,#`$SZ*(($i-8)%4)`]
+___
+# While ARMv8 specifies merged rotate-n-logical operation such as
+# 'eor x,y,z,ror#n', it was found to negatively affect performance
+# on Apple A7. The reason seems to be that it requires even 'y' to
+# be available earlier. This means that such merged instruction is
+# not necessarily best choice on critical path... On the other hand
+# Cortex-A5x handles merged instructions much better than disjoint
+# rotate and logical... See (**) footnote above.
+$code.=<<___ if ($i<15);
+ ror $t0,$e,#$Sigma1[0]
+ add $h,$h,$t2 // h+=K[i]
+ eor $T0,$e,$e,ror#`$Sigma1[2]-$Sigma1[1]`
+ and $t1,$f,$e
+ bic $t2,$g,$e
+ add $h,$h,@X[$i&15] // h+=X[i]
+ orr $t1,$t1,$t2 // Ch(e,f,g)
+ eor $t2,$a,$b // a^b, b^c in next round
+ eor $t0,$t0,$T0,ror#$Sigma1[1] // Sigma1(e)
+ ror $T0,$a,#$Sigma0[0]
+ add $h,$h,$t1 // h+=Ch(e,f,g)
+ eor $t1,$a,$a,ror#`$Sigma0[2]-$Sigma0[1]`
+ add $h,$h,$t0 // h+=Sigma1(e)
+ and $t3,$t3,$t2 // (b^c)&=(a^b)
+ add $d,$d,$h // d+=h
+ eor $t3,$t3,$b // Maj(a,b,c)
+ eor $t1,$T0,$t1,ror#$Sigma0[1] // Sigma0(a)
+ add $h,$h,$t3 // h+=Maj(a,b,c)
+ ldr $t3,[$Ktbl],#$SZ // *K++, $t2 in next round
+ //add $h,$h,$t1 // h+=Sigma0(a)
+___
+$code.=<<___ if ($i>=15);
+ ror $t0,$e,#$Sigma1[0]
+ add $h,$h,$t2 // h+=K[i]
+ ror $T1,@X[($j+1)&15],#$sigma0[0]
+ and $t1,$f,$e
+ ror $T2,@X[($j+14)&15],#$sigma1[0]
+ bic $t2,$g,$e
+ ror $T0,$a,#$Sigma0[0]
+ add $h,$h,@X[$i&15] // h+=X[i]
+ eor $t0,$t0,$e,ror#$Sigma1[1]
+ eor $T1,$T1,@X[($j+1)&15],ror#$sigma0[1]
+ orr $t1,$t1,$t2 // Ch(e,f,g)
+ eor $t2,$a,$b // a^b, b^c in next round
+ eor $t0,$t0,$e,ror#$Sigma1[2] // Sigma1(e)
+ eor $T0,$T0,$a,ror#$Sigma0[1]
+ add $h,$h,$t1 // h+=Ch(e,f,g)
+ and $t3,$t3,$t2 // (b^c)&=(a^b)
+ eor $T2,$T2,@X[($j+14)&15],ror#$sigma1[1]
+ eor $T1,$T1,@X[($j+1)&15],lsr#$sigma0[2] // sigma0(X[i+1])
+ add $h,$h,$t0 // h+=Sigma1(e)
+ eor $t3,$t3,$b // Maj(a,b,c)
+ eor $t1,$T0,$a,ror#$Sigma0[2] // Sigma0(a)
+ eor $T2,$T2,@X[($j+14)&15],lsr#$sigma1[2] // sigma1(X[i+14])
+ add @X[$j],@X[$j],@X[($j+9)&15]
+ add $d,$d,$h // d+=h
+ add $h,$h,$t3 // h+=Maj(a,b,c)
+ ldr $t3,[$Ktbl],#$SZ // *K++, $t2 in next round
+ add @X[$j],@X[$j],$T1
+ add $h,$h,$t1 // h+=Sigma0(a)
+ add @X[$j],@X[$j],$T2
+___
+ ($t2,$t3)=($t3,$t2);
+}
+
+$code.=<<___;
+#include "arm_arch.h"
+
+.text
+
+.globl $func
+.type $func,%function
+.align 6
+$func:
+___
+$code.=<<___ if ($SZ==4);
+ ldr x16,.LOPENSSL_armcap_P
+ adr x17,.LOPENSSL_armcap_P
+ add x16,x16,x17
+ ldr w16,[x16]
+ tst w16,#ARMV8_SHA256
+ b.ne .Lv8_entry
+___
+$code.=<<___;
+ stp x29,x30,[sp,#-128]!
+ add x29,sp,#0
+
+ stp x19,x20,[sp,#16]
+ stp x21,x22,[sp,#32]
+ stp x23,x24,[sp,#48]
+ stp x25,x26,[sp,#64]
+ stp x27,x28,[sp,#80]
+ sub sp,sp,#4*$SZ
+
+ ldp $A,$B,[$ctx] // load context
+ ldp $C,$D,[$ctx,#2*$SZ]
+ ldp $E,$F,[$ctx,#4*$SZ]
+ add $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input
+ ldp $G,$H,[$ctx,#6*$SZ]
+ adr $Ktbl,K$BITS
+ stp $ctx,$num,[x29,#96]
+
+.Loop:
+ ldp @X[0],@X[1],[$inp],#2*$SZ
+ ldr $t2,[$Ktbl],#$SZ // *K++
+ eor $t3,$B,$C // magic seed
+ str $inp,[x29,#112]
+___
+for ($i=0;$i<16;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); }
+$code.=".Loop_16_xx:\n";
+for (;$i<32;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+ cbnz $t2,.Loop_16_xx
+
+ ldp $ctx,$num,[x29,#96]
+ ldr $inp,[x29,#112]
+ sub $Ktbl,$Ktbl,#`$SZ*($rounds+1)` // rewind
+
+ ldp @X[0],@X[1],[$ctx]
+ ldp @X[2],@X[3],[$ctx,#2*$SZ]
+ add $inp,$inp,#14*$SZ // advance input pointer
+ ldp @X[4],@X[5],[$ctx,#4*$SZ]
+ add $A,$A,@X[0]
+ ldp @X[6],@X[7],[$ctx,#6*$SZ]
+ add $B,$B,@X[1]
+ add $C,$C,@X[2]
+ add $D,$D,@X[3]
+ stp $A,$B,[$ctx]
+ add $E,$E,@X[4]
+ add $F,$F,@X[5]
+ stp $C,$D,[$ctx,#2*$SZ]
+ add $G,$G,@X[6]
+ add $H,$H,@X[7]
+ cmp $inp,$num
+ stp $E,$F,[$ctx,#4*$SZ]
+ stp $G,$H,[$ctx,#6*$SZ]
+ b.ne .Loop
+
+ ldp x19,x20,[x29,#16]
+ add sp,sp,#4*$SZ
+ ldp x21,x22,[x29,#32]
+ ldp x23,x24,[x29,#48]
+ ldp x25,x26,[x29,#64]
+ ldp x27,x28,[x29,#80]
+ ldp x29,x30,[sp],#128
+ ret
+.size $func,.-$func
+
+.align 6
+.type K$BITS,%object
+K$BITS:
+___
+$code.=<<___ if ($SZ==8);
+ .quad 0x428a2f98d728ae22,0x7137449123ef65cd
+ .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+ .quad 0x3956c25bf348b538,0x59f111f1b605d019
+ .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+ .quad 0xd807aa98a3030242,0x12835b0145706fbe
+ .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+ .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+ .quad 0x9bdc06a725c71235,0xc19bf174cf692694
+ .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+ .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+ .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+ .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+ .quad 0x983e5152ee66dfab,0xa831c66d2db43210
+ .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+ .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+ .quad 0x06ca6351e003826f,0x142929670a0e6e70
+ .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+ .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+ .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+ .quad 0x81c2c92e47edaee6,0x92722c851482353b
+ .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+ .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+ .quad 0xd192e819d6ef5218,0xd69906245565a910
+ .quad 0xf40e35855771202a,0x106aa07032bbd1b8
+ .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+ .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+ .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+ .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+ .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+ .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+ .quad 0x90befffa23631e28,0xa4506cebde82bde9
+ .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+ .quad 0xca273eceea26619c,0xd186b8c721c0c207
+ .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+ .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+ .quad 0x113f9804bef90dae,0x1b710b35131c471b
+ .quad 0x28db77f523047d84,0x32caab7b40c72493
+ .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+ .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+ .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+ .quad 0 // terminator
+___
+$code.=<<___ if ($SZ==4);
+ .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+ .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+ .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+ .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+ .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+ .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+ .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+ .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+ .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+ .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+ .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+ .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+ .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+ .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+ .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+ .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+ .long 0 //terminator
+___
+$code.=<<___;
+.size K$BITS,.-K$BITS
+.align 3
+.LOPENSSL_armcap_P:
+ .quad OPENSSL_armcap_P-.
+.asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
+___
+
+if ($SZ==4) {
+my $Ktbl="x3";
+
+my ($ABCD,$EFGH,$abcd)=map("v$_.16b",(0..2));
+my @MSG=map("v$_.16b",(4..7));
+my ($W0,$W1)=("v16.4s","v17.4s");
+my ($ABCD_SAVE,$EFGH_SAVE)=("v18.16b","v19.16b");
+
+$code.=<<___;
+.type sha256_block_armv8,%function
+.align 6
+sha256_block_armv8:
+.Lv8_entry:
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+
+ ld1.32 {$ABCD,$EFGH},[$ctx]
+ adr $Ktbl,K256
+
+.Loop_hw:
+ ld1 {@MSG[0]-@MSG[3]},[$inp],#64
+ sub $num,$num,#1
+ ld1.32 {$W0},[$Ktbl],#16
+ rev32 @MSG[0],@MSG[0]
+ rev32 @MSG[1],@MSG[1]
+ rev32 @MSG[2],@MSG[2]
+ rev32 @MSG[3],@MSG[3]
+ orr $ABCD_SAVE,$ABCD,$ABCD // offload
+ orr $EFGH_SAVE,$EFGH,$EFGH
+___
+for($i=0;$i<12;$i++) {
+$code.=<<___;
+ ld1.32 {$W1},[$Ktbl],#16
+ add.i32 $W0,$W0,@MSG[0]
+ sha256su0 @MSG[0],@MSG[1]
+ orr $abcd,$ABCD,$ABCD
+ sha256h $ABCD,$EFGH,$W0
+ sha256h2 $EFGH,$abcd,$W0
+ sha256su1 @MSG[0],@MSG[2],@MSG[3]
+___
+ ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
+}
+$code.=<<___;
+ ld1.32 {$W1},[$Ktbl],#16
+ add.i32 $W0,$W0,@MSG[0]
+ orr $abcd,$ABCD,$ABCD
+ sha256h $ABCD,$EFGH,$W0
+ sha256h2 $EFGH,$abcd,$W0
+
+ ld1.32 {$W0},[$Ktbl],#16
+ add.i32 $W1,$W1,@MSG[1]
+ orr $abcd,$ABCD,$ABCD
+ sha256h $ABCD,$EFGH,$W1
+ sha256h2 $EFGH,$abcd,$W1
+
+ ld1.32 {$W1},[$Ktbl]
+ add.i32 $W0,$W0,@MSG[2]
+ sub $Ktbl,$Ktbl,#$rounds*$SZ-16 // rewind
+ orr $abcd,$ABCD,$ABCD
+ sha256h $ABCD,$EFGH,$W0
+ sha256h2 $EFGH,$abcd,$W0
+
+ add.i32 $W1,$W1,@MSG[3]
+ orr $abcd,$ABCD,$ABCD
+ sha256h $ABCD,$EFGH,$W1
+ sha256h2 $EFGH,$abcd,$W1
+
+ add.i32 $ABCD,$ABCD,$ABCD_SAVE
+ add.i32 $EFGH,$EFGH,$EFGH_SAVE
+
+ cbnz $num,.Loop_hw
+
+ st1.32 {$ABCD,$EFGH},[$ctx]
+
+ ldr x29,[sp],#16
+ ret
+.size sha256_block_armv8,.-sha256_block_armv8
+___
+}
+
+$code.=<<___;
+.comm OPENSSL_armcap_P,4,4
+___
+
+{ my %opcode = (
+ "sha256h" => 0x5e004000, "sha256h2" => 0x5e005000,
+ "sha256su0" => 0x5e282800, "sha256su1" => 0x5e006000 );
+
+ sub unsha256 {
+ my ($mnemonic,$arg)=@_;
+
+ $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
+ &&
+ sprintf ".inst\t0x%08x\t//%s %s",
+ $opcode{$mnemonic}|$1|($2<<5)|($3<<16),
+ $mnemonic,$arg;
+ }
+}
+
+foreach(split("\n",$code)) {
+
+ s/\`([^\`]*)\`/eval($1)/geo;
+
+ s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/geo;
+
+ s/\.\w?32\b//o and s/\.16b/\.4s/go;
+ m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go;
+
+ print $_,"\n";
+}
+
+close STDOUT;
diff --git a/crypto/srp/srp_vfy.c b/crypto/srp/srp_vfy.c
index 4a3d13e..fdca19f 100644
--- a/crypto/srp/srp_vfy.c
+++ b/crypto/srp/srp_vfy.c
@@ -93,6 +93,9 @@
else a[i] = loc - b64table;
++i;
}
+ /* if nothing valid to process we have a zero length response */
+ if (i == 0)
+ return 0;
size = i;
i = size - 1;
j = size;
diff --git a/crypto/symhacks.h b/crypto/symhacks.h
index 07a412f..bd2f000 100644
--- a/crypto/symhacks.h
+++ b/crypto/symhacks.h
@@ -204,6 +204,12 @@
#define SSL_CTX_set_next_protos_advertised_cb SSL_CTX_set_next_protos_adv_cb
#undef SSL_CTX_set_next_proto_select_cb
#define SSL_CTX_set_next_proto_select_cb SSL_CTX_set_next_proto_sel_cb
+#undef ssl3_cbc_record_digest_supported
+#define ssl3_cbc_record_digest_supported ssl3_cbc_record_digest_support
+#undef ssl_check_clienthello_tlsext_late
+#define ssl_check_clienthello_tlsext_late ssl_check_clihello_tlsext_late
+#undef ssl_check_clienthello_tlsext_early
+#define ssl_check_clienthello_tlsext_early ssl_check_clihello_tlsext_early
/* Hack some long ENGINE names */
#undef ENGINE_get_default_BN_mod_exp_crt
diff --git a/crypto/x509/by_dir.c b/crypto/x509/by_dir.c
index 27ca515..c6602da 100644
--- a/crypto/x509/by_dir.c
+++ b/crypto/x509/by_dir.c
@@ -218,7 +218,7 @@
s=dir;
p=s;
- for (;;p++)
+ do
{
if ((*p == LIST_SEPARATOR_CHAR) || (*p == '\0'))
{
@@ -264,9 +264,7 @@
return 0;
}
}
- if (*p == '\0')
- break;
- }
+ } while (*p++ != '\0');
return 1;
}
diff --git a/crypto/x509/x509_vfy.c b/crypto/x509/x509_vfy.c
index 5195ffe..920066a 100644
--- a/crypto/x509/x509_vfy.c
+++ b/crypto/x509/x509_vfy.c
@@ -1462,10 +1462,9 @@
* a certificate was revoked. This has since been changed since
* critical extension can change the meaning of CRL entries.
*/
- if (crl->flags & EXFLAG_CRITICAL)
+ if (!(ctx->param->flags & X509_V_FLAG_IGNORE_CRITICAL)
+ && (crl->flags & EXFLAG_CRITICAL))
{
- if (ctx->param->flags & X509_V_FLAG_IGNORE_CRITICAL)
- return 1;
ctx->error = X509_V_ERR_UNHANDLED_CRITICAL_CRL_EXTENSION;
ok = ctx->verify_cb(0, ctx);
if(!ok)
diff --git a/crypto/x509v3/v3_purp.c b/crypto/x509v3/v3_purp.c
index ad68865..f59bfc1 100644
--- a/crypto/x509v3/v3_purp.c
+++ b/crypto/x509v3/v3_purp.c
@@ -389,8 +389,8 @@
/* Handle proxy certificates */
if((pci=X509_get_ext_d2i(x, NID_proxyCertInfo, NULL, NULL))) {
if (x->ex_flags & EXFLAG_CA
- || X509_get_ext_by_NID(x, NID_subject_alt_name, 0) >= 0
- || X509_get_ext_by_NID(x, NID_issuer_alt_name, 0) >= 0) {
+ || X509_get_ext_by_NID(x, NID_subject_alt_name, -1) >= 0
+ || X509_get_ext_by_NID(x, NID_issuer_alt_name, -1) >= 0) {
x->ex_flags |= EXFLAG_INVALID;
}
if (pci->pcPathLengthConstraint) {
@@ -670,7 +670,7 @@
return 0;
/* Extended Key Usage MUST be critical */
- i_ext = X509_get_ext_by_NID((X509 *) x, NID_ext_key_usage, 0);
+ i_ext = X509_get_ext_by_NID((X509 *) x, NID_ext_key_usage, -1);
if (i_ext >= 0)
{
X509_EXTENSION *ext = X509_get_ext((X509 *) x, i_ext);
diff --git a/crypto/x86cpuid.S b/crypto/x86cpuid.S
index 73b5d98..10be221 100644
--- a/crypto/x86cpuid.S
+++ b/crypto/x86cpuid.S
@@ -226,6 +226,18 @@
movl (%ecx),%ecx
btl $1,(%ecx)
jnc .L015no_x87
+ andl $83886080,%ecx
+ cmpl $83886080,%ecx
+ jne .L016no_sse2
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+.L016no_sse2:
.long 4007259865,4007259865,4007259865,4007259865,2430851995
.L015no_x87:
leal 4(%esp),%eax
@@ -241,11 +253,11 @@
pushl %ebx
nop
movl (%edx),%eax
-.L016spin:
+.L017spin:
leal (%eax,%ecx,1),%ebx
nop
.long 447811568
- jne .L016spin
+ jne .L017spin
movl %ebx,%eax
popl %ebx
ret
@@ -286,32 +298,32 @@
movl 8(%esp),%ecx
xorl %eax,%eax
cmpl $7,%ecx
- jae .L017lot
+ jae .L018lot
cmpl $0,%ecx
- je .L018ret
-.L019little:
+ je .L019ret
+.L020little:
movb %al,(%edx)
subl $1,%ecx
leal 1(%edx),%edx
- jnz .L019little
-.L018ret:
+ jnz .L020little
+.L019ret:
ret
.align 16
-.L017lot:
+.L018lot:
testl $3,%edx
- jz .L020aligned
+ jz .L021aligned
movb %al,(%edx)
leal -1(%ecx),%ecx
leal 1(%edx),%edx
- jmp .L017lot
-.L020aligned:
+ jmp .L018lot
+.L021aligned:
movl %eax,(%edx)
leal -4(%ecx),%ecx
testl $-4,%ecx
leal 4(%edx),%edx
- jnz .L020aligned
+ jnz .L021aligned
cmpl $0,%ecx
- jne .L019little
+ jne .L020little
ret
.size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin
.globl OPENSSL_ia32_rdrand
@@ -320,15 +332,17 @@
OPENSSL_ia32_rdrand:
.L_OPENSSL_ia32_rdrand_begin:
movl $8,%ecx
-.L021loop:
+.L022loop:
.byte 15,199,240
- jc .L022break
- loop .L021loop
-.L022break:
+ jc .L023break
+ loop .L022loop
+.L023break:
cmpl $0,%eax
cmovel %ecx,%eax
ret
.size OPENSSL_ia32_rdrand,.-.L_OPENSSL_ia32_rdrand_begin
+.hidden OPENSSL_cpuid_setup
+.hidden OPENSSL_ia32cap_P
.comm OPENSSL_ia32cap_P,8,4
.section .init
call OPENSSL_cpuid_setup
diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl
index b270b44..0212a5b 100644
--- a/crypto/x86cpuid.pl
+++ b/crypto/x86cpuid.pl
@@ -355,4 +355,7 @@
&initseg("OPENSSL_cpuid_setup");
+&hidden("OPENSSL_cpuid_setup");
+&hidden("OPENSSL_ia32cap_P");
+
&asm_finish();
diff --git a/e_os.h b/e_os.h
index 79c1392..6a0aad1 100644
--- a/e_os.h
+++ b/e_os.h
@@ -368,6 +368,13 @@
# define DEFAULT_HOME "C:"
# endif
+/* Avoid Windows 8 SDK GetVersion deprecated problems */
+#if defined(_MSC_VER) && _MSC_VER>=1800
+# define check_winnt() (1)
+#else
+# define check_winnt() (GetVersion() < 0x80000000)
+#endif
+
#else /* The non-microsoft world */
# ifdef OPENSSL_SYS_VMS
diff --git a/import_openssl.sh b/import_openssl.sh
index 727dd05..f16596b 100755
--- a/import_openssl.sh
+++ b/import_openssl.sh
@@ -33,6 +33,7 @@
# Ensure consistent sorting order / tool output.
export LANG=C
export LC_ALL=C
+PERL_EXE="perl -C0"
function die() {
declare -r message=$1
@@ -127,7 +128,16 @@
function gen_asm_arm () {
local OUT
OUT=$(default_asm_file "$@")
- perl "$1" > "$OUT"
+ $PERL_EXE "$1" void "$OUT" > "$OUT"
+}
+
+# Generate an ARMv8 64-bit assembly file.
+# $1: generator (perl script)
+# $2: [optional] output file name
+function gen_asm_arm64 () {
+ local OUT
+ OUT=$(default_asm_file "$@")
+ $PERL_EXE "$1" linux64 "$OUT" > "$OUT"
}
function gen_asm_mips () {
@@ -136,19 +146,19 @@
# The perl scripts expect to run the target compiler as $CC to determine
# the endianess of the target. Setting CC to true is a hack that forces the scripts
# to generate little endian output
- CC=true perl "$1" o32 > "$OUT"
+ CC=true $PERL_EXE "$1" o32 > "$OUT"
}
function gen_asm_x86 () {
local OUT
OUT=$(default_asm_file "$@")
- perl "$1" elf -fPIC > "$OUT"
+ $PERL_EXE "$1" elf -fPIC $(print_values_with_prefix -D $OPENSSL_CRYPTO_DEFINES_x86) > "$OUT"
}
function gen_asm_x86_64 () {
local OUT
OUT=$(default_asm_file "$@")
- perl "$1" elf "$OUT" > "$OUT"
+ $PERL_EXE "$1" elf "$OUT" > "$OUT"
}
@@ -176,38 +186,110 @@
echo "#"
}
-# Run Configure and generate makefile
-# $1: makefile
-# $2: 32 for 32-bit arch, 64 for 64-bit arch
-function generate_build_config_mk() {
- chmod +x ./Configure
- if [ $2 -eq "32" ] ; then
- ./Configure $CONFIGURE_ARGS $CONFIGURE_ARGS_32
- elif [ $2 -eq "64" ] ; then
- ./Configure $CONFIGURE_ARGS $CONFIGURE_ARGS_64
- else
- die "Unsupported $2-bit arch"
+function run_verbose() {
+ echo Running: $@
+ $@
+}
+
+function scan_opensslconf_for_flags() {
+ for flag in "$@"; do
+ awk "/^#define ${flag}$/ { print \$2 }" crypto/opensslconf.h
+ done
+}
+
+CRYPTO_CONF_FLAGS=(
+OPENSSL_CPUID_OBJ
+DES_LONG
+DES_PTR
+DES_RISC1
+DES_RISC2
+DES_UNROLL
+RC4_INT
+RC4_CHUNK
+RC4_INDEX
+)
+
+function check_asm_flags() {
+ local arch="$1"
+ local target="$2"
+ local unsorted_flags
+ local expected_flags
+ local actual_flags
+ local defines="OPENSSL_CRYPTO_DEFINES_$arch"
+
+ PERL=/usr/bin/perl run_verbose ./Configure $CONFIGURE_ARGS $target
+
+ unsorted_flags="$(awk '/^CFLAG=/ { sub(/^CFLAG= .*-Wall /, ""); gsub(/-D/, ""); print; }' Makefile)"
+ unsorted_flags="$unsorted_flags $(scan_opensslconf_for_flags "${CRYPTO_CONF_FLAGS[@]}")"
+
+ expected_flags="$(echo $unsorted_flags | tr ' ' '\n' | sort | tr '\n' ' ')"
+ actual_flags="$(echo ${!defines} | tr ' ' '\n' | sort | tr '\n' ' ')"
+
+ if [[ $actual_flags != $expected_flags ]]; then
+ echo ${defines} is wrong!
+ echo " $actual_flags"
+ echo Please update to:
+ echo " $expected_flags"
+ exit 1
fi
+}
+
+# Run Configure and generate headers
+# $1: 32 for 32-bit arch, 64 for 64-bit arch, trusty for Trusty
+# $2: 1 if building for static version
+# Out: returns the cflags and depflags in variable $flags
+function generate_build_config_headers() {
+ chmod +x ./Configure
+ local configure_args_bits=CONFIGURE_ARGS_$1
+ local configure_args_stat=''
+ local outname=$1
+ if [[ $2 == 1 ]] ; then
+ configure_args_stat=CONFIGURE_ARGS_STATIC
+ outname="static-$1"
+ fi
+
+ if [[ $1 == trusty ]] ; then
+ PERL=/usr/bin/perl run_verbose ./Configure $CONFIGURE_ARGS_TRUSTY
+ else
+ PERL=/usr/bin/perl run_verbose ./Configure $CONFIGURE_ARGS ${!configure_args_bits} ${!configure_args_stat}
+ fi
+
rm -f apps/CA.pl.bak crypto/opensslconf.h.bak
- mv -f crypto/opensslconf.h crypto/opensslconf-$2.h
- cp -f crypto/opensslconf-$2.h include/openssl/opensslconf-$2.h
+ mv -f crypto/opensslconf.h crypto/opensslconf-$outname.h
+ cp -f crypto/opensslconf-$outname.h include/openssl/opensslconf-$outname.h
- declare -r tmpfile=$(mktemp)
- (grep -e -D Makefile | grep -v CONFIGURE_ARGS= | grep -v OPTIONS= | grep -v -e -DOPENSSL_NO_DEPRECATED) > $tmpfile
-
+ local tmpfile=$(mktemp tmp.XXXXXXXXXX)
+ (grep -e -D Makefile | grep -v CONFIGURE_ARGS= | grep -v OPTIONS= | \
+ grep -v -e -DOPENSSL_NO_DEPRECATED) > $tmpfile
declare -r cflags=$(filter_by_egrep "^-D" $(grep -e "^CFLAG=" $tmpfile))
declare -r depflags=$(filter_by_egrep "^-D" $(grep -e "^DEPFLAG=" $tmpfile))
rm -f $tmpfile
- echo "Generating $(basename $1)"
- (
- print_autogenerated_header
+ flags="$cflags $depflags"
+}
- echo "openssl_cflags := \\"
- for cflag in $cflags $depflags; do
- echo " $cflag \\"
- done
- ) > $1
+# Run Configure and generate makefiles
+function generate_build_config_mk() {
+ chmod +x ./Configure
+ for bits in 32 64 trusty; do
+ # Header flags are output in $flags, first static, then dynamic
+ generate_build_config_headers $bits 1
+ local flags_static=$flags
+ generate_build_config_headers $bits
+
+ echo "Generating build-config-$bits.mk"
+ (
+ print_autogenerated_header
+
+ echo "openssl_cflags_$bits := \\"
+ for flag in $flags ; do echo " $flag \\" ; done
+ echo ""
+
+ echo "openssl_cflags_static_$bits := \\"
+ for flag in $flags_static; do echo " $flag \\" ; done
+ echo ""
+ ) > ../build-config-$bits.mk
+ done
}
# Generate crypto/opensslconf.h file including arch-specific files
@@ -215,12 +297,28 @@
echo "Generating opensslconf.h"
(
echo "// Auto-generated - DO NOT EDIT!"
+ echo "#ifndef OPENSSL_SYS_TRUSTY"
echo "#if defined(__LP64__)"
echo "#include \"opensslconf-64.h\""
echo "#else"
echo "#include \"opensslconf-32.h\""
echo "#endif"
+ echo "#else"
+ echo "#include \"opensslconf-trusty.h\""
+ echo "#endif"
) > crypto/opensslconf.h
+ # Generate a compatible version for the static library builds
+ echo "Generating opensslconf-static.h"
+ (
+ echo "// Auto-generated - DO NOT EDIT!"
+ echo "#if defined(__LP64__)"
+ echo "#include \"opensslconf-static-64.h\""
+ echo "#else"
+ echo "#include \"opensslconf-static-32.h\""
+ echo "#endif"
+ ) > crypto/opensslconf-static.h
+ # move it to output include files as well
+ cp -f crypto/opensslconf-static.h include/openssl/opensslconf-static.h
}
# Return the value of a computed variable name.
@@ -243,26 +341,48 @@
uniq_sort $(var_value $1)
}
+# Print the values in a list with a prefix
+# $1: prefix to use
+# $2+: values of list
+print_values_with_prefix() {
+ declare -r prefix=$1
+ shift
+ for src; do
+ echo -n " $prefix$src "
+ done
+}
+
# Print the definition of a given variable in a GNU Make build file.
# $1: Variable name (e.g. common_src_files)
-# $2+: Variable value (e.g. list of sources)
-print_vardef_in_mk() {
+# $2: prefix for each variable contents
+# $3+: Variable value (e.g. list of sources)
+print_vardef_with_prefix_in_mk() {
declare -r varname=$1
+ declare -r prefix=$2
+ shift
shift
if [ -z "$1" ]; then
echo "$varname :="
else
echo "$varname := \\"
for src; do
- echo " $src \\"
+ echo " $prefix$src \\"
done
fi
echo ""
}
+# Print the definition of a given variable in a GNU Make build file.
+# $1: Variable name (e.g. common_src_files)
+# $2+: Variable value (e.g. list of sources)
+print_vardef_in_mk() {
+ declare -r varname=$1
+ shift
+ print_vardef_with_prefix_in_mk $varname "" $@
+}
# Same as print_vardef_in_mk, but print a CFLAGS definition from
# a list of compiler defines.
-# $1: Variable name (e.g. common_c_flags)
+# $1: Variable name (e.g. common_cflags)
# $2: List of defines (e.g. OPENSSL_NO_CAMELLIA ...)
print_defines_in_mk() {
declare -r varname=$1
@@ -285,6 +405,7 @@
#
# $1: Target file name. (e.g. Crypto-config.mk)
# $2: Variable prefix. (e.g. CRYPTO)
+# $3: "host" or "target"
function generate_config_mk() {
declare -r output="$1"
declare -r prefix="$2"
@@ -294,42 +415,32 @@
(
print_autogenerated_header
echo \
-"# Before including this file, the local Android.mk must define the following
-# variables:
+"# This script will append to the following variables:
#
-# local_c_flags
-# local_c_includes
-# local_additional_dependencies
-#
-# This script will define the following variables:
-#
-# target_c_flags
-# target_c_includes
-# target_src_files
-#
-# host_c_flags
-# host_c_includes
-# host_src_files
-#
+# LOCAL_CFLAGS
+# LOCAL_C_INCLUDES
+# LOCAL_SRC_FILES_\$(TARGET_ARCH)
+# LOCAL_SRC_FILES_\$(TARGET_2ND_ARCH)
+# LOCAL_CFLAGS_\$(TARGET_ARCH)
+# LOCAL_CFLAGS_\$(TARGET_2ND_ARCH)
+# LOCAL_ADDITIONAL_DEPENDENCIES
-# Ensure these are empty.
-unknown_arch_c_flags :=
-unknown_arch_src_files :=
-unknown_arch_exclude_files :=
+LOCAL_ADDITIONAL_DEPENDENCIES += \$(LOCAL_PATH)/$(basename $output)
"
+
common_defines=$(var_sorted_value OPENSSL_${prefix}_DEFINES)
- print_defines_in_mk common_c_flags $common_defines
+ print_defines_in_mk common_cflags $common_defines
common_sources=$(var_sorted_value OPENSSL_${prefix}_SOURCES)
print_vardef_in_mk common_src_files $common_sources
common_includes=$(var_sorted_value OPENSSL_${prefix}_INCLUDES)
- print_vardef_in_mk common_c_includes $common_includes
+ print_vardef_with_prefix_in_mk common_c_includes external/openssl/ $common_includes
for arch in $all_archs; do
arch_defines=$(var_sorted_value OPENSSL_${prefix}_DEFINES_${arch})
- print_defines_in_mk ${arch}_c_flags $arch_defines
+ print_defines_in_mk ${arch}_cflags $arch_defines
arch_sources=$(var_sorted_value OPENSSL_${prefix}_SOURCES_${arch})
print_vardef_in_mk ${arch}_src_files $arch_sources
@@ -339,44 +450,48 @@
done
- echo "\
-target_arch := \$(TARGET_ARCH)
-ifeq (\$(target_arch)-\$(TARGET_HAS_BIGENDIAN),mips-true)
-target_arch := unknown_arch
-endif
+ if [ $3 == "target" ]; then
+ echo "
+LOCAL_CFLAGS += \$(common_cflags)
+LOCAL_C_INCLUDES += \$(common_c_includes)"
+ for arch in $all_archs; do
+ echo "
+LOCAL_SRC_FILES_${arch} += \$(filter-out \$(${arch}_exclude_files),\$(common_src_files) \$(${arch}_src_files))
+LOCAL_CFLAGS_${arch} += \$(${arch}_cflags)"
+ done
+ else
+ echo "
+LOCAL_CFLAGS += \$(common_cflags)
+LOCAL_C_INCLUDES += \$(common_c_includes) \$(local_c_includes)
-target_c_flags := \$(common_c_flags) \$(\$(target_arch)_c_flags) \$(local_c_flags)
-target_c_includes := \$(addprefix external/openssl/,\$(common_c_includes)) \$(local_c_includes)
-target_src_files := \$(common_src_files) \$(\$(target_arch)_src_files)
-target_src_files := \$(filter-out \$(\$(target_arch)_exclude_files), \$(target_src_files))
-
-ifeq (\$(HOST_OS)-\$(HOST_ARCH),linux-x86)
-host_arch := x86
+ifeq (\$(HOST_OS),linux)
+LOCAL_CFLAGS_x86 += \$(x86_cflags)
+LOCAL_SRC_FILES_x86 += \$(filter-out \$(x86_exclude_files), \$(common_src_files) \$(x86_src_files))
+LOCAL_CFLAGS_x86_64 += \$(x86_64_cflags)
+LOCAL_SRC_FILES_x86_64 += \$(filter-out \$(x86_64_exclude_files), \$(common_src_files) \$(x86_64_src_files))
else
-host_arch := unknown_arch
-endif
-
-host_c_flags := \$(common_c_flags) \$(\$(host_arch)_c_flags) \$(local_c_flags)
-host_c_includes := \$(addprefix external/openssl/,\$(common_c_includes)) \$(local_c_includes)
-host_src_files := \$(common_src_files) \$(\$(host_arch)_src_files)
-host_src_files := \$(filter-out \$(\$(host_arch)_exclude_files), \$(host_src_files))
-
-local_additional_dependencies += \$(LOCAL_PATH)/$(basename $output)
-"
-
+\$(warning Unknown host OS \$(HOST_OS))
+LOCAL_SRC_FILES += \$(common_src_files)
+endif"
+ fi
) > "$output"
}
function import() {
declare -r OPENSSL_SOURCE=$1
-
untar $OPENSSL_SOURCE readonly
applypatches $OPENSSL_DIR
+ convert_iso8859_to_utf8 $OPENSSL_DIR
cd $OPENSSL_DIR
- generate_build_config_mk ../build-config-32.mk 32
- generate_build_config_mk ../build-config-64.mk 64
+ # Check the ASM flags for each arch
+ check_asm_flags arm linux-armv4
+ check_asm_flags arm64 linux-aarch64
+ check_asm_flags x86 linux-elf
+ check_asm_flags x86_64 linux-x86_64
+
+ generate_build_config_mk
generate_opensslconf_h
cp -f LICENSE ../NOTICE
@@ -393,13 +508,23 @@
# Generate arm asm
gen_asm_arm crypto/aes/asm/aes-armv4.pl
+ gen_asm_arm crypto/aes/asm/aesv8-armx.pl
+ gen_asm_arm crypto/aes/asm/bsaes-armv7.pl
gen_asm_arm crypto/bn/asm/armv4-gf2m.pl
gen_asm_arm crypto/bn/asm/armv4-mont.pl
gen_asm_arm crypto/modes/asm/ghash-armv4.pl
+ gen_asm_arm crypto/modes/asm/ghashv8-armx.pl
gen_asm_arm crypto/sha/asm/sha1-armv4-large.pl
gen_asm_arm crypto/sha/asm/sha256-armv4.pl
gen_asm_arm crypto/sha/asm/sha512-armv4.pl
+ # Generate armv8 asm
+ gen_asm_arm64 crypto/aes/asm/aesv8-armx.pl crypto/aes/asm/aesv8-armx-64.S
+ gen_asm_arm64 crypto/modes/asm/ghashv8-armx.pl crypto/modes/asm/ghashv8-armx-64.S
+ gen_asm_arm64 crypto/sha/asm/sha1-armv8.pl
+ gen_asm_arm64 crypto/sha/asm/sha512-armv8.pl crypto/sha/asm/sha256-armv8.S
+ gen_asm_arm64 crypto/sha/asm/sha512-armv8.pl
+
# Generate mips asm
gen_asm_mips crypto/aes/asm/aes-mips.pl
gen_asm_mips crypto/bn/asm/mips.pl crypto/bn/asm/bn-mips.S
@@ -462,9 +587,13 @@
cd ..
- generate_config_mk Crypto-config.mk CRYPTO
- generate_config_mk Ssl-config.mk SSL
- generate_config_mk Apps-config.mk APPS
+ generate_config_mk Crypto-config-target.mk CRYPTO target
+ generate_config_mk Crypto-config-host.mk CRYPTO host
+ generate_config_mk Crypto-config-trusty.mk CRYPTO_TRUSTY target
+ generate_config_mk Ssl-config-target.mk SSL target
+ generate_config_mk Ssl-config-host.mk SSL host
+ generate_config_mk Apps-config-target.mk APPS target
+ generate_config_mk Apps-config-host.mk APPS host
# Prune unnecessary sources
prune
@@ -507,7 +636,7 @@
# $1: Directory.
# Out: list of files in $1 that are encoded as ISO-8859.
function find_iso8859_files() {
- find $1 -type f -print0 | xargs -0 file | fgrep "ISO-8859" | cut -d: -f1
+ find $1 -type f -print0 | xargs -0 file --mime-encoding | grep -i "iso-8859" | cut -d: -f1
}
# Convert all ISO-8859 files in a given subdirectory to UTF-8
@@ -530,8 +659,7 @@
# Process new source
tar -zxf $OPENSSL_SOURCE
- convert_iso8859_to_utf8 $OPENSSL_DIR
- cp -rfP $OPENSSL_DIR $OPENSSL_DIR_ORIG
+ cp -RfP $OPENSSL_DIR $OPENSSL_DIR_ORIG
if [ ! -z $readonly ]; then
find $OPENSSL_DIR_ORIG -type f -print0 | xargs -0 chmod a-w
fi
@@ -555,12 +683,13 @@
cd $dir
# Apply appropriate patches
- for i in $OPENSSL_PATCHES; do
- if [ ! "$skip_patch" = "patches/$i" ]; then
+ patches=(../patches/[0-9][0-9][0-9][0-9]-*.patch)
+ for i in "${patches[@]}"; do
+ if [[ $skip_patch != ${i##*/} ]]; then
echo "Applying patch $i"
- patch -p1 --merge < ../patches/$i || die "Could not apply patches/$i. Fix source and run: $0 regenerate patches/$i"
+ patch -p1 < $i || die "Could not apply $i. Fix source and run: $0 regenerate patches/${i##*/}"
else
- echo "Skiping patch $i"
+ echo "Skiping patch ${i##*/}"
fi
done
diff --git a/include/openssl/bio.h b/include/openssl/bio.h
index 05699ab..d05fa22 100644
--- a/include/openssl/bio.h
+++ b/include/openssl/bio.h
@@ -266,6 +266,9 @@
#define BIO_RR_CONNECT 0x02
/* Returned from the accept BIO when an accept would have blocked */
#define BIO_RR_ACCEPT 0x03
+/* Returned from the SSL bio when the channel id retrieval code cannot find the
+ * private key. */
+#define BIO_RR_SSL_CHANNEL_ID_LOOKUP 0x04
/* These are passed by the BIO callback */
#define BIO_CB_FREE 0x01
diff --git a/include/openssl/bn.h b/include/openssl/bn.h
index f34248e..e776c07 100644
--- a/include/openssl/bn.h
+++ b/include/openssl/bn.h
@@ -538,6 +538,8 @@
BIGNUM *BN_mod_sqrt(BIGNUM *ret,
const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx);
+void BN_consttime_swap(BN_ULONG swap, BIGNUM *a, BIGNUM *b, int nwords);
+
/* Deprecated versions */
#ifndef OPENSSL_NO_DEPRECATED
BIGNUM *BN_generate_prime(BIGNUM *ret,int bits,int safe,
@@ -692,6 +694,10 @@
const BIGNUM *BN_get0_nist_prime_384(void);
const BIGNUM *BN_get0_nist_prime_521(void);
+int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range, const BIGNUM *priv,
+ const unsigned char *message, size_t message_len,
+ BN_CTX *ctx);
+
/* library internal functions */
#define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->dmax)?\
@@ -774,11 +780,20 @@
#define bn_fix_top(a) bn_check_top(a)
+#define bn_check_size(bn, bits) bn_wcheck_size(bn, ((bits+BN_BITS2-1))/BN_BITS2)
+#define bn_wcheck_size(bn, words) \
+ do { \
+ const BIGNUM *_bnum2 = (bn); \
+ assert(words <= (_bnum2)->dmax && words >= (_bnum2)->top); \
+ } while(0)
+
#else /* !BN_DEBUG */
#define bn_pollute(a)
#define bn_check_top(a)
#define bn_fix_top(a) bn_correct_top(a)
+#define bn_check_size(bn, bits)
+#define bn_wcheck_size(bn, words)
#endif
@@ -842,6 +857,7 @@
#define BN_F_BN_EXP 123
#define BN_F_BN_EXPAND2 108
#define BN_F_BN_EXPAND_INTERNAL 120
+#define BN_F_BN_GENERATE_DSA_NONCE 140
#define BN_F_BN_GF2M_MOD 131
#define BN_F_BN_GF2M_MOD_EXP 132
#define BN_F_BN_GF2M_MOD_MUL 133
@@ -881,6 +897,7 @@
#define BN_R_NOT_INITIALIZED 107
#define BN_R_NO_INVERSE 108
#define BN_R_NO_SOLUTION 116
+#define BN_R_PRIVATE_KEY_TOO_LARGE 117
#define BN_R_P_IS_NOT_PRIME 112
#define BN_R_TOO_MANY_ITERATIONS 113
#define BN_R_TOO_MANY_TEMPORARY_VARIABLES 109
diff --git a/include/openssl/dsa.h b/include/openssl/dsa.h
index a6f6d0b..7531c65 100644
--- a/include/openssl/dsa.h
+++ b/include/openssl/dsa.h
@@ -96,6 +96,10 @@
* faster variable sliding window method to
* be used for all exponents.
*/
+#define DSA_FLAG_NONCE_FROM_HASH 0x04 /* Causes the DSA nonce to be calculated
+ from SHA512(private_key + H(message) +
+ random). This strengthens DSA against a
+ weak PRNG. */
/* If this flag is set the DSA method is FIPS compliant and can be used
* in FIPS mode. This is set in the validated module method. If an
@@ -130,8 +134,9 @@
{
const char *name;
DSA_SIG * (*dsa_do_sign)(const unsigned char *dgst, int dlen, DSA *dsa);
- int (*dsa_sign_setup)(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp,
- BIGNUM **rp);
+ int (*dsa_sign_setup)(DSA *dsa, BN_CTX *ctx_in,
+ BIGNUM **kinvp, BIGNUM **rp,
+ const unsigned char *dgst, int dlen);
int (*dsa_do_verify)(const unsigned char *dgst, int dgst_len,
DSA_SIG *sig, DSA *dsa);
int (*dsa_mod_exp)(DSA *dsa, BIGNUM *rr, BIGNUM *a1, BIGNUM *p1,
@@ -317,6 +322,7 @@
#define DSA_R_MISSING_PARAMETERS 101
#define DSA_R_MODULUS_TOO_LARGE 103
#define DSA_R_NEED_NEW_SETUP_VALUES 110
+#define DSA_R_NONCE_CANNOT_BE_PRECOMPUTED 112
#define DSA_R_NON_FIPS_DSA_METHOD 111
#define DSA_R_NO_PARAMETERS_SET 107
#define DSA_R_PARAMETER_ENCODING_ERROR 105
diff --git a/include/openssl/ec.h b/include/openssl/ec.h
index dfe8710..d008a0d 100644
--- a/include/openssl/ec.h
+++ b/include/openssl/ec.h
@@ -819,6 +819,17 @@
/* wrapper functions for the underlying EC_GROUP object */
void EC_KEY_set_asn1_flag(EC_KEY *eckey, int asn1_flag);
+/** Sets whether ECDSA operations with the given key will calculate their k
+ * value from SHA512(private_key + message + random) in order to protect
+ * against a weak PRNG.
+ * \param on Whether to calculate k from a hash or not
+ */
+void EC_KEY_set_nonce_from_hash(EC_KEY *key, int on);
+
+/** Returns the value of nonce_from_hash
+ */
+int EC_KEY_get_nonce_from_hash(const EC_KEY *key);
+
/** Creates a table of pre-computed multiples of the generator to
* accelerate further EC_KEY operations.
* \param key EC_KEY object
diff --git a/include/openssl/ecdsa.h b/include/openssl/ecdsa.h
index 7fb5254..dc6a36b 100644
--- a/include/openssl/ecdsa.h
+++ b/include/openssl/ecdsa.h
@@ -250,6 +250,7 @@
#define ECDSA_R_ERR_EC_LIB 102
#define ECDSA_R_MISSING_PARAMETERS 103
#define ECDSA_R_NEED_NEW_SETUP_VALUES 106
+#define ECDSA_R_NONCE_CANNOT_BE_PRECOMPUTED 108
#define ECDSA_R_NON_FIPS_METHOD 107
#define ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED 104
#define ECDSA_R_SIGNATURE_MALLOC_FAILED 105
diff --git a/include/openssl/kssl.h b/include/openssl/kssl.h
index 8242fd5..e4df843 100644
--- a/include/openssl/kssl.h
+++ b/include/openssl/kssl.h
@@ -70,6 +70,15 @@
#include <stdio.h>
#include <ctype.h>
#include <krb5.h>
+#ifdef OPENSSL_SYS_WIN32
+/* These can sometimes get redefined indirectly by krb5 header files
+ * after they get undefed in ossl_typ.h
+ */
+#undef X509_NAME
+#undef X509_EXTENSIONS
+#undef OCSP_REQUEST
+#undef OCSP_RESPONSE
+#endif
#ifdef __cplusplus
extern "C" {
diff --git a/include/openssl/ocsp.h b/include/openssl/ocsp.h
index 31e4574..f14e9f7 100644
--- a/include/openssl/ocsp.h
+++ b/include/openssl/ocsp.h
@@ -90,6 +90,13 @@
#define OCSP_RESPID_KEY 0x400
#define OCSP_NOTIME 0x800
+#ifdef OPENSSL_SYS_WIN32
+ /* Under Win32 these are defined in wincrypt.h */
+#undef OCSP_REQUEST
+#undef X509_NAME
+#undef OCSP_RESPONSE
+#endif
+
/* CertID ::= SEQUENCE {
* hashAlgorithm AlgorithmIdentifier,
* issuerNameHash OCTET STRING, -- Hash of Issuer's DN
diff --git a/include/openssl/opensslconf-32.h b/include/openssl/opensslconf-32.h
index d662548..caf6f1b 100644
--- a/include/openssl/opensslconf-32.h
+++ b/include/openssl/opensslconf-32.h
@@ -53,6 +53,9 @@
#ifndef OPENSSL_NO_RFC3779
# define OPENSSL_NO_RFC3779
#endif
+#ifndef OPENSSL_NO_RIPEMD
+# define OPENSSL_NO_RIPEMD
+#endif
#ifndef OPENSSL_NO_RSAX
# define OPENSSL_NO_RSAX
#endif
@@ -137,6 +140,9 @@
# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)
# define NO_RFC3779
# endif
+# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD)
+# define NO_RIPEMD
+# endif
# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)
# define NO_RSAX
# endif
diff --git a/include/openssl/opensslconf-64.h b/include/openssl/opensslconf-64.h
index 70c5a2c..88fb041 100644
--- a/include/openssl/opensslconf-64.h
+++ b/include/openssl/opensslconf-64.h
@@ -53,6 +53,9 @@
#ifndef OPENSSL_NO_RFC3779
# define OPENSSL_NO_RFC3779
#endif
+#ifndef OPENSSL_NO_RIPEMD
+# define OPENSSL_NO_RIPEMD
+#endif
#ifndef OPENSSL_NO_RSAX
# define OPENSSL_NO_RSAX
#endif
@@ -137,6 +140,9 @@
# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)
# define NO_RFC3779
# endif
+# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD)
+# define NO_RIPEMD
+# endif
# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)
# define NO_RSAX
# endif
diff --git a/include/openssl/opensslconf-static-32.h b/include/openssl/opensslconf-static-32.h
new file mode 100644
index 0000000..caf6f1b
--- /dev/null
+++ b/include/openssl/opensslconf-static-32.h
@@ -0,0 +1,322 @@
+/* opensslconf.h */
+/* WARNING: Generated automatically from opensslconf.h.in by Configure. */
+
+/* OpenSSL was configured with the following options: */
+#ifndef OPENSSL_DOING_MAKEDEPEND
+
+
+#ifndef OPENSSL_NO_CAMELLIA
+# define OPENSSL_NO_CAMELLIA
+#endif
+#ifndef OPENSSL_NO_CAPIENG
+# define OPENSSL_NO_CAPIENG
+#endif
+#ifndef OPENSSL_NO_CAST
+# define OPENSSL_NO_CAST
+#endif
+#ifndef OPENSSL_NO_DTLS1
+# define OPENSSL_NO_DTLS1
+#endif
+#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
+# define OPENSSL_NO_EC_NISTP_64_GCC_128
+#endif
+#ifndef OPENSSL_NO_GMP
+# define OPENSSL_NO_GMP
+#endif
+#ifndef OPENSSL_NO_GOST
+# define OPENSSL_NO_GOST
+#endif
+#ifndef OPENSSL_NO_HEARTBEATS
+# define OPENSSL_NO_HEARTBEATS
+#endif
+#ifndef OPENSSL_NO_IDEA
+# define OPENSSL_NO_IDEA
+#endif
+#ifndef OPENSSL_NO_JPAKE
+# define OPENSSL_NO_JPAKE
+#endif
+#ifndef OPENSSL_NO_KRB5
+# define OPENSSL_NO_KRB5
+#endif
+#ifndef OPENSSL_NO_MD2
+# define OPENSSL_NO_MD2
+#endif
+#ifndef OPENSSL_NO_MDC2
+# define OPENSSL_NO_MDC2
+#endif
+#ifndef OPENSSL_NO_RC5
+# define OPENSSL_NO_RC5
+#endif
+#ifndef OPENSSL_NO_RDRAND
+# define OPENSSL_NO_RDRAND
+#endif
+#ifndef OPENSSL_NO_RFC3779
+# define OPENSSL_NO_RFC3779
+#endif
+#ifndef OPENSSL_NO_RIPEMD
+# define OPENSSL_NO_RIPEMD
+#endif
+#ifndef OPENSSL_NO_RSAX
+# define OPENSSL_NO_RSAX
+#endif
+#ifndef OPENSSL_NO_SCTP
+# define OPENSSL_NO_SCTP
+#endif
+#ifndef OPENSSL_NO_SEED
+# define OPENSSL_NO_SEED
+#endif
+#ifndef OPENSSL_NO_SHA0
+# define OPENSSL_NO_SHA0
+#endif
+#ifndef OPENSSL_NO_STATIC_ENGINE
+# define OPENSSL_NO_STATIC_ENGINE
+#endif
+#ifndef OPENSSL_NO_STORE
+# define OPENSSL_NO_STORE
+#endif
+#ifndef OPENSSL_NO_WHIRLPOOL
+# define OPENSSL_NO_WHIRLPOOL
+#endif
+
+#endif /* OPENSSL_DOING_MAKEDEPEND */
+
+#ifndef OPENSSL_THREADS
+# define OPENSSL_THREADS
+#endif
+#ifndef OPENSSL_NO_DYNAMIC_ENGINE
+# define OPENSSL_NO_DYNAMIC_ENGINE
+#endif
+
+/* The OPENSSL_NO_* macros are also defined as NO_* if the application
+ asks for it. This is a transient feature that is provided for those
+ who haven't had the time to do the appropriate changes in their
+ applications. */
+#ifdef OPENSSL_ALGORITHM_DEFINES
+# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA)
+# define NO_CAMELLIA
+# endif
+# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG)
+# define NO_CAPIENG
+# endif
+# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST)
+# define NO_CAST
+# endif
+# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1)
+# define NO_DTLS1
+# endif
+# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128)
+# define NO_EC_NISTP_64_GCC_128
+# endif
+# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP)
+# define NO_GMP
+# endif
+# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST)
+# define NO_GOST
+# endif
+# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS)
+# define NO_HEARTBEATS
+# endif
+# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA)
+# define NO_IDEA
+# endif
+# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE)
+# define NO_JPAKE
+# endif
+# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5)
+# define NO_KRB5
+# endif
+# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2)
+# define NO_MD2
+# endif
+# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2)
+# define NO_MDC2
+# endif
+# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5)
+# define NO_RC5
+# endif
+# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND)
+# define NO_RDRAND
+# endif
+# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)
+# define NO_RFC3779
+# endif
+# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD)
+# define NO_RIPEMD
+# endif
+# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)
+# define NO_RSAX
+# endif
+# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP)
+# define NO_SCTP
+# endif
+# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED)
+# define NO_SEED
+# endif
+# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0)
+# define NO_SHA0
+# endif
+# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE)
+# define NO_STATIC_ENGINE
+# endif
+# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE)
+# define NO_STORE
+# endif
+# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL)
+# define NO_WHIRLPOOL
+# endif
+#endif
+
+/* crypto/opensslconf.h.in */
+
+/* Generate 80386 code? */
+#undef I386_ONLY
+
+#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */
+#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR)
+#define ENGINESDIR "/usr/local/ssl/lib/engines"
+#define OPENSSLDIR "/usr/local/ssl"
+#endif
+#endif
+
+#undef OPENSSL_UNISTD
+#define OPENSSL_UNISTD <unistd.h>
+
+#undef OPENSSL_EXPORT_VAR_AS_FUNCTION
+
+#if defined(HEADER_IDEA_H) && !defined(IDEA_INT)
+#define IDEA_INT unsigned int
+#endif
+
+#if defined(HEADER_MD2_H) && !defined(MD2_INT)
+#define MD2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC2_H) && !defined(RC2_INT)
+/* I need to put in a mod for the alpha - eay */
+#define RC2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC4_H)
+#if !defined(RC4_INT)
+/* using int types make the structure larger but make the code faster
+ * on most boxes I have tested - up to %20 faster. */
+/*
+ * I don't know what does "most" mean, but declaring "int" is a must on:
+ * - Intel P6 because partial register stalls are very expensive;
+ * - elder Alpha because it lacks byte load/store instructions;
+ */
+#define RC4_INT unsigned char
+#endif
+#if !defined(RC4_CHUNK)
+/*
+ * This enables code handling data aligned at natural CPU word
+ * boundary. See crypto/rc4/rc4_enc.c for further details.
+ */
+#define RC4_CHUNK unsigned long
+#endif
+#endif
+
+#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG)
+/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a
+ * %20 speed up (longs are 8 bytes, int's are 4). */
+#ifndef DES_LONG
+#define DES_LONG unsigned int
+#endif
+#endif
+
+#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H)
+#define CONFIG_HEADER_BN_H
+#define BN_LLONG
+
+/* Should we define BN_DIV2W here? */
+
+/* Only one for the following should be defined */
+#undef SIXTY_FOUR_BIT_LONG
+#undef SIXTY_FOUR_BIT
+#define THIRTY_TWO_BIT
+#endif
+
+#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H)
+#define CONFIG_HEADER_RC4_LOCL_H
+/* if this is defined data[i] is used instead of *data, this is a %20
+ * speedup on x86 */
+#undef RC4_INDEX
+#endif
+
+#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H)
+#define CONFIG_HEADER_BF_LOCL_H
+#define BF_PTR
+#endif /* HEADER_BF_LOCL_H */
+
+#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H)
+#define CONFIG_HEADER_DES_LOCL_H
+#ifndef DES_DEFAULT_OPTIONS
+/* the following is tweaked from a config script, that is why it is a
+ * protected undef/define */
+#ifndef DES_PTR
+#undef DES_PTR
+#endif
+
+/* This helps C compiler generate the correct code for multiple functional
+ * units. It reduces register dependancies at the expense of 2 more
+ * registers */
+#ifndef DES_RISC1
+#undef DES_RISC1
+#endif
+
+#ifndef DES_RISC2
+#undef DES_RISC2
+#endif
+
+#if defined(DES_RISC1) && defined(DES_RISC2)
+YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!!
+#endif
+
+/* Unroll the inner loop, this sometimes helps, sometimes hinders.
+ * Very mucy CPU dependant */
+#ifndef DES_UNROLL
+#define DES_UNROLL
+#endif
+
+/* These default values were supplied by
+ * Peter Gutman <[email protected]>
+ * They are only used if nothing else has been defined */
+#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL)
+/* Special defines which change the way the code is built depending on the
+ CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find
+ even newer MIPS CPU's, but at the moment one size fits all for
+ optimization options. Older Sparc's work better with only UNROLL, but
+ there's no way to tell at compile time what it is you're running on */
+
+#if defined( sun ) /* Newer Sparc's */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#elif defined( __ultrix ) /* Older MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined( __osf1__ ) /* Alpha */
+# define DES_PTR
+# define DES_RISC2
+#elif defined ( _AIX ) /* RS6000 */
+ /* Unknown */
+#elif defined( __hpux ) /* HP-PA */
+ /* Unknown */
+#elif defined( __aux ) /* 68K */
+ /* Unknown */
+#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */
+# define DES_UNROLL
+#elif defined( __sgi ) /* Newer MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#endif /* Systems-specific speed defines */
+#endif
+
+#endif /* DES_DEFAULT_OPTIONS */
+#endif /* HEADER_DES_LOCL_H */
diff --git a/include/openssl/opensslconf-static-64.h b/include/openssl/opensslconf-static-64.h
new file mode 100644
index 0000000..88fb041
--- /dev/null
+++ b/include/openssl/opensslconf-static-64.h
@@ -0,0 +1,322 @@
+/* opensslconf.h */
+/* WARNING: Generated automatically from opensslconf.h.in by Configure. */
+
+/* OpenSSL was configured with the following options: */
+#ifndef OPENSSL_DOING_MAKEDEPEND
+
+
+#ifndef OPENSSL_NO_CAMELLIA
+# define OPENSSL_NO_CAMELLIA
+#endif
+#ifndef OPENSSL_NO_CAPIENG
+# define OPENSSL_NO_CAPIENG
+#endif
+#ifndef OPENSSL_NO_CAST
+# define OPENSSL_NO_CAST
+#endif
+#ifndef OPENSSL_NO_DTLS1
+# define OPENSSL_NO_DTLS1
+#endif
+#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
+# define OPENSSL_NO_EC_NISTP_64_GCC_128
+#endif
+#ifndef OPENSSL_NO_GMP
+# define OPENSSL_NO_GMP
+#endif
+#ifndef OPENSSL_NO_GOST
+# define OPENSSL_NO_GOST
+#endif
+#ifndef OPENSSL_NO_HEARTBEATS
+# define OPENSSL_NO_HEARTBEATS
+#endif
+#ifndef OPENSSL_NO_IDEA
+# define OPENSSL_NO_IDEA
+#endif
+#ifndef OPENSSL_NO_JPAKE
+# define OPENSSL_NO_JPAKE
+#endif
+#ifndef OPENSSL_NO_KRB5
+# define OPENSSL_NO_KRB5
+#endif
+#ifndef OPENSSL_NO_MD2
+# define OPENSSL_NO_MD2
+#endif
+#ifndef OPENSSL_NO_MDC2
+# define OPENSSL_NO_MDC2
+#endif
+#ifndef OPENSSL_NO_RC5
+# define OPENSSL_NO_RC5
+#endif
+#ifndef OPENSSL_NO_RDRAND
+# define OPENSSL_NO_RDRAND
+#endif
+#ifndef OPENSSL_NO_RFC3779
+# define OPENSSL_NO_RFC3779
+#endif
+#ifndef OPENSSL_NO_RIPEMD
+# define OPENSSL_NO_RIPEMD
+#endif
+#ifndef OPENSSL_NO_RSAX
+# define OPENSSL_NO_RSAX
+#endif
+#ifndef OPENSSL_NO_SCTP
+# define OPENSSL_NO_SCTP
+#endif
+#ifndef OPENSSL_NO_SEED
+# define OPENSSL_NO_SEED
+#endif
+#ifndef OPENSSL_NO_SHA0
+# define OPENSSL_NO_SHA0
+#endif
+#ifndef OPENSSL_NO_STATIC_ENGINE
+# define OPENSSL_NO_STATIC_ENGINE
+#endif
+#ifndef OPENSSL_NO_STORE
+# define OPENSSL_NO_STORE
+#endif
+#ifndef OPENSSL_NO_WHIRLPOOL
+# define OPENSSL_NO_WHIRLPOOL
+#endif
+
+#endif /* OPENSSL_DOING_MAKEDEPEND */
+
+#ifndef OPENSSL_THREADS
+# define OPENSSL_THREADS
+#endif
+#ifndef OPENSSL_NO_DYNAMIC_ENGINE
+# define OPENSSL_NO_DYNAMIC_ENGINE
+#endif
+
+/* The OPENSSL_NO_* macros are also defined as NO_* if the application
+ asks for it. This is a transient feature that is provided for those
+ who haven't had the time to do the appropriate changes in their
+ applications. */
+#ifdef OPENSSL_ALGORITHM_DEFINES
+# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA)
+# define NO_CAMELLIA
+# endif
+# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG)
+# define NO_CAPIENG
+# endif
+# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST)
+# define NO_CAST
+# endif
+# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1)
+# define NO_DTLS1
+# endif
+# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128)
+# define NO_EC_NISTP_64_GCC_128
+# endif
+# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP)
+# define NO_GMP
+# endif
+# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST)
+# define NO_GOST
+# endif
+# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS)
+# define NO_HEARTBEATS
+# endif
+# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA)
+# define NO_IDEA
+# endif
+# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE)
+# define NO_JPAKE
+# endif
+# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5)
+# define NO_KRB5
+# endif
+# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2)
+# define NO_MD2
+# endif
+# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2)
+# define NO_MDC2
+# endif
+# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5)
+# define NO_RC5
+# endif
+# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND)
+# define NO_RDRAND
+# endif
+# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)
+# define NO_RFC3779
+# endif
+# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD)
+# define NO_RIPEMD
+# endif
+# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)
+# define NO_RSAX
+# endif
+# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP)
+# define NO_SCTP
+# endif
+# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED)
+# define NO_SEED
+# endif
+# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0)
+# define NO_SHA0
+# endif
+# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE)
+# define NO_STATIC_ENGINE
+# endif
+# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE)
+# define NO_STORE
+# endif
+# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL)
+# define NO_WHIRLPOOL
+# endif
+#endif
+
+/* crypto/opensslconf.h.in */
+
+/* Generate 80386 code? */
+#undef I386_ONLY
+
+#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */
+#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR)
+#define ENGINESDIR "/usr/local/ssl/lib/engines"
+#define OPENSSLDIR "/usr/local/ssl"
+#endif
+#endif
+
+#undef OPENSSL_UNISTD
+#define OPENSSL_UNISTD <unistd.h>
+
+#undef OPENSSL_EXPORT_VAR_AS_FUNCTION
+
+#if defined(HEADER_IDEA_H) && !defined(IDEA_INT)
+#define IDEA_INT unsigned int
+#endif
+
+#if defined(HEADER_MD2_H) && !defined(MD2_INT)
+#define MD2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC2_H) && !defined(RC2_INT)
+/* I need to put in a mod for the alpha - eay */
+#define RC2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC4_H)
+#if !defined(RC4_INT)
+/* using int types make the structure larger but make the code faster
+ * on most boxes I have tested - up to %20 faster. */
+/*
+ * I don't know what does "most" mean, but declaring "int" is a must on:
+ * - Intel P6 because partial register stalls are very expensive;
+ * - elder Alpha because it lacks byte load/store instructions;
+ */
+#define RC4_INT unsigned char
+#endif
+#if !defined(RC4_CHUNK)
+/*
+ * This enables code handling data aligned at natural CPU word
+ * boundary. See crypto/rc4/rc4_enc.c for further details.
+ */
+#define RC4_CHUNK unsigned long
+#endif
+#endif
+
+#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG)
+/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a
+ * %20 speed up (longs are 8 bytes, int's are 4). */
+#ifndef DES_LONG
+#define DES_LONG unsigned int
+#endif
+#endif
+
+#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H)
+#define CONFIG_HEADER_BN_H
+#undef BN_LLONG
+
+/* Should we define BN_DIV2W here? */
+
+/* Only one for the following should be defined */
+#define SIXTY_FOUR_BIT_LONG
+#undef SIXTY_FOUR_BIT
+#undef THIRTY_TWO_BIT
+#endif
+
+#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H)
+#define CONFIG_HEADER_RC4_LOCL_H
+/* if this is defined data[i] is used instead of *data, this is a %20
+ * speedup on x86 */
+#undef RC4_INDEX
+#endif
+
+#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H)
+#define CONFIG_HEADER_BF_LOCL_H
+#define BF_PTR
+#endif /* HEADER_BF_LOCL_H */
+
+#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H)
+#define CONFIG_HEADER_DES_LOCL_H
+#ifndef DES_DEFAULT_OPTIONS
+/* the following is tweaked from a config script, that is why it is a
+ * protected undef/define */
+#ifndef DES_PTR
+#undef DES_PTR
+#endif
+
+/* This helps C compiler generate the correct code for multiple functional
+ * units. It reduces register dependancies at the expense of 2 more
+ * registers */
+#ifndef DES_RISC1
+#undef DES_RISC1
+#endif
+
+#ifndef DES_RISC2
+#undef DES_RISC2
+#endif
+
+#if defined(DES_RISC1) && defined(DES_RISC2)
+YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!!
+#endif
+
+/* Unroll the inner loop, this sometimes helps, sometimes hinders.
+ * Very mucy CPU dependant */
+#ifndef DES_UNROLL
+#define DES_UNROLL
+#endif
+
+/* These default values were supplied by
+ * Peter Gutman <[email protected]>
+ * They are only used if nothing else has been defined */
+#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL)
+/* Special defines which change the way the code is built depending on the
+ CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find
+ even newer MIPS CPU's, but at the moment one size fits all for
+ optimization options. Older Sparc's work better with only UNROLL, but
+ there's no way to tell at compile time what it is you're running on */
+
+#if defined( sun ) /* Newer Sparc's */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#elif defined( __ultrix ) /* Older MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined( __osf1__ ) /* Alpha */
+# define DES_PTR
+# define DES_RISC2
+#elif defined ( _AIX ) /* RS6000 */
+ /* Unknown */
+#elif defined( __hpux ) /* HP-PA */
+ /* Unknown */
+#elif defined( __aux ) /* 68K */
+ /* Unknown */
+#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */
+# define DES_UNROLL
+#elif defined( __sgi ) /* Newer MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#endif /* Systems-specific speed defines */
+#endif
+
+#endif /* DES_DEFAULT_OPTIONS */
+#endif /* HEADER_DES_LOCL_H */
diff --git a/include/openssl/opensslconf-static-trusty.h b/include/openssl/opensslconf-static-trusty.h
new file mode 100644
index 0000000..06f9f98
--- /dev/null
+++ b/include/openssl/opensslconf-static-trusty.h
@@ -0,0 +1,448 @@
+/* opensslconf.h */
+/* WARNING: Generated automatically from opensslconf.h.in by Configure. */
+
+/* OpenSSL was configured with the following options: */
+#ifndef OPENSSL_DOING_MAKEDEPEND
+
+
+#ifndef OPENSSL_NO_CAMELLIA
+# define OPENSSL_NO_CAMELLIA
+#endif
+#ifndef OPENSSL_NO_CAPIENG
+# define OPENSSL_NO_CAPIENG
+#endif
+#ifndef OPENSSL_NO_CAST
+# define OPENSSL_NO_CAST
+#endif
+#ifndef OPENSSL_NO_CMS
+# define OPENSSL_NO_CMS
+#endif
+#ifndef OPENSSL_NO_COMP
+# define OPENSSL_NO_COMP
+#endif
+#ifndef OPENSSL_NO_CONF
+# define OPENSSL_NO_CONF
+#endif
+#ifndef OPENSSL_NO_DES
+# define OPENSSL_NO_DES
+#endif
+#ifndef OPENSSL_NO_DTLS1
+# define OPENSSL_NO_DTLS1
+#endif
+#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
+# define OPENSSL_NO_EC_NISTP_64_GCC_128
+#endif
+#ifndef OPENSSL_NO_GMP
+# define OPENSSL_NO_GMP
+#endif
+#ifndef OPENSSL_NO_GOST
+# define OPENSSL_NO_GOST
+#endif
+#ifndef OPENSSL_NO_HEARTBEATS
+# define OPENSSL_NO_HEARTBEATS
+#endif
+#ifndef OPENSSL_NO_IDEA
+# define OPENSSL_NO_IDEA
+#endif
+#ifndef OPENSSL_NO_JPAKE
+# define OPENSSL_NO_JPAKE
+#endif
+#ifndef OPENSSL_NO_KRB5
+# define OPENSSL_NO_KRB5
+#endif
+#ifndef OPENSSL_NO_LOCKING
+# define OPENSSL_NO_LOCKING
+#endif
+#ifndef OPENSSL_NO_MD2
+# define OPENSSL_NO_MD2
+#endif
+#ifndef OPENSSL_NO_MD4
+# define OPENSSL_NO_MD4
+#endif
+#ifndef OPENSSL_NO_MD5
+# define OPENSSL_NO_MD5
+#endif
+#ifndef OPENSSL_NO_MDC2
+# define OPENSSL_NO_MDC2
+#endif
+#ifndef OPENSSL_NO_OCSP
+# define OPENSSL_NO_OCSP
+#endif
+#ifndef OPENSSL_NO_PEM
+# define OPENSSL_NO_PEM
+#endif
+#ifndef OPENSSL_NO_PKCS12
+# define OPENSSL_NO_PKCS12
+#endif
+#ifndef OPENSSL_NO_PQUEUE
+# define OPENSSL_NO_PQUEUE
+#endif
+#ifndef OPENSSL_NO_RC2
+# define OPENSSL_NO_RC2
+#endif
+#ifndef OPENSSL_NO_RC4
+# define OPENSSL_NO_RC4
+#endif
+#ifndef OPENSSL_NO_RC5
+# define OPENSSL_NO_RC5
+#endif
+#ifndef OPENSSL_NO_RDRAND
+# define OPENSSL_NO_RDRAND
+#endif
+#ifndef OPENSSL_NO_RFC3779
+# define OPENSSL_NO_RFC3779
+#endif
+#ifndef OPENSSL_NO_RIPEMD
+# define OPENSSL_NO_RIPEMD
+#endif
+#ifndef OPENSSL_NO_RSAX
+# define OPENSSL_NO_RSAX
+#endif
+#ifndef OPENSSL_NO_SCTP
+# define OPENSSL_NO_SCTP
+#endif
+#ifndef OPENSSL_NO_SEED
+# define OPENSSL_NO_SEED
+#endif
+#ifndef OPENSSL_NO_SHA0
+# define OPENSSL_NO_SHA0
+#endif
+#ifndef OPENSSL_NO_SRP
+# define OPENSSL_NO_SRP
+#endif
+#ifndef OPENSSL_NO_SSL2
+# define OPENSSL_NO_SSL2
+#endif
+#ifndef OPENSSL_NO_SSL3
+# define OPENSSL_NO_SSL3
+#endif
+#ifndef OPENSSL_NO_STATIC_ENGINE
+# define OPENSSL_NO_STATIC_ENGINE
+#endif
+#ifndef OPENSSL_NO_STORE
+# define OPENSSL_NO_STORE
+#endif
+#ifndef OPENSSL_NO_TLS1
+# define OPENSSL_NO_TLS1
+#endif
+#ifndef OPENSSL_NO_TLSEXT
+# define OPENSSL_NO_TLSEXT
+#endif
+#ifndef OPENSSL_NO_TS
+# define OPENSSL_NO_TS
+#endif
+#ifndef OPENSSL_NO_TXT_DB
+# define OPENSSL_NO_TXT_DB
+#endif
+#ifndef OPENSSL_NO_UI
+# define OPENSSL_NO_UI
+#endif
+#ifndef OPENSSL_NO_WHIRLPOOL
+# define OPENSSL_NO_WHIRLPOOL
+#endif
+
+#endif /* OPENSSL_DOING_MAKEDEPEND */
+
+#ifndef OPENSSL_NO_ERR
+# define OPENSSL_NO_ERR
+#endif
+#ifndef OPENSSL_NO_DYNAMIC_ENGINE
+# define OPENSSL_NO_DYNAMIC_ENGINE
+#endif
+
+/* The OPENSSL_NO_* macros are also defined as NO_* if the application
+ asks for it. This is a transient feature that is provided for those
+ who haven't had the time to do the appropriate changes in their
+ applications. */
+#ifdef OPENSSL_ALGORITHM_DEFINES
+# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA)
+# define NO_CAMELLIA
+# endif
+# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG)
+# define NO_CAPIENG
+# endif
+# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST)
+# define NO_CAST
+# endif
+# if defined(OPENSSL_NO_CMS) && !defined(NO_CMS)
+# define NO_CMS
+# endif
+# if defined(OPENSSL_NO_COMP) && !defined(NO_COMP)
+# define NO_COMP
+# endif
+# if defined(OPENSSL_NO_CONF) && !defined(NO_CONF)
+# define NO_CONF
+# endif
+# if defined(OPENSSL_NO_DES) && !defined(NO_DES)
+# define NO_DES
+# endif
+# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1)
+# define NO_DTLS1
+# endif
+# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128)
+# define NO_EC_NISTP_64_GCC_128
+# endif
+# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP)
+# define NO_GMP
+# endif
+# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST)
+# define NO_GOST
+# endif
+# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS)
+# define NO_HEARTBEATS
+# endif
+# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA)
+# define NO_IDEA
+# endif
+# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE)
+# define NO_JPAKE
+# endif
+# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5)
+# define NO_KRB5
+# endif
+# if defined(OPENSSL_NO_LOCKING) && !defined(NO_LOCKING)
+# define NO_LOCKING
+# endif
+# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2)
+# define NO_MD2
+# endif
+# if defined(OPENSSL_NO_MD4) && !defined(NO_MD4)
+# define NO_MD4
+# endif
+# if defined(OPENSSL_NO_MD5) && !defined(NO_MD5)
+# define NO_MD5
+# endif
+# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2)
+# define NO_MDC2
+# endif
+# if defined(OPENSSL_NO_OCSP) && !defined(NO_OCSP)
+# define NO_OCSP
+# endif
+# if defined(OPENSSL_NO_PEM) && !defined(NO_PEM)
+# define NO_PEM
+# endif
+# if defined(OPENSSL_NO_PKCS12) && !defined(NO_PKCS12)
+# define NO_PKCS12
+# endif
+# if defined(OPENSSL_NO_PQUEUE) && !defined(NO_PQUEUE)
+# define NO_PQUEUE
+# endif
+# if defined(OPENSSL_NO_RC2) && !defined(NO_RC2)
+# define NO_RC2
+# endif
+# if defined(OPENSSL_NO_RC4) && !defined(NO_RC4)
+# define NO_RC4
+# endif
+# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5)
+# define NO_RC5
+# endif
+# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND)
+# define NO_RDRAND
+# endif
+# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)
+# define NO_RFC3779
+# endif
+# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD)
+# define NO_RIPEMD
+# endif
+# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)
+# define NO_RSAX
+# endif
+# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP)
+# define NO_SCTP
+# endif
+# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED)
+# define NO_SEED
+# endif
+# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0)
+# define NO_SHA0
+# endif
+# if defined(OPENSSL_NO_SRP) && !defined(NO_SRP)
+# define NO_SRP
+# endif
+# if defined(OPENSSL_NO_SSL2) && !defined(NO_SSL2)
+# define NO_SSL2
+# endif
+# if defined(OPENSSL_NO_SSL3) && !defined(NO_SSL3)
+# define NO_SSL3
+# endif
+# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE)
+# define NO_STATIC_ENGINE
+# endif
+# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE)
+# define NO_STORE
+# endif
+# if defined(OPENSSL_NO_TLS1) && !defined(NO_TLS1)
+# define NO_TLS1
+# endif
+# if defined(OPENSSL_NO_TLSEXT) && !defined(NO_TLSEXT)
+# define NO_TLSEXT
+# endif
+# if defined(OPENSSL_NO_TS) && !defined(NO_TS)
+# define NO_TS
+# endif
+# if defined(OPENSSL_NO_TXT_DB) && !defined(NO_TXT_DB)
+# define NO_TXT_DB
+# endif
+# if defined(OPENSSL_NO_UI) && !defined(NO_UI)
+# define NO_UI
+# endif
+# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL)
+# define NO_WHIRLPOOL
+# endif
+#endif
+
+/* crypto/opensslconf.h.in */
+
+/* Generate 80386 code? */
+#undef I386_ONLY
+
+#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */
+#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR)
+#define ENGINESDIR "/usr/local/ssl/lib/engines"
+#define OPENSSLDIR "/usr/local/ssl"
+#endif
+#endif
+
+#undef OPENSSL_UNISTD
+#define OPENSSL_UNISTD <trusty_std.h>
+
+#undef OPENSSL_EXPORT_VAR_AS_FUNCTION
+
+#if defined(HEADER_IDEA_H) && !defined(IDEA_INT)
+#define IDEA_INT unsigned int
+#endif
+
+#if defined(HEADER_MD2_H) && !defined(MD2_INT)
+#define MD2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC2_H) && !defined(RC2_INT)
+/* I need to put in a mod for the alpha - eay */
+#define RC2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC4_H)
+#if !defined(RC4_INT)
+/* using int types make the structure larger but make the code faster
+ * on most boxes I have tested - up to %20 faster. */
+/*
+ * I don't know what does "most" mean, but declaring "int" is a must on:
+ * - Intel P6 because partial register stalls are very expensive;
+ * - elder Alpha because it lacks byte load/store instructions;
+ */
+#define RC4_INT unsigned int
+#endif
+#if !defined(RC4_CHUNK)
+/*
+ * This enables code handling data aligned at natural CPU word
+ * boundary. See crypto/rc4/rc4_enc.c for further details.
+ */
+#undef RC4_CHUNK
+#endif
+#endif
+
+#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG)
+/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a
+ * %20 speed up (longs are 8 bytes, int's are 4). */
+#ifndef DES_LONG
+#define DES_LONG unsigned long
+#endif
+#endif
+
+#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H)
+#define CONFIG_HEADER_BN_H
+#undef BN_LLONG
+
+/* Should we define BN_DIV2W here? */
+
+/* Only one for the following should be defined */
+#undef SIXTY_FOUR_BIT_LONG
+#undef SIXTY_FOUR_BIT
+#define THIRTY_TWO_BIT
+#endif
+
+#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H)
+#define CONFIG_HEADER_RC4_LOCL_H
+/* if this is defined data[i] is used instead of *data, this is a %20
+ * speedup on x86 */
+#undef RC4_INDEX
+#endif
+
+#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H)
+#define CONFIG_HEADER_BF_LOCL_H
+#undef BF_PTR
+#endif /* HEADER_BF_LOCL_H */
+
+#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H)
+#define CONFIG_HEADER_DES_LOCL_H
+#ifndef DES_DEFAULT_OPTIONS
+/* the following is tweaked from a config script, that is why it is a
+ * protected undef/define */
+#ifndef DES_PTR
+#undef DES_PTR
+#endif
+
+/* This helps C compiler generate the correct code for multiple functional
+ * units. It reduces register dependancies at the expense of 2 more
+ * registers */
+#ifndef DES_RISC1
+#undef DES_RISC1
+#endif
+
+#ifndef DES_RISC2
+#undef DES_RISC2
+#endif
+
+#if defined(DES_RISC1) && defined(DES_RISC2)
+YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!!
+#endif
+
+/* Unroll the inner loop, this sometimes helps, sometimes hinders.
+ * Very mucy CPU dependant */
+#ifndef DES_UNROLL
+#undef DES_UNROLL
+#endif
+
+/* These default values were supplied by
+ * Peter Gutman <[email protected]>
+ * They are only used if nothing else has been defined */
+#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL)
+/* Special defines which change the way the code is built depending on the
+ CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find
+ even newer MIPS CPU's, but at the moment one size fits all for
+ optimization options. Older Sparc's work better with only UNROLL, but
+ there's no way to tell at compile time what it is you're running on */
+
+#if defined( sun ) /* Newer Sparc's */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#elif defined( __ultrix ) /* Older MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined( __osf1__ ) /* Alpha */
+# define DES_PTR
+# define DES_RISC2
+#elif defined ( _AIX ) /* RS6000 */
+ /* Unknown */
+#elif defined( __hpux ) /* HP-PA */
+ /* Unknown */
+#elif defined( __aux ) /* 68K */
+ /* Unknown */
+#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */
+# define DES_UNROLL
+#elif defined( __sgi ) /* Newer MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#endif /* Systems-specific speed defines */
+#endif
+
+#endif /* DES_DEFAULT_OPTIONS */
+#endif /* HEADER_DES_LOCL_H */
diff --git a/include/openssl/opensslconf-static.h b/include/openssl/opensslconf-static.h
new file mode 100644
index 0000000..f63a6e0
--- /dev/null
+++ b/include/openssl/opensslconf-static.h
@@ -0,0 +1,6 @@
+// Auto-generated - DO NOT EDIT!
+#if defined(__LP64__)
+#include "opensslconf-static-64.h"
+#else
+#include "opensslconf-static-32.h"
+#endif
diff --git a/include/openssl/opensslconf-trusty.h b/include/openssl/opensslconf-trusty.h
new file mode 100644
index 0000000..06f9f98
--- /dev/null
+++ b/include/openssl/opensslconf-trusty.h
@@ -0,0 +1,448 @@
+/* opensslconf.h */
+/* WARNING: Generated automatically from opensslconf.h.in by Configure. */
+
+/* OpenSSL was configured with the following options: */
+#ifndef OPENSSL_DOING_MAKEDEPEND
+
+
+#ifndef OPENSSL_NO_CAMELLIA
+# define OPENSSL_NO_CAMELLIA
+#endif
+#ifndef OPENSSL_NO_CAPIENG
+# define OPENSSL_NO_CAPIENG
+#endif
+#ifndef OPENSSL_NO_CAST
+# define OPENSSL_NO_CAST
+#endif
+#ifndef OPENSSL_NO_CMS
+# define OPENSSL_NO_CMS
+#endif
+#ifndef OPENSSL_NO_COMP
+# define OPENSSL_NO_COMP
+#endif
+#ifndef OPENSSL_NO_CONF
+# define OPENSSL_NO_CONF
+#endif
+#ifndef OPENSSL_NO_DES
+# define OPENSSL_NO_DES
+#endif
+#ifndef OPENSSL_NO_DTLS1
+# define OPENSSL_NO_DTLS1
+#endif
+#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
+# define OPENSSL_NO_EC_NISTP_64_GCC_128
+#endif
+#ifndef OPENSSL_NO_GMP
+# define OPENSSL_NO_GMP
+#endif
+#ifndef OPENSSL_NO_GOST
+# define OPENSSL_NO_GOST
+#endif
+#ifndef OPENSSL_NO_HEARTBEATS
+# define OPENSSL_NO_HEARTBEATS
+#endif
+#ifndef OPENSSL_NO_IDEA
+# define OPENSSL_NO_IDEA
+#endif
+#ifndef OPENSSL_NO_JPAKE
+# define OPENSSL_NO_JPAKE
+#endif
+#ifndef OPENSSL_NO_KRB5
+# define OPENSSL_NO_KRB5
+#endif
+#ifndef OPENSSL_NO_LOCKING
+# define OPENSSL_NO_LOCKING
+#endif
+#ifndef OPENSSL_NO_MD2
+# define OPENSSL_NO_MD2
+#endif
+#ifndef OPENSSL_NO_MD4
+# define OPENSSL_NO_MD4
+#endif
+#ifndef OPENSSL_NO_MD5
+# define OPENSSL_NO_MD5
+#endif
+#ifndef OPENSSL_NO_MDC2
+# define OPENSSL_NO_MDC2
+#endif
+#ifndef OPENSSL_NO_OCSP
+# define OPENSSL_NO_OCSP
+#endif
+#ifndef OPENSSL_NO_PEM
+# define OPENSSL_NO_PEM
+#endif
+#ifndef OPENSSL_NO_PKCS12
+# define OPENSSL_NO_PKCS12
+#endif
+#ifndef OPENSSL_NO_PQUEUE
+# define OPENSSL_NO_PQUEUE
+#endif
+#ifndef OPENSSL_NO_RC2
+# define OPENSSL_NO_RC2
+#endif
+#ifndef OPENSSL_NO_RC4
+# define OPENSSL_NO_RC4
+#endif
+#ifndef OPENSSL_NO_RC5
+# define OPENSSL_NO_RC5
+#endif
+#ifndef OPENSSL_NO_RDRAND
+# define OPENSSL_NO_RDRAND
+#endif
+#ifndef OPENSSL_NO_RFC3779
+# define OPENSSL_NO_RFC3779
+#endif
+#ifndef OPENSSL_NO_RIPEMD
+# define OPENSSL_NO_RIPEMD
+#endif
+#ifndef OPENSSL_NO_RSAX
+# define OPENSSL_NO_RSAX
+#endif
+#ifndef OPENSSL_NO_SCTP
+# define OPENSSL_NO_SCTP
+#endif
+#ifndef OPENSSL_NO_SEED
+# define OPENSSL_NO_SEED
+#endif
+#ifndef OPENSSL_NO_SHA0
+# define OPENSSL_NO_SHA0
+#endif
+#ifndef OPENSSL_NO_SRP
+# define OPENSSL_NO_SRP
+#endif
+#ifndef OPENSSL_NO_SSL2
+# define OPENSSL_NO_SSL2
+#endif
+#ifndef OPENSSL_NO_SSL3
+# define OPENSSL_NO_SSL3
+#endif
+#ifndef OPENSSL_NO_STATIC_ENGINE
+# define OPENSSL_NO_STATIC_ENGINE
+#endif
+#ifndef OPENSSL_NO_STORE
+# define OPENSSL_NO_STORE
+#endif
+#ifndef OPENSSL_NO_TLS1
+# define OPENSSL_NO_TLS1
+#endif
+#ifndef OPENSSL_NO_TLSEXT
+# define OPENSSL_NO_TLSEXT
+#endif
+#ifndef OPENSSL_NO_TS
+# define OPENSSL_NO_TS
+#endif
+#ifndef OPENSSL_NO_TXT_DB
+# define OPENSSL_NO_TXT_DB
+#endif
+#ifndef OPENSSL_NO_UI
+# define OPENSSL_NO_UI
+#endif
+#ifndef OPENSSL_NO_WHIRLPOOL
+# define OPENSSL_NO_WHIRLPOOL
+#endif
+
+#endif /* OPENSSL_DOING_MAKEDEPEND */
+
+#ifndef OPENSSL_NO_ERR
+# define OPENSSL_NO_ERR
+#endif
+#ifndef OPENSSL_NO_DYNAMIC_ENGINE
+# define OPENSSL_NO_DYNAMIC_ENGINE
+#endif
+
+/* The OPENSSL_NO_* macros are also defined as NO_* if the application
+ asks for it. This is a transient feature that is provided for those
+ who haven't had the time to do the appropriate changes in their
+ applications. */
+#ifdef OPENSSL_ALGORITHM_DEFINES
+# if defined(OPENSSL_NO_CAMELLIA) && !defined(NO_CAMELLIA)
+# define NO_CAMELLIA
+# endif
+# if defined(OPENSSL_NO_CAPIENG) && !defined(NO_CAPIENG)
+# define NO_CAPIENG
+# endif
+# if defined(OPENSSL_NO_CAST) && !defined(NO_CAST)
+# define NO_CAST
+# endif
+# if defined(OPENSSL_NO_CMS) && !defined(NO_CMS)
+# define NO_CMS
+# endif
+# if defined(OPENSSL_NO_COMP) && !defined(NO_COMP)
+# define NO_COMP
+# endif
+# if defined(OPENSSL_NO_CONF) && !defined(NO_CONF)
+# define NO_CONF
+# endif
+# if defined(OPENSSL_NO_DES) && !defined(NO_DES)
+# define NO_DES
+# endif
+# if defined(OPENSSL_NO_DTLS1) && !defined(NO_DTLS1)
+# define NO_DTLS1
+# endif
+# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128)
+# define NO_EC_NISTP_64_GCC_128
+# endif
+# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP)
+# define NO_GMP
+# endif
+# if defined(OPENSSL_NO_GOST) && !defined(NO_GOST)
+# define NO_GOST
+# endif
+# if defined(OPENSSL_NO_HEARTBEATS) && !defined(NO_HEARTBEATS)
+# define NO_HEARTBEATS
+# endif
+# if defined(OPENSSL_NO_IDEA) && !defined(NO_IDEA)
+# define NO_IDEA
+# endif
+# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE)
+# define NO_JPAKE
+# endif
+# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5)
+# define NO_KRB5
+# endif
+# if defined(OPENSSL_NO_LOCKING) && !defined(NO_LOCKING)
+# define NO_LOCKING
+# endif
+# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2)
+# define NO_MD2
+# endif
+# if defined(OPENSSL_NO_MD4) && !defined(NO_MD4)
+# define NO_MD4
+# endif
+# if defined(OPENSSL_NO_MD5) && !defined(NO_MD5)
+# define NO_MD5
+# endif
+# if defined(OPENSSL_NO_MDC2) && !defined(NO_MDC2)
+# define NO_MDC2
+# endif
+# if defined(OPENSSL_NO_OCSP) && !defined(NO_OCSP)
+# define NO_OCSP
+# endif
+# if defined(OPENSSL_NO_PEM) && !defined(NO_PEM)
+# define NO_PEM
+# endif
+# if defined(OPENSSL_NO_PKCS12) && !defined(NO_PKCS12)
+# define NO_PKCS12
+# endif
+# if defined(OPENSSL_NO_PQUEUE) && !defined(NO_PQUEUE)
+# define NO_PQUEUE
+# endif
+# if defined(OPENSSL_NO_RC2) && !defined(NO_RC2)
+# define NO_RC2
+# endif
+# if defined(OPENSSL_NO_RC4) && !defined(NO_RC4)
+# define NO_RC4
+# endif
+# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5)
+# define NO_RC5
+# endif
+# if defined(OPENSSL_NO_RDRAND) && !defined(NO_RDRAND)
+# define NO_RDRAND
+# endif
+# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779)
+# define NO_RFC3779
+# endif
+# if defined(OPENSSL_NO_RIPEMD) && !defined(NO_RIPEMD)
+# define NO_RIPEMD
+# endif
+# if defined(OPENSSL_NO_RSAX) && !defined(NO_RSAX)
+# define NO_RSAX
+# endif
+# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP)
+# define NO_SCTP
+# endif
+# if defined(OPENSSL_NO_SEED) && !defined(NO_SEED)
+# define NO_SEED
+# endif
+# if defined(OPENSSL_NO_SHA0) && !defined(NO_SHA0)
+# define NO_SHA0
+# endif
+# if defined(OPENSSL_NO_SRP) && !defined(NO_SRP)
+# define NO_SRP
+# endif
+# if defined(OPENSSL_NO_SSL2) && !defined(NO_SSL2)
+# define NO_SSL2
+# endif
+# if defined(OPENSSL_NO_SSL3) && !defined(NO_SSL3)
+# define NO_SSL3
+# endif
+# if defined(OPENSSL_NO_STATIC_ENGINE) && !defined(NO_STATIC_ENGINE)
+# define NO_STATIC_ENGINE
+# endif
+# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE)
+# define NO_STORE
+# endif
+# if defined(OPENSSL_NO_TLS1) && !defined(NO_TLS1)
+# define NO_TLS1
+# endif
+# if defined(OPENSSL_NO_TLSEXT) && !defined(NO_TLSEXT)
+# define NO_TLSEXT
+# endif
+# if defined(OPENSSL_NO_TS) && !defined(NO_TS)
+# define NO_TS
+# endif
+# if defined(OPENSSL_NO_TXT_DB) && !defined(NO_TXT_DB)
+# define NO_TXT_DB
+# endif
+# if defined(OPENSSL_NO_UI) && !defined(NO_UI)
+# define NO_UI
+# endif
+# if defined(OPENSSL_NO_WHIRLPOOL) && !defined(NO_WHIRLPOOL)
+# define NO_WHIRLPOOL
+# endif
+#endif
+
+/* crypto/opensslconf.h.in */
+
+/* Generate 80386 code? */
+#undef I386_ONLY
+
+#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */
+#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR)
+#define ENGINESDIR "/usr/local/ssl/lib/engines"
+#define OPENSSLDIR "/usr/local/ssl"
+#endif
+#endif
+
+#undef OPENSSL_UNISTD
+#define OPENSSL_UNISTD <trusty_std.h>
+
+#undef OPENSSL_EXPORT_VAR_AS_FUNCTION
+
+#if defined(HEADER_IDEA_H) && !defined(IDEA_INT)
+#define IDEA_INT unsigned int
+#endif
+
+#if defined(HEADER_MD2_H) && !defined(MD2_INT)
+#define MD2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC2_H) && !defined(RC2_INT)
+/* I need to put in a mod for the alpha - eay */
+#define RC2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC4_H)
+#if !defined(RC4_INT)
+/* using int types make the structure larger but make the code faster
+ * on most boxes I have tested - up to %20 faster. */
+/*
+ * I don't know what does "most" mean, but declaring "int" is a must on:
+ * - Intel P6 because partial register stalls are very expensive;
+ * - elder Alpha because it lacks byte load/store instructions;
+ */
+#define RC4_INT unsigned int
+#endif
+#if !defined(RC4_CHUNK)
+/*
+ * This enables code handling data aligned at natural CPU word
+ * boundary. See crypto/rc4/rc4_enc.c for further details.
+ */
+#undef RC4_CHUNK
+#endif
+#endif
+
+#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG)
+/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a
+ * %20 speed up (longs are 8 bytes, int's are 4). */
+#ifndef DES_LONG
+#define DES_LONG unsigned long
+#endif
+#endif
+
+#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H)
+#define CONFIG_HEADER_BN_H
+#undef BN_LLONG
+
+/* Should we define BN_DIV2W here? */
+
+/* Only one for the following should be defined */
+#undef SIXTY_FOUR_BIT_LONG
+#undef SIXTY_FOUR_BIT
+#define THIRTY_TWO_BIT
+#endif
+
+#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H)
+#define CONFIG_HEADER_RC4_LOCL_H
+/* if this is defined data[i] is used instead of *data, this is a %20
+ * speedup on x86 */
+#undef RC4_INDEX
+#endif
+
+#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H)
+#define CONFIG_HEADER_BF_LOCL_H
+#undef BF_PTR
+#endif /* HEADER_BF_LOCL_H */
+
+#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H)
+#define CONFIG_HEADER_DES_LOCL_H
+#ifndef DES_DEFAULT_OPTIONS
+/* the following is tweaked from a config script, that is why it is a
+ * protected undef/define */
+#ifndef DES_PTR
+#undef DES_PTR
+#endif
+
+/* This helps C compiler generate the correct code for multiple functional
+ * units. It reduces register dependancies at the expense of 2 more
+ * registers */
+#ifndef DES_RISC1
+#undef DES_RISC1
+#endif
+
+#ifndef DES_RISC2
+#undef DES_RISC2
+#endif
+
+#if defined(DES_RISC1) && defined(DES_RISC2)
+YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!!
+#endif
+
+/* Unroll the inner loop, this sometimes helps, sometimes hinders.
+ * Very mucy CPU dependant */
+#ifndef DES_UNROLL
+#undef DES_UNROLL
+#endif
+
+/* These default values were supplied by
+ * Peter Gutman <[email protected]>
+ * They are only used if nothing else has been defined */
+#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL)
+/* Special defines which change the way the code is built depending on the
+ CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find
+ even newer MIPS CPU's, but at the moment one size fits all for
+ optimization options. Older Sparc's work better with only UNROLL, but
+ there's no way to tell at compile time what it is you're running on */
+
+#if defined( sun ) /* Newer Sparc's */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#elif defined( __ultrix ) /* Older MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined( __osf1__ ) /* Alpha */
+# define DES_PTR
+# define DES_RISC2
+#elif defined ( _AIX ) /* RS6000 */
+ /* Unknown */
+#elif defined( __hpux ) /* HP-PA */
+ /* Unknown */
+#elif defined( __aux ) /* 68K */
+ /* Unknown */
+#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */
+# define DES_UNROLL
+#elif defined( __sgi ) /* Newer MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#endif /* Systems-specific speed defines */
+#endif
+
+#endif /* DES_DEFAULT_OPTIONS */
+#endif /* HEADER_DES_LOCL_H */
diff --git a/include/openssl/opensslconf.h b/include/openssl/opensslconf.h
index d00590b..94212a0 100644
--- a/include/openssl/opensslconf.h
+++ b/include/openssl/opensslconf.h
@@ -1,6 +1,10 @@
// Auto-generated - DO NOT EDIT!
+#ifndef OPENSSL_SYS_TRUSTY
#if defined(__LP64__)
#include "opensslconf-64.h"
#else
#include "opensslconf-32.h"
#endif
+#else
+#include "opensslconf-trusty.h"
+#endif
diff --git a/include/openssl/opensslv.h b/include/openssl/opensslv.h
index b27a5bb..c3b6ace 100644
--- a/include/openssl/opensslv.h
+++ b/include/openssl/opensslv.h
@@ -25,11 +25,11 @@
* (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for
* major minor fix final patch/beta)
*/
-#define OPENSSL_VERSION_NUMBER 0x1000106fL
+#define OPENSSL_VERSION_NUMBER 0x1000108fL
#ifdef OPENSSL_FIPS
-#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1f-fips 6 Jan 2014"
+#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1h-fips 5 Jun 2014"
#else
-#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1f 6 Jan 2014"
+#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1h 5 Jun 2014"
#endif
#define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT
diff --git a/include/openssl/pkcs7.h b/include/openssl/pkcs7.h
index e4d4431..04f6037 100644
--- a/include/openssl/pkcs7.h
+++ b/include/openssl/pkcs7.h
@@ -453,6 +453,7 @@
#define PKCS7_R_ERROR_SETTING_CIPHER 121
#define PKCS7_R_INVALID_MIME_TYPE 131
#define PKCS7_R_INVALID_NULL_POINTER 143
+#define PKCS7_R_INVALID_SIGNED_DATA_TYPE 155
#define PKCS7_R_MIME_NO_CONTENT_TYPE 132
#define PKCS7_R_MIME_PARSE_ERROR 133
#define PKCS7_R_MIME_SIG_PARSE_ERROR 134
diff --git a/include/openssl/ssl.h b/include/openssl/ssl.h
index e2e97f1..a89ab23 100644
--- a/include/openssl/ssl.h
+++ b/include/openssl/ssl.h
@@ -544,6 +544,13 @@
#ifndef OPENSSL_NO_SRP
char *srp_username;
#endif
+
+ /* original_handshake_hash contains the handshake hash (either
+ * SHA-1+MD5 or SHA-2, depending on TLS version) for the original, full
+ * handshake that created a session. This is used by Channel IDs during
+ * resumption. */
+ unsigned char original_handshake_hash[EVP_MAX_MD_SIZE];
+ unsigned int original_handshake_hash_len;
};
#endif
@@ -553,7 +560,7 @@
/* Allow initial connection to servers that don't support RI */
#define SSL_OP_LEGACY_SERVER_CONNECT 0x00000004L
#define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG 0x00000008L
-#define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x00000010L
+#define SSL_OP_TLSEXT_PADDING 0x00000010L
#define SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER 0x00000020L
#define SSL_OP_SAFARI_ECDHE_ECDSA_BUG 0x00000040L
#define SSL_OP_SSLEAY_080_CLIENT_DH_BUG 0x00000080L
@@ -562,6 +569,8 @@
/* Hasn't done anything since OpenSSL 0.9.7h, retained for compatibility */
#define SSL_OP_MSIE_SSLV2_RSA_PADDING 0x0
+/* Refers to ancient SSLREF and SSLv2, retained for compatibility */
+#define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x0
/* SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS is vestigial. Previously it disabled the
* insertion of empty records in CBC mode, but the empty records were commonly
@@ -648,16 +657,19 @@
* TLS only.) "Released" buffers are put onto a free-list in the context
* or just freed (depending on the context's setting for freelist_max_len). */
#define SSL_MODE_RELEASE_BUFFERS 0x00000010L
+
/* Send the current time in the Random fields of the ClientHello and
* ServerHello records for compatibility with hypothetical implementations
* that require it.
*/
#define SSL_MODE_SEND_CLIENTHELLO_TIME 0x00000020L
#define SSL_MODE_SEND_SERVERHELLO_TIME 0x00000040L
+
/* When set, clients may send application data before receipt of CCS
* and Finished. This mode enables full-handshakes to 'complete' in
* one RTT. */
#define SSL_MODE_HANDSHAKE_CUTTHROUGH 0x00000080L
+
/* When set, TLS 1.0 and SSLv3, multi-byte, CBC records will be split in two:
* the first record will contain a single byte and the second will contain the
* rest of the bytes. This effectively randomises the IV and prevents BEAST
@@ -871,6 +883,9 @@
/* get client cert callback */
int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey);
+ /* get channel id callback */
+ void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey);
+
/* cookie generate callback */
int (*app_gen_cookie_cb)(SSL *ssl, unsigned char *cookie,
unsigned int *cookie_len);
@@ -933,7 +948,7 @@
*/
unsigned int max_send_fragment;
-#ifndef OPENSSL_ENGINE
+#ifndef OPENSSL_NO_ENGINE
/* Engine to pass requests for client certs to
*/
ENGINE *client_cert_engine;
@@ -1033,6 +1048,10 @@
/* If true, a client will advertise the Channel ID extension and a
* server will echo it. */
char tlsext_channel_id_enabled;
+ /* tlsext_channel_id_enabled_new is a hack to support both old and new
+ * ChannelID signatures. It indicates that a client should advertise the
+ * new ChannelID extension number. */
+ char tlsext_channel_id_enabled_new;
/* The client's Channel ID private key. */
EVP_PKEY *tlsext_channel_id_private;
#endif
@@ -1091,6 +1110,8 @@
void (*SSL_CTX_get_info_callback(SSL_CTX *ctx))(const SSL *ssl,int type,int val);
void SSL_CTX_set_client_cert_cb(SSL_CTX *ctx, int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey));
int (*SSL_CTX_get_client_cert_cb(SSL_CTX *ctx))(SSL *ssl, X509 **x509, EVP_PKEY **pkey);
+void SSL_CTX_set_channel_id_cb(SSL_CTX *ctx, void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey));
+void (*SSL_CTX_get_channel_id_cb(SSL_CTX *ctx))(SSL *ssl, EVP_PKEY **pkey);
#ifndef OPENSSL_NO_ENGINE
int SSL_CTX_set_client_cert_engine(SSL_CTX *ctx, ENGINE *e);
#endif
@@ -1167,12 +1188,14 @@
#define SSL_WRITING 2
#define SSL_READING 3
#define SSL_X509_LOOKUP 4
+#define SSL_CHANNEL_ID_LOOKUP 5
/* These will only be used when doing non-blocking IO */
#define SSL_want_nothing(s) (SSL_want(s) == SSL_NOTHING)
#define SSL_want_read(s) (SSL_want(s) == SSL_READING)
#define SSL_want_write(s) (SSL_want(s) == SSL_WRITING)
#define SSL_want_x509_lookup(s) (SSL_want(s) == SSL_X509_LOOKUP)
+#define SSL_want_channel_id_lookup(s) (SSL_want(s) == SSL_CHANNEL_ID_LOOKUP)
#define SSL_MAC_FLAG_READ_MAC_STREAM 1
#define SSL_MAC_FLAG_WRITE_MAC_STREAM 2
@@ -1320,6 +1343,10 @@
#endif /* OPENSSL_NO_KRB5 */
#ifndef OPENSSL_NO_PSK
+ /* PSK identity hint is stored here only to enable setting a hint on an SSL object before an
+ * SSL_SESSION is associated with it. Once an SSL_SESSION is associated with this SSL object,
+ * the psk_identity_hint from the session takes precedence over this one. */
+ char *psk_identity_hint;
unsigned int (*psk_client_callback)(SSL *ssl, const char *hint, char *identity,
unsigned int max_identity_len, unsigned char *psk,
unsigned int max_psk_len);
@@ -1604,6 +1631,7 @@
#define SSL_ERROR_ZERO_RETURN 6
#define SSL_ERROR_WANT_CONNECT 7
#define SSL_ERROR_WANT_ACCEPT 8
+#define SSL_ERROR_WANT_CHANNEL_ID_LOOKUP 9
#define SSL_CTRL_NEED_TMP_RSA 1
#define SSL_CTRL_SET_TMP_RSA 2
@@ -1743,10 +1771,11 @@
#define SSL_set_tmp_ecdh(ssl,ecdh) \
SSL_ctrl(ssl,SSL_CTRL_SET_TMP_ECDH,0,(char *)ecdh)
-/* SSL_enable_tls_channel_id configures a TLS server to accept TLS client
- * IDs from clients. Returns 1 on success. */
-#define SSL_enable_tls_channel_id(ctx) \
- SSL_ctrl(ctx,SSL_CTRL_CHANNEL_ID,0,NULL)
+/* SSL_enable_tls_channel_id either configures a TLS server to accept TLS client
+ * IDs from clients, or configure a client to send TLS client IDs to server.
+ * Returns 1 on success. */
+#define SSL_enable_tls_channel_id(s) \
+ SSL_ctrl(s,SSL_CTRL_CHANNEL_ID,0,NULL)
/* SSL_set1_tls_channel_id configures a TLS client to send a TLS Channel ID to
* compatible servers. private_key must be a P-256 EVP_PKEY*. Returns 1 on
* success. */
@@ -1796,7 +1825,7 @@
char * SSL_CIPHER_get_version(const SSL_CIPHER *c);
const char * SSL_CIPHER_get_name(const SSL_CIPHER *c);
unsigned long SSL_CIPHER_get_id(const SSL_CIPHER *c);
-const char* SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher);
+const char * SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher);
int SSL_get_fd(const SSL *s);
int SSL_get_rfd(const SSL *s);
@@ -2713,7 +2742,6 @@
#define SSL_R_WRONG_VERSION_NUMBER 267
#define SSL_R_X509_LIB 268
#define SSL_R_X509_VERIFICATION_SETUP_PROBLEMS 269
-#define SSL_R_UNEXPECTED_CCS 388
#ifdef __cplusplus
}
diff --git a/include/openssl/ssl3.h b/include/openssl/ssl3.h
index 1aa4023..cba9434 100644
--- a/include/openssl/ssl3.h
+++ b/include/openssl/ssl3.h
@@ -393,9 +393,6 @@
#define TLS1_FLAGS_TLS_PADDING_BUG 0x0008
#define TLS1_FLAGS_SKIP_CERT_VERIFY 0x0010
#define TLS1_FLAGS_KEEP_HANDSHAKE 0x0020
-/* SSL3_FLAGS_CCS_OK indicates that a ChangeCipherSpec record is acceptable at
- * this point in the handshake. If this flag is not set then received CCS
- * records will cause a fatal error for the connection. */
#define SSL3_FLAGS_CCS_OK 0x0080
/* SSL3_FLAGS_SGC_RESTART_DONE is set when we
@@ -563,6 +560,11 @@
* for Channel IDs and that tlsext_channel_id will be valid after the
* handshake. */
char tlsext_channel_id_valid;
+ /* tlsext_channel_id_new means that the updated Channel ID extension
+ * was negotiated. This is a temporary hack in the code to support both
+ * forms of Channel ID extension while we transition to the new format,
+ * which fixed a security issue. */
+ char tlsext_channel_id_new;
/* For a server:
* If |tlsext_channel_id_valid| is true, then this contains the
* verified Channel ID from the client: a P256 point, (x,y), where
@@ -683,11 +685,11 @@
#define SSL3_ST_SR_CERT_VRFY_B (0x1A1|SSL_ST_ACCEPT)
#define SSL3_ST_SR_CHANGE_A (0x1B0|SSL_ST_ACCEPT)
#define SSL3_ST_SR_CHANGE_B (0x1B1|SSL_ST_ACCEPT)
-#define SSL3_ST_SR_POST_CLIENT_CERT (0x1BF|SSL_ST_ACCEPT)
#ifndef OPENSSL_NO_NEXTPROTONEG
#define SSL3_ST_SR_NEXT_PROTO_A (0x210|SSL_ST_ACCEPT)
#define SSL3_ST_SR_NEXT_PROTO_B (0x211|SSL_ST_ACCEPT)
#endif
+#define SSL3_ST_SR_POST_CLIENT_CERT (0x1BF|SSL_ST_ACCEPT)
#define SSL3_ST_SR_CHANNEL_ID_A (0x220|SSL_ST_ACCEPT)
#define SSL3_ST_SR_CHANNEL_ID_B (0x221|SSL_ST_ACCEPT)
#define SSL3_ST_SR_FINISHED_A (0x1C0|SSL_ST_ACCEPT)
diff --git a/include/openssl/symhacks.h b/include/openssl/symhacks.h
index 07a412f..bd2f000 100644
--- a/include/openssl/symhacks.h
+++ b/include/openssl/symhacks.h
@@ -204,6 +204,12 @@
#define SSL_CTX_set_next_protos_advertised_cb SSL_CTX_set_next_protos_adv_cb
#undef SSL_CTX_set_next_proto_select_cb
#define SSL_CTX_set_next_proto_select_cb SSL_CTX_set_next_proto_sel_cb
+#undef ssl3_cbc_record_digest_supported
+#define ssl3_cbc_record_digest_supported ssl3_cbc_record_digest_support
+#undef ssl_check_clienthello_tlsext_late
+#define ssl_check_clienthello_tlsext_late ssl_check_clihello_tlsext_late
+#undef ssl_check_clienthello_tlsext_early
+#define ssl_check_clienthello_tlsext_early ssl_check_clihello_tlsext_early
/* Hack some long ENGINE names */
#undef ENGINE_get_default_BN_mod_exp_crt
diff --git a/include/openssl/tls1.h b/include/openssl/tls1.h
index 86507d7..dc36f79 100644
--- a/include/openssl/tls1.h
+++ b/include/openssl/tls1.h
@@ -233,6 +233,12 @@
/* ExtensionType value from RFC5620 */
#define TLSEXT_TYPE_heartbeat 15
+/* ExtensionType value for TLS padding extension.
+ * http://www.iana.org/assignments/tls-extensiontype-values/tls-extensiontype-values.xhtml
+ * http://tools.ietf.org/html/draft-agl-tls-padding-03
+ */
+#define TLSEXT_TYPE_padding 21
+
/* ExtensionType value from draft-ietf-tls-applayerprotoneg-00 */
#define TLSEXT_TYPE_application_layer_protocol_negotiation 16
@@ -256,10 +262,7 @@
/* This is not an IANA defined extension number */
#define TLSEXT_TYPE_channel_id 30031
-
-/* See https://tools.ietf.org/html/draft-agl-tls-padding-02
- * Number not yet IANA assigned. */
-#define TLSEXT_TYPE_padding 35655
+#define TLSEXT_TYPE_channel_id_new 30032
/* NameType value from RFC 3546 */
#define TLSEXT_NAMETYPE_host_name 0
@@ -532,6 +535,12 @@
#define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031
#define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032
+/* ECDHE PSK ciphersuites from RFC5489
+ * SHA-2 cipher suites are omitted because they cannot be used safely with
+ * SSLv3. */
+#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA 0x0300C035
+#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA 0x0300C036
+
/* XXX
* Inconsistency alert:
* The OpenSSL names of ciphers with ephemeral DH here include the string
@@ -683,6 +692,10 @@
#define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-SHA256"
#define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-SHA384"
+/* ECDHE PSK ciphersuites from RFC5489 */
+#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA "ECDHE-PSK-AES128-CBC-SHA"
+#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA "ECDHE-PSK-AES256-CBC-SHA"
+
#define TLS_CT_RSA_SIGN 1
#define TLS_CT_DSS_SIGN 2
#define TLS_CT_RSA_FIXED_DH 3
diff --git a/openssl.config b/openssl.config
index e3c690d..867711f 100644
--- a/openssl.config
+++ b/openssl.config
@@ -13,6 +13,7 @@
no-mdc2 \
no-rc5 \
no-rdrand \
+no-ripemd \
no-rfc3779 \
no-rsax \
no-sctp \
@@ -20,6 +21,7 @@
no-sha0 \
no-static_engine \
no-whirlpool \
+no-zlib \
"
# configure arguments specific for 32-bit arch
@@ -32,6 +34,11 @@
linux-generic64 \
"
+# configure arguments specific for static build
+CONFIGURE_ARGS_STATIC="\
+no-dso \
+"
+
# unneeded directories
UNNEEDED_SOURCES="\
MacOS \
@@ -47,6 +54,7 @@
crypto/idea \
crypto/md2 \
crypto/rc5 \
+crypto/ripemd \
crypto/seed \
crypto/whrlpool \
demos \
@@ -88,7 +96,6 @@
README.ENGINE \
apps/CA.com \
apps/Makefile \
-apps/Makefile.save \
apps/install-apps.com \
apps/makeapps.com \
apps/openssl-vms.cnf \
@@ -98,15 +105,10 @@
config \
crypto/LPdir_vms.c \
crypto/Makefile \
-crypto/Makefile.save \
crypto/aes/Makefile \
-crypto/aes/Makefile.save \
-crypto/armcap.c \
crypto/asn1/Makefile \
-crypto/asn1/Makefile.save \
crypto/bf/INSTALL \
crypto/bf/Makefile \
-crypto/bf/Makefile.save \
crypto/bf/README \
crypto/bf/VERSION \
crypto/bf/asm/readme \
@@ -116,117 +118,77 @@
crypto/bf/bfspeed.c \
crypto/bf/bftest.c \
crypto/bio/Makefile \
-crypto/bio/Makefile.save \
crypto/bio/bss_rtcp.c \
crypto/bn/Makefile \
-crypto/bn/Makefile.save \
crypto/bn/asm/vms.mar \
crypto/bn/bn_x931p.c \
crypto/bn/vms-helper.c \
crypto/buffer/Makefile \
-crypto/buffer/Makefile.save \
crypto/cmac/Makefile \
-crypto/cmac/Makefile.save \
crypto/cms/Makefile \
-crypto/cms/Makefile.save \
crypto/comp/Makefile \
-crypto/comp/Makefile.save \
crypto/conf/Makefile \
-crypto/conf/Makefile.save \
crypto/crypto-lib.com \
crypto/des/Makefile \
-crypto/des/Makefile.save \
crypto/des/des-lib.com \
crypto/dh/Makefile \
-crypto/dh/Makefile.save \
crypto/dh/dh_prn.c \
crypto/dsa/Makefile \
-crypto/dsa/Makefile.save \
crypto/dso/Makefile \
-crypto/dso/Makefile.save \
crypto/dso/dso_beos.c \
crypto/dso/dso_vms.c \
crypto/dso/dso_win32.c \
crypto/ec/Makefile \
-crypto/ec/Makefile.save \
crypto/ec/ecp_nistp224.c \
crypto/ec/ecp_nistp256.c \
crypto/ec/ecp_nistp521.c \
crypto/ec/ecp_nistputil.c \
crypto/ecdh/Makefile \
-crypto/ecdh/Makefile.save \
crypto/ecdsa/Makefile \
-crypto/ecdsa/Makefile.save \
crypto/engine/Makefile \
-crypto/engine/Makefile.save \
crypto/engine/eng_rdrand.c \
crypto/engine/eng_rsax.c \
crypto/err/Makefile \
-crypto/err/Makefile.save \
crypto/evp/Makefile \
-crypto/evp/Makefile.save \
crypto/evp/evp_fips.c \
crypto/evp/m_md2.c \
crypto/evp/m_sha.c \
crypto/fips_err.h \
crypto/fips_ers.c \
crypto/hmac/Makefile \
-crypto/hmac/Makefile.save \
crypto/install-crypto.com \
crypto/jpake/Makefile \
crypto/krb5/Makefile \
-crypto/krb5/Makefile.save \
crypto/lhash/Makefile \
-crypto/lhash/Makefile.save \
crypto/md4/Makefile \
-crypto/md4/Makefile.save \
crypto/md5/Makefile \
-crypto/md5/Makefile.save \
crypto/mdc2/Makefile \
-crypto/mdc2/Makefile.save \
crypto/modes/Makefile \
-crypto/modes/Makefile.save \
crypto/modes/cts128.c \
crypto/modes/modes.h \
crypto/o_fips.c \
crypto/objects/Makefile \
-crypto/objects/Makefile.save \
crypto/ocsp/Makefile \
-crypto/ocsp/Makefile.save \
crypto/pem/Makefile \
-crypto/pem/Makefile.save \
crypto/pkcs12/Makefile \
-crypto/pkcs12/Makefile.save \
crypto/pkcs7/Makefile \
-crypto/pkcs7/Makefile.save \
crypto/pkcs7/bio_pk7.c \
crypto/ppccap.c \
crypto/pqueue/Makefile \
-crypto/pqueue/Makefile.save \
crypto/rand/Makefile \
-crypto/rand/Makefile.save \
crypto/rand/rand_vms.c \
crypto/rc2/Makefile \
-crypto/rc2/Makefile.save \
crypto/rc4/Makefile \
-crypto/rc4/Makefile.save \
-crypto/ripemd/Makefile \
-crypto/ripemd/Makefile.save \
crypto/rsa/Makefile \
-crypto/rsa/Makefile.save \
crypto/sha/Makefile \
-crypto/sha/Makefile.save \
crypto/sha/sha_one.c \
crypto/srp/Makefile \
-crypto/srp/Makefile.save \
crypto/srp/srptest.c \
crypto/stack/Makefile \
-crypto/stack/Makefile.save \
crypto/store/Makefile \
crypto/threads/pthreads-vms.com \
crypto/threads/win32.bat \
crypto/ts/Makefile \
-crypto/ts/Makefile.save \
crypto/ts/ts.h \
crypto/ts/ts_asn1.c \
crypto/ts/ts_conf.c \
@@ -239,14 +201,10 @@
crypto/ts/ts_rsp_verify.c \
crypto/ts/ts_verify_ctx.c \
crypto/txt_db/Makefile \
-crypto/txt_db/Makefile.save \
crypto/ui/Makefile \
-crypto/ui/Makefile.save \
crypto/vms_rms.h
crypto/x509/Makefile \
-crypto/x509/Makefile.save \
crypto/x509v3/Makefile \
-crypto/x509v3/Makefile.save \
include/openssl/camellia.h \
include/openssl/cast.h \
include/openssl/idea.h \
@@ -258,11 +216,11 @@
openssl.doxy \
openssl.spec \
ssl/Makefile \
-ssl/Makefile.save \
+ssl/heartbeat_test.c \
ssl/install-ssl.com \
ssl/ssl-lib.com \
ssl/ssl_task.c \
-"
+"
NEEDED_SOURCES="\
apps \
@@ -280,17 +238,24 @@
"
OPENSSL_CRYPTO_DEFINES_arm="\
+AES_ASM \
+BSAES_ASM \
+DES_UNROLL \
+GHASH_ASM \
OPENSSL_BN_ASM_GF2m \
OPENSSL_BN_ASM_MONT \
-GHASH_ASM \
-AES_ASM \
+OPENSSL_CPUID_OBJ \
SHA1_ASM \
SHA256_ASM \
SHA512_ASM \
"
OPENSSL_CRYPTO_DEFINES_arm64="\
-OPENSSL_NO_ASM \
+DES_UNROLL \
+OPENSSL_CPUID_OBJ \
+SHA1_ASM \
+SHA256_ASM \
+SHA512_ASM \
"
OPENSSL_CRYPTO_DEFINES_mips="\
@@ -301,34 +266,40 @@
"
OPENSSL_CRYPTO_DEFINES_x86="\
+AES_ASM \
+DES_PTR \
+DES_RISC1 \
+DES_UNROLL \
+GHASH_ASM \
+MD5_ASM \
OPENSSL_BN_ASM_GF2m \
OPENSSL_BN_ASM_MONT \
OPENSSL_BN_ASM_PART_WORDS \
-AES_ASM \
-GHASH_ASM \
+OPENSSL_CPUID_OBJ \
+OPENSSL_IA32_SSE2 \
+RC4_INDEX \
+RMD160_ASM \
SHA1_ASM \
SHA256_ASM \
SHA512_ASM \
-MD5_ASM \
-DES_PTR \
-DES_RISC1 \
-DES_UNROLL \
-OPENSSL_CPUID_OBJ \
+VPAES_ASM \
"
OPENSSL_CRYPTO_DEFINES_x86_64="\
+AES_ASM \
+BSAES_ASM \
+DES_UNROLL \
+GHASH_ASM \
+MD5_ASM \
OPENSSL_BN_ASM_GF2m \
OPENSSL_BN_ASM_MONT \
-AES_ASM \
-GHASH_ASM \
+OPENSSL_BN_ASM_MONT5 \
+OPENSSL_CPUID_OBJ \
+OPENSSL_IA32_SSE2 \
SHA1_ASM \
SHA256_ASM \
SHA512_ASM \
-MD5_ASM \
-DES_PTR \
-DES_RISC1 \
-DES_UNROLL \
-OPENSSL_CPUID_OBJ \
+VPAES_ASM \
"
OPENSSL_CRYPTO_INCLUDES="\
@@ -664,7 +635,6 @@
crypto/evp/m_md5.c \
crypto/evp/m_mdc2.c \
crypto/evp/m_null.c \
-crypto/evp/m_ripemd.c \
crypto/evp/m_sha1.c \
crypto/evp/m_sigver.c \
crypto/evp/m_wp.c \
@@ -763,8 +733,6 @@
crypto/rc4/rc4_enc.c \
crypto/rc4/rc4_skey.c \
crypto/rc4/rc4_utl.c \
-crypto/ripemd/rmd_dgst.c \
-crypto/ripemd/rmd_one.c \
crypto/rsa/rsa_ameth.c \
crypto/rsa/rsa_asn1.c \
crypto/rsa/rsa_chk.c \
@@ -861,9 +829,14 @@
OPENSSL_CRYPTO_SOURCES_arm="\
crypto/aes/asm/aes-armv4.S \
+crypto/aes/asm/aesv8-armx.S \
+crypto/aes/asm/bsaes-armv7.S \
+crypto/armcap.c \
+crypto/armv4cpuid.S \
crypto/bn/asm/armv4-gf2m.S \
crypto/bn/asm/armv4-mont.S \
crypto/modes/asm/ghash-armv4.S \
+crypto/modes/asm/ghashv8-armx.S \
crypto/sha/asm/sha1-armv4-large.S \
crypto/sha/asm/sha256-armv4.S \
crypto/sha/asm/sha512-armv4.S \
@@ -871,9 +844,17 @@
OPENSSL_CRYPTO_SOURCES_EXCLUDES_arm="\
crypto/aes/aes_core.c \
+crypto/mem_clr.c \
"
OPENSSL_CRYPTO_SOURCES_arm64="\
+crypto/armcap.c \
+crypto/arm64cpuid.S \
+crypto/aes/asm/aesv8-armx-64.S \
+crypto/modes/asm/ghashv8-armx-64.S \
+crypto/sha/asm/sha1-armv8.S \
+crypto/sha/asm/sha256-armv8.S \
+crypto/sha/asm/sha512-armv8.S \
"
OPENSSL_CRYPTO_SOURCES_EXCLUDES_arm64="\
@@ -1066,17 +1047,4 @@
apps/x509.c \
"
-OPENSSL_PATCHES="\
-progs.patch \
-handshake_cutthrough.patch \
-jsse.patch \
-channelid.patch \
-eng_dyn_dirs.patch \
-fix_clang_build.patch \
-tls12_digests.patch \
-alpn.patch \
-cbc_record_splitting.patch \
-paddingext.patch \
-early_ccs.patch \
-0018-tls_fallback_scsv.patch \
-"
+source ./openssl.trusty.config
diff --git a/openssl.trusty.config b/openssl.trusty.config
new file mode 100644
index 0000000..9710ad0
--- /dev/null
+++ b/openssl.trusty.config
@@ -0,0 +1,278 @@
+CONFIGURE_ARGS_TRUSTY="\
+-DL_ENDIAN \
+linux-generic32:::<trusty_std.h> \
+no-camellia \
+no-capieng \
+no-cast \
+no-cms \
+no-comp \
+no-conf \
+no-des \
+no-dso \
+no-dtls1 \
+no-err \
+no-gost \
+no-gmp \
+no-heartbeats \
+no-idea \
+no-jpake \
+no-krb5 \
+no-locking \
+no-md2 \
+no-md4 \
+no-md5 \
+no-mdc2 \
+no-ocsp \
+no-pem \
+no-pkcs12 \
+no-pqueue \
+no-rc2 \
+no-rc4 \
+no-rc5 \
+no-rc5 \
+no-rdrand \
+no-rfc3779 \
+no-ripemd \
+no-rsax \
+no-sctp \
+no-seed \
+no-sha0 \
+no-srp \
+no-ssl \
+no-static_engine \
+no-store \
+no-threads \
+no-ts \
+no-txt_db \
+no-ui \
+no-whirlpool \
+"
+
+# Trusty-specific compiler defines for crypto/ library.
+#
+OPENSSL_CRYPTO_TRUSTY_DEFINES="\
+GETPID_IS_MEANINGLESS \
+NO_WINDOWS_BRAINDEATH \
+"
+
+OPENSSL_CRYPTO_TRUSTY_DEFINES_arm="\
+OPENSSL_BN_ASM_GF2m \
+OPENSSL_BN_ASM_MONT \
+GHASH_ASM \
+AES_ASM \
+SHA1_ASM \
+SHA256_ASM \
+SHA512_ASM \
+"
+
+OPENSSL_CRYPTO_TRUSTY_DEFINES_mips=""
+
+OPENSSL_CRYPTO_TRUSTY_DEFINES_x86=""
+
+OPENSSL_CRYPTO_TRUSTY_DEFINES_x86_64=""
+
+OPENSSL_CRYPTO_TRUSTY_INCLUDES="\
+. \
+include \
+crypto \
+crypto/asn1 \
+crypto/evp \
+crypto/modes \
+include \
+include/openssl \
+"
+
+OPENSSL_CRYPTO_TRUSTY_SOURCES="\
+crypto/aes/aes_cbc.c \
+crypto/aes/aes_misc.c \
+crypto/asn1/a_bitstr.c \
+crypto/asn1/a_d2i_fp.c \
+crypto/asn1/a_int.c \
+crypto/asn1/ameth_lib.c \
+crypto/asn1/a_object.c \
+crypto/asn1/a_octet.c \
+crypto/asn1/asn1_lib.c \
+crypto/asn1/asn_pack.c \
+crypto/asn1/a_type.c \
+crypto/asn1/d2i_pr.c \
+crypto/asn1/f_int.c \
+crypto/asn1/i2d_pr.c \
+crypto/asn1/p8_pkey.c \
+crypto/asn1/tasn_dec.c \
+crypto/asn1/tasn_enc.c \
+crypto/asn1/tasn_fre.c \
+crypto/asn1/tasn_new.c \
+crypto/asn1/tasn_typ.c \
+crypto/asn1/tasn_utl.c \
+crypto/asn1/t_pkey.c \
+crypto/asn1/t_x509.c \
+crypto/asn1/x_algor.c \
+crypto/asn1/x_attrib.c \
+crypto/asn1/x_bignum.c \
+crypto/asn1/x_long.c \
+crypto/asn1/x_pubkey.c \
+crypto/asn1/x_sig.c \
+crypto/bio/bio_lib.c \
+crypto/bio/b_print.c \
+crypto/bio/bss_mem.c \
+crypto/bn/bn_add.c \
+crypto/bn/bn_asm.c \
+crypto/bn/bn_blind.c \
+crypto/bn/bn_ctx.c \
+crypto/bn/bn_div.c \
+crypto/bn/bn_exp2.c \
+crypto/bn/bn_exp.c \
+crypto/bn/bn_gcd.c \
+crypto/bn/bn_gf2m.c \
+crypto/bn/bn_kron.c \
+crypto/bn/bn_lib.c \
+crypto/bn/bn_mod.c \
+crypto/bn/bn_mont.c \
+crypto/bn/bn_mul.c \
+crypto/bn/bn_nist.c \
+crypto/bn/bn_prime.c \
+crypto/bn/bn_print.c \
+crypto/bn/bn_rand.c \
+crypto/bn/bn_recp.c \
+crypto/bn/bn_shift.c \
+crypto/bn/bn_sqr.c \
+crypto/bn/bn_sqrt.c \
+crypto/bn/bn_word.c \
+crypto/buffer/buffer.c \
+crypto/buffer/buf_str.c \
+crypto/cmac/cmac.c \
+crypto/cmac/cm_ameth.c \
+crypto/cmac/cm_pmeth.c \
+Crypto-config.mk \
+crypto/cryptlib.c \
+crypto/dh/dh_ameth.c \
+crypto/dh/dh_asn1.c \
+crypto/dh/dh_check.c \
+crypto/dh/dh_gen.c \
+crypto/dh/dh_key.c \
+crypto/dh/dh_lib.c \
+crypto/dh/dh_pmeth.c \
+crypto/dsa/dsa_ameth.c \
+crypto/dsa/dsa_asn1.c \
+crypto/dsa/dsa_gen.c \
+crypto/dsa/dsa_key.c \
+crypto/dsa/dsa_lib.c \
+crypto/dsa/dsa_ossl.c \
+crypto/dsa/dsa_pmeth.c \
+crypto/dsa/dsa_sign.c \
+crypto/dsa/dsa_vrf.c \
+crypto/ecdh/ech_key.c \
+crypto/ecdh/ech_lib.c \
+crypto/ecdh/ech_ossl.c \
+crypto/ecdsa/ecs_asn1.c \
+crypto/ecdsa/ecs_lib.c \
+crypto/ecdsa/ecs_ossl.c \
+crypto/ecdsa/ecs_sign.c \
+crypto/ecdsa/ecs_vrf.c \
+crypto/ec/ec2_mult.c \
+crypto/ec/ec2_oct.c \
+crypto/ec/ec2_smpl.c \
+crypto/ec/ec_ameth.c \
+crypto/ec/ec_asn1.c \
+crypto/ec/ec_curve.c \
+crypto/ec/ec_cvt.c \
+crypto/ec/ec_key.c \
+crypto/ec/eck_prn.c \
+crypto/ec/ec_lib.c \
+crypto/ec/ec_mult.c \
+crypto/ec/ec_oct.c \
+crypto/ec/ec_pmeth.c \
+crypto/ec/ecp_mont.c \
+crypto/ec/ecp_nist.c \
+crypto/ec/ecp_oct.c \
+crypto/ec/ec_print.c \
+crypto/ec/ecp_smpl.c \
+crypto/engine/eng_init.c \
+crypto/engine/eng_lib.c \
+crypto/engine/eng_table.c \
+crypto/engine/tb_asnmth.c \
+crypto/engine/tb_cipher.c \
+crypto/engine/tb_dh.c \
+crypto/engine/tb_digest.c \
+crypto/engine/tb_dsa.c \
+crypto/engine/tb_ecdh.c \
+crypto/engine/tb_ecdsa.c \
+crypto/engine/tb_pkmeth.c \
+crypto/engine/tb_rand.c \
+crypto/engine/tb_rsa.c \
+crypto/err/err.c \
+crypto/evp/digest.c \
+crypto/evp/e_aes.c \
+crypto/evp/evp_enc.c \
+crypto/evp/evp_lib.c \
+crypto/evp/evp_pkey.c \
+crypto/evp/m_sha1.c \
+crypto/evp/m_sigver.c \
+crypto/evp/names.c \
+crypto/evp/p_lib.c \
+crypto/evp/pmeth_fn.c \
+crypto/evp/pmeth_gn.c \
+crypto/evp/pmeth_lib.c \
+crypto/ex_data.c \
+crypto/hmac/hmac.c \
+crypto/hmac/hm_ameth.c \
+crypto/hmac/hm_pmeth.c \
+crypto/lhash/lhash.c \
+crypto/mem.c \
+crypto/mem_clr.c \
+crypto/mem_dbg.c \
+crypto/modes/cbc128.c \
+crypto/modes/ctr128.c \
+crypto/objects/obj_dat.c \
+crypto/objects/obj_xref.c \
+crypto/objects/o_names.c \
+crypto/pkcs7/pk7_lib.c \
+crypto/rand/md_rand.c \
+crypto/rand/rand_lib.c \
+crypto/rsa/rsa_ameth.c \
+crypto/rsa/rsa_asn1.c \
+crypto/rsa/rsa_chk.c \
+crypto/rsa/rsa_crpt.c \
+crypto/rsa/rsa_eay.c \
+crypto/rsa/rsa_gen.c \
+crypto/rsa/rsa_lib.c \
+crypto/rsa/rsa_none.c \
+crypto/rsa/rsa_oaep.c \
+crypto/rsa/rsa_pk1.c \
+crypto/rsa/rsa_pmeth.c \
+crypto/rsa/rsa_pss.c \
+crypto/rsa/rsa_saos.c \
+crypto/rsa/rsa_sign.c \
+crypto/rsa/rsa_ssl.c \
+crypto/rsa/rsa_x931.c \
+crypto/sha/sha1_one.c \
+crypto/sha/sha1dgst.c \
+crypto/sha/sha256.c \
+crypto/sha/sha512.c \
+crypto/stack/stack.c \
+crypto/x509v3/v3_utl.c \
+crypto/x509/x_all.c \
+"
+
+OPENSSL_CRYPTO_TRUSTY_SOURCES_arm="\
+crypto/aes/asm/aes-armv4.S \
+crypto/sha/asm/sha1-armv4-large.S \
+crypto/sha/asm/sha256-armv4.S \
+crypto/sha/asm/sha512-armv4.S \
+crypto/bn/asm/armv4-gf2m.S \
+crypto/bn/asm/armv4-mont.S \
+"
+
+OPENSSL_CRYPTO_TRUSTY_SOURCES_EXCLUDES_arm=""
+
+OPENSSL_CRYPTO_TRUSTY_SOURCES_mips=""
+
+OPENSSL_CRYPTO_TRUSTY_SOURCES_EXCLUDES_mips=""
+
+OPENSSL_CRYPTO_TRUSTY_SOURCES_x86=""
+
+OPENSSL_CRYPTO_TRUSTY_SOURCES_EXCLUDES_x86=""
+
+OPENSSL_CRYPTO_TRUSTY_SOURCES_x86_64=""
+
+OPENSSL_CRYPTO_TRUSTY_SOURCES_EXCLUDES_x86_64=""
diff --git a/openssl.version b/openssl.version
index 87f49ad..ab2e62b 100644
--- a/openssl.version
+++ b/openssl.version
@@ -1 +1 @@
-OPENSSL_VERSION=1.0.1f
+OPENSSL_VERSION=1.0.1h
diff --git a/patches/progs.patch b/patches/0001-progs.patch
similarity index 100%
rename from patches/progs.patch
rename to patches/0001-progs.patch
diff --git a/patches/handshake_cutthrough.patch b/patches/0002-handshake_cutthrough.patch
similarity index 78%
rename from patches/handshake_cutthrough.patch
rename to patches/0002-handshake_cutthrough.patch
index f05a10f..f68fd6f 100644
--- a/patches/handshake_cutthrough.patch
+++ b/patches/0002-handshake_cutthrough.patch
@@ -1,4 +1,4 @@
-From d0e735d01271055f09bc4a1be034253e6e3c2dee Mon Sep 17 00:00:00 2001
+From 4c654523c703645f8b517389b6da537c5a9e5168 Mon Sep 17 00:00:00 2001
From: Adam Langley <[email protected]>
Date: Thu, 24 Jan 2013 16:22:07 -0500
Subject: [PATCH] handshake_cutthrough
@@ -9,14 +9,15 @@
performing full-handshakes.
---
apps/s_client.c | 13 +++++++++++++
- ssl/s3_clnt.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++------
+ ssl/s3_clnt.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++------
ssl/s3_lib.c | 15 ++++++++++++++-
- ssl/ssl.h | 8 +++++++-
+ ssl/ssl.h | 10 +++++++++-
ssl/ssl3.h | 1 +
- ssl/ssl_lib.c | 13 +++++++++++++
+ ssl/ssl_lib.c | 42 ++++++++++++++++++++++++++++++++++++++++++
+ ssl/ssl_locl.h | 2 ++
ssl/ssltest.c | 12 ++++++++++++
test/testssl | 3 +++
- 8 files changed, 110 insertions(+), 8 deletions(-)
+ 9 files changed, 144 insertions(+), 8 deletions(-)
diff --git a/apps/s_client.c b/apps/s_client.c
index 3ba6605..791e277 100644
@@ -64,35 +65,23 @@
if (cipher != NULL)
if(!SSL_CTX_set_cipher_list(ctx,cipher)) {
diff --git a/ssl/s3_clnt.c b/ssl/s3_clnt.c
-index 344e2eb..c3bf18a 100644
+index a6b3c01..3d3fd64 100644
--- a/ssl/s3_clnt.c
+++ b/ssl/s3_clnt.c
-@@ -215,6 +215,24 @@ int ssl3_connect(SSL *s)
+@@ -215,6 +215,12 @@ int ssl3_connect(SSL *s)
}
#endif
-+// BEGIN android-added
-+#if 0
-+/* Send app data in separate packet, otherwise, some particular site
-+ * (only one site so far) closes the socket. http://b/2511073
-+ * Note: there is a very small chance that two TCP packets
-+ * could be arriving at server combined into a single TCP packet,
-+ * then trigger that site to break. We haven't encounter that though.
-+ */
-+// END android-added
+ if (SSL_get_mode(s) & SSL_MODE_HANDSHAKE_CUTTHROUGH)
+ {
+ /* Send app data along with CCS/Finished */
+ s->s3->flags |= SSL3_FLAGS_DELAY_CLIENT_FINISHED;
+ }
+
-+// BEGIN android-added
-+#endif
-+// END android-added
for (;;)
{
state=s->state;
-@@ -526,14 +532,31 @@ int ssl3_connect(SSL *s)
+@@ -526,14 +532,32 @@ int ssl3_connect(SSL *s)
}
else
{
@@ -100,7 +89,8 @@
- /* Allow NewSessionTicket if ticket expected */
- if (s->tlsext_ticket_expected)
- s->s3->tmp.next_state=SSL3_ST_CR_SESSION_TICKET_A;
-+ if ((SSL_get_mode(s) & SSL_MODE_HANDSHAKE_CUTTHROUGH) && SSL_get_cipher_bits(s, NULL) >= 128
++ if ((SSL_get_mode(s) & SSL_MODE_HANDSHAKE_CUTTHROUGH)
++ && ssl3_can_cutthrough(s)
+ && s->s3->previous_server_finished_len == 0 /* no cutthrough on renegotiation (would complicate the state machine) */
+ )
+ {
@@ -130,7 +120,7 @@
}
s->init_num=0;
break;
-@@ -581,6 +604,24 @@ int ssl3_connect(SSL *s)
+@@ -581,6 +605,24 @@ int ssl3_connect(SSL *s)
s->state=s->s3->tmp.next_state;
break;
@@ -156,10 +146,10 @@
/* clean a few things up */
ssl3_cleanup_key_block(s);
diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c
-index e7c5dcb..0d77c40 100644
+index c4ef273..1865c70 100644
--- a/ssl/s3_lib.c
+++ b/ssl/s3_lib.c
-@@ -4199,9 +4199,22 @@ int ssl3_write(SSL *s, const void *buf, int len)
+@@ -4211,9 +4211,22 @@ int ssl3_write(SSL *s, const void *buf, int len)
static int ssl3_read_internal(SSL *s, void *buf, int len, int peek)
{
@@ -184,21 +174,30 @@
s->s3->in_read_app_data=1;
ret=s->method->ssl_read_bytes(s,SSL3_RT_APPLICATION_DATA,buf,len,peek);
diff --git a/ssl/ssl.h b/ssl/ssl.h
-index f9c9049..f2af98c 100644
+index 1f255c3..3e31fb5 100644
--- a/ssl/ssl.h
+++ b/ssl/ssl.h
-@@ -649,6 +649,10 @@ struct ssl_session_st
- */
+@@ -644,6 +644,7 @@ struct ssl_session_st
+ * TLS only.) "Released" buffers are put onto a free-list in the context
+ * or just freed (depending on the context's setting for freelist_max_len). */
+ #define SSL_MODE_RELEASE_BUFFERS 0x00000010L
++
+ /* Send the current time in the Random fields of the ClientHello and
+ * ServerHello records for compatibility with hypothetical implementations
+ * that require it.
+@@ -651,6 +652,11 @@ struct ssl_session_st
#define SSL_MODE_SEND_CLIENTHELLO_TIME 0x00000020L
#define SSL_MODE_SEND_SERVERHELLO_TIME 0x00000040L
+
+/* When set, clients may send application data before receipt of CCS
+ * and Finished. This mode enables full-handshakes to 'complete' in
+ * one RTT. */
+#define SSL_MODE_HANDSHAKE_CUTTHROUGH 0x00000080L
-
++
/* Note: SSL[_CTX]_set_{options,mode} use |= op on the previous value,
* they cannot be used to clear bits. */
-@@ -1415,10 +1419,12 @@ extern "C" {
+
+@@ -1424,10 +1430,12 @@ extern "C" {
/* Is the SSL_connection established? */
#define SSL_get_state(a) SSL_state(a)
#define SSL_is_init_finished(a) (SSL_state(a) == SSL_ST_OK)
@@ -213,10 +212,10 @@
/* The following 2 states are kept in ssl->rstate when reads fail,
* you should not need these */
diff --git a/ssl/ssl3.h b/ssl/ssl3.h
-index 247e88c..bd0d764 100644
+index cb8b249..9a61b71 100644
--- a/ssl/ssl3.h
+++ b/ssl/ssl3.h
-@@ -547,6 +547,7 @@ typedef struct ssl3_state_st
+@@ -556,6 +556,7 @@ typedef struct ssl3_state_st
/*client */
/* extra state */
#define SSL3_ST_CW_FLUSH (0x100|SSL_ST_CONNECT)
@@ -225,10 +224,10 @@
#define DTLS1_SCTP_ST_CW_WRITE_SOCK (0x310|SSL_ST_CONNECT)
#define DTLS1_SCTP_ST_CR_READ_SOCK (0x320|SSL_ST_CONNECT)
diff --git a/ssl/ssl_lib.c b/ssl/ssl_lib.c
-index 14d143d..a56e6ef 100644
+index 6dbc3c1..7892928 100644
--- a/ssl/ssl_lib.c
+++ b/ssl/ssl_lib.c
-@@ -3225,6 +3225,19 @@ void SSL_set_msg_callback(SSL *ssl, void (*cb)(int write_p, int version, int con
+@@ -3225,6 +3225,48 @@ void SSL_set_msg_callback(SSL *ssl, void (*cb)(int write_p, int version, int con
SSL_callback_ctrl(ssl, SSL_CTRL_SET_MSG_CALLBACK, (void (*)(void))cb);
}
@@ -239,17 +238,59 @@
+ s->version >= SSL3_VERSION &&
+ s->s3->in_read_app_data == 0 && /* cutthrough only applies to write() */
+ (SSL_get_mode((SSL*)s) & SSL_MODE_HANDSHAKE_CUTTHROUGH) && /* cutthrough enabled */
-+ SSL_get_cipher_bits(s, NULL) >= 128 && /* strong cipher choosen */
++ ssl3_can_cutthrough(s) && /* cutthrough allowed */
+ s->s3->previous_server_finished_len == 0 && /* not a renegotiation handshake */
+ (s->state == SSL3_ST_CR_SESSION_TICKET_A || /* ready to write app-data*/
+ s->state == SSL3_ST_CR_FINISHED_A));
+ }
+
++int ssl3_can_cutthrough(const SSL *s)
++ {
++ const SSL_CIPHER *c;
++
++ /* require a strong enough cipher */
++ if (SSL_get_cipher_bits(s, NULL) < 128)
++ return 0;
++
++ /* require ALPN or NPN extension */
++ if (!s->s3->alpn_selected
++#ifndef OPENSSL_NO_NEXTPROTONEG
++ && !s->s3->next_proto_neg_seen
++#endif
++ )
++ {
++ return 0;
++ }
++
++ /* require a forward-secret cipher */
++ c = SSL_get_current_cipher(s);
++ if (!c || (c->algorithm_mkey != SSL_kEDH &&
++ c->algorithm_mkey != SSL_kEECDH))
++ {
++ return 0;
++ }
++
++ return 1;
++ }
++
/* Allocates new EVP_MD_CTX and sets pointer to it into given pointer
* vairable, freeing EVP_MD_CTX previously stored in that variable, if
* any. If EVP_MD pointer is passed, initializes ctx with this md
+diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h
+index e485907..3b1d644 100644
+--- a/ssl/ssl_locl.h
++++ b/ssl/ssl_locl.h
+@@ -1126,6 +1126,8 @@ int tls12_get_sigid(const EVP_PKEY *pk);
+ const EVP_MD *tls12_get_hash(unsigned char hash_alg);
+
+ #endif
++
++int ssl3_can_cutthrough(const SSL *s);
+ EVP_MD_CTX* ssl_replace_hash(EVP_MD_CTX **hash,const EVP_MD *md) ;
+ void ssl_clear_hash_ctx(EVP_MD_CTX **hash);
+ int ssl_add_serverhello_renegotiate_ext(SSL *s, unsigned char *p, int *len,
diff --git a/ssl/ssltest.c b/ssl/ssltest.c
-index 316bbb0..91169bb 100644
+index 4f80be8..28fa223 100644
--- a/ssl/ssltest.c
+++ b/ssl/ssltest.c
@@ -369,6 +369,7 @@ static void sv_usage(void)
@@ -279,7 +320,7 @@
else
{
fprintf(stderr,"unknown option %s\n",*argv);
-@@ -900,6 +906,12 @@ bad:
+@@ -906,6 +912,12 @@ bad:
SSL_CTX_set_cipher_list(c_ctx,cipher);
SSL_CTX_set_cipher_list(s_ctx,cipher);
}
@@ -307,5 +348,5 @@
$ssltest -bio_pair -ssl2 $extra || exit 1
--
-1.8.2.1
+1.9.1.423.g4596e3a
diff --git a/patches/jsse.patch b/patches/0003-jsse.patch
similarity index 97%
rename from patches/jsse.patch
rename to patches/0003-jsse.patch
index 795a2bb..a24c922 100644
--- a/patches/jsse.patch
+++ b/patches/0003-jsse.patch
@@ -10,6 +10,14 @@
/* Default generate session ID callback. */
GEN_SESSION_CB generate_session_id;
+@@ -1545,6 +1548,7 @@
+ char * SSL_CIPHER_get_version(const SSL_CIPHER *c);
+ const char * SSL_CIPHER_get_name(const SSL_CIPHER *c);
+ unsigned long SSL_CIPHER_get_id(const SSL_CIPHER *c);
++const char * SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher);
+
+ int SSL_get_fd(const SSL *s);
+ int SSL_get_rfd(const SSL *s);
@@ -1554,6 +1558,7 @@ const char * SSL_get_cipher_list(const
char * SSL_get_shared_ciphers(const SSL *s, char *buf, int len);
int SSL_get_read_ahead(const SSL * s);
diff --git a/patches/0004-channelid.patch b/patches/0004-channelid.patch
new file mode 100644
index 0000000..3b9ec8a
--- /dev/null
+++ b/patches/0004-channelid.patch
@@ -0,0 +1,1462 @@
+diff --git a/crypto/bio/bio.h b/crypto/bio/bio.h
+index 05699ab..d05fa22 100644
+--- a/crypto/bio/bio.h
++++ b/crypto/bio/bio.h
+@@ -266,6 +266,9 @@ void BIO_clear_flags(BIO *b, int flags);
+ #define BIO_RR_CONNECT 0x02
+ /* Returned from the accept BIO when an accept would have blocked */
+ #define BIO_RR_ACCEPT 0x03
++/* Returned from the SSL bio when the channel id retrieval code cannot find the
++ * private key. */
++#define BIO_RR_SSL_CHANNEL_ID_LOOKUP 0x04
+
+ /* These are passed by the BIO callback */
+ #define BIO_CB_FREE 0x01
+diff --git a/crypto/evp/evp.h b/crypto/evp/evp.h
+index ea4bed9..5f18d4b 100644
+--- a/crypto/evp/evp.h
++++ b/crypto/evp/evp.h
+@@ -921,6 +921,7 @@ struct ec_key_st *EVP_PKEY_get1_EC_KEY(EVP_PKEY *pkey);
+ #endif
+
+ EVP_PKEY * EVP_PKEY_new(void);
++EVP_PKEY * EVP_PKEY_dup(EVP_PKEY *pkey);
+ void EVP_PKEY_free(EVP_PKEY *pkey);
+
+ EVP_PKEY * d2i_PublicKey(int type,EVP_PKEY **a, const unsigned char **pp,
+diff --git a/crypto/evp/p_lib.c b/crypto/evp/p_lib.c
+index a0e14a3..65a4440 100644
+--- a/crypto/evp/p_lib.c
++++ b/crypto/evp/p_lib.c
+@@ -200,6 +200,12 @@ EVP_PKEY *EVP_PKEY_new(void)
+ return(ret);
+ }
+
++EVP_PKEY *EVP_PKEY_dup(EVP_PKEY *pkey)
++ {
++ CRYPTO_add(&pkey->references,1,CRYPTO_LOCK_EVP_PKEY);
++ return pkey;
++ }
++
+ /* Setup a public key ASN1 method and ENGINE from a NID or a string.
+ * If pkey is NULL just return 1 or 0 if the algorithm exists.
+ */
+diff --git a/ssl/bio_ssl.c b/ssl/bio_ssl.c
+index e9552ca..06a13de 100644
+--- a/ssl/bio_ssl.c
++++ b/ssl/bio_ssl.c
+@@ -206,6 +206,10 @@ static int ssl_read(BIO *b, char *out, int outl)
+ BIO_set_retry_special(b);
+ retry_reason=BIO_RR_SSL_X509_LOOKUP;
+ break;
++ case SSL_ERROR_WANT_CHANNEL_ID_LOOKUP:
++ BIO_set_retry_special(b);
++ retry_reason=BIO_RR_SSL_CHANNEL_ID_LOOKUP;
++ break;
+ case SSL_ERROR_WANT_ACCEPT:
+ BIO_set_retry_special(b);
+ retry_reason=BIO_RR_ACCEPT;
+@@ -280,6 +284,10 @@ static int ssl_write(BIO *b, const char *out, int outl)
+ BIO_set_retry_special(b);
+ retry_reason=BIO_RR_SSL_X509_LOOKUP;
+ break;
++ case SSL_ERROR_WANT_CHANNEL_ID_LOOKUP:
++ BIO_set_retry_special(b);
++ retry_reason=BIO_RR_SSL_CHANNEL_ID_LOOKUP;
++ break;
+ case SSL_ERROR_WANT_CONNECT:
+ BIO_set_retry_special(b);
+ retry_reason=BIO_RR_CONNECT;
+diff --git a/ssl/s3_both.c b/ssl/s3_both.c
+index 53b9390..c0dac70 100644
+--- a/ssl/s3_both.c
++++ b/ssl/s3_both.c
+@@ -554,7 +554,8 @@ long ssl3_get_message(SSL *s, int st1, int stn, int mt, long max, int *ok)
+ #endif
+
+ /* Feed this message into MAC computation. */
+- ssl3_finish_mac(s, (unsigned char *)s->init_buf->data, s->init_num + 4);
++ if (*((unsigned char*) s->init_buf->data) != SSL3_MT_ENCRYPTED_EXTENSIONS)
++ ssl3_finish_mac(s, (unsigned char *)s->init_buf->data, s->init_num + 4);
+ if (s->msg_callback)
+ s->msg_callback(0, s->version, SSL3_RT_HANDSHAKE, s->init_buf->data, (size_t)s->init_num + 4, s, s->msg_callback_arg);
+ *ok=1;
+diff --git a/ssl/s3_clnt.c b/ssl/s3_clnt.c
+index 3d3fd64..7e0c4d5 100644
+--- a/ssl/s3_clnt.c
++++ b/ssl/s3_clnt.c
+@@ -465,13 +465,14 @@ int ssl3_connect(SSL *s)
+ SSL3_ST_CW_CHANGE_A,SSL3_ST_CW_CHANGE_B);
+ if (ret <= 0) goto end;
+
+-#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG)
+ s->state=SSL3_ST_CW_FINISHED_A;
+-#else
++#if !defined(OPENSSL_NO_TLSEXT)
++ if (s->s3->tlsext_channel_id_valid)
++ s->state=SSL3_ST_CW_CHANNEL_ID_A;
++# if !defined(OPENSSL_NO_NEXTPROTONEG)
+ if (s->s3->next_proto_neg_seen)
+ s->state=SSL3_ST_CW_NEXT_PROTO_A;
+- else
+- s->state=SSL3_ST_CW_FINISHED_A;
++# endif
+ #endif
+ s->init_num=0;
+
+@@ -505,6 +506,18 @@ int ssl3_connect(SSL *s)
+ case SSL3_ST_CW_NEXT_PROTO_B:
+ ret=ssl3_send_next_proto(s);
+ if (ret <= 0) goto end;
++ if (s->s3->tlsext_channel_id_valid)
++ s->state=SSL3_ST_CW_CHANNEL_ID_A;
++ else
++ s->state=SSL3_ST_CW_FINISHED_A;
++ break;
++#endif
++
++#if !defined(OPENSSL_NO_TLSEXT)
++ case SSL3_ST_CW_CHANNEL_ID_A:
++ case SSL3_ST_CW_CHANNEL_ID_B:
++ ret=ssl3_send_channel_id(s);
++ if (ret <= 0) goto end;
+ s->state=SSL3_ST_CW_FINISHED_A;
+ break;
+ #endif
+@@ -532,6 +545,18 @@ int ssl3_connect(SSL *s)
+ }
+ else
+ {
++ /* This is a non-resumption handshake. If it
++ * involves ChannelID, then record the
++ * handshake hashes at this point in the
++ * session so that any resumption of this
++ * session with ChannelID can sign those
++ * hashes. */
++ if (s->s3->tlsext_channel_id_new)
++ {
++ ret = tls1_record_handshake_hashes_for_channel_id(s);
++ if (ret <= 0)
++ goto end;
++ }
+ if ((SSL_get_mode(s) & SSL_MODE_HANDSHAKE_CUTTHROUGH)
+ && ssl3_can_cutthrough(s)
+ && s->s3->previous_server_finished_len == 0 /* no cutthrough on renegotiation (would complicate the state machine) */
+@@ -3338,7 +3363,8 @@ err:
+ return(0);
+ }
+
+-#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
++#if !defined(OPENSSL_NO_TLSEXT)
++# if !defined(OPENSSL_NO_NEXTPROTONEG)
+ int ssl3_send_next_proto(SSL *s)
+ {
+ unsigned int len, padding_len;
+@@ -3362,7 +3388,135 @@ int ssl3_send_next_proto(SSL *s)
+
+ return ssl3_do_write(s, SSL3_RT_HANDSHAKE);
+ }
+-#endif /* !OPENSSL_NO_TLSEXT && !OPENSSL_NO_NEXTPROTONEG */
++# endif /* !OPENSSL_NO_NEXTPROTONEG */
++
++int ssl3_send_channel_id(SSL *s)
++ {
++ unsigned char *d;
++ int ret = -1, public_key_len;
++ EVP_MD_CTX md_ctx;
++ size_t sig_len;
++ ECDSA_SIG *sig = NULL;
++ unsigned char *public_key = NULL, *derp, *der_sig = NULL;
++
++ if (s->state != SSL3_ST_CW_CHANNEL_ID_A)
++ return ssl3_do_write(s, SSL3_RT_HANDSHAKE);
++
++ if (!s->tlsext_channel_id_private && s->ctx->channel_id_cb)
++ {
++ EVP_PKEY *key = NULL;
++ s->ctx->channel_id_cb(s, &key);
++ if (key != NULL)
++ {
++ s->tlsext_channel_id_private = key;
++ }
++ }
++ if (!s->tlsext_channel_id_private)
++ {
++ s->rwstate=SSL_CHANNEL_ID_LOOKUP;
++ return (-1);
++ }
++ s->rwstate=SSL_NOTHING;
++
++ d = (unsigned char *)s->init_buf->data;
++ *(d++)=SSL3_MT_ENCRYPTED_EXTENSIONS;
++ l2n3(2 + 2 + TLSEXT_CHANNEL_ID_SIZE, d);
++ if (s->s3->tlsext_channel_id_new)
++ s2n(TLSEXT_TYPE_channel_id_new, d);
++ else
++ s2n(TLSEXT_TYPE_channel_id, d);
++ s2n(TLSEXT_CHANNEL_ID_SIZE, d);
++
++ EVP_MD_CTX_init(&md_ctx);
++
++ public_key_len = i2d_PublicKey(s->tlsext_channel_id_private, NULL);
++ if (public_key_len <= 0)
++ {
++ SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_CANNOT_SERIALIZE_PUBLIC_KEY);
++ goto err;
++ }
++ /* i2d_PublicKey will produce an ANSI X9.62 public key which, for a
++ * P-256 key, is 0x04 (meaning uncompressed) followed by the x and y
++ * field elements as 32-byte, big-endian numbers. */
++ if (public_key_len != 65)
++ {
++ SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_CHANNEL_ID_NOT_P256);
++ goto err;
++ }
++ public_key = OPENSSL_malloc(public_key_len);
++ if (!public_key)
++ {
++ SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,ERR_R_MALLOC_FAILURE);
++ goto err;
++ }
++
++ derp = public_key;
++ i2d_PublicKey(s->tlsext_channel_id_private, &derp);
++
++ if (EVP_DigestSignInit(&md_ctx, NULL, EVP_sha256(), NULL,
++ s->tlsext_channel_id_private) != 1)
++ {
++ SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_EVP_DIGESTSIGNINIT_FAILED);
++ goto err;
++ }
++
++ if (!tls1_channel_id_hash(&md_ctx, s))
++ goto err;
++
++ if (!EVP_DigestSignFinal(&md_ctx, NULL, &sig_len))
++ {
++ SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_EVP_DIGESTSIGNFINAL_FAILED);
++ goto err;
++ }
++
++ der_sig = OPENSSL_malloc(sig_len);
++ if (!der_sig)
++ {
++ SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,ERR_R_MALLOC_FAILURE);
++ goto err;
++ }
++
++ if (!EVP_DigestSignFinal(&md_ctx, der_sig, &sig_len))
++ {
++ SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_EVP_DIGESTSIGNFINAL_FAILED);
++ goto err;
++ }
++
++ derp = der_sig;
++ sig = d2i_ECDSA_SIG(NULL, (const unsigned char**) &derp, sig_len);
++ if (sig == NULL)
++ {
++ SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_D2I_ECDSA_SIG);
++ goto err;
++ }
++
++ /* The first byte of public_key will be 0x4, denoting an uncompressed key. */
++ memcpy(d, public_key + 1, 64);
++ d += 64;
++ memset(d, 0, 2 * 32);
++ BN_bn2bin(sig->r, d + 32 - BN_num_bytes(sig->r));
++ d += 32;
++ BN_bn2bin(sig->s, d + 32 - BN_num_bytes(sig->s));
++ d += 32;
++
++ s->state = SSL3_ST_CW_CHANNEL_ID_B;
++ s->init_num = 4 + 2 + 2 + TLSEXT_CHANNEL_ID_SIZE;
++ s->init_off = 0;
++
++ ret = ssl3_do_write(s, SSL3_RT_HANDSHAKE);
++
++err:
++ EVP_MD_CTX_cleanup(&md_ctx);
++ if (public_key)
++ OPENSSL_free(public_key);
++ if (der_sig)
++ OPENSSL_free(der_sig);
++ if (sig)
++ ECDSA_SIG_free(sig);
++
++ return ret;
++ }
++#endif /* !OPENSSL_NO_TLSEXT */
+
+ /* Check to see if handshake is full or resumed. Usually this is just a
+ * case of checking to see if a cache hit has occurred. In the case of
+diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c
+index 1865c70..f801923 100644
+--- a/ssl/s3_lib.c
++++ b/ssl/s3_lib.c
+@@ -2951,6 +2951,11 @@ int ssl3_new(SSL *s)
+ #ifndef OPENSSL_NO_SRP
+ SSL_SRP_CTX_init(s);
+ #endif
++#if !defined(OPENSSL_NO_TLSEXT)
++ s->tlsext_channel_id_enabled = s->ctx->tlsext_channel_id_enabled;
++ if (s->ctx->tlsext_channel_id_private)
++ s->tlsext_channel_id_private = EVP_PKEY_dup(s->ctx->tlsext_channel_id_private);
++#endif
+ s->method->ssl_clear(s);
+ return(1);
+ err:
+@@ -3079,6 +3084,10 @@ void ssl3_clear(SSL *s)
+ s->next_proto_negotiated_len = 0;
+ }
+ #endif
++
++#if !defined(OPENSSL_NO_TLSEXT)
++ s->s3->tlsext_channel_id_valid = 0;
++#endif
+ }
+
+ #ifndef OPENSSL_NO_SRP
+@@ -3353,6 +3362,33 @@ long ssl3_ctrl(SSL *s, int cmd, long larg, void *parg)
+ ret = 1;
+ break;
+ #endif
++ case SSL_CTRL_CHANNEL_ID:
++ s->tlsext_channel_id_enabled = 1;
++ ret = 1;
++ break;
++
++ case SSL_CTRL_SET_CHANNEL_ID:
++ if (s->server)
++ break;
++ s->tlsext_channel_id_enabled = 1;
++ if (EVP_PKEY_bits(parg) != 256)
++ {
++ SSLerr(SSL_F_SSL3_CTRL,SSL_R_CHANNEL_ID_NOT_P256);
++ break;
++ }
++ if (s->tlsext_channel_id_private)
++ EVP_PKEY_free(s->tlsext_channel_id_private);
++ s->tlsext_channel_id_private = EVP_PKEY_dup((EVP_PKEY*) parg);
++ ret = 1;
++ break;
++
++ case SSL_CTRL_GET_CHANNEL_ID:
++ if (!s->server)
++ break;
++ if (!s->s3->tlsext_channel_id_valid)
++ break;
++ memcpy(parg, s->s3->tlsext_channel_id, larg < 64 ? larg : 64);
++ return 64;
+
+ #endif /* !OPENSSL_NO_TLSEXT */
+ default:
+@@ -3574,6 +3610,12 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg)
+ }
+ return 1;
+ }
++ case SSL_CTRL_CHANNEL_ID:
++ /* must be called on a server */
++ if (ctx->method->ssl_accept == ssl_undefined_function)
++ return 0;
++ ctx->tlsext_channel_id_enabled=1;
++ return 1;
+
+ #ifdef TLSEXT_TYPE_opaque_prf_input
+ case SSL_CTRL_SET_TLSEXT_OPAQUE_PRF_INPUT_CB_ARG:
+@@ -3642,6 +3684,18 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg)
+ }
+ break;
+
++ case SSL_CTRL_SET_CHANNEL_ID:
++ ctx->tlsext_channel_id_enabled = 1;
++ if (EVP_PKEY_bits(parg) != 256)
++ {
++ SSLerr(SSL_F_SSL3_CTX_CTRL,SSL_R_CHANNEL_ID_NOT_P256);
++ break;
++ }
++ if (ctx->tlsext_channel_id_private)
++ EVP_PKEY_free(ctx->tlsext_channel_id_private);
++ ctx->tlsext_channel_id_private = EVP_PKEY_dup((EVP_PKEY*) parg);
++ break;
++
+ default:
+ return(0);
+ }
+diff --git a/ssl/s3_srvr.c b/ssl/s3_srvr.c
+index 323b260..6824ef6 100644
+--- a/ssl/s3_srvr.c
++++ b/ssl/s3_srvr.c
+@@ -157,8 +157,11 @@
+ #include <openssl/buffer.h>
+ #include <openssl/rand.h>
+ #include <openssl/objects.h>
++#include <openssl/ec.h>
++#include <openssl/ecdsa.h>
+ #include <openssl/evp.h>
+ #include <openssl/hmac.h>
++#include <openssl/sha.h>
+ #include <openssl/x509.h>
+ #ifndef OPENSSL_NO_DH
+ #include <openssl/dh.h>
+@@ -615,15 +618,8 @@ int ssl3_accept(SSL *s)
+ * the client uses its key from the certificate
+ * for key exchange.
+ */
+-#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG)
+- s->state=SSL3_ST_SR_FINISHED_A;
+-#else
+- if (s->s3->next_proto_neg_seen)
+- s->state=SSL3_ST_SR_NEXT_PROTO_A;
+- else
+- s->state=SSL3_ST_SR_FINISHED_A;
+-#endif
+ s->init_num = 0;
++ s->state=SSL3_ST_SR_POST_CLIENT_CERT;
+ }
+ else if (TLS1_get_version(s) >= TLS1_2_VERSION)
+ {
+@@ -683,16 +679,28 @@ int ssl3_accept(SSL *s)
+ ret=ssl3_get_cert_verify(s);
+ if (ret <= 0) goto end;
+
+-#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG)
+- s->state=SSL3_ST_SR_FINISHED_A;
+-#else
+- if (s->s3->next_proto_neg_seen)
++ s->state=SSL3_ST_SR_POST_CLIENT_CERT;
++ s->init_num=0;
++ break;
++
++ case SSL3_ST_SR_POST_CLIENT_CERT: {
++ char next_proto_neg = 0;
++ char channel_id = 0;
++#if !defined(OPENSSL_NO_TLSEXT)
++# if !defined(OPENSSL_NO_NEXTPROTONEG)
++ next_proto_neg = s->s3->next_proto_neg_seen;
++# endif
++ channel_id = s->s3->tlsext_channel_id_valid;
++#endif
++
++ if (next_proto_neg)
+ s->state=SSL3_ST_SR_NEXT_PROTO_A;
++ else if (channel_id)
++ s->state=SSL3_ST_SR_CHANNEL_ID_A;
+ else
+ s->state=SSL3_ST_SR_FINISHED_A;
+-#endif
+- s->init_num=0;
+ break;
++ }
+
+ #if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ case SSL3_ST_SR_NEXT_PROTO_A:
+@@ -700,6 +708,19 @@ int ssl3_accept(SSL *s)
+ ret=ssl3_get_next_proto(s);
+ if (ret <= 0) goto end;
+ s->init_num = 0;
++ if (s->s3->tlsext_channel_id_valid)
++ s->state=SSL3_ST_SR_CHANNEL_ID_A;
++ else
++ s->state=SSL3_ST_SR_FINISHED_A;
++ break;
++#endif
++
++#if !defined(OPENSSL_NO_TLSEXT)
++ case SSL3_ST_SR_CHANNEL_ID_A:
++ case SSL3_ST_SR_CHANNEL_ID_B:
++ ret=ssl3_get_channel_id(s);
++ if (ret <= 0) goto end;
++ s->init_num = 0;
+ s->state=SSL3_ST_SR_FINISHED_A;
+ break;
+ #endif
+@@ -717,6 +738,15 @@ int ssl3_accept(SSL *s)
+ #endif
+ else
+ s->state=SSL3_ST_SW_CHANGE_A;
++ /* If this is a full handshake with ChannelID then
++ * record the hashshake hashes in |s->session| in case
++ * we need them to verify a ChannelID signature on a
++ * resumption of this session in the future. */
++ if (!s->hit && s->s3->tlsext_channel_id_new)
++ {
++ ret = tls1_record_handshake_hashes_for_channel_id(s);
++ if (ret <= 0) goto end;
++ }
+ s->init_num=0;
+ break;
+
+@@ -771,19 +801,7 @@ int ssl3_accept(SSL *s)
+ if (ret <= 0) goto end;
+ s->state=SSL3_ST_SW_FLUSH;
+ if (s->hit)
+- {
+-#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG)
+- s->s3->tmp.next_state=SSL3_ST_SR_FINISHED_A;
+-#else
+- if (s->s3->next_proto_neg_seen)
+- {
+- s->s3->flags |= SSL3_FLAGS_CCS_OK;
+- s->s3->tmp.next_state=SSL3_ST_SR_NEXT_PROTO_A;
+- }
+- else
+- s->s3->tmp.next_state=SSL3_ST_SR_FINISHED_A;
+-#endif
+- }
++ s->s3->tmp.next_state=SSL3_ST_SR_POST_CLIENT_CERT;
+ else
+ s->s3->tmp.next_state=SSL_ST_OK;
+ s->init_num=0;
+@@ -1466,6 +1487,22 @@ int ssl3_send_server_hello(SSL *s)
+
+ if (s->state == SSL3_ST_SW_SRVR_HELLO_A)
+ {
++ /* We only accept ChannelIDs on connections with ECDHE in order
++ * to avoid a known attack while we fix ChannelID itself. */
++ if (s->s3 &&
++ s->s3->tlsext_channel_id_valid &&
++ (s->s3->tmp.new_cipher->algorithm_mkey & SSL_kEECDH) == 0)
++ s->s3->tlsext_channel_id_valid = 0;
++
++ /* If this is a resumption and the original handshake didn't
++ * support ChannelID then we didn't record the original
++ * handshake hashes in the session and so cannot resume with
++ * ChannelIDs. */
++ if (s->hit &&
++ s->s3->tlsext_channel_id_new &&
++ s->session->original_handshake_hash_len == 0)
++ s->s3->tlsext_channel_id_valid = 0;
++
+ buf=(unsigned char *)s->init_buf->data;
+ #ifdef OPENSSL_NO_TLSEXT
+ p=s->s3->server_random;
+@@ -3632,4 +3669,145 @@ int ssl3_get_next_proto(SSL *s)
+ return 1;
+ }
+ # endif
++
++/* ssl3_get_channel_id reads and verifies a ClientID handshake message. */
++int ssl3_get_channel_id(SSL *s)
++ {
++ int ret = -1, ok;
++ long n;
++ const unsigned char *p;
++ unsigned short extension_type, extension_len;
++ EC_GROUP* p256 = NULL;
++ EC_KEY* key = NULL;
++ EC_POINT* point = NULL;
++ ECDSA_SIG sig;
++ BIGNUM x, y;
++ unsigned short expected_extension_type;
++
++ if (s->state == SSL3_ST_SR_CHANNEL_ID_A && s->init_num == 0)
++ {
++ /* The first time that we're called we take the current
++ * handshake hash and store it. */
++ EVP_MD_CTX md_ctx;
++ unsigned int len;
++
++ EVP_MD_CTX_init(&md_ctx);
++ EVP_DigestInit_ex(&md_ctx, EVP_sha256(), NULL);
++ if (!tls1_channel_id_hash(&md_ctx, s))
++ return -1;
++ len = sizeof(s->s3->tlsext_channel_id);
++ EVP_DigestFinal(&md_ctx, s->s3->tlsext_channel_id, &len);
++ EVP_MD_CTX_cleanup(&md_ctx);
++ }
++
++ n = s->method->ssl_get_message(s,
++ SSL3_ST_SR_CHANNEL_ID_A,
++ SSL3_ST_SR_CHANNEL_ID_B,
++ SSL3_MT_ENCRYPTED_EXTENSIONS,
++ 2 + 2 + TLSEXT_CHANNEL_ID_SIZE,
++ &ok);
++
++ if (!ok)
++ return((int)n);
++
++ ssl3_finish_mac(s, (unsigned char*)s->init_buf->data, s->init_num + 4);
++
++ /* s->state doesn't reflect whether ChangeCipherSpec has been received
++ * in this handshake, but s->s3->change_cipher_spec does (will be reset
++ * by ssl3_get_finished). */
++ if (!s->s3->change_cipher_spec)
++ {
++ SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_GOT_CHANNEL_ID_BEFORE_A_CCS);
++ return -1;
++ }
++
++ if (n != 2 + 2 + TLSEXT_CHANNEL_ID_SIZE)
++ {
++ SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_INVALID_MESSAGE);
++ return -1;
++ }
++
++ p = (unsigned char *)s->init_msg;
++
++ /* The payload looks like:
++ * uint16 extension_type
++ * uint16 extension_len;
++ * uint8 x[32];
++ * uint8 y[32];
++ * uint8 r[32];
++ * uint8 s[32];
++ */
++ n2s(p, extension_type);
++ n2s(p, extension_len);
++
++ expected_extension_type = TLSEXT_TYPE_channel_id;
++ if (s->s3->tlsext_channel_id_new)
++ expected_extension_type = TLSEXT_TYPE_channel_id_new;
++
++ if (extension_type != expected_extension_type ||
++ extension_len != TLSEXT_CHANNEL_ID_SIZE)
++ {
++ SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_INVALID_MESSAGE);
++ return -1;
++ }
++
++ p256 = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1);
++ if (!p256)
++ {
++ SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_NO_P256_SUPPORT);
++ return -1;
++ }
++
++ BN_init(&x);
++ BN_init(&y);
++ sig.r = BN_new();
++ sig.s = BN_new();
++
++ if (BN_bin2bn(p + 0, 32, &x) == NULL ||
++ BN_bin2bn(p + 32, 32, &y) == NULL ||
++ BN_bin2bn(p + 64, 32, sig.r) == NULL ||
++ BN_bin2bn(p + 96, 32, sig.s) == NULL)
++ goto err;
++
++ point = EC_POINT_new(p256);
++ if (!point ||
++ !EC_POINT_set_affine_coordinates_GFp(p256, point, &x, &y, NULL))
++ goto err;
++
++ key = EC_KEY_new();
++ if (!key ||
++ !EC_KEY_set_group(key, p256) ||
++ !EC_KEY_set_public_key(key, point))
++ goto err;
++
++ /* We stored the handshake hash in |tlsext_channel_id| the first time
++ * that we were called. */
++ switch (ECDSA_do_verify(s->s3->tlsext_channel_id, SHA256_DIGEST_LENGTH, &sig, key)) {
++ case 1:
++ break;
++ case 0:
++ SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_CHANNEL_ID_SIGNATURE_INVALID);
++ s->s3->tlsext_channel_id_valid = 0;
++ goto err;
++ default:
++ s->s3->tlsext_channel_id_valid = 0;
++ goto err;
++ }
++
++ memcpy(s->s3->tlsext_channel_id, p, 64);
++ ret = 1;
++
++err:
++ BN_free(&x);
++ BN_free(&y);
++ BN_free(sig.r);
++ BN_free(sig.s);
++ if (key)
++ EC_KEY_free(key);
++ if (point)
++ EC_POINT_free(point);
++ if (p256)
++ EC_GROUP_free(p256);
++ return ret;
++ }
+ #endif
+diff --git a/ssl/ssl.h b/ssl/ssl.h
+index 944aea6..e50b8f0 100644
+--- a/ssl/ssl.h
++++ b/ssl/ssl.h
+@@ -547,6 +547,13 @@ struct ssl_session_st
+ #ifndef OPENSSL_NO_SRP
+ char *srp_username;
+ #endif
++
++ /* original_handshake_hash contains the handshake hash (either
++ * SHA-1+MD5 or SHA-2, depending on TLS version) for the original, full
++ * handshake that created a session. This is used by Channel IDs during
++ * resumption. */
++ unsigned char original_handshake_hash[EVP_MAX_MD_SIZE];
++ unsigned int original_handshake_hash_len;
+ };
+
+ #endif
+@@ -862,6 +869,9 @@ struct ssl_ctx_st
+ /* get client cert callback */
+ int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey);
+
++ /* get channel id callback */
++ void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey);
++
+ /* cookie generate callback */
+ int (*app_gen_cookie_cb)(SSL *ssl, unsigned char *cookie,
+ unsigned int *cookie_len);
+@@ -999,6 +1009,16 @@ struct ssl_ctx_st
+ # endif
+ /* SRTP profiles we are willing to do from RFC 5764 */
+ STACK_OF(SRTP_PROTECTION_PROFILE) *srtp_profiles;
++
++ /* If true, a client will advertise the Channel ID extension and a
++ * server will echo it. */
++ char tlsext_channel_id_enabled;
++ /* tlsext_channel_id_enabled_new is a hack to support both old and new
++ * ChannelID signatures. It indicates that a client should advertise the
++ * new ChannelID extension number. */
++ char tlsext_channel_id_enabled_new;
++ /* The client's Channel ID private key. */
++ EVP_PKEY *tlsext_channel_id_private;
+ #endif
+ };
+
+@@ -1040,6 +1060,10 @@ LHASH_OF(SSL_SESSION) *SSL_CTX_sessions(SSL_CTX *ctx);
+ SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_TIMEOUTS,0,NULL)
+ #define SSL_CTX_sess_cache_full(ctx) \
+ SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CACHE_FULL,0,NULL)
++/* SSL_CTX_enable_tls_channel_id configures a TLS server to accept TLS client
++ * IDs from clients. Returns 1 on success. */
++#define SSL_CTX_enable_tls_channel_id(ctx) \
++ SSL_CTX_ctrl(ctx,SSL_CTRL_CHANNEL_ID,0,NULL)
+
+ void SSL_CTX_sess_set_new_cb(SSL_CTX *ctx, int (*new_session_cb)(struct ssl_st *ssl,SSL_SESSION *sess));
+ int (*SSL_CTX_sess_get_new_cb(SSL_CTX *ctx))(struct ssl_st *ssl, SSL_SESSION *sess);
+@@ -1056,6 +1080,8 @@ void SSL_CTX_set_info_callback(SSL_CTX *ctx, void (*cb)(const SSL *ssl,int type,
+ void (*SSL_CTX_get_info_callback(SSL_CTX *ctx))(const SSL *ssl,int type,int val);
+ void SSL_CTX_set_client_cert_cb(SSL_CTX *ctx, int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey));
+ int (*SSL_CTX_get_client_cert_cb(SSL_CTX *ctx))(SSL *ssl, X509 **x509, EVP_PKEY **pkey);
++void SSL_CTX_set_channel_id_cb(SSL_CTX *ctx, void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey));
++void (*SSL_CTX_get_channel_id_cb(SSL_CTX *ctx))(SSL *ssl, EVP_PKEY **pkey);
+ #ifndef OPENSSL_NO_ENGINE
+ int SSL_CTX_set_client_cert_engine(SSL_CTX *ctx, ENGINE *e);
+ #endif
+@@ -1117,5 +1143,6 @@ const char *SSL_get_psk_identity(const SSL *s);
+ #define SSL_WRITING 2
+ #define SSL_READING 3
+ #define SSL_X509_LOOKUP 4
++#define SSL_CHANNEL_ID_LOOKUP 5
+
+ /* These will only be used when doing non-blocking IO */
+@@ -1124,5 +1151,6 @@ const char *SSL_get_psk_identity(const SSL *s);
+ #define SSL_want_read(s) (SSL_want(s) == SSL_READING)
+ #define SSL_want_write(s) (SSL_want(s) == SSL_WRITING)
+ #define SSL_want_x509_lookup(s) (SSL_want(s) == SSL_X509_LOOKUP)
++#define SSL_want_channel_id_lookup(s) (SSL_want(s) == SSL_CHANNEL_ID_LOOKUP)
+
+ #define SSL_MAC_FLAG_READ_MAC_STREAM 1
+@@ -1373,6 +1401,13 @@ struct ssl_st
+ */
+ unsigned int tlsext_hb_pending; /* Indicates if a HeartbeatRequest is in flight */
+ unsigned int tlsext_hb_seq; /* HeartbeatRequest sequence number */
++
++ /* Copied from the SSL_CTX. For a server, means that we'll accept
++ * Channel IDs from clients. For a client, means that we'll advertise
++ * support. */
++ char tlsext_channel_id_enabled;
++ /* The client's Channel ID private key. */
++ EVP_PKEY *tlsext_channel_id_private;
+ #else
+ #define session_ctx ctx
+ #endif /* OPENSSL_NO_TLSEXT */
+@@ -1543,5 +1578,6 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
+ #define SSL_ERROR_ZERO_RETURN 6
+ #define SSL_ERROR_WANT_CONNECT 7
+ #define SSL_ERROR_WANT_ACCEPT 8
++#define SSL_ERROR_WANT_CHANNEL_ID_LOOKUP 9
+
+ #define SSL_CTRL_NEED_TMP_RSA 1
+@@ -1631,6 +1667,9 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
+ #define SSL_CTRL_GET_TLS_EXT_HEARTBEAT_PENDING 86
+ #define SSL_CTRL_SET_TLS_EXT_HEARTBEAT_NO_REQUESTS 87
+ #endif
++#define SSL_CTRL_CHANNEL_ID 88
++#define SSL_CTRL_GET_CHANNEL_ID 89
++#define SSL_CTRL_SET_CHANNEL_ID 90
+ #endif
+
+ #define DTLS_CTRL_GET_TIMEOUT 73
+@@ -1678,6 +1717,26 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
+ #define SSL_set_tmp_ecdh(ssl,ecdh) \
+ SSL_ctrl(ssl,SSL_CTRL_SET_TMP_ECDH,0,(char *)ecdh)
+
++/* SSL_enable_tls_channel_id either configures a TLS server to accept TLS client
++ * IDs from clients, or configure a client to send TLS client IDs to server.
++ * Returns 1 on success. */
++#define SSL_enable_tls_channel_id(s) \
++ SSL_ctrl(s,SSL_CTRL_CHANNEL_ID,0,NULL)
++/* SSL_set1_tls_channel_id configures a TLS client to send a TLS Channel ID to
++ * compatible servers. private_key must be a P-256 EVP_PKEY*. Returns 1 on
++ * success. */
++#define SSL_set1_tls_channel_id(s, private_key) \
++ SSL_ctrl(s,SSL_CTRL_SET_CHANNEL_ID,0,(void*)private_key)
++#define SSL_CTX_set1_tls_channel_id(ctx, private_key) \
++ SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CHANNEL_ID,0,(void*)private_key)
++/* SSL_get_tls_channel_id gets the client's TLS Channel ID from a server SSL*
++ * and copies up to the first |channel_id_len| bytes into |channel_id|. The
++ * Channel ID consists of the client's P-256 public key as an (x,y) pair where
++ * each is a 32-byte, big-endian field element. Returns 0 if the client didn't
++ * offer a Channel ID and the length of the complete Channel ID otherwise. */
++#define SSL_get_tls_channel_id(ctx, channel_id, channel_id_len) \
++ SSL_ctrl(ctx,SSL_CTRL_GET_CHANNEL_ID,channel_id_len,(void*)channel_id)
++
+ #define SSL_CTX_add_extra_chain_cert(ctx,x509) \
+ SSL_CTX_ctrl(ctx,SSL_CTRL_EXTRA_CHAIN_CERT,0,(char *)x509)
+ #define SSL_CTX_get_extra_chain_certs(ctx,px509) \
+@@ -2176,6 +2235,7 @@ void ERR_load_SSL_strings(void);
+ #define SSL_F_SSL3_GET_CERTIFICATE_REQUEST 135
+ #define SSL_F_SSL3_GET_CERT_STATUS 289
+ #define SSL_F_SSL3_GET_CERT_VERIFY 136
++#define SSL_F_SSL3_GET_CHANNEL_ID 317
+ #define SSL_F_SSL3_GET_CLIENT_CERTIFICATE 137
+ #define SSL_F_SSL3_GET_CLIENT_HELLO 138
+ #define SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE 139
+@@ -2195,6 +2255,7 @@ void ERR_load_SSL_strings(void);
+ #define SSL_F_SSL3_READ_BYTES 148
+ #define SSL_F_SSL3_READ_N 149
+ #define SSL_F_SSL3_SEND_CERTIFICATE_REQUEST 150
++#define SSL_F_SSL3_SEND_CHANNEL_ID 318
+ #define SSL_F_SSL3_SEND_CLIENT_CERTIFICATE 151
+ #define SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE 152
+ #define SSL_F_SSL3_SEND_CLIENT_VERIFY 153
+@@ -2361,12 +2422,15 @@ void ERR_load_SSL_strings(void);
+ #define SSL_R_BIO_NOT_SET 128
+ #define SSL_R_BLOCK_CIPHER_PAD_IS_WRONG 129
+ #define SSL_R_BN_LIB 130
++#define SSL_R_CANNOT_SERIALIZE_PUBLIC_KEY 376
+ #define SSL_R_CA_DN_LENGTH_MISMATCH 131
+ #define SSL_R_CA_DN_TOO_LONG 132
+ #define SSL_R_CCS_RECEIVED_EARLY 133
+ #define SSL_R_CERTIFICATE_VERIFY_FAILED 134
+ #define SSL_R_CERT_LENGTH_MISMATCH 135
+ #define SSL_R_CHALLENGE_IS_DIFFERENT 136
++#define SSL_R_CHANNEL_ID_NOT_P256 375
++#define SSL_R_CHANNEL_ID_SIGNATURE_INVALID 371
+ #define SSL_R_CIPHER_CODE_WRONG_LENGTH 137
+ #define SSL_R_CIPHER_OR_HASH_UNAVAILABLE 138
+ #define SSL_R_CIPHER_TABLE_SRC_ERROR 139
+@@ -2379,6 +2443,7 @@ void ERR_load_SSL_strings(void);
+ #define SSL_R_CONNECTION_ID_IS_DIFFERENT 143
+ #define SSL_R_CONNECTION_TYPE_NOT_SET 144
+ #define SSL_R_COOKIE_MISMATCH 308
++#define SSL_R_D2I_ECDSA_SIG 379
+ #define SSL_R_DATA_BETWEEN_CCS_AND_FINISHED 145
+ #define SSL_R_DATA_LENGTH_TOO_LONG 146
+ #define SSL_R_DECRYPTION_FAILED 147
+@@ -2396,9 +2461,12 @@ void ERR_load_SSL_strings(void);
+ #define SSL_R_ENCRYPTED_LENGTH_TOO_LONG 150
+ #define SSL_R_ERROR_GENERATING_TMP_RSA_KEY 282
+ #define SSL_R_ERROR_IN_RECEIVED_CIPHER_LIST 151
++#define SSL_R_EVP_DIGESTSIGNFINAL_FAILED 377
++#define SSL_R_EVP_DIGESTSIGNINIT_FAILED 378
+ #define SSL_R_EXCESSIVE_MESSAGE_SIZE 152
+ #define SSL_R_EXTRA_DATA_IN_MESSAGE 153
+ #define SSL_R_GOT_A_FIN_BEFORE_A_CCS 154
++#define SSL_R_GOT_CHANNEL_ID_BEFORE_A_CCS 372
+ #define SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS 355
+ #define SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION 356
+ #define SSL_R_HTTPS_PROXY_REQUEST 155
+@@ -2408,6 +2476,7 @@ void ERR_load_SSL_strings(void);
+ #define SSL_R_INVALID_CHALLENGE_LENGTH 158
+ #define SSL_R_INVALID_COMMAND 280
+ #define SSL_R_INVALID_COMPRESSION_ALGORITHM 341
++#define SSL_R_INVALID_MESSAGE 374
+ #define SSL_R_INVALID_PURPOSE 278
+ #define SSL_R_INVALID_SRP_USERNAME 357
+ #define SSL_R_INVALID_STATUS_RESPONSE 328
+@@ -2462,6 +2531,7 @@ void ERR_load_SSL_strings(void);
+ #define SSL_R_NO_COMPRESSION_SPECIFIED 187
+ #define SSL_R_NO_GOST_CERTIFICATE_SENT_BY_PEER 330
+ #define SSL_R_NO_METHOD_SPECIFIED 188
++#define SSL_R_NO_P256_SUPPORT 373
+ #define SSL_R_NO_PRIVATEKEY 189
+ #define SSL_R_NO_PRIVATE_KEY_ASSIGNED 190
+ #define SSL_R_NO_PROTOCOLS_AVAILABLE 191
+diff --git a/ssl/ssl3.h b/ssl/ssl3.h
+index cf81de0..8502628 100644
+--- a/ssl/ssl3.h
++++ b/ssl/ssl3.h
+@@ -548,6 +548,22 @@ typedef struct ssl3_state_st
+ char is_probably_safari;
+ #endif /* !OPENSSL_NO_EC */
+ #endif /* !OPENSSL_NO_TLSEXT */
++
++ /* In a client, this means that the server supported Channel ID and that
++ * a Channel ID was sent. In a server it means that we echoed support
++ * for Channel IDs and that tlsext_channel_id will be valid after the
++ * handshake. */
++ char tlsext_channel_id_valid;
++ /* tlsext_channel_id_new means that the updated Channel ID extension
++ * was negotiated. This is a temporary hack in the code to support both
++ * forms of Channel ID extension while we transition to the new format,
++ * which fixed a security issue. */
++ char tlsext_channel_id_new;
++ /* For a server:
++ * If |tlsext_channel_id_valid| is true, then this contains the
++ * verified Channel ID from the client: a P256 point, (x,y), where
++ * each are big-endian values. */
++ unsigned char tlsext_channel_id[64];
+ } SSL3_STATE;
+
+ #endif
+@@ -592,6 +608,8 @@ typedef struct ssl3_state_st
+ #define SSL3_ST_CW_NEXT_PROTO_A (0x200|SSL_ST_CONNECT)
+ #define SSL3_ST_CW_NEXT_PROTO_B (0x201|SSL_ST_CONNECT)
+ #endif
++#define SSL3_ST_CW_CHANNEL_ID_A (0x210|SSL_ST_CONNECT)
++#define SSL3_ST_CW_CHANNEL_ID_B (0x211|SSL_ST_CONNECT)
+ #define SSL3_ST_CW_FINISHED_A (0x1B0|SSL_ST_CONNECT)
+ #define SSL3_ST_CW_FINISHED_B (0x1B1|SSL_ST_CONNECT)
+ /* read from server */
+@@ -646,6 +664,9 @@ typedef struct ssl3_state_st
+ #define SSL3_ST_SR_NEXT_PROTO_A (0x210|SSL_ST_ACCEPT)
+ #define SSL3_ST_SR_NEXT_PROTO_B (0x211|SSL_ST_ACCEPT)
+ #endif
++#define SSL3_ST_SR_POST_CLIENT_CERT (0x1BF|SSL_ST_ACCEPT)
++#define SSL3_ST_SR_CHANNEL_ID_A (0x220|SSL_ST_ACCEPT)
++#define SSL3_ST_SR_CHANNEL_ID_B (0x221|SSL_ST_ACCEPT)
+ #define SSL3_ST_SR_FINISHED_A (0x1C0|SSL_ST_ACCEPT)
+ #define SSL3_ST_SR_FINISHED_B (0x1C1|SSL_ST_ACCEPT)
+ /* write to client */
+@@ -673,6 +694,7 @@ typedef struct ssl3_state_st
+ #ifndef OPENSSL_NO_NEXTPROTONEG
+ #define SSL3_MT_NEXT_PROTO 67
+ #endif
++#define SSL3_MT_ENCRYPTED_EXTENSIONS 203
+ #define DTLS1_MT_HELLO_VERIFY_REQUEST 3
+
+
+diff --git a/ssl/ssl_asn1.c b/ssl/ssl_asn1.c
+index 8bda011..e579e7c 100644
+--- a/ssl/ssl_asn1.c
++++ b/ssl/ssl_asn1.c
+@@ -118,11 +118,12 @@ typedef struct ssl_session_asn1_st
+ ASN1_OCTET_STRING srp_username;
+ #endif /* OPENSSL_NO_SRP */
++ ASN1_OCTET_STRING original_handshake_hash;
+ } SSL_SESSION_ASN1;
+
+ int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp)
+ {
+ #define LSIZE2 (sizeof(long)*2)
+- int v1=0,v2=0,v3=0,v4=0,v5=0,v7=0,v8=0;
++ int v1=0,v2=0,v3=0,v4=0,v5=0,v7=0,v8=0,v14=0;
+ unsigned char buf[4],ibuf1[LSIZE2],ibuf2[LSIZE2];
+ unsigned char ibuf3[LSIZE2],ibuf4[LSIZE2],ibuf5[LSIZE2];
+ #ifndef OPENSSL_NO_TLSEXT
+@@ -280,4 +281,11 @@ int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp)
+ }
++
++ if (in->original_handshake_hash_len > 0)
++ {
++ a.original_handshake_hash.length = in->original_handshake_hash_len;
++ a.original_handshake_hash.type = V_ASN1_OCTET_STRING;
++ a.original_handshake_hash.data = in->original_handshake_hash;
++ }
+ #endif /* OPENSSL_NO_PSK */
+ #ifndef OPENSSL_NO_SRP
+ if (in->srp_username)
+@@ -335,4 +343,6 @@ int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp)
+ #endif /* OPENSSL_NO_SRP */
++ if (in->original_handshake_hash_len > 0)
++ M_ASN1_I2D_len_EXP_opt(&(a.original_handshake_hash),i2d_ASN1_OCTET_STRING,14,v14);
+
+ M_ASN1_I2D_seq_total();
+
+@@ -385,4 +395,6 @@ int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp)
+ #endif /* OPENSSL_NO_SRP */
++ if (in->original_handshake_hash_len > 0)
++ M_ASN1_I2D_put_EXP_opt(&(a.original_handshake_hash),i2d_ASN1_OCTET_STRING,14,v14);
+ M_ASN1_I2D_finish();
+ }
+
+@@ -661,5 +673,16 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp,
+ os.data = NULL;
+ }
+
++ os.length=0;
++ os.data=NULL;
++ M_ASN1_D2I_get_EXP_opt(osp,d2i_ASN1_OCTET_STRING,14);
++ if (os.data && os.length < (int)sizeof(ret->original_handshake_hash))
++ {
++ memcpy(ret->original_handshake_hash, os.data, os.length);
++ ret->original_handshake_hash_len = os.length;
++ OPENSSL_free(os.data);
++ os.data = NULL;
++ }
++
+ M_ASN1_D2I_Finish(a,SSL_SESSION_free,SSL_F_D2I_SSL_SESSION);
+ }
+diff --git a/ssl/ssl_err.c b/ssl/ssl_err.c
+index 370fb57..b3eee4d 100644
+--- a/ssl/ssl_err.c
++++ b/ssl/ssl_err.c
+@@ -151,6 +151,7 @@ static ERR_STRING_DATA SSL_str_functs[]=
+ {ERR_FUNC(SSL_F_SSL3_GET_CERTIFICATE_REQUEST), "SSL3_GET_CERTIFICATE_REQUEST"},
+ {ERR_FUNC(SSL_F_SSL3_GET_CERT_STATUS), "SSL3_GET_CERT_STATUS"},
+ {ERR_FUNC(SSL_F_SSL3_GET_CERT_VERIFY), "SSL3_GET_CERT_VERIFY"},
++{ERR_FUNC(SSL_F_SSL3_GET_CHANNEL_ID), "SSL3_GET_CHANNEL_ID"},
+ {ERR_FUNC(SSL_F_SSL3_GET_CLIENT_CERTIFICATE), "SSL3_GET_CLIENT_CERTIFICATE"},
+ {ERR_FUNC(SSL_F_SSL3_GET_CLIENT_HELLO), "SSL3_GET_CLIENT_HELLO"},
+ {ERR_FUNC(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE), "SSL3_GET_CLIENT_KEY_EXCHANGE"},
+@@ -170,6 +171,7 @@ static ERR_STRING_DATA SSL_str_functs[]=
+ {ERR_FUNC(SSL_F_SSL3_READ_BYTES), "SSL3_READ_BYTES"},
+ {ERR_FUNC(SSL_F_SSL3_READ_N), "SSL3_READ_N"},
+ {ERR_FUNC(SSL_F_SSL3_SEND_CERTIFICATE_REQUEST), "SSL3_SEND_CERTIFICATE_REQUEST"},
++{ERR_FUNC(SSL_F_SSL3_SEND_CHANNEL_ID), "SSL3_SEND_CHANNEL_ID"},
+ {ERR_FUNC(SSL_F_SSL3_SEND_CLIENT_CERTIFICATE), "SSL3_SEND_CLIENT_CERTIFICATE"},
+ {ERR_FUNC(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE), "SSL3_SEND_CLIENT_KEY_EXCHANGE"},
+ {ERR_FUNC(SSL_F_SSL3_SEND_CLIENT_VERIFY), "SSL3_SEND_CLIENT_VERIFY"},
+@@ -339,12 +341,15 @@ static ERR_STRING_DATA SSL_str_reasons[]=
+ {ERR_REASON(SSL_R_BIO_NOT_SET) ,"bio not set"},
+ {ERR_REASON(SSL_R_BLOCK_CIPHER_PAD_IS_WRONG),"block cipher pad is wrong"},
+ {ERR_REASON(SSL_R_BN_LIB) ,"bn lib"},
++{ERR_REASON(SSL_R_CANNOT_SERIALIZE_PUBLIC_KEY),"cannot serialize public key"},
+ {ERR_REASON(SSL_R_CA_DN_LENGTH_MISMATCH) ,"ca dn length mismatch"},
+ {ERR_REASON(SSL_R_CA_DN_TOO_LONG) ,"ca dn too long"},
+ {ERR_REASON(SSL_R_CCS_RECEIVED_EARLY) ,"ccs received early"},
+ {ERR_REASON(SSL_R_CERTIFICATE_VERIFY_FAILED),"certificate verify failed"},
+ {ERR_REASON(SSL_R_CERT_LENGTH_MISMATCH) ,"cert length mismatch"},
+ {ERR_REASON(SSL_R_CHALLENGE_IS_DIFFERENT),"challenge is different"},
++{ERR_REASON(SSL_R_CHANNEL_ID_NOT_P256) ,"channel id not p256"},
++{ERR_REASON(SSL_R_CHANNEL_ID_SIGNATURE_INVALID),"Channel ID signature invalid"},
+ {ERR_REASON(SSL_R_CIPHER_CODE_WRONG_LENGTH),"cipher code wrong length"},
+ {ERR_REASON(SSL_R_CIPHER_OR_HASH_UNAVAILABLE),"cipher or hash unavailable"},
+ {ERR_REASON(SSL_R_CIPHER_TABLE_SRC_ERROR),"cipher table src error"},
+@@ -357,6 +362,7 @@ static ERR_STRING_DATA SSL_str_reasons[]=
+ {ERR_REASON(SSL_R_CONNECTION_ID_IS_DIFFERENT),"connection id is different"},
+ {ERR_REASON(SSL_R_CONNECTION_TYPE_NOT_SET),"connection type not set"},
+ {ERR_REASON(SSL_R_COOKIE_MISMATCH) ,"cookie mismatch"},
++{ERR_REASON(SSL_R_D2I_ECDSA_SIG) ,"d2i ecdsa sig"},
+ {ERR_REASON(SSL_R_DATA_BETWEEN_CCS_AND_FINISHED),"data between ccs and finished"},
+ {ERR_REASON(SSL_R_DATA_LENGTH_TOO_LONG) ,"data length too long"},
+ {ERR_REASON(SSL_R_DECRYPTION_FAILED) ,"decryption failed"},
+@@ -374,9 +380,12 @@ static ERR_STRING_DATA SSL_str_reasons[]=
+ {ERR_REASON(SSL_R_ENCRYPTED_LENGTH_TOO_LONG),"encrypted length too long"},
+ {ERR_REASON(SSL_R_ERROR_GENERATING_TMP_RSA_KEY),"error generating tmp rsa key"},
+ {ERR_REASON(SSL_R_ERROR_IN_RECEIVED_CIPHER_LIST),"error in received cipher list"},
++{ERR_REASON(SSL_R_EVP_DIGESTSIGNFINAL_FAILED),"evp digestsignfinal failed"},
++{ERR_REASON(SSL_R_EVP_DIGESTSIGNINIT_FAILED),"evp digestsigninit failed"},
+ {ERR_REASON(SSL_R_EXCESSIVE_MESSAGE_SIZE),"excessive message size"},
+ {ERR_REASON(SSL_R_EXTRA_DATA_IN_MESSAGE) ,"extra data in message"},
+ {ERR_REASON(SSL_R_GOT_A_FIN_BEFORE_A_CCS),"got a fin before a ccs"},
++{ERR_REASON(SSL_R_GOT_CHANNEL_ID_BEFORE_A_CCS),"got Channel ID before a ccs"},
+ {ERR_REASON(SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS),"got next proto before a ccs"},
+ {ERR_REASON(SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION),"got next proto without seeing extension"},
+ {ERR_REASON(SSL_R_HTTPS_PROXY_REQUEST) ,"https proxy request"},
+@@ -386,6 +395,7 @@ static ERR_STRING_DATA SSL_str_reasons[]=
+ {ERR_REASON(SSL_R_INVALID_CHALLENGE_LENGTH),"invalid challenge length"},
+ {ERR_REASON(SSL_R_INVALID_COMMAND) ,"invalid command"},
+ {ERR_REASON(SSL_R_INVALID_COMPRESSION_ALGORITHM),"invalid compression algorithm"},
++{ERR_REASON(SSL_R_INVALID_MESSAGE) ,"invalid message"},
+ {ERR_REASON(SSL_R_INVALID_PURPOSE) ,"invalid purpose"},
+ {ERR_REASON(SSL_R_INVALID_SRP_USERNAME) ,"invalid srp username"},
+ {ERR_REASON(SSL_R_INVALID_STATUS_RESPONSE),"invalid status response"},
+@@ -440,6 +450,7 @@ static ERR_STRING_DATA SSL_str_reasons[]=
+ {ERR_REASON(SSL_R_NO_COMPRESSION_SPECIFIED),"no compression specified"},
+ {ERR_REASON(SSL_R_NO_GOST_CERTIFICATE_SENT_BY_PEER),"Peer haven't sent GOST certificate, required for selected ciphersuite"},
+ {ERR_REASON(SSL_R_NO_METHOD_SPECIFIED) ,"no method specified"},
++{ERR_REASON(SSL_R_NO_P256_SUPPORT) ,"no p256 support"},
+ {ERR_REASON(SSL_R_NO_PRIVATEKEY) ,"no privatekey"},
+ {ERR_REASON(SSL_R_NO_PRIVATE_KEY_ASSIGNED),"no private key assigned"},
+ {ERR_REASON(SSL_R_NO_PROTOCOLS_AVAILABLE),"no protocols available"},
+diff --git a/ssl/ssl_lib.c b/ssl/ssl_lib.c
+index 123f925..6938267 100644
+--- a/ssl/ssl_lib.c
++++ b/ssl/ssl_lib.c
+@@ -562,6 +562,8 @@ void SSL_free(SSL *s)
+ sk_OCSP_RESPID_pop_free(s->tlsext_ocsp_ids, OCSP_RESPID_free);
+ if (s->tlsext_ocsp_resp)
+ OPENSSL_free(s->tlsext_ocsp_resp);
++ if (s->tlsext_channel_id_private)
++ EVP_PKEY_free(s->tlsext_channel_id_private);
+ #endif
+
+ if (s->client_CA != NULL)
+@@ -1952,6 +1954,11 @@ void SSL_CTX_free(SSL_CTX *a)
+ ssl_buf_freelist_free(a->rbuf_freelist);
+ #endif
+
++#ifndef OPENSSL_NO_TLSEXT
++ if (a->tlsext_channel_id_private)
++ EVP_PKEY_free(a->tlsext_channel_id_private);
++#endif
++
+ OPENSSL_free(a);
+ }
+
+@@ -2504,6 +2511,10 @@ int SSL_get_error(const SSL *s,int i)
+ {
+ return(SSL_ERROR_WANT_X509_LOOKUP);
+ }
++ if ((i < 0) && SSL_want_channel_id_lookup(s))
++ {
++ return(SSL_ERROR_WANT_CHANNEL_ID_LOOKUP);
++ }
+
+ if (i == 0)
+ {
+diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h
+index fcc6d80..3ce3d60 100644
+--- a/ssl/ssl_locl.h
++++ b/ssl/ssl_locl.h
+@@ -378,6 +378,7 @@
+ * (currently this also goes into algorithm2) */
+ #define TLS1_STREAM_MAC 0x04
+
++#define TLSEXT_CHANNEL_ID_SIZE 128
+
+
+ /*
+@@ -1008,6 +1009,7 @@ int ssl3_check_cert_and_algorithm(SSL *s);
+ int ssl3_check_finished(SSL *s);
+ # ifndef OPENSSL_NO_NEXTPROTONEG
+ int ssl3_send_next_proto(SSL *s);
++int ssl3_send_channel_id(SSL *s);
+ # endif
+ #endif
+
+@@ -1030,6 +1032,7 @@ int ssl3_get_cert_verify(SSL *s);
+ #ifndef OPENSSL_NO_NEXTPROTONEG
+ int ssl3_get_next_proto(SSL *s);
+ #endif
++int ssl3_get_channel_id(SSL *s);
+
+ int dtls1_send_hello_request(SSL *s);
+ int dtls1_send_server_hello(SSL *s);
+@@ -1072,6 +1075,7 @@ void ssl_free_wbio_buffer(SSL *s);
+ int tls1_change_cipher_state(SSL *s, int which);
+ int tls1_setup_key_block(SSL *s);
+ int tls1_enc(SSL *s, int snd);
++int tls1_handshake_digest(SSL *s, unsigned char *out, size_t out_len);
+ int tls1_final_finish_mac(SSL *s,
+ const char *str, int slen, unsigned char *p);
+ int tls1_cert_verify_mac(SSL *s, int md_nid, unsigned char *p);
+@@ -1127,6 +1131,8 @@ int tls12_get_sigandhash(unsigned char *p, const EVP_PKEY *pk,
+ int tls12_get_sigid(const EVP_PKEY *pk);
+ const EVP_MD *tls12_get_hash(unsigned char hash_alg);
+
++int tls1_channel_id_hash(EVP_MD_CTX *ctx, SSL *s);
++int tls1_record_handshake_hashes_for_channel_id(SSL *s);
+ #endif
+
+ int ssl3_can_cutthrough(const SSL *s);
+diff --git a/ssl/ssl_sess.c b/ssl/ssl_sess.c
+index 2a378c3..dd3b4a6 100644
+--- a/ssl/ssl_sess.c
++++ b/ssl/ssl_sess.c
+@@ -1151,6 +1151,17 @@ int (*SSL_CTX_get_client_cert_cb(SSL_CTX *ctx))(SSL * ssl, X509 ** x509 , EVP_PK
+ return ctx->client_cert_cb;
+ }
+
++void SSL_CTX_set_channel_id_cb(SSL_CTX *ctx,
++ void (*cb)(SSL *ssl, EVP_PKEY **pkey))
++ {
++ ctx->channel_id_cb=cb;
++ }
++
++void (*SSL_CTX_get_channel_id_cb(SSL_CTX *ctx))(SSL * ssl, EVP_PKEY **pkey)
++ {
++ return ctx->channel_id_cb;
++ }
++
+ #ifndef OPENSSL_NO_ENGINE
+ int SSL_CTX_set_client_cert_engine(SSL_CTX *ctx, ENGINE *e)
+ {
+diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c
+index 0c4cdde..f396674 100644
+--- a/ssl/t1_enc.c
++++ b/ssl/t1_enc.c
+@@ -895,54 +895,79 @@ int tls1_cert_verify_mac(SSL *s, int md_nid, unsigned char *out)
+ return((int)ret);
+ }
+
++/* tls1_handshake_digest calculates the current handshake hash and writes it to
++ * |out|, which has space for |out_len| bytes. It returns the number of bytes
++ * written or -1 in the event of an error. This function works on a copy of the
++ * underlying digests so can be called multiple times and prior to the final
++ * update etc. */
++int tls1_handshake_digest(SSL *s, unsigned char *out, size_t out_len)
++ {
++ const EVP_MD *md;
++ EVP_MD_CTX ctx;
++ int i, err = 0, len = 0;
++ long mask;
++
++ EVP_MD_CTX_init(&ctx);
++
++ for (i = 0; ssl_get_handshake_digest(i, &mask, &md); i++)
++ {
++ int hash_size;
++ unsigned int digest_len;
++ EVP_MD_CTX *hdgst = s->s3->handshake_dgst[i];
++
++ if ((mask & ssl_get_algorithm2(s)) == 0)
++ continue;
++
++ hash_size = EVP_MD_size(md);
++ if (!hdgst || hash_size < 0 || (size_t)hash_size > out_len)
++ {
++ err = 1;
++ break;
++ }
++
++ if (!EVP_MD_CTX_copy_ex(&ctx, hdgst) ||
++ !EVP_DigestFinal_ex(&ctx, out, &digest_len) ||
++ digest_len != (unsigned int)hash_size) /* internal error */
++ {
++ err = 1;
++ break;
++ }
++ out += digest_len;
++ out_len -= digest_len;
++ len += digest_len;
++ }
++
++ EVP_MD_CTX_cleanup(&ctx);
++
++ if (err != 0)
++ return -1;
++ return len;
++ }
++
+ int tls1_final_finish_mac(SSL *s,
+ const char *str, int slen, unsigned char *out)
+ {
+- unsigned int i;
+- EVP_MD_CTX ctx;
+ unsigned char buf[2*EVP_MAX_MD_SIZE];
+- unsigned char *q,buf2[12];
+- int idx;
+- long mask;
++ unsigned char buf2[12];
+ int err=0;
+- const EVP_MD *md;
++ int digests_len;
+
+- q=buf;
+-
+- if (s->s3->handshake_buffer)
++ if (s->s3->handshake_buffer)
+ if (!ssl3_digest_cached_records(s))
+ return 0;
+
+- EVP_MD_CTX_init(&ctx);
+-
+- for (idx=0;ssl_get_handshake_digest(idx,&mask,&md);idx++)
++ digests_len = tls1_handshake_digest(s, buf, sizeof(buf));
++ if (digests_len < 0)
+ {
+- if (mask & ssl_get_algorithm2(s))
+- {
+- int hashsize = EVP_MD_size(md);
+- EVP_MD_CTX *hdgst = s->s3->handshake_dgst[idx];
+- if (!hdgst || hashsize < 0 || hashsize > (int)(sizeof buf - (size_t)(q-buf)))
+- {
+- /* internal error: 'buf' is too small for this cipersuite! */
+- err = 1;
+- }
+- else
+- {
+- if (!EVP_MD_CTX_copy_ex(&ctx, hdgst) ||
+- !EVP_DigestFinal_ex(&ctx,q,&i) ||
+- (i != (unsigned int)hashsize))
+- err = 1;
+- q+=hashsize;
+- }
+- }
++ err = 1;
++ digests_len = 0;
+ }
+-
++
+ if (!tls1_PRF(ssl_get_algorithm2(s),
+- str,slen, buf,(int)(q-buf), NULL,0, NULL,0, NULL,0,
++ str,slen, buf, digests_len, NULL,0, NULL,0, NULL,0,
+ s->session->master_key,s->session->master_key_length,
+ out,buf2,sizeof buf2))
+ err = 1;
+- EVP_MD_CTX_cleanup(&ctx);
+
+ if (err)
+ return 0;
+diff --git a/ssl/t1_lib.c b/ssl/t1_lib.c
+index bddffd9..1a56a97 100644
+--- a/ssl/t1_lib.c
++++ b/ssl/t1_lib.c
+@@ -641,6 +641,19 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *p, unsigned cha
+ }
+ #endif
+
++ if (s->tlsext_channel_id_enabled)
++ {
++ /* The client advertises an emtpy extension to indicate its
++ * support for Channel ID. */
++ if (limit - ret - 4 < 0)
++ return NULL;
++ if (s->ctx->tlsext_channel_id_enabled_new)
++ s2n(TLSEXT_TYPE_channel_id_new,ret);
++ else
++ s2n(TLSEXT_TYPE_channel_id,ret);
++ s2n(0,ret);
++ }
++
+ #ifndef OPENSSL_NO_SRTP
+ if(SSL_get_srtp_profiles(s))
+ {
+@@ -881,6 +894,19 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *p, unsigned cha
+ }
+ #endif
+
++ /* If the client advertised support for Channel ID, and we have it
++ * enabled, then we want to echo it back. */
++ if (s->s3->tlsext_channel_id_valid)
++ {
++ if (limit - ret - 4 < 0)
++ return NULL;
++ if (s->s3->tlsext_channel_id_new)
++ s2n(TLSEXT_TYPE_channel_id_new,ret);
++ else
++ s2n(TLSEXT_TYPE_channel_id,ret);
++ s2n(0,ret);
++ }
++
+ if ((extdatalen = ret-p-2)== 0)
+ return p;
+
+@@ -1442,6 +1468,16 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in
+ }
+ #endif
+
++ else if (type == TLSEXT_TYPE_channel_id && s->tlsext_channel_id_enabled)
++ s->s3->tlsext_channel_id_valid = 1;
++
++ else if (type == TLSEXT_TYPE_channel_id_new &&
++ s->tlsext_channel_id_enabled)
++ {
++ s->s3->tlsext_channel_id_valid = 1;
++ s->s3->tlsext_channel_id_new = 1;
++ }
++
+ /* session ticket processed earlier */
+ #ifndef OPENSSL_NO_SRTP
+ else if (type == TLSEXT_TYPE_use_srtp)
+@@ -1672,6 +1708,15 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in
+ s->s3->next_proto_neg_seen = 1;
+ }
+ #endif
++ else if (type == TLSEXT_TYPE_channel_id)
++ s->s3->tlsext_channel_id_valid = 1;
++
++ else if (type == TLSEXT_TYPE_channel_id_new)
++ {
++ s->s3->tlsext_channel_id_valid = 1;
++ s->s3->tlsext_channel_id_new = 1;
++ }
++
+ else if (type == TLSEXT_TYPE_renegotiate)
+ {
+ if(!ssl_parse_serverhello_renegotiate_ext(s, data, size, al))
+@@ -2727,3 +2772,74 @@ tls1_heartbeat(SSL *s)
+ return ret;
+ }
+ #endif
++
++#if !defined(OPENSSL_NO_TLSEXT)
++/* tls1_channel_id_hash calculates the signed data for a Channel ID on the given
++ * SSL connection and writes it to |md|.
++ */
++int
++tls1_channel_id_hash(EVP_MD_CTX *md, SSL *s)
++ {
++ EVP_MD_CTX ctx;
++ unsigned char temp_digest[EVP_MAX_MD_SIZE];
++ unsigned temp_digest_len;
++ int i;
++ static const char kClientIDMagic[] = "TLS Channel ID signature";
++
++ if (s->s3->handshake_buffer)
++ if (!ssl3_digest_cached_records(s))
++ return 0;
++
++ EVP_DigestUpdate(md, kClientIDMagic, sizeof(kClientIDMagic));
++
++ if (s->hit && s->s3->tlsext_channel_id_new)
++ {
++ static const char kResumptionMagic[] = "Resumption";
++ EVP_DigestUpdate(md, kResumptionMagic,
++ sizeof(kResumptionMagic));
++ if (s->session->original_handshake_hash_len == 0)
++ return 0;
++ EVP_DigestUpdate(md, s->session->original_handshake_hash,
++ s->session->original_handshake_hash_len);
++ }
++
++ EVP_MD_CTX_init(&ctx);
++ for (i = 0; i < SSL_MAX_DIGEST; i++)
++ {
++ if (s->s3->handshake_dgst[i] == NULL)
++ continue;
++ EVP_MD_CTX_copy_ex(&ctx, s->s3->handshake_dgst[i]);
++ EVP_DigestFinal_ex(&ctx, temp_digest, &temp_digest_len);
++ EVP_DigestUpdate(md, temp_digest, temp_digest_len);
++ }
++ EVP_MD_CTX_cleanup(&ctx);
++
++ return 1;
++ }
++#endif
++
++/* tls1_record_handshake_hashes_for_channel_id records the current handshake
++ * hashes in |s->session| so that Channel ID resumptions can sign that data. */
++int tls1_record_handshake_hashes_for_channel_id(SSL *s)
++ {
++ int digest_len;
++ /* This function should never be called for a resumed session because
++ * the handshake hashes that we wish to record are for the original,
++ * full handshake. */
++ if (s->hit)
++ return -1;
++ /* It only makes sense to call this function if Channel IDs have been
++ * negotiated. */
++ if (!s->s3->tlsext_channel_id_new)
++ return -1;
++
++ digest_len = tls1_handshake_digest(
++ s, s->session->original_handshake_hash,
++ sizeof(s->session->original_handshake_hash));
++ if (digest_len < 0)
++ return -1;
++
++ s->session->original_handshake_hash_len = digest_len;
++
++ return 1;
++ }
+diff --git a/ssl/tls1.h b/ssl/tls1.h
+index c992091..12f2f21 100644
+--- a/ssl/tls1.h
++++ b/ssl/tls1.h
+@@ -254,6 +254,10 @@ extern "C" {
+ #define TLSEXT_TYPE_next_proto_neg 13172
+ #endif
+
++/* This is not an IANA defined extension number */
++#define TLSEXT_TYPE_channel_id 30031
++#define TLSEXT_TYPE_channel_id_new 30032
++
+ /* NameType value from RFC 3546 */
+ #define TLSEXT_NAMETYPE_host_name 0
+ /* status request value from RFC 3546 */
+--
+1.9.1.423.g4596e3a
+
diff --git a/patches/eng_dyn_dirs.patch b/patches/0005-eng_dyn_dirs.patch
similarity index 100%
rename from patches/eng_dyn_dirs.patch
rename to patches/0005-eng_dyn_dirs.patch
diff --git a/patches/fix_clang_build.patch b/patches/0006-fix_clang_build.patch
similarity index 100%
rename from patches/fix_clang_build.patch
rename to patches/0006-fix_clang_build.patch
diff --git a/patches/tls12_digests.patch b/patches/0007-tls12_digests.patch
similarity index 100%
rename from patches/tls12_digests.patch
rename to patches/0007-tls12_digests.patch
diff --git a/patches/alpn.patch b/patches/0008-alpn.patch
similarity index 100%
rename from patches/alpn.patch
rename to patches/0008-alpn.patch
diff --git a/patches/cbc_record_splitting.patch b/patches/0009-cbc_record_splitting.patch
similarity index 89%
rename from patches/cbc_record_splitting.patch
rename to patches/0009-cbc_record_splitting.patch
index 37e58a1..5430402 100644
--- a/patches/cbc_record_splitting.patch
+++ b/patches/0009-cbc_record_splitting.patch
@@ -1,4 +1,4 @@
-From 6d65fc2d2bd6d6f4a5de364ff2cf7ec2da8f5037 Mon Sep 17 00:00:00 2001
+From e6102d2ac84a55e4a50d9edfc36ec894c6174fb7 Mon Sep 17 00:00:00 2001
From: Adam Langley <[email protected]>
Date: Thu, 31 Oct 2013 13:22:54 -0400
@@ -16,12 +16,12 @@
apps/s_client.c | 16 +++++---
ssl/d1_pkt.c | 50 ++++---------------------
ssl/s3_enc.c | 17 ++++-----
- ssl/s3_pkt.c | 112 +++++++++++++++++++++++++++++++-------------------------
- ssl/ssl.h | 19 +++++++---
+ ssl/s3_pkt.c | 113 +++++++++++++++++++++++++++++++-------------------------
+ ssl/ssl.h | 20 +++++++---
ssl/ssl3.h | 4 +-
ssl/ssl_locl.h | 2 -
ssl/t1_enc.c | 10 ++---
- 8 files changed, 109 insertions(+), 121 deletions(-)
+ 8 files changed, 110 insertions(+), 122 deletions(-)
diff --git a/apps/s_client.c b/apps/s_client.c
index cb1efcd..0c70580 100644
@@ -83,7 +83,7 @@
if (state) SSL_CTX_set_info_callback(ctx,apps_ssl_info_callback);
if (cipher != NULL)
diff --git a/ssl/d1_pkt.c b/ssl/d1_pkt.c
-index 0bf87be..cb0f8f0 100644
+index 438c091..363fc8c 100644
--- a/ssl/d1_pkt.c
+++ b/ssl/d1_pkt.c
@@ -179,6 +179,8 @@ static int dtls1_record_needs_buffering(SSL *s, SSL3_RECORD *rr,
@@ -95,7 +95,7 @@
/* copy buffered record into SSL structure */
static int
-@@ -1456,11 +1458,12 @@ int dtls1_write_bytes(SSL *s, int type, const void *buf, int len)
+@@ -1464,11 +1466,12 @@ int dtls1_write_bytes(SSL *s, int type, const void *buf, int len)
OPENSSL_assert(len <= SSL3_RT_MAX_PLAIN_LENGTH);
s->rwstate=SSL_NOTHING;
@@ -110,7 +110,7 @@
{
unsigned char *p,*pseq;
int i,mac_size,clear=0;
-@@ -1487,7 +1490,7 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf, unsigned int len,
+@@ -1495,7 +1498,7 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf, unsigned int len,
/* if it went, fall through and send more stuff */
}
@@ -119,7 +119,7 @@
return 0;
wr= &(s->s3->wrec);
-@@ -1508,37 +1511,6 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf, unsigned int len,
+@@ -1516,37 +1519,6 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf, unsigned int len,
goto err;
}
@@ -157,7 +157,7 @@
p = wb->buf + prefix_len;
/* write the header */
-@@ -1644,14 +1616,6 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf, unsigned int len,
+@@ -1652,14 +1624,6 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf, unsigned int len,
ssl3_record_sequence_update(&(s->s3->write_sequence[0]));
@@ -172,7 +172,7 @@
/* now let's set up wb */
wb->left = prefix_len + wr->length;
wb->offset = 0;
-@@ -1748,7 +1712,7 @@ int dtls1_dispatch_alert(SSL *s)
+@@ -1756,7 +1720,7 @@ int dtls1_dispatch_alert(SSL *s)
}
#endif
@@ -222,10 +222,10 @@
SSLerr(SSL_F_SSL3_SETUP_KEY_BLOCK,ERR_R_MALLOC_FAILURE);
return(0);
diff --git a/ssl/s3_pkt.c b/ssl/s3_pkt.c
-index 04b474d..d690493 100644
+index 706ef1f..957d7c6 100644
--- a/ssl/s3_pkt.c
+++ b/ssl/s3_pkt.c
-@@ -118,7 +118,7 @@
+@@ -119,7 +119,7 @@
#include <openssl/rand.h>
static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
@@ -234,7 +234,7 @@
static int ssl3_get_record(SSL *s);
int ssl3_read_n(SSL *s, int n, int max, int extend)
-@@ -618,12 +618,34 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len)
+@@ -636,15 +636,36 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len)
n=(len-tot);
for (;;)
{
@@ -253,9 +253,6 @@
+ !s->s3->record_split_done)
+ {
+ fragment = 1;
-+ /* The first byte will be in its own record, so we
-+ * can write an extra byte. */
-+ max++;
+ /* record_split_done records that the splitting has
+ * been done in case we hit an SSL_WANT_WRITE condition.
+ * In that case, we don't need to do the split again. */
@@ -272,7 +269,12 @@
if (i <= 0)
{
s->s3->wnum=tot;
-@@ -634,10 +656,10 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len)
++ /* Try to write the fragment next time. */
++ s->s3->record_split_done = 0;
+ return i;
+ }
+
+@@ -652,10 +673,10 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len)
(type == SSL3_RT_APPLICATION_DATA &&
(s->mode & SSL_MODE_ENABLE_PARTIAL_WRITE)))
{
@@ -287,7 +289,7 @@
return tot+i;
}
-@@ -646,11 +668,16 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len)
+@@ -664,11 +685,16 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len)
}
}
@@ -306,16 +308,21 @@
int prefix_len=0;
int eivlen;
long align=0;
-@@ -676,7 +703,7 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
+@@ -691,11 +717,11 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
/* if it went, fall through and send more stuff */
}
+- if (wb->buf == NULL)
++ if (wb->buf == NULL)
+ if (!ssl3_setup_write_buffer(s))
+ return -1;
+
- if (len == 0 && !create_empty_fragment)
+ if (len == 0)
return 0;
wr= &(s->s3->wrec);
-@@ -686,11 +713,6 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
+@@ -705,11 +731,6 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
(s->enc_write_ctx == NULL) ||
(EVP_MD_CTX_md(s->write_hash) == NULL))
{
@@ -327,7 +334,7 @@
mac_size=0;
}
else
-@@ -700,42 +722,33 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
+@@ -719,42 +740,33 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
goto err;
}
@@ -387,7 +394,7 @@
align = (-align)&(SSL3_ALIGN_PAYLOAD-1);
#endif
p = wb->buf + align;
-@@ -772,7 +785,7 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
+@@ -791,7 +803,7 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
*(p++)=s->version&0xff;
/* field where we are to write out packet length */
@@ -396,7 +403,7 @@
p+=2;
/* Explicit IV length, block ciphers and TLS version 1.1 or later */
if (s->enc_write_ctx && s->version >= TLS1_1_VERSION)
-@@ -800,8 +813,8 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
+@@ -819,8 +831,8 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
/* lets setup the record stuff. */
wr->data=p + eivlen;
@@ -407,7 +414,7 @@
/* we now 'read' from wr->input, wr->length bytes into
* wr->data */
-@@ -854,11 +867,10 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
+@@ -873,11 +885,10 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
wr->type=type; /* not needed but helps for debugging */
wr->length+=SSL3_RT_HEADER_LENGTH;
@@ -422,7 +429,7 @@
return wr->length;
}
-@@ -1514,7 +1526,7 @@ int ssl3_dispatch_alert(SSL *s)
+@@ -1548,7 +1559,7 @@ int ssl3_dispatch_alert(SSL *s)
void (*cb)(const SSL *ssl,int type,int val)=NULL;
s->s3->alert_dispatch=0;
@@ -432,12 +439,12 @@
{
s->s3->alert_dispatch=1;
diff --git a/ssl/ssl.h b/ssl/ssl.h
-index b289bc2..8564484 100644
+index ef85428..ce65664 100644
--- a/ssl/ssl.h
+++ b/ssl/ssl.h
-@@ -580,11 +580,15 @@ struct ssl_session_st
- #define SSL_OP_TLS_D5_BUG 0x00000100L
- #define SSL_OP_TLS_BLOCK_PADDING_BUG 0x00000200L
+@@ -578,11 +578,15 @@ struct ssl_session_st
+ /* Refers to ancient SSLREF and SSLv2, retained for compatibility */
+ #define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x0
-/* Disable SSL 3.0/TLS 1.0 CBC vulnerability workaround that was added
- * in OpenSSL 0.9.6d. Usually (depending on the application protocol)
@@ -456,23 +463,24 @@
#define SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS 0x00000800L /* added in 0.9.6e */
/* SSL_OP_ALL: various bug workarounds that should be rather harmless.
-@@ -668,6 +672,11 @@ struct ssl_session_st
- * and Finished. This mode enables full-handshakes to 'complete' in
+@@ -675,6 +679,12 @@ struct ssl_session_st
* one RTT. */
#define SSL_MODE_HANDSHAKE_CUTTHROUGH 0x00000080L
+
+/* When set, TLS 1.0 and SSLv3, multi-byte, CBC records will be split in two:
+ * the first record will contain a single byte and the second will contain the
+ * rest of the bytes. This effectively randomises the IV and prevents BEAST
+ * attacks. */
+#define SSL_MODE_CBC_RECORD_SPLITTING 0x00000100L
-
++
/* Note: SSL[_CTX]_set_{options,mode} use |= op on the previous value,
* they cannot be used to clear bits. */
+
diff --git a/ssl/ssl3.h b/ssl/ssl3.h
-index 6a5cdbe..65f58a7 100644
+index 16c389d..8e3e449 100644
--- a/ssl/ssl3.h
+++ b/ssl/ssl3.h
-@@ -418,8 +418,8 @@ typedef struct ssl3_state_st
+@@ -419,8 +419,8 @@ typedef struct ssl3_state_st
unsigned char client_random[SSL3_RANDOM_SIZE];
/* flags for countermeasure against known-IV weakness */
@@ -484,10 +492,10 @@
/* The value of 'extra' when the buffers were initialized */
int init_extra;
diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h
-index b83d8cd..dac33e2 100644
+index 10baaee..6d4bc14 100644
--- a/ssl/ssl_locl.h
+++ b/ssl/ssl_locl.h
-@@ -1091,8 +1091,6 @@ int dtls1_shutdown(SSL *s);
+@@ -1093,8 +1093,6 @@ int dtls1_shutdown(SSL *s);
long dtls1_get_message(SSL *s, int st1, int stn, int mt, long max, int *ok);
int dtls1_get_record(SSL *s);
@@ -497,10 +505,10 @@
int dtls1_enc(SSL *s, int snd);
diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c
-index 15800af..b2686f4 100644
+index 9963a80..4ca1549 100644
--- a/ssl/t1_enc.c
+++ b/ssl/t1_enc.c
-@@ -762,22 +762,22 @@ printf("\nkey block\n");
+@@ -774,22 +774,22 @@ printf("\nkey block\n");
{ int z; for (z=0; z<num; z++) printf("%02X%c",p1[z],((z+1)%16)?' ':'\n'); }
#endif
@@ -529,5 +537,5 @@
}
}
--
-1.8.4.1
+2.0.0.526.g5318336
diff --git a/patches/0010-dsa_nonce.patch b/patches/0010-dsa_nonce.patch
new file mode 100644
index 0000000..368881c
--- /dev/null
+++ b/patches/0010-dsa_nonce.patch
@@ -0,0 +1,502 @@
+From 9be2984bfbff9a83e7b38f47ac87c677e9a9a0b8 Mon Sep 17 00:00:00 2001
+From: Adam Langley <[email protected]>
+Date: Thu, 24 Jan 2013 16:27:28 -0500
+Subject: dsa_nonce
+
+Adds the option to calculate (EC)DSA nonces by hashing the message and
+private key along with entropy.
+---
+ crypto/bn/bn.h | 6 +++++
+ crypto/bn/bn_err.c | 2 ++
+ crypto/bn/bn_rand.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++
+ crypto/dsa/dsa.h | 10 +++++--
+ crypto/dsa/dsa_err.c | 1 +
+ crypto/dsa/dsa_ossl.c | 28 ++++++++++++++++----
+ crypto/dsa/dsa_sign.c | 9 ++++++-
+ crypto/ec/ec.h | 11 ++++++++
+ crypto/ec/ec_key.c | 12 +++++++++
+ crypto/ec/ec_lcl.h | 1 +
+ crypto/ecdsa/ecdsa.h | 1 +
+ crypto/ecdsa/ecs_err.c | 1 +
+ crypto/ecdsa/ecs_locl.h | 5 ++--
+ crypto/ecdsa/ecs_ossl.c | 38 ++++++++++++++++++++-------
+ crypto/ecdsa/ecs_sign.c | 10 ++++++-
+ 15 files changed, 185 insertions(+), 20 deletions(-)
+
+diff --git a/crypto/bn/bn.h b/crypto/bn/bn.h
+index f34248e..9281ce5 100644
+--- a/crypto/bn/bn.h
++++ b/crypto/bn/bn.h
+@@ -692,6 +692,10 @@ const BIGNUM *BN_get0_nist_prime_256(void);
+ const BIGNUM *BN_get0_nist_prime_384(void);
+ const BIGNUM *BN_get0_nist_prime_521(void);
+
++int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range, const BIGNUM *priv,
++ const unsigned char *message, size_t message_len,
++ BN_CTX *ctx);
++
+ /* library internal functions */
+
+ #define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->dmax)?\
+@@ -842,6 +846,7 @@ void ERR_load_BN_strings(void);
+ #define BN_F_BN_EXP 123
+ #define BN_F_BN_EXPAND2 108
+ #define BN_F_BN_EXPAND_INTERNAL 120
++#define BN_F_BN_GENERATE_DSA_NONCE 140
+ #define BN_F_BN_GF2M_MOD 131
+ #define BN_F_BN_GF2M_MOD_EXP 132
+ #define BN_F_BN_GF2M_MOD_MUL 133
+@@ -881,6 +886,7 @@ void ERR_load_BN_strings(void);
+ #define BN_R_NOT_INITIALIZED 107
+ #define BN_R_NO_INVERSE 108
+ #define BN_R_NO_SOLUTION 116
++#define BN_R_PRIVATE_KEY_TOO_LARGE 117
+ #define BN_R_P_IS_NOT_PRIME 112
+ #define BN_R_TOO_MANY_ITERATIONS 113
+ #define BN_R_TOO_MANY_TEMPORARY_VARIABLES 109
+diff --git a/crypto/bn/bn_err.c b/crypto/bn/bn_err.c
+index cfe2eb9..f722b52 100644
+--- a/crypto/bn/bn_err.c
++++ b/crypto/bn/bn_err.c
+@@ -87,6 +87,7 @@ static ERR_STRING_DATA BN_str_functs[]=
+ {ERR_FUNC(BN_F_BN_EXP), "BN_exp"},
+ {ERR_FUNC(BN_F_BN_EXPAND2), "bn_expand2"},
+ {ERR_FUNC(BN_F_BN_EXPAND_INTERNAL), "BN_EXPAND_INTERNAL"},
++{ERR_FUNC(BN_F_BN_GENERATE_DSA_NONCE), "BN_generate_dsa_nonce"},
+ {ERR_FUNC(BN_F_BN_GF2M_MOD), "BN_GF2m_mod"},
+ {ERR_FUNC(BN_F_BN_GF2M_MOD_EXP), "BN_GF2m_mod_exp"},
+ {ERR_FUNC(BN_F_BN_GF2M_MOD_MUL), "BN_GF2m_mod_mul"},
+@@ -129,6 +130,7 @@ static ERR_STRING_DATA BN_str_reasons[]=
+ {ERR_REASON(BN_R_NOT_INITIALIZED) ,"not initialized"},
+ {ERR_REASON(BN_R_NO_INVERSE) ,"no inverse"},
+ {ERR_REASON(BN_R_NO_SOLUTION) ,"no solution"},
++{ERR_REASON(BN_R_PRIVATE_KEY_TOO_LARGE) ,"private key too large"},
+ {ERR_REASON(BN_R_P_IS_NOT_PRIME) ,"p is not prime"},
+ {ERR_REASON(BN_R_TOO_MANY_ITERATIONS) ,"too many iterations"},
+ {ERR_REASON(BN_R_TOO_MANY_TEMPORARY_VARIABLES),"too many temporary variables"},
+diff --git a/crypto/bn/bn_rand.c b/crypto/bn/bn_rand.c
+index b376c28..55676f0 100644
+--- a/crypto/bn/bn_rand.c
++++ b/crypto/bn/bn_rand.c
+@@ -114,6 +114,7 @@
+ #include "cryptlib.h"
+ #include "bn_lcl.h"
+ #include <openssl/rand.h>
++#include <openssl/sha.h>
+
+ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
+ {
+@@ -303,3 +304,72 @@ int BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range)
+ {
+ return bn_rand_range(1, r, range);
+ }
++
++#ifndef OPENSSL_NO_SHA512
++/* BN_generate_dsa_nonce generates a random number 0 <= out < range. Unlike
++ * BN_rand_range, it also includes the contents of |priv| and |message| in the
++ * generation so that an RNG failure isn't fatal as long as |priv| remains
++ * secret. This is intended for use in DSA and ECDSA where an RNG weakness
++ * leads directly to private key exposure unless this function is used. */
++int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range, const BIGNUM* priv,
++ const unsigned char *message, size_t message_len,
++ BN_CTX *ctx)
++ {
++ SHA512_CTX sha;
++ /* We use 512 bits of random data per iteration to
++ * ensure that we have at least |range| bits of randomness. */
++ unsigned char random_bytes[64];
++ unsigned char digest[SHA512_DIGEST_LENGTH];
++ unsigned done, todo;
++ /* We generate |range|+8 bytes of random output. */
++ const unsigned num_k_bytes = BN_num_bytes(range) + 8;
++ unsigned char private_bytes[96];
++ unsigned char *k_bytes;
++ int ret = 0;
++
++ k_bytes = OPENSSL_malloc(num_k_bytes);
++ if (!k_bytes)
++ goto err;
++
++ /* We copy |priv| into a local buffer to avoid exposing its length. */
++ todo = sizeof(priv->d[0])*priv->top;
++ if (todo > sizeof(private_bytes))
++ {
++ /* No reasonable DSA or ECDSA key should have a private key
++ * this large and we don't handle this case in order to avoid
++ * leaking the length of the private key. */
++ BNerr(BN_F_BN_GENERATE_DSA_NONCE, BN_R_PRIVATE_KEY_TOO_LARGE);
++ goto err;
++ }
++ memcpy(private_bytes, priv->d, todo);
++ memset(private_bytes + todo, 0, sizeof(private_bytes) - todo);
++
++ for (done = 0; done < num_k_bytes;) {
++ if (RAND_bytes(random_bytes, sizeof(random_bytes)) != 1)
++ goto err;
++ SHA512_Init(&sha);
++ SHA512_Update(&sha, &done, sizeof(done));
++ SHA512_Update(&sha, private_bytes, sizeof(private_bytes));
++ SHA512_Update(&sha, message, message_len);
++ SHA512_Update(&sha, random_bytes, sizeof(random_bytes));
++ SHA512_Final(digest, &sha);
++
++ todo = num_k_bytes - done;
++ if (todo > SHA512_DIGEST_LENGTH)
++ todo = SHA512_DIGEST_LENGTH;
++ memcpy(k_bytes + done, digest, todo);
++ done += todo;
++ }
++
++ if (!BN_bin2bn(k_bytes, num_k_bytes, out))
++ goto err;
++ if (BN_mod(out, out, range, ctx) != 1)
++ goto err;
++ ret = 1;
++
++err:
++ if (k_bytes)
++ OPENSSL_free(k_bytes);
++ return ret;
++ }
++#endif /* OPENSSL_NO_SHA512 */
+diff --git a/crypto/dsa/dsa.h b/crypto/dsa/dsa.h
+index b448d2a..71ef572 100644
+--- a/crypto/dsa/dsa.h
++++ b/crypto/dsa/dsa.h
+@@ -96,6 +96,10 @@
+ * faster variable sliding window method to
+ * be used for all exponents.
+ */
++#define DSA_FLAG_NONCE_FROM_HASH 0x04 /* Causes the DSA nonce to be calculated
++ from SHA512(private_key + H(message) +
++ random). This strengthens DSA against a
++ weak PRNG. */
+
+
+ /* If this flag is set the DSA method is FIPS compliant and can be used
+@@ -131,8 +135,9 @@ struct dsa_method
+ {
+ const char *name;
+ DSA_SIG * (*dsa_do_sign)(const unsigned char *dgst, int dlen, DSA *dsa);
+- int (*dsa_sign_setup)(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp,
+- BIGNUM **rp);
++ int (*dsa_sign_setup)(DSA *dsa, BN_CTX *ctx_in,
++ BIGNUM **kinvp, BIGNUM **rp,
++ const unsigned char *dgst, int dlen);
+ int (*dsa_do_verify)(const unsigned char *dgst, int dgst_len,
+ DSA_SIG *sig, DSA *dsa);
+ int (*dsa_mod_exp)(DSA *dsa, BIGNUM *rr, BIGNUM *a1, BIGNUM *p1,
+@@ -325,6 +330,7 @@ void ERR_load_DSA_strings(void);
+ #define DSA_R_MISSING_PARAMETERS 101
+ #define DSA_R_MODULUS_TOO_LARGE 103
+ #define DSA_R_NEED_NEW_SETUP_VALUES 110
++#define DSA_R_NONCE_CANNOT_BE_PRECOMPUTED 112
+ #define DSA_R_NON_FIPS_DSA_METHOD 111
+ #define DSA_R_NO_PARAMETERS_SET 107
+ #define DSA_R_PARAMETER_ENCODING_ERROR 105
+diff --git a/crypto/dsa/dsa_err.c b/crypto/dsa/dsa_err.c
+index 00545b7..e6171cc 100644
+--- a/crypto/dsa/dsa_err.c
++++ b/crypto/dsa/dsa_err.c
+@@ -109,6 +109,7 @@ static ERR_STRING_DATA DSA_str_reasons[]=
+ {ERR_REASON(DSA_R_MISSING_PARAMETERS) ,"missing parameters"},
+ {ERR_REASON(DSA_R_MODULUS_TOO_LARGE) ,"modulus too large"},
+ {ERR_REASON(DSA_R_NEED_NEW_SETUP_VALUES) ,"need new setup values"},
++{ERR_REASON(DSA_R_NONCE_CANNOT_BE_PRECOMPUTED),"nonce cannot be precomputed"},
+ {ERR_REASON(DSA_R_NON_FIPS_DSA_METHOD) ,"non fips dsa method"},
+ {ERR_REASON(DSA_R_NO_PARAMETERS_SET) ,"no parameters set"},
+ {ERR_REASON(DSA_R_PARAMETER_ENCODING_ERROR),"parameter encoding error"},
+diff --git a/crypto/dsa/dsa_ossl.c b/crypto/dsa/dsa_ossl.c
+index a865a8c..15f8da2 100644
+--- a/crypto/dsa/dsa_ossl.c
++++ b/crypto/dsa/dsa_ossl.c
+@@ -67,7 +67,9 @@
+ #include <openssl/asn1.h>
+
+ static DSA_SIG *dsa_do_sign(const unsigned char *dgst, int dlen, DSA *dsa);
+-static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp, BIGNUM **rp);
++static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in,
++ BIGNUM **kinvp, BIGNUM **rp,
++ const unsigned char *dgst, int dlen);
+ static int dsa_do_verify(const unsigned char *dgst, int dgst_len, DSA_SIG *sig,
+ DSA *dsa);
+ static int dsa_init(DSA *dsa);
+@@ -167,7 +169,8 @@ static DSA_SIG *dsa_do_sign(const unsigned char *dgst, int dlen, DSA *dsa)
+ redo:
+ if ((dsa->kinv == NULL) || (dsa->r == NULL))
+ {
+- if (!DSA_sign_setup(dsa,ctx,&kinv,&r)) goto err;
++ if (!dsa->meth->dsa_sign_setup(dsa,ctx,&kinv,&r,dgst,dlen))
++ goto err;
+ }
+ else
+ {
+@@ -226,7 +229,9 @@ err:
+ return(ret);
+ }
+
+-static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp, BIGNUM **rp)
++static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in,
++ BIGNUM **kinvp, BIGNUM **rp,
++ const unsigned char *dgst, int dlen)
+ {
+ BN_CTX *ctx;
+ BIGNUM k,kq,*K,*kinv=NULL,*r=NULL;
+@@ -252,8 +257,21 @@ static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp, BIGNUM **rp)
+
+ /* Get random k */
+ do
+- if (!BN_rand_range(&k, dsa->q)) goto err;
+- while (BN_is_zero(&k));
++ {
++#ifndef OPENSSL_NO_SHA512
++ if (dsa->flags & DSA_FLAG_NONCE_FROM_HASH)
++ {
++ /* If DSA_FLAG_NONCE_FROM_HASH is set then we calculate k from
++ * SHA512(private_key + H(message) + random). This protects the
++ * private key from a weak PRNG. */
++ if (!BN_generate_dsa_nonce(&k, dsa->q, dsa->priv_key, dgst,
++ dlen, ctx))
++ goto err;
++ }
++ else
++#endif
++ if (!BN_rand_range(&k, dsa->q)) goto err;
++ } while (BN_is_zero(&k));
+ if ((dsa->flags & DSA_FLAG_NO_EXP_CONSTTIME) == 0)
+ {
+ BN_set_flags(&k, BN_FLG_CONSTTIME);
+diff --git a/crypto/dsa/dsa_sign.c b/crypto/dsa/dsa_sign.c
+index c3cc364..8ace300 100644
+--- a/crypto/dsa/dsa_sign.c
++++ b/crypto/dsa/dsa_sign.c
+@@ -86,7 +86,14 @@ int DSA_sign_setup(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp, BIGNUM **rp)
+ return 0;
+ }
+ #endif
+- return dsa->meth->dsa_sign_setup(dsa, ctx_in, kinvp, rp);
++ if (dsa->flags & DSA_FLAG_NONCE_FROM_HASH)
++ {
++ /* You cannot precompute the DSA nonce if it is required to
++ * depend on the message. */
++ DSAerr(DSA_F_DSA_SIGN_SETUP, DSA_R_NONCE_CANNOT_BE_PRECOMPUTED);
++ return 0;
++ }
++ return dsa->meth->dsa_sign_setup(dsa, ctx_in, kinvp, rp, NULL, 0);
+ }
+
+ DSA_SIG *DSA_SIG_new(void)
+diff --git a/crypto/ec/ec.h b/crypto/ec/ec.h
+index dfe8710..d008a0d 100644
+--- a/crypto/ec/ec.h
++++ b/crypto/ec/ec.h
+@@ -819,6 +819,17 @@ void *EC_KEY_insert_key_method_data(EC_KEY *key, void *data,
+ /* wrapper functions for the underlying EC_GROUP object */
+ void EC_KEY_set_asn1_flag(EC_KEY *eckey, int asn1_flag);
+
++/** Sets whether ECDSA operations with the given key will calculate their k
++ * value from SHA512(private_key + message + random) in order to protect
++ * against a weak PRNG.
++ * \param on Whether to calculate k from a hash or not
++ */
++void EC_KEY_set_nonce_from_hash(EC_KEY *key, int on);
++
++/** Returns the value of nonce_from_hash
++ */
++int EC_KEY_get_nonce_from_hash(const EC_KEY *key);
++
+ /** Creates a table of pre-computed multiples of the generator to
+ * accelerate further EC_KEY operations.
+ * \param key EC_KEY object
+diff --git a/crypto/ec/ec_key.c b/crypto/ec/ec_key.c
+index 7fa2475..73dd7b9 100644
+--- a/crypto/ec/ec_key.c
++++ b/crypto/ec/ec_key.c
+@@ -85,6 +85,7 @@ EC_KEY *EC_KEY_new(void)
+ ret->pub_key = NULL;
+ ret->priv_key= NULL;
+ ret->enc_flag= 0;
++ ret->nonce_from_hash_flag = 0;
+ ret->conv_form = POINT_CONVERSION_UNCOMPRESSED;
+ ret->references= 1;
+ ret->method_data = NULL;
+@@ -198,6 +199,7 @@ EC_KEY *EC_KEY_copy(EC_KEY *dest, const EC_KEY *src)
+
+ /* copy the rest */
+ dest->enc_flag = src->enc_flag;
++ dest->nonce_from_hash_flag = src->nonce_from_hash_flag;
+ dest->conv_form = src->conv_form;
+ dest->version = src->version;
+ dest->flags = src->flags;
+@@ -505,6 +507,16 @@ void EC_KEY_set_enc_flags(EC_KEY *key, unsigned int flags)
+ key->enc_flag = flags;
+ }
+
++int EC_KEY_get_nonce_from_hash(const EC_KEY *key)
++ {
++ return key->nonce_from_hash_flag;
++ }
++
++void EC_KEY_set_nonce_from_hash(EC_KEY *key, int on)
++ {
++ key->nonce_from_hash_flag = on != 0;
++ }
++
+ point_conversion_form_t EC_KEY_get_conv_form(const EC_KEY *key)
+ {
+ return key->conv_form;
+diff --git a/crypto/ec/ec_lcl.h b/crypto/ec/ec_lcl.h
+index da7967d..6f714c7 100644
+--- a/crypto/ec/ec_lcl.h
++++ b/crypto/ec/ec_lcl.h
+@@ -246,6 +246,7 @@ struct ec_key_st {
+ BIGNUM *priv_key;
+
+ unsigned int enc_flag;
++ char nonce_from_hash_flag;
+ point_conversion_form_t conv_form;
+
+ int references;
+diff --git a/crypto/ecdsa/ecdsa.h b/crypto/ecdsa/ecdsa.h
+index 7fb5254..dc6a36b 100644
+--- a/crypto/ecdsa/ecdsa.h
++++ b/crypto/ecdsa/ecdsa.h
+@@ -250,6 +250,7 @@ void ERR_load_ECDSA_strings(void);
+ #define ECDSA_R_ERR_EC_LIB 102
+ #define ECDSA_R_MISSING_PARAMETERS 103
+ #define ECDSA_R_NEED_NEW_SETUP_VALUES 106
++#define ECDSA_R_NONCE_CANNOT_BE_PRECOMPUTED 108
+ #define ECDSA_R_NON_FIPS_METHOD 107
+ #define ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED 104
+ #define ECDSA_R_SIGNATURE_MALLOC_FAILED 105
+diff --git a/crypto/ecdsa/ecs_err.c b/crypto/ecdsa/ecs_err.c
+index 81542e6..7406c6d 100644
+--- a/crypto/ecdsa/ecs_err.c
++++ b/crypto/ecdsa/ecs_err.c
+@@ -85,6 +85,7 @@ static ERR_STRING_DATA ECDSA_str_reasons[]=
+ {ERR_REASON(ECDSA_R_ERR_EC_LIB) ,"err ec lib"},
+ {ERR_REASON(ECDSA_R_MISSING_PARAMETERS) ,"missing parameters"},
+ {ERR_REASON(ECDSA_R_NEED_NEW_SETUP_VALUES),"need new setup values"},
++{ERR_REASON(ECDSA_R_NONCE_CANNOT_BE_PRECOMPUTED),"nonce cannot be precomputed"},
+ {ERR_REASON(ECDSA_R_NON_FIPS_METHOD) ,"non fips method"},
+ {ERR_REASON(ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED),"random number generation failed"},
+ {ERR_REASON(ECDSA_R_SIGNATURE_MALLOC_FAILED),"signature malloc failed"},
+diff --git a/crypto/ecdsa/ecs_locl.h b/crypto/ecdsa/ecs_locl.h
+index cb3be13..46f7ad9 100644
+--- a/crypto/ecdsa/ecs_locl.h
++++ b/crypto/ecdsa/ecs_locl.h
+@@ -70,8 +70,9 @@ struct ecdsa_method
+ const char *name;
+ ECDSA_SIG *(*ecdsa_do_sign)(const unsigned char *dgst, int dgst_len,
+ const BIGNUM *inv, const BIGNUM *rp, EC_KEY *eckey);
+- int (*ecdsa_sign_setup)(EC_KEY *eckey, BN_CTX *ctx, BIGNUM **kinv,
+- BIGNUM **r);
++ int (*ecdsa_sign_setup)(EC_KEY *eckey, BN_CTX *ctx,
++ BIGNUM **kinv, BIGNUM **r,
++ const unsigned char *dgst, int dlen);
+ int (*ecdsa_do_verify)(const unsigned char *dgst, int dgst_len,
+ const ECDSA_SIG *sig, EC_KEY *eckey);
+ #if 0
+diff --git a/crypto/ecdsa/ecs_ossl.c b/crypto/ecdsa/ecs_ossl.c
+index 7725935..325aca8 100644
+--- a/crypto/ecdsa/ecs_ossl.c
++++ b/crypto/ecdsa/ecs_ossl.c
+@@ -60,11 +60,13 @@
+ #include <openssl/err.h>
+ #include <openssl/obj_mac.h>
+ #include <openssl/bn.h>
++#include <openssl/rand.h>
+
+ static ECDSA_SIG *ecdsa_do_sign(const unsigned char *dgst, int dlen,
+ const BIGNUM *, const BIGNUM *, EC_KEY *eckey);
+-static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, BIGNUM **kinvp,
+- BIGNUM **rp);
++static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in,
++ BIGNUM **kinvp, BIGNUM **rp,
++ const unsigned char *dgst, int dlen);
+ static int ecdsa_do_verify(const unsigned char *dgst, int dgst_len,
+ const ECDSA_SIG *sig, EC_KEY *eckey);
+
+@@ -86,8 +88,9 @@ const ECDSA_METHOD *ECDSA_OpenSSL(void)
+ return &openssl_ecdsa_meth;
+ }
+
+-static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, BIGNUM **kinvp,
+- BIGNUM **rp)
++static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in,
++ BIGNUM **kinvp, BIGNUM **rp,
++ const unsigned char *dgst, int dlen)
+ {
+ BN_CTX *ctx = NULL;
+ BIGNUM *k = NULL, *r = NULL, *order = NULL, *X = NULL;
+@@ -136,11 +139,28 @@ static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, BIGNUM **kinvp,
+ {
+ /* get random k */
+ do
+- if (!BN_rand_range(k, order))
++#ifndef OPENSSL_NO_SHA512
++ if (EC_KEY_get_nonce_from_hash(eckey))
+ {
+- ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP,
+- ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED);
+- goto err;
++ if (!BN_generate_dsa_nonce(
++ k, order,
++ EC_KEY_get0_private_key(eckey),
++ dgst, dlen, ctx))
++ {
++ ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP,
++ ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED);
++ goto err;
++ }
++ }
++ else
++#endif
++ {
++ if (!BN_rand_range(k, order))
++ {
++ ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP,
++ ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED);
++ goto err;
++ }
+ }
+ while (BN_is_zero(k));
+
+@@ -282,7 +302,7 @@ static ECDSA_SIG *ecdsa_do_sign(const unsigned char *dgst, int dgst_len,
+ {
+ if (in_kinv == NULL || in_r == NULL)
+ {
+- if (!ECDSA_sign_setup(eckey, ctx, &kinv, &ret->r))
++ if (!ecdsa->meth->ecdsa_sign_setup(eckey, ctx, &kinv, &ret->r, dgst, dgst_len))
+ {
+ ECDSAerr(ECDSA_F_ECDSA_DO_SIGN,ERR_R_ECDSA_LIB);
+ goto err;
+diff --git a/crypto/ecdsa/ecs_sign.c b/crypto/ecdsa/ecs_sign.c
+index 353d5af..ea79a24 100644
+--- a/crypto/ecdsa/ecs_sign.c
++++ b/crypto/ecdsa/ecs_sign.c
+@@ -58,6 +58,7 @@
+ #include <openssl/engine.h>
+ #endif
+ #include <openssl/rand.h>
++#include <openssl/err.h>
+
+ ECDSA_SIG *ECDSA_do_sign(const unsigned char *dgst, int dlen, EC_KEY *eckey)
+ {
+@@ -102,5 +103,12 @@ int ECDSA_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, BIGNUM **kinvp,
+ ECDSA_DATA *ecdsa = ecdsa_check(eckey);
+ if (ecdsa == NULL)
+ return 0;
+- return ecdsa->meth->ecdsa_sign_setup(eckey, ctx_in, kinvp, rp);
++ if (EC_KEY_get_nonce_from_hash(eckey))
++ {
++ /* You cannot precompute the ECDSA nonce if it is required to
++ * depend on the message. */
++ ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ECDSA_R_NONCE_CANNOT_BE_PRECOMPUTED);
++ return 0;
++ }
++ return ecdsa->meth->ecdsa_sign_setup(eckey, ctx_in, kinvp, rp, NULL, 0);
+ }
+--
+1.8.5.1
+
diff --git a/patches/0011-ecdhe_psk.patch b/patches/0011-ecdhe_psk.patch
new file mode 100644
index 0000000..f2d3d8b
--- /dev/null
+++ b/patches/0011-ecdhe_psk.patch
@@ -0,0 +1,1405 @@
+From 1d43b892d27915843e5714d96de269672b5b35db Mon Sep 17 00:00:00 2001
+From: Adam Langley <[email protected]>
+Date: Thu, 14 Nov 2013 16:12:01 -0500
+Subject: Implement ECDHE-PSK-WITH-AES.
+
+Add support for TLS-ECDHE-PSK cipher suites:
+* TLS-ECDHE-PSK-WITH-AES-128-CBC-SHA256, and
+* TLS-ECDHE-PSK-WITH-AES-256-CBC-SHA384.
+---
+ ssl/s3_clnt.c | 360 ++++++++++++++++++++++----------------
+ ssl/s3_enc.c | 2 +-
+ ssl/s3_lib.c | 38 +++++-
+ ssl/s3_srvr.c | 541 ++++++++++++++++++++++++++++++++--------------------------
+ ssl/ssl_lib.c | 2 +-
+ ssl/tls1.h | 8 +
+ 6 files changed, 555 insertions(+), 396 deletions(-)
+
+diff --git a/ssl/s3_clnt.c b/ssl/s3_clnt.c
+index 8f3740f..3672cce 100644
+--- a/ssl/s3_clnt.c
++++ b/ssl/s3_clnt.c
+@@ -333,9 +333,10 @@ int ssl3_connect(SSL *s)
+ }
+ #endif
+ /* Check if it is anon DH/ECDH */
+- /* or PSK */
++ /* or non-RSA PSK */
+ if (!(s->s3->tmp.new_cipher->algorithm_auth & SSL_aNULL) &&
+- !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK))
++ !((s->s3->tmp.new_cipher->algorithm_auth & SSL_aPSK) &&
++ !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kRSA)))
+ {
+ ret=ssl3_get_server_certificate(s);
+ if (ret <= 0) goto end;
+@@ -1368,7 +1369,7 @@ int ssl3_get_key_exchange(SSL *s)
+ omitted if no identity hint is sent. Set
+ session->sess_cert anyway to avoid problems
+ later.*/
+- if (s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK)
++ if (s->s3->tmp.new_cipher->algorithm_auth & SSL_aPSK)
+ {
+ s->session->sess_cert=ssl_sess_cert_new();
+ if (s->ctx->psk_identity_hint)
+@@ -1416,52 +1417,56 @@ int ssl3_get_key_exchange(SSL *s)
+ EVP_MD_CTX_init(&md_ctx);
+
+ #ifndef OPENSSL_NO_PSK
+- if (alg_k & SSL_kPSK)
++ if (alg_a & SSL_aPSK)
+ {
+ char tmp_id_hint[PSK_MAX_IDENTITY_LEN+1];
+
+ al=SSL_AD_HANDSHAKE_FAILURE;
+ n2s(p,i);
+ param_len=i+2;
+- /* Store PSK identity hint for later use, hint is used
+- * in ssl3_send_client_key_exchange. Assume that the
+- * maximum length of a PSK identity hint can be as
+- * long as the maximum length of a PSK identity. */
+- if (i > PSK_MAX_IDENTITY_LEN)
+- {
+- SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,
+- SSL_R_DATA_LENGTH_TOO_LONG);
+- goto f_err;
+- }
+- if (param_len > n)
++ s->ctx->psk_identity_hint = NULL;
++ if (i != 0)
+ {
+- al=SSL_AD_DECODE_ERROR;
+- SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,
+- SSL_R_BAD_PSK_IDENTITY_HINT_LENGTH);
+- goto f_err;
++ /* Store PSK identity hint for later use, hint is used
++ * in ssl3_send_client_key_exchange. Assume that the
++ * maximum length of a PSK identity hint can be as
++ * long as the maximum length of a PSK identity. */
++ if (i > PSK_MAX_IDENTITY_LEN)
++ {
++ SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,
++ SSL_R_DATA_LENGTH_TOO_LONG);
++ goto f_err;
++ }
++ if (param_len > n)
++ {
++ al=SSL_AD_DECODE_ERROR;
++ SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,
++ SSL_R_BAD_PSK_IDENTITY_HINT_LENGTH);
++ goto f_err;
++ }
++ /* If received PSK identity hint contains NULL
++ * characters, the hint is truncated from the first
++ * NULL. p may not be ending with NULL, so create a
++ * NULL-terminated string. */
++ memcpy(tmp_id_hint, p, i);
++ memset(tmp_id_hint+i, 0, PSK_MAX_IDENTITY_LEN+1-i);
++ if (s->ctx->psk_identity_hint != NULL)
++ OPENSSL_free(s->ctx->psk_identity_hint);
++ s->ctx->psk_identity_hint = BUF_strdup(tmp_id_hint);
++ if (s->ctx->psk_identity_hint == NULL)
++ {
++ SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, ERR_R_MALLOC_FAILURE);
++ goto f_err;
++ }
+ }
+- /* If received PSK identity hint contains NULL
+- * characters, the hint is truncated from the first
+- * NULL. p may not be ending with NULL, so create a
+- * NULL-terminated string. */
+- memcpy(tmp_id_hint, p, i);
+- memset(tmp_id_hint+i, 0, PSK_MAX_IDENTITY_LEN+1-i);
+- if (s->ctx->psk_identity_hint != NULL)
+- OPENSSL_free(s->ctx->psk_identity_hint);
+- s->ctx->psk_identity_hint = BUF_strdup(tmp_id_hint);
+- if (s->ctx->psk_identity_hint == NULL)
+- {
+- SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, ERR_R_MALLOC_FAILURE);
+- goto f_err;
+- }
+-
+ p+=i;
+ n-=param_len;
+ }
+- else
+ #endif /* !OPENSSL_NO_PSK */
++
++ if (0) {}
+ #ifndef OPENSSL_NO_SRP
+- if (alg_k & SSL_kSRP)
++ else if (alg_k & SSL_kSRP)
+ {
+ n2s(p,i);
+ param_len=i+2;
+@@ -1538,10 +1543,9 @@ int ssl3_get_key_exchange(SSL *s)
+ pkey=X509_get_pubkey(s->session->sess_cert->peer_pkeys[SSL_PKEY_DSA_SIGN].x509);
+ #endif
+ }
+- else
+ #endif /* !OPENSSL_NO_SRP */
+ #ifndef OPENSSL_NO_RSA
+- if (alg_k & SSL_kRSA)
++ else if (alg_k & SSL_kRSA)
+ {
+ if ((rsa=RSA_new()) == NULL)
+ {
+@@ -1590,9 +1594,6 @@ int ssl3_get_key_exchange(SSL *s)
+ s->session->sess_cert->peer_rsa_tmp=rsa;
+ rsa=NULL;
+ }
+-#else /* OPENSSL_NO_RSA */
+- if (0)
+- ;
+ #endif
+ #ifndef OPENSSL_NO_DH
+ else if (alg_k & SSL_kEDH)
+@@ -1773,14 +1774,14 @@ int ssl3_get_key_exchange(SSL *s)
+ EC_POINT_free(srvr_ecpoint);
+ srvr_ecpoint = NULL;
+ }
+- else if (alg_k)
++#endif /* !OPENSSL_NO_ECDH */
++
++ else if (!(alg_k & SSL_kPSK))
+ {
+ al=SSL_AD_UNEXPECTED_MESSAGE;
+ SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,SSL_R_UNEXPECTED_MESSAGE);
+ goto f_err;
+ }
+-#endif /* !OPENSSL_NO_ECDH */
+-
+
+ /* p points to the next byte, there are 'n' bytes left */
+
+@@ -1885,8 +1886,9 @@ fprintf(stderr, "USING TLSv1.2 HASH %s\n", EVP_MD_name(md));
+ }
+ else
+ {
+- if (!(alg_a & SSL_aNULL) && !(alg_k & SSL_kPSK))
+- /* aNULL or kPSK do not need public keys */
++ if (!(alg_a & SSL_aNULL) &&
++ /* Among PSK ciphers only RSA_PSK needs a public key */
++ !((alg_a & SSL_aPSK) && !(alg_k & SSL_kRSA)))
+ {
+ SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,ERR_R_INTERNAL_ERROR);
+ goto err;
+@@ -2286,8 +2288,9 @@ int ssl3_get_server_done(SSL *s)
+ int ssl3_send_client_key_exchange(SSL *s)
+ {
+ unsigned char *p,*d;
+- int n;
++ int n = 0;
+ unsigned long alg_k;
++ unsigned long alg_a;
+ #ifndef OPENSSL_NO_RSA
+ unsigned char *q;
+ EVP_PKEY *pkey=NULL;
+@@ -2302,7 +2305,11 @@ int ssl3_send_client_key_exchange(SSL *s)
+ unsigned char *encodedPoint = NULL;
+ int encoded_pt_len = 0;
+ BN_CTX * bn_ctx = NULL;
+-#endif
++#ifndef OPENSSL_NO_PSK
++ unsigned int psk_len = 0;
++ unsigned char psk[PSK_MAX_PSK_LEN];
++#endif /* OPENSSL_NO_PSK */
++#endif /* OPENSSL_NO_ECDH */
+
+ if (s->state == SSL3_ST_CW_KEY_EXCH_A)
+ {
+@@ -2310,7 +2317,96 @@ int ssl3_send_client_key_exchange(SSL *s)
+ p= &(d[4]);
+
+ alg_k=s->s3->tmp.new_cipher->algorithm_mkey;
++ alg_a=s->s3->tmp.new_cipher->algorithm_auth;
++
++#ifndef OPENSSL_NO_PSK
++ if (alg_a & SSL_aPSK)
++ {
++ char identity[PSK_MAX_IDENTITY_LEN];
++ unsigned char *t = NULL;
++ unsigned char pre_ms[PSK_MAX_PSK_LEN*2+4];
++ unsigned int pre_ms_len = 0;
++ int psk_err = 1;
++
++ n = 0;
++ if (s->psk_client_callback == NULL)
++ {
++ SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
++ SSL_R_PSK_NO_CLIENT_CB);
++ goto err;
++ }
+
++ psk_len = s->psk_client_callback(s, s->ctx->psk_identity_hint,
++ identity, PSK_MAX_IDENTITY_LEN, psk, sizeof(psk));
++ if (psk_len > PSK_MAX_PSK_LEN)
++ {
++ SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
++ ERR_R_INTERNAL_ERROR);
++ goto psk_err;
++ }
++ else if (psk_len == 0)
++ {
++ SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
++ SSL_R_PSK_IDENTITY_NOT_FOUND);
++ goto psk_err;
++ }
++
++ if (!(alg_k & SSL_kEECDH))
++ {
++ /* Create the shared secret now if we're not using ECDHE-PSK.*/
++ pre_ms_len = 2+psk_len+2+psk_len;
++ t = pre_ms;
++ s2n(psk_len, t);
++ memset(t, 0, psk_len);
++ t+=psk_len;
++ s2n(psk_len, t);
++ memcpy(t, psk, psk_len);
++
++ s->session->master_key_length =
++ s->method->ssl3_enc->generate_master_secret(s,
++ s->session->master_key,
++ pre_ms, pre_ms_len);
++ n = strlen(identity);
++ s2n(n, p);
++ memcpy(p, identity, n);
++ n += 2;
++ }
++
++ if (s->session->psk_identity_hint != NULL)
++ OPENSSL_free(s->session->psk_identity_hint);
++ s->session->psk_identity_hint = NULL;
++ if (s->ctx->psk_identity_hint)
++ {
++ s->session->psk_identity_hint = BUF_strdup(s->ctx->psk_identity_hint);
++ if (s->ctx->psk_identity_hint != NULL &&
++ s->session->psk_identity_hint == NULL)
++ {
++ SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
++ ERR_R_MALLOC_FAILURE);
++ goto psk_err;
++ }
++ }
++
++ if (s->session->psk_identity != NULL)
++ OPENSSL_free(s->session->psk_identity);
++ s->session->psk_identity = BUF_strdup(identity);
++ if (s->session->psk_identity == NULL)
++ {
++ SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
++ ERR_R_MALLOC_FAILURE);
++ goto psk_err;
++ }
++ psk_err = 0;
++ psk_err:
++ OPENSSL_cleanse(identity, PSK_MAX_IDENTITY_LEN);
++ OPENSSL_cleanse(pre_ms, sizeof(pre_ms));
++ if (psk_err != 0)
++ {
++ ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE);
++ goto err;
++ }
++ }
++#endif
+ /* Fool emacs indentation */
+ if (0) {}
+ #ifndef OPENSSL_NO_RSA
+@@ -2571,14 +2667,19 @@ int ssl3_send_client_key_exchange(SSL *s)
+ /* perhaps clean things up a bit EAY EAY EAY EAY*/
+ }
+ #endif
+-
+-#ifndef OPENSSL_NO_ECDH
++#ifndef OPENSSL_NO_ECDH
+ else if (alg_k & (SSL_kEECDH|SSL_kECDHr|SSL_kECDHe))
+ {
+ const EC_GROUP *srvr_group = NULL;
+ EC_KEY *tkey;
+ int ecdh_clnt_cert = 0;
+ int field_size = 0;
++#ifndef OPENSSL_NO_PSK
++ unsigned char *pre_ms;
++ unsigned char *t;
++ unsigned int pre_ms_len;
++ unsigned int i;
++#endif
+
+ if (s->session->sess_cert == NULL)
+ {
+@@ -2706,15 +2807,41 @@ int ssl3_send_client_key_exchange(SSL *s)
+ goto err;
+ }
+
+- /* generate master key from the result */
+- s->session->master_key_length = s->method->ssl3_enc \
+- -> generate_master_secret(s,
+- s->session->master_key,
+- p, n);
+-
++#ifndef OPENSSL_NO_PSK
++ /* ECDHE PSK ciphersuites from RFC 5489 */
++ if ((alg_a & SSL_aPSK) && psk_len != 0)
++ {
++ pre_ms_len = 2+psk_len+2+n;
++ pre_ms = OPENSSL_malloc(pre_ms_len);
++ if (pre_ms == NULL)
++ {
++ SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
++ ERR_R_MALLOC_FAILURE);
++ goto err;
++ }
++ memset(pre_ms, 0, pre_ms_len);
++ t = pre_ms;
++ s2n(psk_len, t);
++ memcpy(t, psk, psk_len);
++ t += psk_len;
++ s2n(n, t);
++ memcpy(t, p, n);
++ s->session->master_key_length = s->method->ssl3_enc \
++ -> generate_master_secret(s,
++ s->session->master_key, pre_ms, pre_ms_len);
++ OPENSSL_cleanse(pre_ms, pre_ms_len);
++ OPENSSL_free(pre_ms);
++ }
++#endif /* OPENSSL_NO_PSK */
++ if (!(alg_a & SSL_aPSK))
++ {
++ /* generate master key from the result */
++ s->session->master_key_length = s->method->ssl3_enc \
++ -> generate_master_secret(s,
++ s->session->master_key, p, n);
++ }
+ memset(p, 0, n); /* clean up */
+-
+- if (ecdh_clnt_cert)
++ if (ecdh_clnt_cert)
+ {
+ /* Send empty client key exch message */
+ n = 0;
+@@ -2742,29 +2869,42 @@ int ssl3_send_client_key_exchange(SSL *s)
+ }
+
+ /* Encode the public key */
+- n = EC_POINT_point2oct(srvr_group,
+- EC_KEY_get0_public_key(clnt_ecdh),
+- POINT_CONVERSION_UNCOMPRESSED,
++ encoded_pt_len = EC_POINT_point2oct(srvr_group,
++ EC_KEY_get0_public_key(clnt_ecdh),
++ POINT_CONVERSION_UNCOMPRESSED,
+ encodedPoint, encoded_pt_len, bn_ctx);
++
++ n = 0;
++#ifndef OPENSSL_NO_PSK
++ if ((alg_a & SSL_aPSK) && psk_len != 0)
++ {
++ i = strlen(s->session->psk_identity);
++ s2n(i, p);
++ memcpy(p, s->session->psk_identity, i);
++ p += i;
++ n = i + 2;
++ }
++#endif
+
+- *p = n; /* length of encoded point */
++ *p = encoded_pt_len; /* length of encoded point */
+ /* Encoded point will be copied here */
+- p += 1;
++ p += 1;
++ n += 1;
+ /* copy the point */
+- memcpy((unsigned char *)p, encodedPoint, n);
++ memcpy((unsigned char *)p, encodedPoint, encoded_pt_len);
+ /* increment n to account for length field */
+- n += 1;
++ n += encoded_pt_len;
+ }
+
+ /* Free allocated memory */
+ BN_CTX_free(bn_ctx);
+ if (encodedPoint != NULL) OPENSSL_free(encodedPoint);
+- if (clnt_ecdh != NULL)
++ if (clnt_ecdh != NULL)
+ EC_KEY_free(clnt_ecdh);
+ EVP_PKEY_free(srvr_pub_pkey);
+ }
+ #endif /* !OPENSSL_NO_ECDH */
+- else if (alg_k & SSL_kGOST)
++ else if (alg_k & SSL_kGOST)
+ {
+ /* GOST key exchange message creation */
+ EVP_PKEY_CTX *pkey_ctx;
+@@ -2887,89 +3027,7 @@ int ssl3_send_client_key_exchange(SSL *s)
+ }
+ }
+ #endif
+-#ifndef OPENSSL_NO_PSK
+- else if (alg_k & SSL_kPSK)
+- {
+- char identity[PSK_MAX_IDENTITY_LEN];
+- unsigned char *t = NULL;
+- unsigned char psk_or_pre_ms[PSK_MAX_PSK_LEN*2+4];
+- unsigned int pre_ms_len = 0, psk_len = 0;
+- int psk_err = 1;
+-
+- n = 0;
+- if (s->psk_client_callback == NULL)
+- {
+- SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
+- SSL_R_PSK_NO_CLIENT_CB);
+- goto err;
+- }
+-
+- psk_len = s->psk_client_callback(s, s->ctx->psk_identity_hint,
+- identity, PSK_MAX_IDENTITY_LEN,
+- psk_or_pre_ms, sizeof(psk_or_pre_ms));
+- if (psk_len > PSK_MAX_PSK_LEN)
+- {
+- SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
+- ERR_R_INTERNAL_ERROR);
+- goto psk_err;
+- }
+- else if (psk_len == 0)
+- {
+- SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
+- SSL_R_PSK_IDENTITY_NOT_FOUND);
+- goto psk_err;
+- }
+-
+- /* create PSK pre_master_secret */
+- pre_ms_len = 2+psk_len+2+psk_len;
+- t = psk_or_pre_ms;
+- memmove(psk_or_pre_ms+psk_len+4, psk_or_pre_ms, psk_len);
+- s2n(psk_len, t);
+- memset(t, 0, psk_len);
+- t+=psk_len;
+- s2n(psk_len, t);
+-
+- if (s->session->psk_identity_hint != NULL)
+- OPENSSL_free(s->session->psk_identity_hint);
+- s->session->psk_identity_hint = BUF_strdup(s->ctx->psk_identity_hint);
+- if (s->ctx->psk_identity_hint != NULL &&
+- s->session->psk_identity_hint == NULL)
+- {
+- SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
+- ERR_R_MALLOC_FAILURE);
+- goto psk_err;
+- }
+-
+- if (s->session->psk_identity != NULL)
+- OPENSSL_free(s->session->psk_identity);
+- s->session->psk_identity = BUF_strdup(identity);
+- if (s->session->psk_identity == NULL)
+- {
+- SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
+- ERR_R_MALLOC_FAILURE);
+- goto psk_err;
+- }
+-
+- s->session->master_key_length =
+- s->method->ssl3_enc->generate_master_secret(s,
+- s->session->master_key,
+- psk_or_pre_ms, pre_ms_len);
+- n = strlen(identity);
+- s2n(n, p);
+- memcpy(p, identity, n);
+- n+=2;
+- psk_err = 0;
+- psk_err:
+- OPENSSL_cleanse(identity, PSK_MAX_IDENTITY_LEN);
+- OPENSSL_cleanse(psk_or_pre_ms, sizeof(psk_or_pre_ms));
+- if (psk_err != 0)
+- {
+- ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE);
+- goto err;
+- }
+- }
+-#endif
+- else
++ else if (!(alg_k & SSL_kPSK) || ((alg_k & SSL_kPSK) && !(alg_a & SSL_aPSK)))
+ {
+ ssl3_send_alert(s, SSL3_AL_FATAL,
+ SSL_AD_HANDSHAKE_FAILURE);
+@@ -3274,7 +3332,7 @@ int ssl3_check_cert_and_algorithm(SSL *s)
+ alg_a=s->s3->tmp.new_cipher->algorithm_auth;
+
+ /* we don't have a certificate */
+- if ((alg_a & (SSL_aDH|SSL_aNULL|SSL_aKRB5)) || (alg_k & SSL_kPSK))
++ if ((alg_a & (SSL_aDH|SSL_aNULL|SSL_aKRB5)) || ((alg_a & SSL_aPSK) && !(alg_k & SSL_kRSA)))
+ return(1);
+
+ sc=s->session->sess_cert;
+diff --git a/ssl/s3_enc.c b/ssl/s3_enc.c
+index 6358e1b..0dac7e7 100644
+--- a/ssl/s3_enc.c
++++ b/ssl/s3_enc.c
+@@ -734,7 +734,7 @@ int n_ssl3_mac(SSL *ssl, unsigned char *md, int send)
+ }
+
+ t=EVP_MD_CTX_size(hash);
+- if (t < 0)
++ if (t < 0 || t > 20)
+ return -1;
+ md_size=t;
+ npad=(48/md_size)*md_size;
+diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c
+index 1d87ac5..77244d3 100644
+--- a/ssl/s3_lib.c
++++ b/ssl/s3_lib.c
+@@ -2827,6 +2827,42 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
+ 256,
+ },
+
++#ifndef OPENSSL_NO_PSK
++ /* ECDH PSK ciphersuites from RFC 5489 */
++
++ /* Cipher C037 */
++ {
++ 1,
++ TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA256,
++ TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA256,
++ SSL_kEECDH,
++ SSL_aPSK,
++ SSL_AES128,
++ SSL_SHA256,
++ SSL_TLSV1,
++ SSL_NOT_EXP|SSL_HIGH,
++ SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF_SHA256,
++ 128,
++ 128,
++ },
++
++ /* Cipher C038 */
++ {
++ 1,
++ TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA384,
++ TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA384,
++ SSL_kEECDH,
++ SSL_aPSK,
++ SSL_AES256,
++ SSL_SHA384,
++ SSL_TLSV1,
++ SSL_NOT_EXP|SSL_HIGH,
++ SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF_SHA384,
++ 256,
++ 256,
++ },
++#endif /* OPENSSL_NO_PSK */
++
+ #endif /* OPENSSL_NO_ECDH */
+
+
+@@ -3979,7 +3999,7 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt,
+ #endif /* OPENSSL_NO_KRB5 */
+ #ifndef OPENSSL_NO_PSK
+ /* with PSK there must be server callback set */
+- if ((alg_k & SSL_kPSK) && s->psk_server_callback == NULL)
++ if ((alg_a & SSL_aPSK) && s->psk_server_callback == NULL)
+ continue;
+ #endif /* OPENSSL_NO_PSK */
+
+diff --git a/ssl/s3_srvr.c b/ssl/s3_srvr.c
+index 9335eda..fe70124 100644
+--- a/ssl/s3_srvr.c
++++ b/ssl/s3_srvr.c
+@@ -217,6 +217,7 @@ int ssl3_accept(SSL *s)
+ {
+ BUF_MEM *buf;
+ unsigned long alg_k,Time=(unsigned long)time(NULL);
++ unsigned long alg_a;
+ void (*cb)(const SSL *ssl,int type,int val)=NULL;
+ int ret= -1;
+ int new_state,state,skip=0;
+@@ -418,9 +419,11 @@ int ssl3_accept(SSL *s)
+ case SSL3_ST_SW_CERT_A:
+ case SSL3_ST_SW_CERT_B:
+ /* Check if it is anon DH or anon ECDH, */
+- /* normal PSK or KRB5 or SRP */
++ /* non-RSA PSK or KRB5 or SRP */
+ if (!(s->s3->tmp.new_cipher->algorithm_auth & SSL_aNULL)
+- && !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK)
++ /* Among PSK ciphersuites only RSA_PSK uses server certificate */
++ && !(s->s3->tmp.new_cipher->algorithm_auth & SSL_aPSK &&
++ !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kRSA))
+ && !(s->s3->tmp.new_cipher->algorithm_auth & SSL_aKRB5))
+ {
+ ret=ssl3_send_server_certificate(s);
+@@ -449,6 +452,7 @@ int ssl3_accept(SSL *s)
+ case SSL3_ST_SW_KEY_EXCH_A:
+ case SSL3_ST_SW_KEY_EXCH_B:
+ alg_k = s->s3->tmp.new_cipher->algorithm_mkey;
++ alg_a = s->s3->tmp.new_cipher->algorithm_auth;
+
+ /* clear this, it may get reset by
+ * send_server_key_exchange */
+@@ -478,10 +482,12 @@ int ssl3_accept(SSL *s)
+ * public key for key exchange.
+ */
+ if (s->s3->tmp.use_rsa_tmp
+- /* PSK: send ServerKeyExchange if PSK identity
+- * hint if provided */
++ /* PSK: send ServerKeyExchange if either:
++ * - PSK identity hint is provided, or
++ * - the key exchange is kEECDH.
++ */
+ #ifndef OPENSSL_NO_PSK
+- || ((alg_k & SSL_kPSK) && s->ctx->psk_identity_hint)
++ || ((alg_a & SSL_aPSK) && ((alg_k & SSL_kEECDH) || s->ctx->psk_identity_hint))
+ #endif
+ #ifndef OPENSSL_NO_SRP
+ /* SRP: send ServerKeyExchange */
+@@ -1658,7 +1664,8 @@ int ssl3_send_server_key_exchange(SSL *s)
+ const EVP_MD *md = NULL;
+ unsigned char *p,*d;
+ int al,i;
+- unsigned long type;
++ unsigned long alg_k;
++ unsigned long alg_a;
+ int n;
+ CERT *cert;
+ BIGNUM *r[4];
+@@ -1669,15 +1676,25 @@ int ssl3_send_server_key_exchange(SSL *s)
+ EVP_MD_CTX_init(&md_ctx);
+ if (s->state == SSL3_ST_SW_KEY_EXCH_A)
+ {
+- type=s->s3->tmp.new_cipher->algorithm_mkey;
++ alg_k=s->s3->tmp.new_cipher->algorithm_mkey;
++ alg_a=s->s3->tmp.new_cipher->algorithm_auth;
+ cert=s->cert;
+
+ buf=s->init_buf;
+
+ r[0]=r[1]=r[2]=r[3]=NULL;
+ n=0;
++#ifndef OPENSSL_NO_PSK
++ if (alg_a & SSL_aPSK)
++ {
++ /* size for PSK identity hint */
++ n+=2;
++ if (s->ctx->psk_identity_hint)
++ n+=strlen(s->ctx->psk_identity_hint);
++ }
++#endif /* !OPENSSL_NO_PSK */
+ #ifndef OPENSSL_NO_RSA
+- if (type & SSL_kRSA)
++ if (alg_k & SSL_kRSA)
+ {
+ rsa=cert->rsa_tmp;
+ if ((rsa == NULL) && (s->cert->rsa_tmp_cb != NULL))
+@@ -1704,10 +1721,9 @@ int ssl3_send_server_key_exchange(SSL *s)
+ r[1]=rsa->e;
+ s->s3->tmp.use_rsa_tmp=1;
+ }
+- else
+ #endif
+ #ifndef OPENSSL_NO_DH
+- if (type & SSL_kEDH)
++ else if (alg_k & SSL_kEDH)
+ {
+ dhp=cert->dh_tmp;
+ if ((dhp == NULL) && (s->cert->dh_tmp_cb != NULL))
+@@ -1760,10 +1776,9 @@ int ssl3_send_server_key_exchange(SSL *s)
+ r[1]=dh->g;
+ r[2]=dh->pub_key;
+ }
+- else
+ #endif
+ #ifndef OPENSSL_NO_ECDH
+- if (type & SSL_kEECDH)
++ else if (alg_k & SSL_kEECDH)
+ {
+ const EC_GROUP *group;
+
+@@ -1876,7 +1891,7 @@ int ssl3_send_server_key_exchange(SSL *s)
+ * to encode the entire ServerECDHParams
+ * structure.
+ */
+- n = 4 + encodedlen;
++ n += 4 + encodedlen;
+
+ /* We'll generate the serverKeyExchange message
+ * explicitly so we can set these to NULLs
+@@ -1886,18 +1901,9 @@ int ssl3_send_server_key_exchange(SSL *s)
+ r[2]=NULL;
+ r[3]=NULL;
+ }
+- else
+ #endif /* !OPENSSL_NO_ECDH */
+-#ifndef OPENSSL_NO_PSK
+- if (type & SSL_kPSK)
+- {
+- /* reserve size for record length and PSK identity hint*/
+- n+=2+strlen(s->ctx->psk_identity_hint);
+- }
+- else
+-#endif /* !OPENSSL_NO_PSK */
+ #ifndef OPENSSL_NO_SRP
+- if (type & SSL_kSRP)
++ else if (alg_k & SSL_kSRP)
+ {
+ if ((s->srp_ctx.N == NULL) ||
+ (s->srp_ctx.g == NULL) ||
+@@ -1912,8 +1918,8 @@ int ssl3_send_server_key_exchange(SSL *s)
+ r[2]=s->srp_ctx.s;
+ r[3]=s->srp_ctx.B;
+ }
+- else
+ #endif
++ else if (!(alg_k & SSL_kPSK))
+ {
+ al=SSL_AD_HANDSHAKE_FAILURE;
+ SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE,SSL_R_UNKNOWN_KEY_EXCHANGE_TYPE);
+@@ -1923,15 +1929,16 @@ int ssl3_send_server_key_exchange(SSL *s)
+ {
+ nr[i]=BN_num_bytes(r[i]);
+ #ifndef OPENSSL_NO_SRP
+- if ((i == 2) && (type & SSL_kSRP))
++ if ((i == 2) && (alg_k & SSL_kSRP))
+ n+=1+nr[i];
+ else
+ #endif
+ n+=2+nr[i];
+ }
+
+- if (!(s->s3->tmp.new_cipher->algorithm_auth & SSL_aNULL)
+- && !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK))
++ if (!(alg_a & SSL_aNULL)
++ /* Among PSK ciphersuites only RSA uses a certificate */
++ && !((alg_a & SSL_aPSK) && !(alg_k & SSL_kRSA)))
+ {
+ if ((pkey=ssl_get_sign_pkey(s,s->s3->tmp.new_cipher,&md))
+ == NULL)
+@@ -1958,7 +1965,7 @@ int ssl3_send_server_key_exchange(SSL *s)
+ for (i=0; i < 4 && r[i] != NULL; i++)
+ {
+ #ifndef OPENSSL_NO_SRP
+- if ((i == 2) && (type & SSL_kSRP))
++ if ((i == 2) && (alg_k & SSL_kSRP))
+ {
+ *p = nr[i];
+ p++;
+@@ -1970,8 +1977,32 @@ int ssl3_send_server_key_exchange(SSL *s)
+ p+=nr[i];
+ }
+
++/* Note: ECDHE PSK ciphersuites use SSL_kEECDH and SSL_aPSK.
++ * When one of them is used, the server key exchange record needs to have both
++ * the psk_identity_hint and the ServerECDHParams. */
++#ifndef OPENSSL_NO_PSK
++ if (alg_a & SSL_aPSK)
++ {
++ if (s->ctx->psk_identity_hint)
++ {
++ /* copy PSK identity hint */
++ s2n(strlen(s->ctx->psk_identity_hint), p);
++ strncpy((char *)p, s->ctx->psk_identity_hint, strlen(s->ctx->psk_identity_hint));
++ p+=strlen(s->ctx->psk_identity_hint);
++ }
++ else
++ {
++ /* No identity hint is provided. */
++ *p = 0;
++ p += 1;
++ *p = 0;
++ p += 1;
++ }
++ }
++#endif /* OPENSSL_NO_PSK */
++
+ #ifndef OPENSSL_NO_ECDH
+- if (type & SSL_kEECDH)
++ if (alg_k & SSL_kEECDH)
+ {
+ /* XXX: For now, we only support named (not generic) curves.
+ * In this situation, the serverKeyExchange message has:
+@@ -1994,17 +2025,7 @@ int ssl3_send_server_key_exchange(SSL *s)
+ encodedPoint = NULL;
+ p += encodedlen;
+ }
+-#endif
+-
+-#ifndef OPENSSL_NO_PSK
+- if (type & SSL_kPSK)
+- {
+- /* copy PSK identity hint */
+- s2n(strlen(s->ctx->psk_identity_hint), p);
+- strncpy((char *)p, s->ctx->psk_identity_hint, strlen(s->ctx->psk_identity_hint));
+- p+=strlen(s->ctx->psk_identity_hint);
+- }
+-#endif
++#endif /* OPENSSL_NO_ECDH */
+
+ /* not anonymous */
+ if (pkey != NULL)
+@@ -2041,7 +2062,7 @@ int ssl3_send_server_key_exchange(SSL *s)
+ n+=u+2;
+ }
+ else
+-#endif
++#endif /* OPENSSL_NO_RSA */
+ if (md)
+ {
+ /* For TLS1.2 and later send signature
+@@ -2215,6 +2236,7 @@ int ssl3_get_client_key_exchange(SSL *s)
+ int i,al,ok;
+ long n;
+ unsigned long alg_k;
++ unsigned long alg_a;
+ unsigned char *p;
+ #ifndef OPENSSL_NO_RSA
+ RSA *rsa=NULL;
+@@ -2232,7 +2254,11 @@ int ssl3_get_client_key_exchange(SSL *s)
+ EC_KEY *srvr_ecdh = NULL;
+ EVP_PKEY *clnt_pub_pkey = NULL;
+ EC_POINT *clnt_ecpoint = NULL;
+- BN_CTX *bn_ctx = NULL;
++ BN_CTX *bn_ctx = NULL;
++#ifndef OPENSSL_NO_PSK
++ unsigned int psk_len = 0;
++ unsigned char psk[PSK_MAX_PSK_LEN];
++#endif /* OPENSSL_NO_PSK */
+ #endif
+
+ n=s->method->ssl_get_message(s,
+@@ -2246,7 +2272,106 @@ int ssl3_get_client_key_exchange(SSL *s)
+ p=(unsigned char *)s->init_msg;
+
+ alg_k=s->s3->tmp.new_cipher->algorithm_mkey;
++ alg_a=s->s3->tmp.new_cipher->algorithm_auth;
++
++#ifndef OPENSSL_NO_PSK
++ if (alg_a & SSL_aPSK)
++ {
++ unsigned char *t = NULL;
++ unsigned char pre_ms[PSK_MAX_PSK_LEN*2+4];
++ unsigned int pre_ms_len = 0;
++ int psk_err = 1;
++ char tmp_id[PSK_MAX_IDENTITY_LEN+1];
++
++ al=SSL_AD_HANDSHAKE_FAILURE;
++
++ n2s(p, i);
++ if (n != i+2 && !(alg_k & SSL_kEECDH))
++ {
++ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
++ SSL_R_LENGTH_MISMATCH);
++ goto psk_err;
++ }
++ if (i > PSK_MAX_IDENTITY_LEN)
++ {
++ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
++ SSL_R_DATA_LENGTH_TOO_LONG);
++ goto psk_err;
++ }
++ if (s->psk_server_callback == NULL)
++ {
++ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
++ SSL_R_PSK_NO_SERVER_CB);
++ goto psk_err;
++ }
++
++ /* Create guaranteed NUL-terminated identity
++ * string for the callback */
++ memcpy(tmp_id, p, i);
++ memset(tmp_id+i, 0, PSK_MAX_IDENTITY_LEN+1-i);
++ psk_len = s->psk_server_callback(s, tmp_id, psk, sizeof(psk));
+
++ if (psk_len > PSK_MAX_PSK_LEN)
++ {
++ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
++ ERR_R_INTERNAL_ERROR);
++ goto psk_err;
++ }
++ else if (psk_len == 0)
++ {
++ /* PSK related to the given identity not found */
++ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
++ SSL_R_PSK_IDENTITY_NOT_FOUND);
++ al=SSL_AD_UNKNOWN_PSK_IDENTITY;
++ goto psk_err;
++ }
++ if (!(alg_k & SSL_kEECDH))
++ {
++ /* Create the shared secret now if we're not using ECDHE-PSK.*/
++ pre_ms_len=2+psk_len+2+psk_len;
++ t = pre_ms;
++ s2n(psk_len, t);
++ memset(t, 0, psk_len);
++ t+=psk_len;
++ s2n(psk_len, t);
++ memcpy(t, psk, psk_len);
++
++ s->session->master_key_length=
++ s->method->ssl3_enc->generate_master_secret(s,
++ s->session->master_key, pre_ms, pre_ms_len);
++ }
++ if (s->session->psk_identity != NULL)
++ OPENSSL_free(s->session->psk_identity);
++ s->session->psk_identity = BUF_strdup(tmp_id);
++ OPENSSL_cleanse(tmp_id, PSK_MAX_IDENTITY_LEN+1);
++ if (s->session->psk_identity == NULL)
++ {
++ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
++ ERR_R_MALLOC_FAILURE);
++ goto psk_err;
++ }
++
++ if (s->session->psk_identity_hint != NULL)
++ OPENSSL_free(s->session->psk_identity_hint);
++ s->session->psk_identity_hint = BUF_strdup(s->ctx->psk_identity_hint);
++ if (s->ctx->psk_identity_hint != NULL &&
++ s->session->psk_identity_hint == NULL)
++ {
++ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
++ ERR_R_MALLOC_FAILURE);
++ goto psk_err;
++ }
++
++ p += i;
++ n -= (i + 2);
++ psk_err = 0;
++ psk_err:
++ OPENSSL_cleanse(pre_ms, sizeof(pre_ms));
++ if (psk_err != 0)
++ goto f_err;
++ }
++#endif /* OPENSSL_NO_PSK */
++ if (0) {}
+ #ifndef OPENSSL_NO_RSA
+ if (alg_k & SSL_kRSA)
+ {
+@@ -2410,10 +2535,9 @@ int ssl3_get_client_key_exchange(SSL *s)
+ p,sizeof(rand_premaster_secret));
+ OPENSSL_cleanse(p,sizeof(rand_premaster_secret));
+ }
+- else
+ #endif
+ #ifndef OPENSSL_NO_DH
+- if (alg_k & (SSL_kEDH|SSL_kDHr|SSL_kDHd))
++ else if (alg_k & (SSL_kEDH|SSL_kDHr|SSL_kDHd))
+ {
+ n2s(p,i);
+ if (n != i+2)
+@@ -2474,10 +2598,9 @@ int ssl3_get_client_key_exchange(SSL *s)
+ s->session->master_key,p,i);
+ OPENSSL_cleanse(p,i);
+ }
+- else
+ #endif
+ #ifndef OPENSSL_NO_KRB5
+- if (alg_k & SSL_kKRB5)
++ else if (alg_k & SSL_kKRB5)
+ {
+ krb5_error_code krb5rc;
+ krb5_data enc_ticket;
+@@ -2666,17 +2789,20 @@ int ssl3_get_client_key_exchange(SSL *s)
+ ** if (s->kssl_ctx) s->kssl_ctx = NULL;
+ */
+ }
+- else
+ #endif /* OPENSSL_NO_KRB5 */
+-
+ #ifndef OPENSSL_NO_ECDH
+- if (alg_k & (SSL_kEECDH|SSL_kECDHr|SSL_kECDHe))
++ else if (alg_k & (SSL_kEECDH|SSL_kECDHr|SSL_kECDHe))
+ {
+ int ret = 1;
+ int field_size = 0;
+ const EC_KEY *tkey;
+ const EC_GROUP *group;
+ const BIGNUM *priv_key;
++#ifndef OPENSSL_NO_PSK
++ unsigned char *pre_ms;
++ unsigned int pre_ms_len;
++ unsigned char *t;
++#endif /* OPENSSL_NO_PSK */
+
+ /* initialize structures for server's ECDH key pair */
+ if ((srvr_ecdh = EC_KEY_new()) == NULL)
+@@ -2772,7 +2898,7 @@ int ssl3_get_client_key_exchange(SSL *s)
+ }
+
+ /* Get encoded point length */
+- i = *p;
++ i = *p;
+ p += 1;
+ if (n != 1 + i)
+ {
+@@ -2814,214 +2940,145 @@ int ssl3_get_client_key_exchange(SSL *s)
+ EC_KEY_free(srvr_ecdh);
+ BN_CTX_free(bn_ctx);
+ EC_KEY_free(s->s3->tmp.ecdh);
+- s->s3->tmp.ecdh = NULL;
++ s->s3->tmp.ecdh = NULL;
+
+- /* Compute the master secret */
+- s->session->master_key_length = s->method->ssl3_enc-> \
+- generate_master_secret(s, s->session->master_key, p, i);
+-
+- OPENSSL_cleanse(p, i);
+- return (ret);
+- }
+- else
+-#endif
+ #ifndef OPENSSL_NO_PSK
+- if (alg_k & SSL_kPSK)
++ /* ECDHE PSK ciphersuites from RFC 5489 */
++ if ((alg_a & SSL_aPSK) && psk_len != 0)
+ {
+- unsigned char *t = NULL;
+- unsigned char psk_or_pre_ms[PSK_MAX_PSK_LEN*2+4];
+- unsigned int pre_ms_len = 0, psk_len = 0;
+- int psk_err = 1;
+- char tmp_id[PSK_MAX_IDENTITY_LEN+1];
+-
+- al=SSL_AD_HANDSHAKE_FAILURE;
+-
+- n2s(p,i);
+- if (n != i+2)
+- {
+- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+- SSL_R_LENGTH_MISMATCH);
+- goto psk_err;
+- }
+- if (i > PSK_MAX_IDENTITY_LEN)
+- {
+- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+- SSL_R_DATA_LENGTH_TOO_LONG);
+- goto psk_err;
+- }
+- if (s->psk_server_callback == NULL)
+- {
+- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+- SSL_R_PSK_NO_SERVER_CB);
+- goto psk_err;
+- }
+-
+- /* Create guaranteed NULL-terminated identity
+- * string for the callback */
+- memcpy(tmp_id, p, i);
+- memset(tmp_id+i, 0, PSK_MAX_IDENTITY_LEN+1-i);
+- psk_len = s->psk_server_callback(s, tmp_id,
+- psk_or_pre_ms, sizeof(psk_or_pre_ms));
+- OPENSSL_cleanse(tmp_id, PSK_MAX_IDENTITY_LEN+1);
+-
+- if (psk_len > PSK_MAX_PSK_LEN)
+- {
+- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+- ERR_R_INTERNAL_ERROR);
+- goto psk_err;
+- }
+- else if (psk_len == 0)
+- {
+- /* PSK related to the given identity not found */
+- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+- SSL_R_PSK_IDENTITY_NOT_FOUND);
+- al=SSL_AD_UNKNOWN_PSK_IDENTITY;
+- goto psk_err;
+- }
+-
+- /* create PSK pre_master_secret */
+- pre_ms_len=2+psk_len+2+psk_len;
+- t = psk_or_pre_ms;
+- memmove(psk_or_pre_ms+psk_len+4, psk_or_pre_ms, psk_len);
+- s2n(psk_len, t);
+- memset(t, 0, psk_len);
+- t+=psk_len;
+- s2n(psk_len, t);
+-
+- if (s->session->psk_identity != NULL)
+- OPENSSL_free(s->session->psk_identity);
+- s->session->psk_identity = BUF_strdup((char *)p);
+- if (s->session->psk_identity == NULL)
+- {
+- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+- ERR_R_MALLOC_FAILURE);
+- goto psk_err;
+- }
+-
+- if (s->session->psk_identity_hint != NULL)
+- OPENSSL_free(s->session->psk_identity_hint);
+- s->session->psk_identity_hint = BUF_strdup(s->ctx->psk_identity_hint);
+- if (s->ctx->psk_identity_hint != NULL &&
+- s->session->psk_identity_hint == NULL)
++ pre_ms_len = 2+psk_len+2+i;
++ pre_ms = OPENSSL_malloc(pre_ms_len);
++ if (pre_ms == NULL)
+ {
+ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+ ERR_R_MALLOC_FAILURE);
+- goto psk_err;
++ goto err;
+ }
+-
+- s->session->master_key_length=
+- s->method->ssl3_enc->generate_master_secret(s,
+- s->session->master_key, psk_or_pre_ms, pre_ms_len);
+- psk_err = 0;
+- psk_err:
+- OPENSSL_cleanse(psk_or_pre_ms, sizeof(psk_or_pre_ms));
+- if (psk_err != 0)
+- goto f_err;
++ memset(pre_ms, 0, pre_ms_len);
++ t = pre_ms;
++ s2n(psk_len, t);
++ memcpy(t, psk, psk_len);
++ t += psk_len;
++ s2n(i, t);
++ memcpy(t, p, i);
++ s->session->master_key_length = s->method->ssl3_enc \
++ -> generate_master_secret(s,
++ s->session->master_key, pre_ms, pre_ms_len);
++ OPENSSL_cleanse(pre_ms, pre_ms_len);
++ OPENSSL_free(pre_ms);
+ }
+- else
+-#endif
+-#ifndef OPENSSL_NO_SRP
+- if (alg_k & SSL_kSRP)
++#endif /* OPENSSL_NO_PSK */
++ if (!(alg_a & SSL_aPSK))
+ {
+- int param_len;
+-
+- n2s(p,i);
+- param_len=i+2;
+- if (param_len > n)
+- {
+- al=SSL_AD_DECODE_ERROR;
+- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_BAD_SRP_A_LENGTH);
+- goto f_err;
+- }
+- if (!(s->srp_ctx.A=BN_bin2bn(p,i,NULL)))
+- {
+- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,ERR_R_BN_LIB);
+- goto err;
+- }
+- if (s->session->srp_username != NULL)
+- OPENSSL_free(s->session->srp_username);
+- s->session->srp_username = BUF_strdup(s->srp_ctx.login);
+- if (s->session->srp_username == NULL)
+- {
+- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+- ERR_R_MALLOC_FAILURE);
+- goto err;
+- }
++ /* Compute the master secret */
++ s->session->master_key_length = s->method->ssl3_enc \
++ -> generate_master_secret(s,
++ s->session->master_key, p, i);
++ }
+
+- if ((s->session->master_key_length = SRP_generate_server_master_secret(s,s->session->master_key))<0)
+- {
+- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,ERR_R_INTERNAL_ERROR);
+- goto err;
+- }
++ OPENSSL_cleanse(p, i);
++ }
++#endif
++#ifndef OPENSSL_NO_SRP
++ else if (alg_k & SSL_kSRP)
++ {
++ int param_len;
+
+- p+=i;
++ n2s(p,i);
++ param_len=i+2;
++ if (param_len > n)
++ {
++ al=SSL_AD_DECODE_ERROR;
++ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_BAD_SRP_A_LENGTH);
++ goto f_err;
++ }
++ if (!(s->srp_ctx.A=BN_bin2bn(p,i,NULL)))
++ {
++ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,ERR_R_BN_LIB);
++ goto err;
++ }
++ if (s->session->srp_username != NULL)
++ OPENSSL_free(s->session->srp_username);
++ s->session->srp_username = BUF_strdup(s->srp_ctx.login);
++ if (s->session->srp_username == NULL)
++ {
++ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
++ ERR_R_MALLOC_FAILURE);
++ goto err;
+ }
+- else
+-#endif /* OPENSSL_NO_SRP */
+- if (alg_k & SSL_kGOST)
+- {
+- int ret = 0;
+- EVP_PKEY_CTX *pkey_ctx;
+- EVP_PKEY *client_pub_pkey = NULL, *pk = NULL;
+- unsigned char premaster_secret[32], *start;
+- size_t outlen=32, inlen;
+- unsigned long alg_a;
+- int Ttag, Tclass;
+- long Tlen;
+-
+- /* Get our certificate private key*/
+- alg_a = s->s3->tmp.new_cipher->algorithm_auth;
+- if (alg_a & SSL_aGOST94)
+- pk = s->cert->pkeys[SSL_PKEY_GOST94].privatekey;
+- else if (alg_a & SSL_aGOST01)
+- pk = s->cert->pkeys[SSL_PKEY_GOST01].privatekey;
+
+- pkey_ctx = EVP_PKEY_CTX_new(pk,NULL);
+- EVP_PKEY_decrypt_init(pkey_ctx);
+- /* If client certificate is present and is of the same type, maybe
+- * use it for key exchange. Don't mind errors from
+- * EVP_PKEY_derive_set_peer, because it is completely valid to use
+- * a client certificate for authorization only. */
+- client_pub_pkey = X509_get_pubkey(s->session->peer);
+- if (client_pub_pkey)
+- {
+- if (EVP_PKEY_derive_set_peer(pkey_ctx, client_pub_pkey) <= 0)
+- ERR_clear_error();
+- }
+- /* Decrypt session key */
+- if (ASN1_get_object((const unsigned char **)&p, &Tlen, &Ttag, &Tclass, n) != V_ASN1_CONSTRUCTED ||
+- Ttag != V_ASN1_SEQUENCE ||
+- Tclass != V_ASN1_UNIVERSAL)
+- {
+- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED);
+- goto gerr;
+- }
+- start = p;
+- inlen = Tlen;
+- if (EVP_PKEY_decrypt(pkey_ctx,premaster_secret,&outlen,start,inlen) <=0)
++ if ((s->session->master_key_length = SRP_generate_server_master_secret(s,s->session->master_key))<0)
++ {
++ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,ERR_R_INTERNAL_ERROR);
++ goto err;
++ }
+
+- {
+- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED);
+- goto gerr;
+- }
+- /* Generate master secret */
+- s->session->master_key_length=
+- s->method->ssl3_enc->generate_master_secret(s,
+- s->session->master_key,premaster_secret,32);
+- /* Check if pubkey from client certificate was used */
+- if (EVP_PKEY_CTX_ctrl(pkey_ctx, -1, -1, EVP_PKEY_CTRL_PEER_KEY, 2, NULL) > 0)
+- ret = 2;
+- else
+- ret = 1;
+- gerr:
+- EVP_PKEY_free(client_pub_pkey);
+- EVP_PKEY_CTX_free(pkey_ctx);
+- if (ret)
+- return ret;
+- else
+- goto err;
++ p+=i;
++ }
++#endif /* OPENSSL_NO_SRP */
++ else if (alg_k & SSL_kGOST)
++ {
++ int ret = 0;
++ EVP_PKEY_CTX *pkey_ctx;
++ EVP_PKEY *client_pub_pkey = NULL, *pk = NULL;
++ unsigned char premaster_secret[32], *start;
++ size_t outlen=32, inlen;
++ unsigned long alg_a;
++ int Ttag, Tclass;
++ long Tlen;
++
++ /* Get our certificate private key*/
++ alg_a = s->s3->tmp.new_cipher->algorithm_auth;
++ if (alg_a & SSL_aGOST94)
++ pk = s->cert->pkeys[SSL_PKEY_GOST94].privatekey;
++ else if (alg_a & SSL_aGOST01)
++ pk = s->cert->pkeys[SSL_PKEY_GOST01].privatekey;
++
++ pkey_ctx = EVP_PKEY_CTX_new(pk,NULL);
++ EVP_PKEY_decrypt_init(pkey_ctx);
++ /* If client certificate is present and is of the same type, maybe
++ * use it for key exchange. Don't mind errors from
++ * EVP_PKEY_derive_set_peer, because it is completely valid to use
++ * a client certificate for authorization only. */
++ client_pub_pkey = X509_get_pubkey(s->session->peer);
++ if (client_pub_pkey)
++ {
++ if (EVP_PKEY_derive_set_peer(pkey_ctx, client_pub_pkey) <= 0)
++ ERR_clear_error();
++ }
++ /* Decrypt session key */
++ if (ASN1_get_object((const unsigned char **)&p, &Tlen, &Ttag, &Tclass, n) != V_ASN1_CONSTRUCTED ||
++ Ttag != V_ASN1_SEQUENCE ||
++ Tclass != V_ASN1_UNIVERSAL)
++ {
++ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED);
++ goto gerr;
++ }
++ start = p;
++ inlen = Tlen;
++ if (EVP_PKEY_decrypt(pkey_ctx,premaster_secret,&outlen,start,inlen) <=0)
++ {
++ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED);
++ goto gerr;
+ }
++ /* Generate master secret */
++ s->session->master_key_length=
++ s->method->ssl3_enc->generate_master_secret(s,
++ s->session->master_key,premaster_secret,32);
++ /* Check if pubkey from client certificate was used */
++ if (EVP_PKEY_CTX_ctrl(pkey_ctx, -1, -1, EVP_PKEY_CTRL_PEER_KEY, 2, NULL) > 0)
++ ret = 2;
++ else
++ ret = 1;
++ gerr:
++ EVP_PKEY_free(client_pub_pkey);
++ EVP_PKEY_CTX_free(pkey_ctx);
++ if (ret)
++ return ret;
+ else
++ goto err;
++ }
++ else if (!(alg_k & SSL_kPSK))
+ {
+ al=SSL_AD_HANDSHAKE_FAILURE;
+ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+diff --git a/ssl/ssl_lib.c b/ssl/ssl_lib.c
+index 0fda4ca..6c57d2a 100644
+--- a/ssl/ssl_lib.c
++++ b/ssl/ssl_lib.c
+@@ -1424,7 +1424,7 @@ int ssl_cipher_list_to_bytes(SSL *s,STACK_OF(SSL_CIPHER) *sk,unsigned char *p,
+ #endif /* OPENSSL_NO_KRB5 */
+ #ifndef OPENSSL_NO_PSK
+ /* with PSK there must be client callback set */
+- if (((c->algorithm_mkey & SSL_kPSK) || (c->algorithm_auth & SSL_aPSK)) &&
++ if ((c->algorithm_auth & SSL_aPSK) &&
+ s->psk_client_callback == NULL)
+ continue;
+ #endif /* OPENSSL_NO_PSK */
+diff --git a/ssl/tls1.h b/ssl/tls1.h
+index 9e035fb..3e6b7c7 100644
+--- a/ssl/tls1.h
++++ b/ssl/tls1.h
+@@ -536,6 +536,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb)
+ #define TLS1_CK_ECDHE_ECDSA_CHACHA20_POLY1305 0x0300CC14
+ #define TLS1_CK_DHE_RSA_CHACHA20_POLY1305 0x0300CC15
+
++/* ECDHE PSK ciphersuites from RFC 5489 */
++#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA256 0x0300C037
++#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA384 0x0300C038
++
+ /* XXX
+ * Inconsistency alert:
+ * The OpenSSL names of ciphers with ephemeral DH here include the string
+@@ -691,6 +698,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb)
+ #define TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 "ECDHE-ECDSA-CHACHA20-POLY1305"
+ #define TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305 "DHE-RSA-CHACHA20-POLY1305"
+
++/* ECDHE PSK ciphersuites from RFC 5489 */
++#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA256 "ECDHE-PSK-WITH-AES-128-CBC-SHA256"
++#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA384 "ECDHE-PSK-WITH-AES-256-CBC-SHA384"
++
+ #define TLS_CT_RSA_SIGN 1
+ #define TLS_CT_DSS_SIGN 2
+ #define TLS_CT_RSA_FIXED_DH 3
+2.0.0.526.g5318336
+
diff --git a/patches/0012-wincrypt.patch b/patches/0012-wincrypt.patch
new file mode 100644
index 0000000..fe28901
--- /dev/null
+++ b/patches/0012-wincrypt.patch
@@ -0,0 +1,34 @@
+From e7d931119b81d514d8f0645b5453ec16c441b3db Mon Sep 17 00:00:00 2001
+From: Andrew Hsieh <[email protected]>
+Date: Wed, 7 May 2014 20:01:28 +0800
+Subject: [PATCH] Undef OCSP_REQUEST, X509_NAME and OCSP_RESPONSE
+
+prebuilts/gcc/linux-x86/host/x86_64-w64-mingw32-4.8/mingw/include/wincrypt.h
+define all as constants, but ocsp.h use them as type/function names
+
+Change-Id: I580b55a36575c1b19df6e7f3adaf90e7c345e46f
+---
+ crypto/ocsp/ocsp.h | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/crypto/ocsp/ocsp.h b/crypto/ocsp/ocsp.h
+index 31e4574..f14e9f7 100644
+--- a/crypto/ocsp/ocsp.h
++++ b/crypto/ocsp/ocsp.h
+@@ -90,6 +90,13 @@ extern "C" {
+ #define OCSP_RESPID_KEY 0x400
+ #define OCSP_NOTIME 0x800
+
++#ifdef OPENSSL_SYS_WIN32
++ /* Under Win32 these are defined in wincrypt.h */
++#undef OCSP_REQUEST
++#undef X509_NAME
++#undef OCSP_RESPONSE
++#endif
++
+ /* CertID ::= SEQUENCE {
+ * hashAlgorithm AlgorithmIdentifier,
+ * issuerNameHash OCTET STRING, -- Hash of Issuer's DN
+--
+1.9.1.423.g4596e3a
+
diff --git a/patches/0013-tls_psk_hint.patch b/patches/0013-tls_psk_hint.patch
new file mode 100644
index 0000000..4746b78
--- /dev/null
+++ b/patches/0013-tls_psk_hint.patch
@@ -0,0 +1,417 @@
+From 9c14752f8872401de413fb46a96146b0d6bf926e Mon Sep 17 00:00:00 2001
+From: Alex Klyubin <[email protected]>
+Date: Tue, 8 Apr 2014 16:02:24 -0700
+Subject: tls_psk_hint
+
+Fix TLS-PSK identity hint implementation issues.
+
+PSK identity hint can be stored in SSL_CTX and in SSL/SSL_SESSION,
+similar to other TLS parameters, with the value in SSL/SSL_SESSION
+taking precedence over the one in SSL_CTX. The value in SSL_CTX is
+shared (used as the default) between all SSL instances associated
+with that SSL_CTX, whereas the value in SSL/SSL_SESSION is confined
+to that particular TLS/SSL connection/session.
+
+The existing implementation of TLS-PSK does not correctly distinguish
+between PSK identity hint in SSL_CTX and in SSL/SSL_SESSION. This
+change fixes these issues:
+1. SSL_use_psk_identity_hint does nothing and returns "success" when
+ the SSL object does not have an associated SSL_SESSION.
+2. On the client, the hint in SSL_CTX (which is shared between
+ multiple SSL instances) is overwritten with the hint received from
+ server or reset to NULL if no hint was received.
+3. On the client, psk_client_callback is invoked with the hint from
+ SSL_CTX rather than from current SSL/SSL_SESSION (i.e., the one
+ received from the server). Issue #2 above masks this issue.
+4. On the server, the hint in SSL/SSL_SESSION is ignored and the hint
+ from SSL_CTX is sent to the client.
+5. On the server, the hint in SSL/SSL_SESSION is reset to the one in
+ SSL_CTX after the ClientKeyExchange message step.
+
+This change fixes the issues by:
+* Adding storage for the hint in the SSL object. The idea being that
+ the hint in the associated SSL_SESSION takes precedence.
+* Reading the hint during the handshake only from the associated
+ SSL_SESSION object.
+* Initializing the hint in SSL object with the one from the SSL_CTX
+ object.
+* Initializing the hint in SSL_SESSION object with the one from the
+ SSL object.
+* Making SSL_use_psk_identity_hint and SSL_get_psk_identity_hint
+ set/get the hint to/from SSL_SESSION associated with the provided
+ SSL object, or, if no SSL_SESSION is available, set/get the hint
+ to/from the provided SSL object.
+* Removing code which resets the hint during handshake.
+---
+ ssl/d1_clnt.c | 13 +------------
+ ssl/d1_srvr.c | 10 +++++-----
+ ssl/s3_clnt.c | 37 +++++++++++++------------------------
+ ssl/s3_srvr.c | 44 ++++++++++++++++----------------------------
+ ssl/ssl.h | 4 ++++
+ ssl/ssl_lib.c | 56 +++++++++++++++++++++++++++++++++++++++++++++-----------
+ ssl/ssl_sess.c | 12 ++++++++++++
+ 7 files changed, 96 insertions(+), 80 deletions(-)
+
+diff --git a/ssl/d1_clnt.c b/ssl/d1_clnt.c
+index f857946..b017139 100644
+--- a/ssl/d1_clnt.c
++++ b/ssl/d1_clnt.c
+@@ -1434,7 +1434,7 @@ int dtls1_send_client_key_exchange(SSL *s)
+ goto err;
+ }
+
+- psk_len = s->psk_client_callback(s, s->ctx->psk_identity_hint,
++ psk_len = s->psk_client_callback(s, s->session->psk_identity_hint,
+ identity, PSK_MAX_IDENTITY_LEN,
+ psk_or_pre_ms, sizeof(psk_or_pre_ms));
+ if (psk_len > PSK_MAX_PSK_LEN)
+@@ -1459,17 +1459,6 @@ int dtls1_send_client_key_exchange(SSL *s)
+ t+=psk_len;
+ s2n(psk_len, t);
+
+- if (s->session->psk_identity_hint != NULL)
+- OPENSSL_free(s->session->psk_identity_hint);
+- s->session->psk_identity_hint = BUF_strdup(s->ctx->psk_identity_hint);
+- if (s->ctx->psk_identity_hint != NULL &&
+- s->session->psk_identity_hint == NULL)
+- {
+- SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
+- ERR_R_MALLOC_FAILURE);
+- goto psk_err;
+- }
+-
+ if (s->session->psk_identity != NULL)
+ OPENSSL_free(s->session->psk_identity);
+ s->session->psk_identity = BUF_strdup(identity);
+diff --git a/ssl/d1_srvr.c b/ssl/d1_srvr.c
+index 1384ab0..c181db6 100644
+--- a/ssl/d1_srvr.c
++++ b/ssl/d1_srvr.c
+@@ -471,7 +471,7 @@ int dtls1_accept(SSL *s)
+ /* PSK: send ServerKeyExchange if PSK identity
+ * hint if provided */
+ #ifndef OPENSSL_NO_PSK
+- || ((alg_k & SSL_kPSK) && s->ctx->psk_identity_hint)
++ || ((alg_k & SSL_kPSK) && s->session->psk_identity_hint)
+ #endif
+ || (alg_k & (SSL_kEDH|SSL_kDHr|SSL_kDHd))
+ || (alg_k & SSL_kEECDH)
+@@ -1288,7 +1288,7 @@ int dtls1_send_server_key_exchange(SSL *s)
+ if (type & SSL_kPSK)
+ {
+ /* reserve size for record length and PSK identity hint*/
+- n+=2+strlen(s->ctx->psk_identity_hint);
++ n+=2+strlen(s->session->psk_identity_hint);
+ }
+ else
+ #endif /* !OPENSSL_NO_PSK */
+@@ -1365,9 +1365,9 @@ int dtls1_send_server_key_exchange(SSL *s)
+ if (type & SSL_kPSK)
+ {
+ /* copy PSK identity hint */
+- s2n(strlen(s->ctx->psk_identity_hint), p);
+- strncpy((char *)p, s->ctx->psk_identity_hint, strlen(s->ctx->psk_identity_hint));
+- p+=strlen(s->ctx->psk_identity_hint);
++ s2n(strlen(s->session->psk_identity_hint), p);
++ strncpy((char *)p, s->session->psk_identity_hint, strlen(s->session->psk_identity_hint));
++ p+=strlen(s->session->psk_identity_hint);
+ }
+ #endif
+
+diff --git a/ssl/s3_clnt.c b/ssl/s3_clnt.c
+index 12c3fe8..17367a2 100644
+--- a/ssl/s3_clnt.c
++++ b/ssl/s3_clnt.c
+@@ -1374,9 +1374,11 @@ int ssl3_get_key_exchange(SSL *s)
+ if (s->s3->tmp.new_cipher->algorithm_auth & SSL_aPSK)
+ {
+ s->session->sess_cert=ssl_sess_cert_new();
+- if (s->ctx->psk_identity_hint)
+- OPENSSL_free(s->ctx->psk_identity_hint);
+- s->ctx->psk_identity_hint = NULL;
++ if (s->session->psk_identity_hint)
++ {
++ OPENSSL_free(s->session->psk_identity_hint);
++ s->session->psk_identity_hint = NULL;
++ }
+ }
+ #endif
+ s->s3->tmp.reuse_message=1;
+@@ -1426,7 +1428,11 @@ int ssl3_get_key_exchange(SSL *s)
+ al=SSL_AD_HANDSHAKE_FAILURE;
+ n2s(p,i);
+ param_len=i+2;
+- s->ctx->psk_identity_hint = NULL;
++ if (s->session->psk_identity_hint)
++ {
++ OPENSSL_free(s->session->psk_identity_hint);
++ s->session->psk_identity_hint = NULL;
++ }
+ if (i != 0)
+ {
+ /* Store PSK identity hint for later use, hint is used
+@@ -1452,10 +1458,8 @@ int ssl3_get_key_exchange(SSL *s)
+ * NULL-terminated string. */
+ memcpy(tmp_id_hint, p, i);
+ memset(tmp_id_hint+i, 0, PSK_MAX_IDENTITY_LEN+1-i);
+- if (s->ctx->psk_identity_hint != NULL)
+- OPENSSL_free(s->ctx->psk_identity_hint);
+- s->ctx->psk_identity_hint = BUF_strdup(tmp_id_hint);
+- if (s->ctx->psk_identity_hint == NULL)
++ s->session->psk_identity_hint = BUF_strdup(tmp_id_hint);
++ if (s->session->psk_identity_hint == NULL)
+ {
+ SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, ERR_R_MALLOC_FAILURE);
+ goto f_err;
+@@ -2338,7 +2342,7 @@ int ssl3_send_client_key_exchange(SSL *s)
+ goto err;
+ }
+
+- psk_len = s->psk_client_callback(s, s->ctx->psk_identity_hint,
++ psk_len = s->psk_client_callback(s, s->session->psk_identity_hint,
+ identity, PSK_MAX_IDENTITY_LEN, psk, sizeof(psk));
+ if (psk_len > PSK_MAX_PSK_LEN)
+ {
+@@ -2374,21 +2378,6 @@ int ssl3_send_client_key_exchange(SSL *s)
+ n += 2;
+ }
+
+- if (s->session->psk_identity_hint != NULL)
+- OPENSSL_free(s->session->psk_identity_hint);
+- s->session->psk_identity_hint = NULL;
+- if (s->ctx->psk_identity_hint)
+- {
+- s->session->psk_identity_hint = BUF_strdup(s->ctx->psk_identity_hint);
+- if (s->ctx->psk_identity_hint != NULL &&
+- s->session->psk_identity_hint == NULL)
+- {
+- SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
+- ERR_R_MALLOC_FAILURE);
+- goto psk_err;
+- }
+- }
+-
+ if (s->session->psk_identity != NULL)
+ OPENSSL_free(s->session->psk_identity);
+ s->session->psk_identity = BUF_strdup(identity);
+diff --git a/ssl/s3_srvr.c b/ssl/s3_srvr.c
+index d6f1a35..c360337 100644
+--- a/ssl/s3_srvr.c
++++ b/ssl/s3_srvr.c
+@@ -492,7 +492,7 @@ int ssl3_accept(SSL *s)
+ * - the key exchange is kEECDH.
+ */
+ #ifndef OPENSSL_NO_PSK
+- || ((alg_a & SSL_aPSK) && ((alg_k & SSL_kEECDH) || s->ctx->psk_identity_hint))
++ || ((alg_a & SSL_aPSK) && ((alg_k & SSL_kEECDH) || s->session->psk_identity_hint))
+ #endif
+ #ifndef OPENSSL_NO_SRP
+ /* SRP: send ServerKeyExchange */
+@@ -1702,6 +1702,10 @@ int ssl3_send_server_key_exchange(SSL *s)
+ int curve_id = 0;
+ BN_CTX *bn_ctx = NULL;
+ #endif
++#ifndef OPENSSL_NO_PSK
++ const char* psk_identity_hint;
++ size_t psk_identity_hint_len;
++#endif
+ EVP_PKEY *pkey;
+ const EVP_MD *md = NULL;
+ unsigned char *p,*d;
+@@ -1730,9 +1734,12 @@ int ssl3_send_server_key_exchange(SSL *s)
+ if (alg_a & SSL_aPSK)
+ {
+ /* size for PSK identity hint */
+- n+=2;
+- if (s->ctx->psk_identity_hint)
+- n+=strlen(s->ctx->psk_identity_hint);
++ psk_identity_hint = s->session->psk_identity_hint;
++ if (psk_identity_hint)
++ psk_identity_hint_len = strlen(psk_identity_hint);
++ else
++ psk_identity_hint_len = 0;
++ n+=2+psk_identity_hint_len;
+ }
+ #endif /* !OPENSSL_NO_PSK */
+ #ifndef OPENSSL_NO_RSA
+@@ -2025,20 +2032,12 @@ int ssl3_send_server_key_exchange(SSL *s)
+ #ifndef OPENSSL_NO_PSK
+ if (alg_a & SSL_aPSK)
+ {
+- if (s->ctx->psk_identity_hint)
+- {
+- /* copy PSK identity hint */
+- s2n(strlen(s->ctx->psk_identity_hint), p);
+- strncpy((char *)p, s->ctx->psk_identity_hint, strlen(s->ctx->psk_identity_hint));
+- p+=strlen(s->ctx->psk_identity_hint);
+- }
+- else
++ /* copy PSK identity hint (if provided) */
++ s2n(psk_identity_hint_len, p);
++ if (psk_identity_hint_len > 0)
+ {
+- /* No identity hint is provided. */
+- *p = 0;
+- p += 1;
+- *p = 0;
+- p += 1;
++ memcpy(p, psk_identity_hint, psk_identity_hint_len);
++ p+=psk_identity_hint_len;
+ }
+ }
+ #endif /* OPENSSL_NO_PSK */
+@@ -2393,17 +2392,6 @@ int ssl3_get_client_key_exchange(SSL *s)
+ goto psk_err;
+ }
+
+- if (s->session->psk_identity_hint != NULL)
+- OPENSSL_free(s->session->psk_identity_hint);
+- s->session->psk_identity_hint = BUF_strdup(s->ctx->psk_identity_hint);
+- if (s->ctx->psk_identity_hint != NULL &&
+- s->session->psk_identity_hint == NULL)
+- {
+- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+- ERR_R_MALLOC_FAILURE);
+- goto psk_err;
+- }
+-
+ p += i;
+ n -= (i + 2);
+ psk_err = 0;
+diff --git a/ssl/ssl.h b/ssl/ssl.h
+index a7e1455..f044cd1 100644
+--- a/ssl/ssl.h
++++ b/ssl/ssl.h
+@@ -1441,6 +1441,10 @@ struct ssl_st
+ #endif /* OPENSSL_NO_KRB5 */
+
+ #ifndef OPENSSL_NO_PSK
++ /* PSK identity hint is stored here only to enable setting a hint on an SSL object before an
++ * SSL_SESSION is associated with it. Once an SSL_SESSION is associated with this SSL object,
++ * the psk_identity_hint from the session takes precedence over this one. */
++ char *psk_identity_hint;
+ unsigned int (*psk_client_callback)(SSL *ssl, const char *hint, char *identity,
+ unsigned int max_identity_len, unsigned char *psk,
+ unsigned int max_psk_len);
+diff --git a/ssl/ssl_lib.c b/ssl/ssl_lib.c
+index 3e49cab..cf24292 100644
+--- a/ssl/ssl_lib.c
++++ b/ssl/ssl_lib.c
+@@ -388,6 +388,13 @@ SSL *SSL_new(SSL_CTX *ctx)
+ CRYPTO_new_ex_data(CRYPTO_EX_INDEX_SSL, s, &s->ex_data);
+
+ #ifndef OPENSSL_NO_PSK
++ s->psk_identity_hint = NULL;
++ if (ctx->psk_identity_hint)
++ {
++ s->psk_identity_hint = BUF_strdup(ctx->psk_identity_hint);
++ if (s->psk_identity_hint == NULL)
++ goto err;
++ }
+ s->psk_client_callback=ctx->psk_client_callback;
+ s->psk_server_callback=ctx->psk_server_callback;
+ #endif
+@@ -648,6 +655,11 @@ void SSL_free(SSL *s)
+ OPENSSL_free(s->alpn_client_proto_list);
+ #endif
+
++#ifndef OPENSSL_NO_PSK
++ if (s->psk_identity_hint)
++ OPENSSL_free(s->psk_identity_hint);
++#endif
++
+ if (s->client_CA != NULL)
+ sk_X509_NAME_pop_free(s->client_CA,X509_NAME_free);
+
+@@ -3361,32 +3373,54 @@ int SSL_use_psk_identity_hint(SSL *s, const char *identity_hint)
+ if (s == NULL)
+ return 0;
+
+- if (s->session == NULL)
+- return 1; /* session not created yet, ignored */
+-
+ if (identity_hint != NULL && strlen(identity_hint) > PSK_MAX_IDENTITY_LEN)
+ {
+ SSLerr(SSL_F_SSL_USE_PSK_IDENTITY_HINT, SSL_R_DATA_LENGTH_TOO_LONG);
+ return 0;
+ }
+- if (s->session->psk_identity_hint != NULL)
++
++ /* Clear hint in SSL and associated SSL_SESSION (if any). */
++ if (s->psk_identity_hint != NULL)
++ {
++ OPENSSL_free(s->psk_identity_hint);
++ s->psk_identity_hint = NULL;
++ }
++ if (s->session != NULL && s->session->psk_identity_hint != NULL)
++ {
+ OPENSSL_free(s->session->psk_identity_hint);
++ s->session->psk_identity_hint = NULL;
++ }
++
+ if (identity_hint != NULL)
+ {
+- s->session->psk_identity_hint = BUF_strdup(identity_hint);
+- if (s->session->psk_identity_hint == NULL)
+- return 0;
++ /* The hint is stored in SSL and SSL_SESSION with the one in
++ * SSL_SESSION taking precedence. Thus, if SSL_SESSION is avaiable,
++ * we store the hint there, otherwise we store it in SSL. */
++ if (s->session != NULL)
++ {
++ s->session->psk_identity_hint = BUF_strdup(identity_hint);
++ if (s->session->psk_identity_hint == NULL)
++ return 0;
++ }
++ else
++ {
++ s->psk_identity_hint = BUF_strdup(identity_hint);
++ if (s->psk_identity_hint == NULL)
++ return 0;
++ }
+ }
+- else
+- s->session->psk_identity_hint = NULL;
+ return 1;
+ }
+
+ const char *SSL_get_psk_identity_hint(const SSL *s)
+ {
+- if (s == NULL || s->session == NULL)
++ if (s == NULL)
+ return NULL;
+- return(s->session->psk_identity_hint);
++ /* The hint is stored in SSL and SSL_SESSION with the one in SSL_SESSION
++ * taking precedence. */
++ if (s->session != NULL)
++ return(s->session->psk_identity_hint);
++ return(s->psk_identity_hint);
+ }
+
+ const char *SSL_get_psk_identity(const SSL *s)
+diff --git a/ssl/ssl_sess.c b/ssl/ssl_sess.c
+index 44268e7..cdd198c 100644
+--- a/ssl/ssl_sess.c
++++ b/ssl/ssl_sess.c
+@@ -437,6 +437,18 @@ int ssl_get_new_session(SSL *s, int session)
+ }
+ #endif
+ #endif
++#ifndef OPENSSL_NO_PSK
++ if (s->psk_identity_hint)
++ {
++ ss->psk_identity_hint = BUF_strdup(s->psk_identity_hint);
++ if (ss->psk_identity_hint == NULL)
++ {
++ SSLerr(SSL_F_SSL_GET_NEW_SESSION, ERR_R_MALLOC_FAILURE);
++ SSL_SESSION_free(ss);
++ return 0;
++ }
++ }
++#endif
+ }
+ else
+ {
+--
+2.0.0.526.g5318336
+
diff --git a/patches/0014-arm_asm.patch b/patches/0014-arm_asm.patch
new file mode 100644
index 0000000..d97df62
--- /dev/null
+++ b/patches/0014-arm_asm.patch
@@ -0,0 +1,7834 @@
+diff --git a/Configure b/Configure
+index de78469..26743bb 100755
+--- a/Configure
++++ b/Configure
+@@ -136,7 +136,8 @@ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-a
+ my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::";
+ my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::";
+ my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o:";
+-my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void";
++my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o bsaes-armv7.o aesv8-armx.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o ghashv8-armx.o::void";
++my $aarch64_asm="armcap.o arm64cpuid.o mem_clr.o:::aes_core.o aes_cbc.o aesv8-armx.o:::sha1-armv8.o sha256-armv8.o sha512-armv8.o:::::::ghashv8-armx.o:";
+ my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32";
+ my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64";
+ my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::";
+@@ -350,6 +351,7 @@ my %table=(
+ # It's believed that majority of ARM toolchains predefine appropriate -march.
+ # If you compiler does not, do complement config command line with one!
+ "linux-armv4", "gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
++"linux-aarch64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${aarch64_asm}:linux64:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+ #### IA-32 targets...
+ "linux-ia32-icc", "icc:-DL_ENDIAN -DTERMIO -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+ "linux-elf", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+@@ -1503,7 +1505,7 @@ if ($rmd160_obj =~ /\.o$/)
+ }
+ if ($aes_obj =~ /\.o$/)
+ {
+- $cflags.=" -DAES_ASM";
++ $cflags.=" -DAES_ASM" if ($aes_obj =~ m/\baes\-/);;
+ # aes-ctr.o is not a real file, only indication that assembler
+ # module implements AES_ctr32_encrypt...
+ $cflags.=" -DAES_CTR_ASM" if ($aes_obj =~ s/\s*aes\-ctr\.o//);
+@@ -1525,7 +1527,7 @@ else {
+ $wp_obj="wp_block.o";
+ }
+ $cmll_obj=$cmll_enc unless ($cmll_obj =~ /.o$/);
+-if ($modes_obj =~ /ghash/)
++if ($modes_obj =~ /ghash\-/)
+ {
+ $cflags.=" -DGHASH_ASM";
+ }
+diff --git a/config b/config
+index 41fa2a6..dff7df7 100755
+--- a/config
++++ b/config
+@@ -644,6 +644,7 @@ case "$GUESSOS" in
+ armv[1-3]*-*-linux2) OUT="linux-generic32" ;;
+ armv[7-9]*-*-linux2) OUT="linux-armv4"; options="$options -march=armv7-a" ;;
+ arm*-*-linux2) OUT="linux-armv4" ;;
++ aarch64-*-linux2) OUT="linux-aarch64" ;;
+ sh*b-*-linux2) OUT="linux-generic32"; options="$options -DB_ENDIAN" ;;
+ sh*-*-linux2) OUT="linux-generic32"; options="$options -DL_ENDIAN" ;;
+ m68k*-*-linux2) OUT="linux-generic32"; options="$options -DB_ENDIAN" ;;
+diff --git a/crypto/aes/Makefile b/crypto/aes/Makefile
+index 45ede0a..9181a1a 100644
+--- a/crypto/aes/Makefile
++++ b/crypto/aes/Makefile
+@@ -78,9 +78,15 @@ aes-parisc.s: asm/aes-parisc.pl
+ aes-mips.S: asm/aes-mips.pl
+ $(PERL) asm/aes-mips.pl $(PERLASM_SCHEME) $@
+
++aesv8-armx.S: asm/aesv8-armx.pl
++ $(PERL) asm/aesv8-armx.pl $(PERLASM_SCHEME) $@
++aesv8-armx.o: aesv8-armx.S
++
+ # GNU make "catch all"
+ aes-%.S: asm/aes-%.pl; $(PERL) $< $(PERLASM_SCHEME) > $@
+ aes-armv4.o: aes-armv4.S
++bsaes-%.S: asm/bsaes-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@
++bsaes-armv7.o: bsaes-armv7.S
+
+ files:
+ $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
+diff --git a/crypto/aes/asm/aes-armv4.pl b/crypto/aes/asm/aes-armv4.pl
+index 86b86c4..4f89170 100644
+--- a/crypto/aes/asm/aes-armv4.pl
++++ b/crypto/aes/asm/aes-armv4.pl
+@@ -1,7 +1,7 @@
+ #!/usr/bin/env perl
+
+ # ====================================================================
+-# Written by Andy Polyakov <[email protected]> for the OpenSSL
++# Written by Andy Polyakov <[email protected]> for the OpenSSL
+ # project. The module is, however, dual licensed under OpenSSL and
+ # CRYPTOGAMS licenses depending on where you obtain it. For further
+ # details see http://www.openssl.org/~appro/cryptogams/.
+@@ -51,9 +51,23 @@ $key="r11";
+ $rounds="r12";
+
+ $code=<<___;
+-#include "arm_arch.h"
++#ifndef __KERNEL__
++# include "arm_arch.h"
++#else
++# define __ARM_ARCH__ __LINUX_ARM_ARCH__
++#endif
++
+ .text
++#if __ARM_ARCH__<7
++.code 32
++#else
++.syntax unified
++# ifdef __thumb2__
++.thumb
++# else
+ .code 32
++# endif
++#endif
+
+ .type AES_Te,%object
+ .align 5
+@@ -167,7 +181,11 @@ AES_Te:
+ .type AES_encrypt,%function
+ .align 5
+ AES_encrypt:
++#if __ARM_ARCH__<7
+ sub r3,pc,#8 @ AES_encrypt
++#else
++ adr r3,AES_encrypt
++#endif
+ stmdb sp!,{r1,r4-r12,lr}
+ mov $rounds,r0 @ inp
+ mov $key,r2
+@@ -409,11 +427,21 @@ _armv4_AES_encrypt:
+ .align 5
+ private_AES_set_encrypt_key:
+ _armv4_AES_set_encrypt_key:
++#if __ARM_ARCH__<7
+ sub r3,pc,#8 @ AES_set_encrypt_key
++#else
++ adr r3,private_AES_set_encrypt_key
++#endif
+ teq r0,#0
++#if __ARM_ARCH__>=7
++ itt eq @ Thumb2 thing, sanity check in ARM
++#endif
+ moveq r0,#-1
+ beq .Labrt
+ teq r2,#0
++#if __ARM_ARCH__>=7
++ itt eq @ Thumb2 thing, sanity check in ARM
++#endif
+ moveq r0,#-1
+ beq .Labrt
+
+@@ -422,6 +450,9 @@ _armv4_AES_set_encrypt_key:
+ teq r1,#192
+ beq .Lok
+ teq r1,#256
++#if __ARM_ARCH__>=7
++ itt ne @ Thumb2 thing, sanity check in ARM
++#endif
+ movne r0,#-1
+ bne .Labrt
+
+@@ -576,6 +607,9 @@ _armv4_AES_set_encrypt_key:
+ str $s2,[$key,#-16]
+ subs $rounds,$rounds,#1
+ str $s3,[$key,#-12]
++#if __ARM_ARCH__>=7
++ itt eq @ Thumb2 thing, sanity check in ARM
++#endif
+ subeq r2,$key,#216
+ beq .Ldone
+
+@@ -645,6 +679,9 @@ _armv4_AES_set_encrypt_key:
+ str $s2,[$key,#-24]
+ subs $rounds,$rounds,#1
+ str $s3,[$key,#-20]
++#if __ARM_ARCH__>=7
++ itt eq @ Thumb2 thing, sanity check in ARM
++#endif
+ subeq r2,$key,#256
+ beq .Ldone
+
+@@ -674,11 +711,17 @@ _armv4_AES_set_encrypt_key:
+ str $i3,[$key,#-4]
+ b .L256_loop
+
++.align 2
+ .Ldone: mov r0,#0
+ ldmia sp!,{r4-r12,lr}
+-.Labrt: tst lr,#1
++.Labrt:
++#if __ARM_ARCH__>=5
++ ret @ bx lr
++#else
++ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
++#endif
+ .size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
+
+ .global private_AES_set_decrypt_key
+@@ -688,34 +731,57 @@ private_AES_set_decrypt_key:
+ str lr,[sp,#-4]! @ push lr
+ bl _armv4_AES_set_encrypt_key
+ teq r0,#0
+- ldrne lr,[sp],#4 @ pop lr
++ ldr lr,[sp],#4 @ pop lr
+ bne .Labrt
+
+- stmdb sp!,{r4-r12}
++ mov r0,r2 @ AES_set_encrypt_key preserves r2,
++ mov r1,r2 @ which is AES_KEY *key
++ b _armv4_AES_set_enc2dec_key
++.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
+
+- ldr $rounds,[r2,#240] @ AES_set_encrypt_key preserves r2,
+- mov $key,r2 @ which is AES_KEY *key
+- mov $i1,r2
+- add $i2,r2,$rounds,lsl#4
++@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
++.global AES_set_enc2dec_key
++.type AES_set_enc2dec_key,%function
++.align 5
++AES_set_enc2dec_key:
++_armv4_AES_set_enc2dec_key:
++ stmdb sp!,{r4-r12,lr}
++
++ ldr $rounds,[r0,#240]
++ mov $i1,r0 @ input
++ add $i2,r0,$rounds,lsl#4
++ mov $key,r1 @ ouput
++ add $tbl,r1,$rounds,lsl#4
++ str $rounds,[r1,#240]
++
++.Linv: ldr $s0,[$i1],#16
++ ldr $s1,[$i1,#-12]
++ ldr $s2,[$i1,#-8]
++ ldr $s3,[$i1,#-4]
++ ldr $t1,[$i2],#-16
++ ldr $t2,[$i2,#16+4]
++ ldr $t3,[$i2,#16+8]
++ ldr $i3,[$i2,#16+12]
++ str $s0,[$tbl],#-16
++ str $s1,[$tbl,#16+4]
++ str $s2,[$tbl,#16+8]
++ str $s3,[$tbl,#16+12]
++ str $t1,[$key],#16
++ str $t2,[$key,#-12]
++ str $t3,[$key,#-8]
++ str $i3,[$key,#-4]
++ teq $i1,$i2
++ bne .Linv
+
+-.Linv: ldr $s0,[$i1]
++ ldr $s0,[$i1]
+ ldr $s1,[$i1,#4]
+ ldr $s2,[$i1,#8]
+ ldr $s3,[$i1,#12]
+- ldr $t1,[$i2]
+- ldr $t2,[$i2,#4]
+- ldr $t3,[$i2,#8]
+- ldr $i3,[$i2,#12]
+- str $s0,[$i2],#-16
+- str $s1,[$i2,#16+4]
+- str $s2,[$i2,#16+8]
+- str $s3,[$i2,#16+12]
+- str $t1,[$i1],#16
+- str $t2,[$i1,#-12]
+- str $t3,[$i1,#-8]
+- str $i3,[$i1,#-4]
+- teq $i1,$i2
+- bne .Linv
++ str $s0,[$key]
++ str $s1,[$key,#4]
++ str $s2,[$key,#8]
++ str $s3,[$key,#12]
++ sub $key,$key,$rounds,lsl#3
+ ___
+ $mask80=$i1;
+ $mask1b=$i2;
+@@ -773,7 +839,7 @@ $code.=<<___;
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+ #endif
+-.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
++.size AES_set_enc2dec_key,.-AES_set_enc2dec_key
+
+ .type AES_Td,%object
+ .align 5
+@@ -883,7 +949,11 @@ AES_Td:
+ .type AES_decrypt,%function
+ .align 5
+ AES_decrypt:
++#if __ARM_ARCH__<7
+ sub r3,pc,#8 @ AES_decrypt
++#else
++ adr r3,AES_decrypt
++#endif
+ stmdb sp!,{r1,r4-r12,lr}
+ mov $rounds,r0 @ inp
+ mov $key,r2
+@@ -1080,8 +1150,9 @@ _armv4_AES_decrypt:
+ ldrb $t3,[$tbl,$i3] @ Td4[s0>>0]
+ and $i3,lr,$s1,lsr#8
+
++ add $s1,$tbl,$s1,lsr#24
+ ldrb $i1,[$tbl,$i1] @ Td4[s1>>0]
+- ldrb $s1,[$tbl,$s1,lsr#24] @ Td4[s1>>24]
++ ldrb $s1,[$s1] @ Td4[s1>>24]
+ ldrb $i2,[$tbl,$i2] @ Td4[s1>>16]
+ eor $s0,$i1,$s0,lsl#24
+ ldrb $i3,[$tbl,$i3] @ Td4[s1>>8]
+@@ -1094,7 +1165,8 @@ _armv4_AES_decrypt:
+ ldrb $i2,[$tbl,$i2] @ Td4[s2>>0]
+ and $i3,lr,$s2,lsr#16
+
+- ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24]
++ add $s2,$tbl,$s2,lsr#24
++ ldrb $s2,[$s2] @ Td4[s2>>24]
+ eor $s0,$s0,$i1,lsl#8
+ ldrb $i3,[$tbl,$i3] @ Td4[s2>>16]
+ eor $s1,$i2,$s1,lsl#16
+@@ -1106,8 +1178,9 @@ _armv4_AES_decrypt:
+ ldrb $i2,[$tbl,$i2] @ Td4[s3>>8]
+ and $i3,lr,$s3 @ i2
+
++ add $s3,$tbl,$s3,lsr#24
+ ldrb $i3,[$tbl,$i3] @ Td4[s3>>0]
+- ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24]
++ ldrb $s3,[$s3] @ Td4[s3>>24]
+ eor $s0,$s0,$i1,lsl#16
+ ldr $i1,[$key,#0]
+ eor $s1,$s1,$i2,lsl#8
+@@ -1130,5 +1203,15 @@ _armv4_AES_decrypt:
+ ___
+
+ $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
++$code =~ s/\bret\b/bx\tlr/gm;
++
++open SELF,$0;
++while(<SELF>) {
++ next if (/^#!/);
++ last if (!s/^#/@/ and !/^$/);
++ print;
++}
++close SELF;
++
+ print $code;
+ close STDOUT; # enforce flush
+diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl
+new file mode 100755
+index 0000000..415dc04
+--- /dev/null
++++ b/crypto/aes/asm/aesv8-armx.pl
+@@ -0,0 +1,980 @@
++#!/usr/bin/env perl
++#
++# ====================================================================
++# Written by Andy Polyakov <[email protected]> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# This module implements support for ARMv8 AES instructions. The
++# module is endian-agnostic in sense that it supports both big- and
++# little-endian cases. As does it support both 32- and 64-bit modes
++# of operation. Latter is achieved by limiting amount of utilized
++# registers to 16, which implies additional instructions. This has
++# no effect on mighty Apple A7, as results are literally equal to
++# the theoretical estimates based on instruction latencies and issue
++# rate. It remains to be seen how does it affect other platforms...
++#
++# Performance in cycles per byte processed with 128-bit key:
++#
++# CBC enc CBC dec CTR
++# Apple A7 2.39 1.20 1.20
++# Cortex-A5x n/a n/a n/a
++
++$flavour = shift;
++open STDOUT,">".shift;
++
++$prefix="aes_v8";
++
++$code=<<___;
++#include "arm_arch.h"
++
++#if __ARM_ARCH__>=7
++.text
++___
++$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/);
++$code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/);
++
++# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax,
++# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to
++# maintain both 32- and 64-bit codes within single module and
++# transliterate common code to either flavour with regex vodoo.
++#
++{{{
++my ($inp,$bits,$out,$ptr,$rounds)=("x0","w1","x2","x3","w12");
++my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)=
++ $flavour=~/64/? map("q$_",(0..6)) : map("q$_",(0..3,8..10));
++
++
++$code.=<<___;
++.align 5
++rcon:
++.long 0x01,0x01,0x01,0x01
++.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
++.long 0x1b,0x1b,0x1b,0x1b
++
++.globl ${prefix}_set_encrypt_key
++.type ${prefix}_set_encrypt_key,%function
++.align 5
++${prefix}_set_encrypt_key:
++.Lenc_key:
++___
++$code.=<<___ if ($flavour =~ /64/);
++ stp x29,x30,[sp,#-16]!
++ add x29,sp,#0
++___
++$code.=<<___;
++ adr $ptr,rcon
++ cmp $bits,#192
++
++ veor $zero,$zero,$zero
++ vld1.8 {$in0},[$inp],#16
++ mov $bits,#8 // reuse $bits
++ vld1.32 {$rcon,$mask},[$ptr],#32
++
++ b.lt .Loop128
++ b.eq .L192
++ b .L256
++
++.align 4
++.Loop128:
++ vtbl.8 $key,{$in0},$mask
++ vext.8 $tmp,$zero,$in0,#12
++ vst1.32 {$in0},[$out],#16
++ aese $key,$zero
++ subs $bits,$bits,#1
++
++ veor $in0,$in0,$tmp
++ vext.8 $tmp,$zero,$tmp,#12
++ veor $in0,$in0,$tmp
++ vext.8 $tmp,$zero,$tmp,#12
++ veor $key,$key,$rcon
++ veor $in0,$in0,$tmp
++ vshl.u8 $rcon,$rcon,#1
++ veor $in0,$in0,$key
++ b.ne .Loop128
++
++ vld1.32 {$rcon},[$ptr]
++
++ vtbl.8 $key,{$in0},$mask
++ vext.8 $tmp,$zero,$in0,#12
++ vst1.32 {$in0},[$out],#16
++ aese $key,$zero
++
++ veor $in0,$in0,$tmp
++ vext.8 $tmp,$zero,$tmp,#12
++ veor $in0,$in0,$tmp
++ vext.8 $tmp,$zero,$tmp,#12
++ veor $key,$key,$rcon
++ veor $in0,$in0,$tmp
++ vshl.u8 $rcon,$rcon,#1
++ veor $in0,$in0,$key
++
++ vtbl.8 $key,{$in0},$mask
++ vext.8 $tmp,$zero,$in0,#12
++ vst1.32 {$in0},[$out],#16
++ aese $key,$zero
++
++ veor $in0,$in0,$tmp
++ vext.8 $tmp,$zero,$tmp,#12
++ veor $in0,$in0,$tmp
++ vext.8 $tmp,$zero,$tmp,#12
++ veor $key,$key,$rcon
++ veor $in0,$in0,$tmp
++ veor $in0,$in0,$key
++ vst1.32 {$in0},[$out]
++ add $out,$out,#0x50
++
++ mov $rounds,#10
++ b .Ldone
++
++.align 4
++.L192:
++ vld1.8 {$in1},[$inp],#8
++ vmov.i8 $key,#8 // borrow $key
++ vst1.32 {$in0},[$out],#16
++ vsub.i8 $mask,$mask,$key // adjust the mask
++
++.Loop192:
++ vtbl.8 $key,{$in1},$mask
++ vext.8 $tmp,$zero,$in0,#12
++ vst1.32 {$in1},[$out],#8
++ aese $key,$zero
++ subs $bits,$bits,#1
++
++ veor $in0,$in0,$tmp
++ vext.8 $tmp,$zero,$tmp,#12
++ veor $in0,$in0,$tmp
++ vext.8 $tmp,$zero,$tmp,#12
++ veor $in0,$in0,$tmp
++
++ vdup.32 $tmp,${in0}[3]
++ veor $tmp,$tmp,$in1
++ veor $key,$key,$rcon
++ vext.8 $in1,$zero,$in1,#12
++ vshl.u8 $rcon,$rcon,#1
++ veor $in1,$in1,$tmp
++ veor $in0,$in0,$key
++ veor $in1,$in1,$key
++ vst1.32 {$in0},[$out],#16
++ b.ne .Loop192
++
++ mov $rounds,#12
++ add $out,$out,#0x20
++ b .Ldone
++
++.align 4
++.L256:
++ vld1.8 {$in1},[$inp]
++ mov $bits,#7
++ mov $rounds,#14
++ vst1.32 {$in0},[$out],#16
++
++.Loop256:
++ vtbl.8 $key,{$in1},$mask
++ vext.8 $tmp,$zero,$in0,#12
++ vst1.32 {$in1},[$out],#16
++ aese $key,$zero
++ subs $bits,$bits,#1
++
++ veor $in0,$in0,$tmp
++ vext.8 $tmp,$zero,$tmp,#12
++ veor $in0,$in0,$tmp
++ vext.8 $tmp,$zero,$tmp,#12
++ veor $key,$key,$rcon
++ veor $in0,$in0,$tmp
++ vshl.u8 $rcon,$rcon,#1
++ veor $in0,$in0,$key
++ vst1.32 {$in0},[$out],#16
++ b.eq .Ldone
++
++ vdup.32 $key,${in0}[3] // just splat
++ vext.8 $tmp,$zero,$in1,#12
++ aese $key,$zero
++
++ veor $in1,$in1,$tmp
++ vext.8 $tmp,$zero,$tmp,#12
++ veor $in1,$in1,$tmp
++ vext.8 $tmp,$zero,$tmp,#12
++ veor $in1,$in1,$tmp
++
++ veor $in1,$in1,$key
++ b .Loop256
++
++.Ldone:
++ str $rounds,[$out]
++
++ eor x0,x0,x0 // return value
++ `"ldr x29,[sp],#16" if ($flavour =~ /64/)`
++ ret
++.size ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key
++
++.globl ${prefix}_set_decrypt_key
++.type ${prefix}_set_decrypt_key,%function
++.align 5
++${prefix}_set_decrypt_key:
++___
++$code.=<<___ if ($flavour =~ /64/);
++ stp x29,x30,[sp,#-16]!
++ add x29,sp,#0
++___
++$code.=<<___ if ($flavour !~ /64/);
++ stmdb sp!,{r4,lr}
++___
++$code.=<<___;
++ bl .Lenc_key
++
++ sub $out,$out,#240 // restore original $out
++ mov x4,#-16
++ add $inp,$out,x12,lsl#4 // end of key schedule
++
++ vld1.32 {v0.16b},[$out]
++ vld1.32 {v1.16b},[$inp]
++ vst1.32 {v0.16b},[$inp],x4
++ vst1.32 {v1.16b},[$out],#16
++
++.Loop_imc:
++ vld1.32 {v0.16b},[$out]
++ vld1.32 {v1.16b},[$inp]
++ aesimc v0.16b,v0.16b
++ aesimc v1.16b,v1.16b
++ vst1.32 {v0.16b},[$inp],x4
++ vst1.32 {v1.16b},[$out],#16
++ cmp $inp,$out
++ b.hi .Loop_imc
++
++ vld1.32 {v0.16b},[$out]
++ aesimc v0.16b,v0.16b
++ vst1.32 {v0.16b},[$inp]
++
++ eor x0,x0,x0 // return value
++___
++$code.=<<___ if ($flavour !~ /64/);
++ ldmia sp!,{r4,pc}
++___
++$code.=<<___ if ($flavour =~ /64/);
++ ldp x29,x30,[sp],#16
++ ret
++___
++$code.=<<___;
++.size ${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key
++___
++}}}
++{{{
++sub gen_block () {
++my $dir = shift;
++my ($e,$mc) = $dir eq "en" ? ("e","mc") : ("d","imc");
++my ($inp,$out,$key)=map("x$_",(0..2));
++my $rounds="w3";
++my ($rndkey0,$rndkey1,$inout)=map("q$_",(0..3));
++
++$code.=<<___;
++.globl ${prefix}_${dir}crypt
++.type ${prefix}_${dir}crypt,%function
++.align 5
++${prefix}_${dir}crypt:
++ ldr $rounds,[$key,#240]
++ vld1.32 {$rndkey0},[$key],#16
++ vld1.8 {$inout},[$inp]
++ sub $rounds,$rounds,#2
++ vld1.32 {$rndkey1},[$key],#16
++
++.Loop_${dir}c:
++ aes$e $inout,$rndkey0
++ vld1.32 {$rndkey0},[$key],#16
++ aes$mc $inout,$inout
++ subs $rounds,$rounds,#2
++ aes$e $inout,$rndkey1
++ vld1.32 {$rndkey1},[$key],#16
++ aes$mc $inout,$inout
++ b.gt .Loop_${dir}c
++
++ aes$e $inout,$rndkey0
++ vld1.32 {$rndkey0},[$key]
++ aes$mc $inout,$inout
++ aes$e $inout,$rndkey1
++ veor $inout,$inout,$rndkey0
++
++ vst1.8 {$inout},[$out]
++ ret
++.size ${prefix}_${dir}crypt,.-${prefix}_${dir}crypt
++___
++}
++&gen_block("en");
++&gen_block("de");
++}}}
++{{{
++my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); my $enc="w5";
++my ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","x7","x8","x12");
++my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7));
++
++my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1);
++
++### q8-q15 preloaded key schedule
++
++$code.=<<___;
++.globl ${prefix}_cbc_encrypt
++.type ${prefix}_cbc_encrypt,%function
++.align 5
++${prefix}_cbc_encrypt:
++___
++$code.=<<___ if ($flavour =~ /64/);
++ stp x29,x30,[sp,#-16]!
++ add x29,sp,#0
++___
++$code.=<<___ if ($flavour !~ /64/);
++ mov ip,sp
++ stmdb sp!,{r4-r8,lr}
++ vstmdb sp!,{d8-d15} @ ABI specification says so
++ ldmia ip,{r4-r5} @ load remaining args
++___
++$code.=<<___;
++ subs $len,$len,#16
++ mov $step,#16
++ b.lo .Lcbc_abort
++ cclr $step,eq
++
++ cmp $enc,#0 // en- or decrypting?
++ ldr $rounds,[$key,#240]
++ and $len,$len,#-16
++ vld1.8 {$ivec},[$ivp]
++ vld1.8 {$dat},[$inp],$step
++
++ vld1.32 {q8-q9},[$key] // load key schedule...
++ sub $rounds,$rounds,#6
++ add $key_,$key,x5,lsl#4 // pointer to last 7 round keys
++ sub $rounds,$rounds,#2
++ vld1.32 {q10-q11},[$key_],#32
++ vld1.32 {q12-q13},[$key_],#32
++ vld1.32 {q14-q15},[$key_],#32
++ vld1.32 {$rndlast},[$key_]
++
++ add $key_,$key,#32
++ mov $cnt,$rounds
++ b.eq .Lcbc_dec
++
++ cmp $rounds,#2
++ veor $dat,$dat,$ivec
++ veor $rndzero_n_last,q8,$rndlast
++ b.eq .Lcbc_enc128
++
++.Loop_cbc_enc:
++ aese $dat,q8
++ vld1.32 {q8},[$key_],#16
++ aesmc $dat,$dat
++ subs $cnt,$cnt,#2
++ aese $dat,q9
++ vld1.32 {q9},[$key_],#16
++ aesmc $dat,$dat
++ b.gt .Loop_cbc_enc
++
++ aese $dat,q8
++ aesmc $dat,$dat
++ subs $len,$len,#16
++ aese $dat,q9
++ aesmc $dat,$dat
++ cclr $step,eq
++ aese $dat,q10
++ aesmc $dat,$dat
++ add $key_,$key,#16
++ aese $dat,q11
++ aesmc $dat,$dat
++ vld1.8 {q8},[$inp],$step
++ aese $dat,q12
++ aesmc $dat,$dat
++ veor q8,q8,$rndzero_n_last
++ aese $dat,q13
++ aesmc $dat,$dat
++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
++ aese $dat,q14
++ aesmc $dat,$dat
++ aese $dat,q15
++
++ mov $cnt,$rounds
++ veor $ivec,$dat,$rndlast
++ vst1.8 {$ivec},[$out],#16
++ b.hs .Loop_cbc_enc
++
++ b .Lcbc_done
++
++.align 5
++.Lcbc_enc128:
++ vld1.32 {$in0-$in1},[$key_]
++ aese $dat,q8
++ aesmc $dat,$dat
++ b .Lenter_cbc_enc128
++.Loop_cbc_enc128:
++ aese $dat,q8
++ aesmc $dat,$dat
++ vst1.8 {$ivec},[$out],#16
++.Lenter_cbc_enc128:
++ aese $dat,q9
++ aesmc $dat,$dat
++ subs $len,$len,#16
++ aese $dat,$in0
++ aesmc $dat,$dat
++ cclr $step,eq
++ aese $dat,$in1
++ aesmc $dat,$dat
++ aese $dat,q10
++ aesmc $dat,$dat
++ aese $dat,q11
++ aesmc $dat,$dat
++ vld1.8 {q8},[$inp],$step
++ aese $dat,q12
++ aesmc $dat,$dat
++ aese $dat,q13
++ aesmc $dat,$dat
++ aese $dat,q14
++ aesmc $dat,$dat
++ veor q8,q8,$rndzero_n_last
++ aese $dat,q15
++ veor $ivec,$dat,$rndlast
++ b.hs .Loop_cbc_enc128
++
++ vst1.8 {$ivec},[$out],#16
++ b .Lcbc_done
++
++.align 5
++.Lcbc_dec128:
++ vld1.32 {$tmp0-$tmp1},[$key_]
++ veor $ivec,$ivec,$rndlast
++ veor $in0,$dat0,$rndlast
++ mov $step1,$step
++
++.Loop2x_cbc_dec128:
++ aesd $dat0,q8
++ aesd $dat1,q8
++ aesimc $dat0,$dat0
++ aesimc $dat1,$dat1
++ subs $len,$len,#32
++ aesd $dat0,q9
++ aesd $dat1,q9
++ aesimc $dat0,$dat0
++ aesimc $dat1,$dat1
++ cclr $step,lo
++ aesd $dat0,$tmp0
++ aesd $dat1,$tmp0
++ aesimc $dat0,$dat0
++ aesimc $dat1,$dat1
++ cclr $step1,ls
++ aesd $dat0,$tmp1
++ aesd $dat1,$tmp1
++ aesimc $dat0,$dat0
++ aesimc $dat1,$dat1
++ aesd $dat0,q10
++ aesd $dat1,q10
++ aesimc $dat0,$dat0
++ aesimc $dat1,$dat1
++ aesd $dat0,q11
++ aesd $dat1,q11
++ aesimc $dat0,$dat0
++ aesimc $dat1,$dat1
++ aesd $dat0,q12
++ aesd $dat1,q12
++ aesimc $dat0,$dat0
++ aesimc $dat1,$dat1
++ aesd $dat0,q13
++ aesd $dat1,q13
++ aesimc $dat0,$dat0
++ aesimc $dat1,$dat1
++ aesd $dat0,q14
++ aesd $dat1,q14
++ aesimc $dat0,$dat0
++ aesimc $dat1,$dat1
++ aesd $dat0,q15
++ aesd $dat1,q15
++
++ veor $ivec,$ivec,$dat0
++ vld1.8 {$dat0},[$inp],$step
++ veor $in0,$in0,$dat1
++ vld1.8 {$dat1},[$inp],$step1
++ vst1.8 {$ivec},[$out],#16
++ veor $ivec,$in1,$rndlast
++ vst1.8 {$in0},[$out],#16
++ veor $in0,$dat0,$rndlast
++ vorr $in1,$dat1,$dat1
++ b.hs .Loop2x_cbc_dec128
++
++ adds $len,$len,#32
++ veor $ivec,$ivec,$rndlast
++ b.eq .Lcbc_done
++ veor $in0,$in0,$rndlast
++ b .Lcbc_dec_tail
++
++.align 5
++.Lcbc_dec:
++ subs $len,$len,#16
++ vorr $in0,$dat,$dat
++ b.lo .Lcbc_dec_tail
++
++ cclr $step,eq
++ cmp $rounds,#2
++ vld1.8 {$dat1},[$inp],$step
++ vorr $in1,$dat1,$dat1
++ b.eq .Lcbc_dec128
++
++.Loop2x_cbc_dec:
++ aesd $dat0,q8
++ aesd $dat1,q8
++ vld1.32 {q8},[$key_],#16
++ aesimc $dat0,$dat0
++ aesimc $dat1,$dat1
++ subs $cnt,$cnt,#2
++ aesd $dat0,q9
++ aesd $dat1,q9
++ vld1.32 {q9},[$key_],#16
++ aesimc $dat0,$dat0
++ aesimc $dat1,$dat1
++ b.gt .Loop2x_cbc_dec
++
++ aesd $dat0,q8
++ aesd $dat1,q8
++ aesimc $dat0,$dat0
++ aesimc $dat1,$dat1
++ veor $tmp0,$ivec,$rndlast
++ veor $tmp1,$in0,$rndlast
++ aesd $dat0,q9
++ aesd $dat1,q9
++ aesimc $dat0,$dat0
++ aesimc $dat1,$dat1
++ vorr $ivec,$in1,$in1
++ subs $len,$len,#32
++ aesd $dat0,q10
++ aesd $dat1,q10
++ aesimc $dat0,$dat0
++ cclr $step,lo
++ aesimc $dat1,$dat1
++ mov $key_,$key
++ aesd $dat0,q11
++ aesd $dat1,q11
++ aesimc $dat0,$dat0
++ vld1.8 {$in0},[$inp],$step
++ aesimc $dat1,$dat1
++ cclr $step,ls
++ aesd $dat0,q12
++ aesd $dat1,q12
++ aesimc $dat0,$dat0
++ aesimc $dat1,$dat1
++ vld1.8 {$in1},[$inp],$step
++ aesd $dat0,q13
++ aesd $dat1,q13
++ aesimc $dat0,$dat0
++ aesimc $dat1,$dat1
++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
++ aesd $dat0,q14
++ aesd $dat1,q14
++ aesimc $dat0,$dat0
++ aesimc $dat1,$dat1
++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
++ aesd $dat0,q15
++ aesd $dat1,q15
++
++ mov $cnt,$rounds
++ veor $tmp0,$tmp0,$dat0
++ veor $tmp1,$tmp1,$dat1
++ vorr $dat0,$in0,$in0
++ vst1.8 {$tmp0},[$out],#16
++ vorr $dat1,$in1,$in1
++ vst1.8 {$tmp1},[$out],#16
++ b.hs .Loop2x_cbc_dec
++
++ adds $len,$len,#32
++ b.eq .Lcbc_done
++
++.Lcbc_dec_tail:
++ aesd $dat,q8
++ vld1.32 {q8},[$key_],#16
++ aesimc $dat,$dat
++ subs $cnt,$cnt,#2
++ aesd $dat,q9
++ vld1.32 {q9},[$key_],#16
++ aesimc $dat,$dat
++ b.gt .Lcbc_dec_tail
++
++ aesd $dat,q8
++ aesimc $dat,$dat
++ aesd $dat,q9
++ aesimc $dat,$dat
++ veor $tmp,$ivec,$rndlast
++ aesd $dat,q10
++ aesimc $dat,$dat
++ vorr $ivec,$in0,$in0
++ aesd $dat,q11
++ aesimc $dat,$dat
++ aesd $dat,q12
++ aesimc $dat,$dat
++ aesd $dat,q13
++ aesimc $dat,$dat
++ aesd $dat,q14
++ aesimc $dat,$dat
++ aesd $dat,q15
++
++ veor $tmp,$tmp,$dat
++ vst1.8 {$tmp},[$out],#16
++
++.Lcbc_done:
++ vst1.8 {$ivec},[$ivp]
++.Lcbc_abort:
++___
++$code.=<<___ if ($flavour !~ /64/);
++ vldmia sp!,{d8-d15}
++ ldmia sp!,{r4-r8,pc}
++___
++$code.=<<___ if ($flavour =~ /64/);
++ ldr x29,[sp],#16
++ ret
++___
++$code.=<<___;
++.size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt
++___
++}}}
++{{{
++my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4));
++my ($rounds,$cnt,$key_,$ctr,$tctr,$tctr1)=("w5","w6","x7","w8","w9","w10");
++my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7));
++
++my ($dat,$tmp)=($dat0,$tmp0);
++
++### q8-q15 preloaded key schedule
++
++$code.=<<___;
++.globl ${prefix}_ctr32_encrypt_blocks
++.type ${prefix}_ctr32_encrypt_blocks,%function
++.align 5
++${prefix}_ctr32_encrypt_blocks:
++___
++$code.=<<___ if ($flavour =~ /64/);
++ stp x29,x30,[sp,#-16]!
++ add x29,sp,#0
++___
++$code.=<<___ if ($flavour !~ /64/);
++ mov ip,sp
++ stmdb sp!,{r4-r10,lr}
++ vstmdb sp!,{d8-d15} @ ABI specification says so
++ ldr r4, [ip] @ load remaining arg
++___
++$code.=<<___;
++ ldr $rounds,[$key,#240]
++
++ ldr $ctr, [$ivp, #12]
++ vld1.32 {$dat0},[$ivp]
++
++ vld1.32 {q8-q9},[$key] // load key schedule...
++ sub $rounds,$rounds,#6
++ add $key_,$key,x5,lsl#4 // pointer to last 7 round keys
++ sub $rounds,$rounds,#2
++ vld1.32 {q10-q11},[$key_],#32
++ vld1.32 {q12-q13},[$key_],#32
++ vld1.32 {q14-q15},[$key_],#32
++ vld1.32 {$rndlast},[$key_]
++
++ add $key_,$key,#32
++ mov $cnt,$rounds
++
++ subs $len,$len,#2
++ b.lo .Lctr32_tail
++
++#ifndef __ARMEB__
++ rev $ctr, $ctr
++#endif
++ vorr $dat1,$dat0,$dat0
++ add $ctr, $ctr, #1
++ vorr $ivec,$dat0,$dat0
++ rev $tctr1, $ctr
++ cmp $rounds,#2
++ vmov.32 ${dat1}[3],$tctr1
++ b.eq .Lctr32_128
++
++.Loop2x_ctr32:
++ aese $dat0,q8
++ aese $dat1,q8
++ vld1.32 {q8},[$key_],#16
++ aesmc $dat0,$dat0
++ aesmc $dat1,$dat1
++ subs $cnt,$cnt,#2
++ aese $dat0,q9
++ aese $dat1,q9
++ vld1.32 {q9},[$key_],#16
++ aesmc $dat0,$dat0
++ aesmc $dat1,$dat1
++ b.gt .Loop2x_ctr32
++
++ aese $dat0,q8
++ aese $dat1,q8
++ aesmc $tmp0,$dat0
++ vorr $dat0,$ivec,$ivec
++ aesmc $tmp1,$dat1
++ vorr $dat1,$ivec,$ivec
++ aese $tmp0,q9
++ aese $tmp1,q9
++ vld1.8 {$in0},[$inp],#16
++ aesmc $tmp0,$tmp0
++ vld1.8 {$in1},[$inp],#16
++ aesmc $tmp1,$tmp1
++ add $ctr,$ctr,#1
++ aese $tmp0,q10
++ aese $tmp1,q10
++ rev $tctr,$ctr
++ aesmc $tmp0,$tmp0
++ aesmc $tmp1,$tmp1
++ add $ctr,$ctr,#1
++ aese $tmp0,q11
++ aese $tmp1,q11
++ veor $in0,$in0,$rndlast
++ rev $tctr1,$ctr
++ aesmc $tmp0,$tmp0
++ aesmc $tmp1,$tmp1
++ veor $in1,$in1,$rndlast
++ mov $key_,$key
++ aese $tmp0,q12
++ aese $tmp1,q12
++ subs $len,$len,#2
++ aesmc $tmp0,$tmp0
++ aesmc $tmp1,$tmp1
++ vld1.32 {q8-q9},[$key_],#32 // re-pre-load rndkey[0-1]
++ aese $tmp0,q13
++ aese $tmp1,q13
++ aesmc $tmp0,$tmp0
++ aesmc $tmp1,$tmp1
++ aese $tmp0,q14
++ aese $tmp1,q14
++ vmov.32 ${dat0}[3], $tctr
++ aesmc $tmp0,$tmp0
++ vmov.32 ${dat1}[3], $tctr1
++ aesmc $tmp1,$tmp1
++ aese $tmp0,q15
++ aese $tmp1,q15
++
++ mov $cnt,$rounds
++ veor $in0,$in0,$tmp0
++ veor $in1,$in1,$tmp1
++ vst1.8 {$in0},[$out],#16
++ vst1.8 {$in1},[$out],#16
++ b.hs .Loop2x_ctr32
++
++ adds $len,$len,#2
++ b.eq .Lctr32_done
++ b .Lctr32_tail
++
++.Lctr32_128:
++ vld1.32 {$tmp0-$tmp1},[$key_]
++
++.Loop2x_ctr32_128:
++ aese $dat0,q8
++ aese $dat1,q8
++ aesmc $dat0,$dat0
++ vld1.8 {$in0},[$inp],#16
++ aesmc $dat1,$dat1
++ vld1.8 {$in1},[$inp],#16
++ aese $dat0,q9
++ aese $dat1,q9
++ add $ctr,$ctr,#1
++ aesmc $dat0,$dat0
++ aesmc $dat1,$dat1
++ rev $tctr,$ctr
++ aese $dat0,$tmp0
++ aese $dat1,$tmp0
++ add $ctr,$ctr,#1
++ aesmc $dat0,$dat0
++ aesmc $dat1,$dat1
++ rev $tctr1,$ctr
++ aese $dat0,$tmp1
++ aese $dat1,$tmp1
++ subs $len,$len,#2
++ aesmc $dat0,$dat0
++ aesmc $dat1,$dat1
++ aese $dat0,q10
++ aese $dat1,q10
++ aesmc $dat0,$dat0
++ aesmc $dat1,$dat1
++ aese $dat0,q11
++ aese $dat1,q11
++ aesmc $dat0,$dat0
++ aesmc $dat1,$dat1
++ aese $dat0,q12
++ aese $dat1,q12
++ aesmc $dat0,$dat0
++ aesmc $dat1,$dat1
++ aese $dat0,q13
++ aese $dat1,q13
++ aesmc $dat0,$dat0
++ aesmc $dat1,$dat1
++ aese $dat0,q14
++ aese $dat1,q14
++ aesmc $dat0,$dat0
++ aesmc $dat1,$dat1
++ veor $in0,$in0,$rndlast
++ aese $dat0,q15
++ veor $in1,$in1,$rndlast
++ aese $dat1,q15
++
++ veor $in0,$in0,$dat0
++ vorr $dat0,$ivec,$ivec
++ veor $in1,$in1,$dat1
++ vorr $dat1,$ivec,$ivec
++ vst1.8 {$in0},[$out],#16
++ vmov.32 ${dat0}[3], $tctr
++ vst1.8 {$in1},[$out],#16
++ vmov.32 ${dat1}[3], $tctr1
++ b.hs .Loop2x_ctr32_128
++
++ adds $len,$len,#2
++ b.eq .Lctr32_done
++
++.Lctr32_tail:
++ aese $dat,q8
++ vld1.32 {q8},[$key_],#16
++ aesmc $dat,$dat
++ subs $cnt,$cnt,#2
++ aese $dat,q9
++ vld1.32 {q9},[$key_],#16
++ aesmc $dat,$dat
++ b.gt .Lctr32_tail
++
++ aese $dat,q8
++ aesmc $dat,$dat
++ aese $dat,q9
++ aesmc $dat,$dat
++ vld1.8 {$in0},[$inp]
++ aese $dat,q10
++ aesmc $dat,$dat
++ aese $dat,q11
++ aesmc $dat,$dat
++ aese $dat,q12
++ aesmc $dat,$dat
++ aese $dat,q13
++ aesmc $dat,$dat
++ aese $dat,q14
++ aesmc $dat,$dat
++ veor $in0,$in0,$rndlast
++ aese $dat,q15
++
++ veor $in0,$in0,$dat
++ vst1.8 {$in0},[$out]
++
++.Lctr32_done:
++___
++$code.=<<___ if ($flavour !~ /64/);
++ vldmia sp!,{d8-d15}
++ ldmia sp!,{r4-r10,pc}
++___
++$code.=<<___ if ($flavour =~ /64/);
++ ldr x29,[sp],#16
++ ret
++___
++$code.=<<___;
++.size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks
++___
++}}}
++$code.=<<___;
++#endif
++___
++########################################
++if ($flavour =~ /64/) { ######## 64-bit code
++ my %opcode = (
++ "aesd" => 0x4e285800, "aese" => 0x4e284800,
++ "aesimc"=> 0x4e287800, "aesmc" => 0x4e286800 );
++
++ local *unaes = sub {
++ my ($mnemonic,$arg)=@_;
++
++ $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o &&
++ sprintf ".inst\t0x%08x\t//%s %s",
++ $opcode{$mnemonic}|$1|($2<<5),
++ $mnemonic,$arg;
++ };
++
++ foreach(split("\n",$code)) {
++ s/\`([^\`]*)\`/eval($1)/geo;
++
++ s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers
++ s/@\s/\/\//o; # old->new style commentary
++
++ #s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or
++ s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or
++ s/vmov\.i8/movi/o or # fix up legacy mnemonics
++ s/vext\.8/ext/o or
++ s/vrev32\.8/rev32/o or
++ s/vtst\.8/cmtst/o or
++ s/vshr/ushr/o or
++ s/^(\s+)v/$1/o or # strip off v prefix
++ s/\bbx\s+lr\b/ret/o;
++
++ # fix up remainig legacy suffixes
++ s/\.[ui]?8//o;
++ m/\],#8/o and s/\.16b/\.8b/go;
++ s/\.[ui]?32//o and s/\.16b/\.4s/go;
++ s/\.[ui]?64//o and s/\.16b/\.2d/go;
++ s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o;
++
++ print $_,"\n";
++ }
++} else { ######## 32-bit code
++ my %opcode = (
++ "aesd" => 0xf3b00340, "aese" => 0xf3b00300,
++ "aesimc"=> 0xf3b003c0, "aesmc" => 0xf3b00380 );
++
++ local *unaes = sub {
++ my ($mnemonic,$arg)=@_;
++
++ if ($arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o) {
++ my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
++ |(($2&7)<<1) |(($2&8)<<2);
++ # since ARMv7 instructions are always encoded little-endian.
++ # correct solution is to use .inst directive, but older
++ # assemblers don't implement it:-(
++ sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
++ $word&0xff,($word>>8)&0xff,
++ ($word>>16)&0xff,($word>>24)&0xff,
++ $mnemonic,$arg;
++ }
++ };
++
++ sub unvtbl {
++ my $arg=shift;
++
++ $arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o &&
++ sprintf "vtbl.8 d%d,{q%d},d%d\n\t".
++ "vtbl.8 d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1;
++ }
++
++ sub unvdup32 {
++ my $arg=shift;
++
++ $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o &&
++ sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1;
++ }
++
++ sub unvmov32 {
++ my $arg=shift;
++
++ $arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o &&
++ sprintf "vmov.32 d%d[%d],%s",2*$1+($2>>1),$2&1,$3;
++ }
++
++ foreach(split("\n",$code)) {
++ s/\`([^\`]*)\`/eval($1)/geo;
++
++ s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers
++ s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers
++ s/\/\/\s?/@ /o; # new->old style commentary
++
++ # fix up remainig new-style suffixes
++ s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo or
++ s/\],#[0-9]+/]!/o;
++
++ s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or
++ s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or
++ s/vtbl\.8\s+(.*)/unvtbl($1)/geo or
++ s/vdup\.32\s+(.*)/unvdup32($1)/geo or
++ s/vmov\.32\s+(.*)/unvmov32($1)/geo or
++ s/^(\s+)b\./$1b/o or
++ s/^(\s+)ret/$1bx\tlr/o;
++
++ print $_,"\n";
++ }
++}
++
++close STDOUT;
+diff --git a/crypto/aes/asm/bsaes-armv7.pl b/crypto/aes/asm/bsaes-armv7.pl
+new file mode 100644
+index 0000000..f3d96d9
+--- /dev/null
++++ b/crypto/aes/asm/bsaes-armv7.pl
+@@ -0,0 +1,2467 @@
++#!/usr/bin/env perl
++
++# ====================================================================
++# Written by Andy Polyakov <[email protected]> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++#
++# Specific modes and adaptation for Linux kernel by Ard Biesheuvel
++# <[email protected]>. Permission to use under GPL terms is
++# granted.
++# ====================================================================
++
++# Bit-sliced AES for ARM NEON
++#
++# February 2012.
++#
++# This implementation is direct adaptation of bsaes-x86_64 module for
++# ARM NEON. Except that this module is endian-neutral [in sense that
++# it can be compiled for either endianness] by courtesy of vld1.8's
++# neutrality. Initial version doesn't implement interface to OpenSSL,
++# only low-level primitives and unsupported entry points, just enough
++# to collect performance results, which for Cortex-A8 core are:
++#
++# encrypt 19.5 cycles per byte processed with 128-bit key
++# decrypt 22.1 cycles per byte processed with 128-bit key
++# key conv. 440 cycles per 128-bit key/0.18 of 8x block
++#
++# Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7,
++# which is [much] worse than anticipated (for further details see
++# http://www.openssl.org/~appro/Snapdragon-S4.html).
++#
++# Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code
++# manages in 20.0 cycles].
++#
++# When comparing to x86_64 results keep in mind that NEON unit is
++# [mostly] single-issue and thus can't [fully] benefit from
++# instruction-level parallelism. And when comparing to aes-armv4
++# results keep in mind key schedule conversion overhead (see
++# bsaes-x86_64.pl for further details)...
++#
++# <[email protected]>
++
++# April-August 2013
++#
++# Add CBC, CTR and XTS subroutines, adapt for kernel use.
++#
++# <[email protected]>
++
++while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
++open STDOUT,">$output";
++
++my ($inp,$out,$len,$key)=("r0","r1","r2","r3");
++my @XMM=map("q$_",(0..15));
++
++{
++my ($key,$rounds,$const)=("r4","r5","r6");
++
++sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
++sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
++
++sub Sbox {
++# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
++# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb
++my @b=@_[0..7];
++my @t=@_[8..11];
++my @s=@_[12..15];
++ &InBasisChange (@b);
++ &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s);
++ &OutBasisChange (@b[7,1,4,2,6,5,0,3]);
++}
++
++sub InBasisChange {
++# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
++# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb
++my @b=@_[0..7];
++$code.=<<___;
++ veor @b[2], @b[2], @b[1]
++ veor @b[5], @b[5], @b[6]
++ veor @b[3], @b[3], @b[0]
++ veor @b[6], @b[6], @b[2]
++ veor @b[5], @b[5], @b[0]
++
++ veor @b[6], @b[6], @b[3]
++ veor @b[3], @b[3], @b[7]
++ veor @b[7], @b[7], @b[5]
++ veor @b[3], @b[3], @b[4]
++ veor @b[4], @b[4], @b[5]
++
++ veor @b[2], @b[2], @b[7]
++ veor @b[3], @b[3], @b[1]
++ veor @b[1], @b[1], @b[5]
++___
++}
++
++sub OutBasisChange {
++# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
++# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb
++my @b=@_[0..7];
++$code.=<<___;
++ veor @b[0], @b[0], @b[6]
++ veor @b[1], @b[1], @b[4]
++ veor @b[4], @b[4], @b[6]
++ veor @b[2], @b[2], @b[0]
++ veor @b[6], @b[6], @b[1]
++
++ veor @b[1], @b[1], @b[5]
++ veor @b[5], @b[5], @b[3]
++ veor @b[3], @b[3], @b[7]
++ veor @b[7], @b[7], @b[5]
++ veor @b[2], @b[2], @b[5]
++
++ veor @b[4], @b[4], @b[7]
++___
++}
++
++sub InvSbox {
++# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
++# output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb
++my @b=@_[0..7];
++my @t=@_[8..11];
++my @s=@_[12..15];
++ &InvInBasisChange (@b);
++ &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s);
++ &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]);
++}
++
++sub InvInBasisChange { # OutBasisChange in reverse (with twist)
++my @b=@_[5,1,2,6,3,7,0,4];
++$code.=<<___
++ veor @b[1], @b[1], @b[7]
++ veor @b[4], @b[4], @b[7]
++
++ veor @b[7], @b[7], @b[5]
++ veor @b[1], @b[1], @b[3]
++ veor @b[2], @b[2], @b[5]
++ veor @b[3], @b[3], @b[7]
++
++ veor @b[6], @b[6], @b[1]
++ veor @b[2], @b[2], @b[0]
++ veor @b[5], @b[5], @b[3]
++ veor @b[4], @b[4], @b[6]
++ veor @b[0], @b[0], @b[6]
++ veor @b[1], @b[1], @b[4]
++___
++}
++
++sub InvOutBasisChange { # InBasisChange in reverse
++my @b=@_[2,5,7,3,6,1,0,4];
++$code.=<<___;
++ veor @b[1], @b[1], @b[5]
++ veor @b[2], @b[2], @b[7]
++
++ veor @b[3], @b[3], @b[1]
++ veor @b[4], @b[4], @b[5]
++ veor @b[7], @b[7], @b[5]
++ veor @b[3], @b[3], @b[4]
++ veor @b[5], @b[5], @b[0]
++ veor @b[3], @b[3], @b[7]
++ veor @b[6], @b[6], @b[2]
++ veor @b[2], @b[2], @b[1]
++ veor @b[6], @b[6], @b[3]
++
++ veor @b[3], @b[3], @b[0]
++ veor @b[5], @b[5], @b[6]
++___
++}
++
++sub Mul_GF4 {
++#;*************************************************************
++#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) *
++#;*************************************************************
++my ($x0,$x1,$y0,$y1,$t0,$t1)=@_;
++$code.=<<___;
++ veor $t0, $y0, $y1
++ vand $t0, $t0, $x0
++ veor $x0, $x0, $x1
++ vand $t1, $x1, $y0
++ vand $x0, $x0, $y1
++ veor $x1, $t1, $t0
++ veor $x0, $x0, $t1
++___
++}
++
++sub Mul_GF4_N { # not used, see next subroutine
++# multiply and scale by N
++my ($x0,$x1,$y0,$y1,$t0)=@_;
++$code.=<<___;
++ veor $t0, $y0, $y1
++ vand $t0, $t0, $x0
++ veor $x0, $x0, $x1
++ vand $x1, $x1, $y0
++ vand $x0, $x0, $y1
++ veor $x1, $x1, $x0
++ veor $x0, $x0, $t0
++___
++}
++
++sub Mul_GF4_N_GF4 {
++# interleaved Mul_GF4_N and Mul_GF4
++my ($x0,$x1,$y0,$y1,$t0,
++ $x2,$x3,$y2,$y3,$t1)=@_;
++$code.=<<___;
++ veor $t0, $y0, $y1
++ veor $t1, $y2, $y3
++ vand $t0, $t0, $x0
++ vand $t1, $t1, $x2
++ veor $x0, $x0, $x1
++ veor $x2, $x2, $x3
++ vand $x1, $x1, $y0
++ vand $x3, $x3, $y2
++ vand $x0, $x0, $y1
++ vand $x2, $x2, $y3
++ veor $x1, $x1, $x0
++ veor $x2, $x2, $x3
++ veor $x0, $x0, $t0
++ veor $x3, $x3, $t1
++___
++}
++sub Mul_GF16_2 {
++my @x=@_[0..7];
++my @y=@_[8..11];
++my @t=@_[12..15];
++$code.=<<___;
++ veor @t[0], @x[0], @x[2]
++ veor @t[1], @x[1], @x[3]
++___
++ &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2..3]);
++$code.=<<___;
++ veor @y[0], @y[0], @y[2]
++ veor @y[1], @y[1], @y[3]
++___
++ Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3],
++ @x[2], @x[3], @y[2], @y[3], @t[2]);
++$code.=<<___;
++ veor @x[0], @x[0], @t[0]
++ veor @x[2], @x[2], @t[0]
++ veor @x[1], @x[1], @t[1]
++ veor @x[3], @x[3], @t[1]
++
++ veor @t[0], @x[4], @x[6]
++ veor @t[1], @x[5], @x[7]
++___
++ &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3],
++ @x[6], @x[7], @y[2], @y[3], @t[2]);
++$code.=<<___;
++ veor @y[0], @y[0], @y[2]
++ veor @y[1], @y[1], @y[3]
++___
++ &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[2..3]);
++$code.=<<___;
++ veor @x[4], @x[4], @t[0]
++ veor @x[6], @x[6], @t[0]
++ veor @x[5], @x[5], @t[1]
++ veor @x[7], @x[7], @t[1]
++___
++}
++sub Inv_GF256 {
++#;********************************************************************
++#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) *
++#;********************************************************************
++my @x=@_[0..7];
++my @t=@_[8..11];
++my @s=@_[12..15];
++# direct optimizations from hardware
++$code.=<<___;
++ veor @t[3], @x[4], @x[6]
++ veor @t[2], @x[5], @x[7]
++ veor @t[1], @x[1], @x[3]
++ veor @s[1], @x[7], @x[6]
++ vmov @t[0], @t[2]
++ veor @s[0], @x[0], @x[2]
++
++ vorr @t[2], @t[2], @t[1]
++ veor @s[3], @t[3], @t[0]
++ vand @s[2], @t[3], @s[0]
++ vorr @t[3], @t[3], @s[0]
++ veor @s[0], @s[0], @t[1]
++ vand @t[0], @t[0], @t[1]
++ veor @t[1], @x[3], @x[2]
++ vand @s[3], @s[3], @s[0]
++ vand @s[1], @s[1], @t[1]
++ veor @t[1], @x[4], @x[5]
++ veor @s[0], @x[1], @x[0]
++ veor @t[3], @t[3], @s[1]
++ veor @t[2], @t[2], @s[1]
++ vand @s[1], @t[1], @s[0]
++ vorr @t[1], @t[1], @s[0]
++ veor @t[3], @t[3], @s[3]
++ veor @t[0], @t[0], @s[1]
++ veor @t[2], @t[2], @s[2]
++ veor @t[1], @t[1], @s[3]
++ veor @t[0], @t[0], @s[2]
++ vand @s[0], @x[7], @x[3]
++ veor @t[1], @t[1], @s[2]
++ vand @s[1], @x[6], @x[2]
++ vand @s[2], @x[5], @x[1]
++ vorr @s[3], @x[4], @x[0]
++ veor @t[3], @t[3], @s[0]
++ veor @t[1], @t[1], @s[2]
++ veor @t[0], @t[0], @s[3]
++ veor @t[2], @t[2], @s[1]
++
++ @ Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3
++
++ @ new smaller inversion
++
++ vand @s[2], @t[3], @t[1]
++ vmov @s[0], @t[0]
++
++ veor @s[1], @t[2], @s[2]
++ veor @s[3], @t[0], @s[2]
++ veor @s[2], @t[0], @s[2] @ @s[2]=@s[3]
++
++ vbsl @s[1], @t[1], @t[0]
++ vbsl @s[3], @t[3], @t[2]
++ veor @t[3], @t[3], @t[2]
++
++ vbsl @s[0], @s[1], @s[2]
++ vbsl @t[0], @s[2], @s[1]
++
++ vand @s[2], @s[0], @s[3]
++ veor @t[1], @t[1], @t[0]
++
++ veor @s[2], @s[2], @t[3]
++___
++# output in s3, s2, s1, t1
++
++# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3
++
++# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
++ &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]);
++
++### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb
++}
++
++# AES linear components
++
++sub ShiftRows {
++my @x=@_[0..7];
++my @t=@_[8..11];
++my $mask=pop;
++$code.=<<___;
++ vldmia $key!, {@t[0]-@t[3]}
++ veor @t[0], @t[0], @x[0]
++ veor @t[1], @t[1], @x[1]
++ vtbl.8 `&Dlo(@x[0])`, {@t[0]}, `&Dlo($mask)`
++ vtbl.8 `&Dhi(@x[0])`, {@t[0]}, `&Dhi($mask)`
++ vldmia $key!, {@t[0]}
++ veor @t[2], @t[2], @x[2]
++ vtbl.8 `&Dlo(@x[1])`, {@t[1]}, `&Dlo($mask)`
++ vtbl.8 `&Dhi(@x[1])`, {@t[1]}, `&Dhi($mask)`
++ vldmia $key!, {@t[1]}
++ veor @t[3], @t[3], @x[3]
++ vtbl.8 `&Dlo(@x[2])`, {@t[2]}, `&Dlo($mask)`
++ vtbl.8 `&Dhi(@x[2])`, {@t[2]}, `&Dhi($mask)`
++ vldmia $key!, {@t[2]}
++ vtbl.8 `&Dlo(@x[3])`, {@t[3]}, `&Dlo($mask)`
++ vtbl.8 `&Dhi(@x[3])`, {@t[3]}, `&Dhi($mask)`
++ vldmia $key!, {@t[3]}
++ veor @t[0], @t[0], @x[4]
++ veor @t[1], @t[1], @x[5]
++ vtbl.8 `&Dlo(@x[4])`, {@t[0]}, `&Dlo($mask)`
++ vtbl.8 `&Dhi(@x[4])`, {@t[0]}, `&Dhi($mask)`
++ veor @t[2], @t[2], @x[6]
++ vtbl.8 `&Dlo(@x[5])`, {@t[1]}, `&Dlo($mask)`
++ vtbl.8 `&Dhi(@x[5])`, {@t[1]}, `&Dhi($mask)`
++ veor @t[3], @t[3], @x[7]
++ vtbl.8 `&Dlo(@x[6])`, {@t[2]}, `&Dlo($mask)`
++ vtbl.8 `&Dhi(@x[6])`, {@t[2]}, `&Dhi($mask)`
++ vtbl.8 `&Dlo(@x[7])`, {@t[3]}, `&Dlo($mask)`
++ vtbl.8 `&Dhi(@x[7])`, {@t[3]}, `&Dhi($mask)`
++___
++}
++
++sub MixColumns {
++# modified to emit output in order suitable for feeding back to aesenc[last]
++my @x=@_[0..7];
++my @t=@_[8..15];
++my $inv=@_[16]; # optional
++$code.=<<___;
++ vext.8 @t[0], @x[0], @x[0], #12 @ x0 <<< 32
++ vext.8 @t[1], @x[1], @x[1], #12
++ veor @x[0], @x[0], @t[0] @ x0 ^ (x0 <<< 32)
++ vext.8 @t[2], @x[2], @x[2], #12
++ veor @x[1], @x[1], @t[1]
++ vext.8 @t[3], @x[3], @x[3], #12
++ veor @x[2], @x[2], @t[2]
++ vext.8 @t[4], @x[4], @x[4], #12
++ veor @x[3], @x[3], @t[3]
++ vext.8 @t[5], @x[5], @x[5], #12
++ veor @x[4], @x[4], @t[4]
++ vext.8 @t[6], @x[6], @x[6], #12
++ veor @x[5], @x[5], @t[5]
++ vext.8 @t[7], @x[7], @x[7], #12
++ veor @x[6], @x[6], @t[6]
++
++ veor @t[1], @t[1], @x[0]
++ veor @x[7], @x[7], @t[7]
++ vext.8 @x[0], @x[0], @x[0], #8 @ (x0 ^ (x0 <<< 32)) <<< 64)
++ veor @t[2], @t[2], @x[1]
++ veor @t[0], @t[0], @x[7]
++ veor @t[1], @t[1], @x[7]
++ vext.8 @x[1], @x[1], @x[1], #8
++ veor @t[5], @t[5], @x[4]
++ veor @x[0], @x[0], @t[0]
++ veor @t[6], @t[6], @x[5]
++ veor @x[1], @x[1], @t[1]
++ vext.8 @t[0], @x[4], @x[4], #8
++ veor @t[4], @t[4], @x[3]
++ vext.8 @t[1], @x[5], @x[5], #8
++ veor @t[7], @t[7], @x[6]
++ vext.8 @x[4], @x[3], @x[3], #8
++ veor @t[3], @t[3], @x[2]
++ vext.8 @x[5], @x[7], @x[7], #8
++ veor @t[4], @t[4], @x[7]
++ vext.8 @x[3], @x[6], @x[6], #8
++ veor @t[3], @t[3], @x[7]
++ vext.8 @x[6], @x[2], @x[2], #8
++ veor @x[7], @t[1], @t[5]
++___
++$code.=<<___ if (!$inv);
++ veor @x[2], @t[0], @t[4]
++ veor @x[4], @x[4], @t[3]
++ veor @x[5], @x[5], @t[7]
++ veor @x[3], @x[3], @t[6]
++ @ vmov @x[2], @t[0]
++ veor @x[6], @x[6], @t[2]
++ @ vmov @x[7], @t[1]
++___
++$code.=<<___ if ($inv);
++ veor @t[3], @t[3], @x[4]
++ veor @x[5], @x[5], @t[7]
++ veor @x[2], @x[3], @t[6]
++ veor @x[3], @t[0], @t[4]
++ veor @x[4], @x[6], @t[2]
++ vmov @x[6], @t[3]
++ @ vmov @x[7], @t[1]
++___
++}
++
++sub InvMixColumns_orig {
++my @x=@_[0..7];
++my @t=@_[8..15];
++
++$code.=<<___;
++ @ multiplication by 0x0e
++ vext.8 @t[7], @x[7], @x[7], #12
++ vmov @t[2], @x[2]
++ veor @x[2], @x[2], @x[5] @ 2 5
++ veor @x[7], @x[7], @x[5] @ 7 5
++ vext.8 @t[0], @x[0], @x[0], #12
++ vmov @t[5], @x[5]
++ veor @x[5], @x[5], @x[0] @ 5 0 [1]
++ veor @x[0], @x[0], @x[1] @ 0 1
++ vext.8 @t[1], @x[1], @x[1], #12
++ veor @x[1], @x[1], @x[2] @ 1 25
++ veor @x[0], @x[0], @x[6] @ 01 6 [2]
++ vext.8 @t[3], @x[3], @x[3], #12
++ veor @x[1], @x[1], @x[3] @ 125 3 [4]
++ veor @x[2], @x[2], @x[0] @ 25 016 [3]
++ veor @x[3], @x[3], @x[7] @ 3 75
++ veor @x[7], @x[7], @x[6] @ 75 6 [0]
++ vext.8 @t[6], @x[6], @x[6], #12
++ vmov @t[4], @x[4]
++ veor @x[6], @x[6], @x[4] @ 6 4
++ veor @x[4], @x[4], @x[3] @ 4 375 [6]
++ veor @x[3], @x[3], @x[7] @ 375 756=36
++ veor @x[6], @x[6], @t[5] @ 64 5 [7]
++ veor @x[3], @x[3], @t[2] @ 36 2
++ vext.8 @t[5], @t[5], @t[5], #12
++ veor @x[3], @x[3], @t[4] @ 362 4 [5]
++___
++ my @y = @x[7,5,0,2,1,3,4,6];
++$code.=<<___;
++ @ multiplication by 0x0b
++ veor @y[1], @y[1], @y[0]
++ veor @y[0], @y[0], @t[0]
++ vext.8 @t[2], @t[2], @t[2], #12
++ veor @y[1], @y[1], @t[1]
++ veor @y[0], @y[0], @t[5]
++ vext.8 @t[4], @t[4], @t[4], #12
++ veor @y[1], @y[1], @t[6]
++ veor @y[0], @y[0], @t[7]
++ veor @t[7], @t[7], @t[6] @ clobber t[7]
++
++ veor @y[3], @y[3], @t[0]
++ veor @y[1], @y[1], @y[0]
++ vext.8 @t[0], @t[0], @t[0], #12
++ veor @y[2], @y[2], @t[1]
++ veor @y[4], @y[4], @t[1]
++ vext.8 @t[1], @t[1], @t[1], #12
++ veor @y[2], @y[2], @t[2]
++ veor @y[3], @y[3], @t[2]
++ veor @y[5], @y[5], @t[2]
++ veor @y[2], @y[2], @t[7]
++ vext.8 @t[2], @t[2], @t[2], #12
++ veor @y[3], @y[3], @t[3]
++ veor @y[6], @y[6], @t[3]
++ veor @y[4], @y[4], @t[3]
++ veor @y[7], @y[7], @t[4]
++ vext.8 @t[3], @t[3], @t[3], #12
++ veor @y[5], @y[5], @t[4]
++ veor @y[7], @y[7], @t[7]
++ veor @t[7], @t[7], @t[5] @ clobber t[7] even more
++ veor @y[3], @y[3], @t[5]
++ veor @y[4], @y[4], @t[4]
++
++ veor @y[5], @y[5], @t[7]
++ vext.8 @t[4], @t[4], @t[4], #12
++ veor @y[6], @y[6], @t[7]
++ veor @y[4], @y[4], @t[7]
++
++ veor @t[7], @t[7], @t[5]
++ vext.8 @t[5], @t[5], @t[5], #12
++
++ @ multiplication by 0x0d
++ veor @y[4], @y[4], @y[7]
++ veor @t[7], @t[7], @t[6] @ restore t[7]
++ veor @y[7], @y[7], @t[4]
++ vext.8 @t[6], @t[6], @t[6], #12
++ veor @y[2], @y[2], @t[0]
++ veor @y[7], @y[7], @t[5]
++ vext.8 @t[7], @t[7], @t[7], #12
++ veor @y[2], @y[2], @t[2]
++
++ veor @y[3], @y[3], @y[1]
++ veor @y[1], @y[1], @t[1]
++ veor @y[0], @y[0], @t[0]
++ veor @y[3], @y[3], @t[0]
++ veor @y[1], @y[1], @t[5]
++ veor @y[0], @y[0], @t[5]
++ vext.8 @t[0], @t[0], @t[0], #12
++ veor @y[1], @y[1], @t[7]
++ veor @y[0], @y[0], @t[6]
++ veor @y[3], @y[3], @y[1]
++ veor @y[4], @y[4], @t[1]
++ vext.8 @t[1], @t[1], @t[1], #12
++
++ veor @y[7], @y[7], @t[7]
++ veor @y[4], @y[4], @t[2]
++ veor @y[5], @y[5], @t[2]
++ veor @y[2], @y[2], @t[6]
++ veor @t[6], @t[6], @t[3] @ clobber t[6]
++ vext.8 @t[2], @t[2], @t[2], #12
++ veor @y[4], @y[4], @y[7]
++ veor @y[3], @y[3], @t[6]
++
++ veor @y[6], @y[6], @t[6]
++ veor @y[5], @y[5], @t[5]
++ vext.8 @t[5], @t[5], @t[5], #12
++ veor @y[6], @y[6], @t[4]
++ vext.8 @t[4], @t[4], @t[4], #12
++ veor @y[5], @y[5], @t[6]
++ veor @y[6], @y[6], @t[7]
++ vext.8 @t[7], @t[7], @t[7], #12
++ veor @t[6], @t[6], @t[3] @ restore t[6]
++ vext.8 @t[3], @t[3], @t[3], #12
++
++ @ multiplication by 0x09
++ veor @y[4], @y[4], @y[1]
++ veor @t[1], @t[1], @y[1] @ t[1]=y[1]
++ veor @t[0], @t[0], @t[5] @ clobber t[0]
++ vext.8 @t[6], @t[6], @t[6], #12
++ veor @t[1], @t[1], @t[5]
++ veor @y[3], @y[3], @t[0]
++ veor @t[0], @t[0], @y[0] @ t[0]=y[0]
++ veor @t[1], @t[1], @t[6]
++ veor @t[6], @t[6], @t[7] @ clobber t[6]
++ veor @y[4], @y[4], @t[1]
++ veor @y[7], @y[7], @t[4]
++ veor @y[6], @y[6], @t[3]
++ veor @y[5], @y[5], @t[2]
++ veor @t[4], @t[4], @y[4] @ t[4]=y[4]
++ veor @t[3], @t[3], @y[3] @ t[3]=y[3]
++ veor @t[5], @t[5], @y[5] @ t[5]=y[5]
++ veor @t[2], @t[2], @y[2] @ t[2]=y[2]
++ veor @t[3], @t[3], @t[7]
++ veor @XMM[5], @t[5], @t[6]
++ veor @XMM[6], @t[6], @y[6] @ t[6]=y[6]
++ veor @XMM[2], @t[2], @t[6]
++ veor @XMM[7], @t[7], @y[7] @ t[7]=y[7]
++
++ vmov @XMM[0], @t[0]
++ vmov @XMM[1], @t[1]
++ @ vmov @XMM[2], @t[2]
++ vmov @XMM[3], @t[3]
++ vmov @XMM[4], @t[4]
++ @ vmov @XMM[5], @t[5]
++ @ vmov @XMM[6], @t[6]
++ @ vmov @XMM[7], @t[7]
++___
++}
++
++sub InvMixColumns {
++my @x=@_[0..7];
++my @t=@_[8..15];
++
++# Thanks to Jussi Kivilinna for providing pointer to
++#
++# | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 |
++# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 |
++# | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 |
++# | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 |
++
++$code.=<<___;
++ @ multiplication by 0x05-0x00-0x04-0x00
++ vext.8 @t[0], @x[0], @x[0], #8
++ vext.8 @t[6], @x[6], @x[6], #8
++ vext.8 @t[7], @x[7], @x[7], #8
++ veor @t[0], @t[0], @x[0]
++ vext.8 @t[1], @x[1], @x[1], #8
++ veor @t[6], @t[6], @x[6]
++ vext.8 @t[2], @x[2], @x[2], #8
++ veor @t[7], @t[7], @x[7]
++ vext.8 @t[3], @x[3], @x[3], #8
++ veor @t[1], @t[1], @x[1]
++ vext.8 @t[4], @x[4], @x[4], #8
++ veor @t[2], @t[2], @x[2]
++ vext.8 @t[5], @x[5], @x[5], #8
++ veor @t[3], @t[3], @x[3]
++ veor @t[4], @t[4], @x[4]
++ veor @t[5], @t[5], @x[5]
++
++ veor @x[0], @x[0], @t[6]
++ veor @x[1], @x[1], @t[6]
++ veor @x[2], @x[2], @t[0]
++ veor @x[4], @x[4], @t[2]
++ veor @x[3], @x[3], @t[1]
++ veor @x[1], @x[1], @t[7]
++ veor @x[2], @x[2], @t[7]
++ veor @x[4], @x[4], @t[6]
++ veor @x[5], @x[5], @t[3]
++ veor @x[3], @x[3], @t[6]
++ veor @x[6], @x[6], @t[4]
++ veor @x[4], @x[4], @t[7]
++ veor @x[5], @x[5], @t[7]
++ veor @x[7], @x[7], @t[5]
++___
++ &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6
++}
++
++sub swapmove {
++my ($a,$b,$n,$mask,$t)=@_;
++$code.=<<___;
++ vshr.u64 $t, $b, #$n
++ veor $t, $t, $a
++ vand $t, $t, $mask
++ veor $a, $a, $t
++ vshl.u64 $t, $t, #$n
++ veor $b, $b, $t
++___
++}
++sub swapmove2x {
++my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_;
++$code.=<<___;
++ vshr.u64 $t0, $b0, #$n
++ vshr.u64 $t1, $b1, #$n
++ veor $t0, $t0, $a0
++ veor $t1, $t1, $a1
++ vand $t0, $t0, $mask
++ vand $t1, $t1, $mask
++ veor $a0, $a0, $t0
++ vshl.u64 $t0, $t0, #$n
++ veor $a1, $a1, $t1
++ vshl.u64 $t1, $t1, #$n
++ veor $b0, $b0, $t0
++ veor $b1, $b1, $t1
++___
++}
++
++sub bitslice {
++my @x=reverse(@_[0..7]);
++my ($t0,$t1,$t2,$t3)=@_[8..11];
++$code.=<<___;
++ vmov.i8 $t0,#0x55 @ compose .LBS0
++ vmov.i8 $t1,#0x33 @ compose .LBS1
++___
++ &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3);
++ &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
++$code.=<<___;
++ vmov.i8 $t0,#0x0f @ compose .LBS2
++___
++ &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3);
++ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
++
++ &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3);
++ &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3);
++}
++
++$code.=<<___;
++#ifndef __KERNEL__
++# include "arm_arch.h"
++
++# define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
++# define VFP_ABI_POP vldmia sp!,{d8-d15}
++# define VFP_ABI_FRAME 0x40
++#else
++# define VFP_ABI_PUSH
++# define VFP_ABI_POP
++# define VFP_ABI_FRAME 0
++# define BSAES_ASM_EXTENDED_KEY
++# define XTS_CHAIN_TWEAK
++# define __ARM_ARCH__ __LINUX_ARM_ARCH__
++#endif
++
++#ifdef __thumb__
++# define adrl adr
++#endif
++
++#if __ARM_ARCH__>=7
++.text
++.syntax unified @ ARMv7-capable assembler is expected to handle this
++#ifdef __thumb2__
++.thumb
++#else
++.code 32
++#endif
++
++.fpu neon
++
++.type _bsaes_decrypt8,%function
++.align 4
++_bsaes_decrypt8:
++ adr $const,_bsaes_decrypt8
++ vldmia $key!, {@XMM[9]} @ round 0 key
++ add $const,$const,#.LM0ISR-_bsaes_decrypt8
++
++ vldmia $const!, {@XMM[8]} @ .LM0ISR
++ veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key
++ veor @XMM[11], @XMM[1], @XMM[9]
++ vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])`
++ vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])`
++ veor @XMM[12], @XMM[2], @XMM[9]
++ vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])`
++ vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])`
++ veor @XMM[13], @XMM[3], @XMM[9]
++ vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])`
++ vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])`
++ veor @XMM[14], @XMM[4], @XMM[9]
++ vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])`
++ vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])`
++ veor @XMM[15], @XMM[5], @XMM[9]
++ vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])`
++ vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])`
++ veor @XMM[10], @XMM[6], @XMM[9]
++ vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])`
++ vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])`
++ veor @XMM[11], @XMM[7], @XMM[9]
++ vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])`
++ vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])`
++ vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])`
++ vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])`
++___
++ &bitslice (@XMM[0..7, 8..11]);
++$code.=<<___;
++ sub $rounds,$rounds,#1
++ b .Ldec_sbox
++.align 4
++.Ldec_loop:
++___
++ &ShiftRows (@XMM[0..7, 8..12]);
++$code.=".Ldec_sbox:\n";
++ &InvSbox (@XMM[0..7, 8..15]);
++$code.=<<___;
++ subs $rounds,$rounds,#1
++ bcc .Ldec_done
++___
++ &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]);
++$code.=<<___;
++ vldmia $const, {@XMM[12]} @ .LISR
++ ite eq @ Thumb2 thing, sanity check in ARM
++ addeq $const,$const,#0x10
++ bne .Ldec_loop
++ vldmia $const, {@XMM[12]} @ .LISRM0
++ b .Ldec_loop
++.align 4
++.Ldec_done:
++___
++ &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]);
++$code.=<<___;
++ vldmia $key, {@XMM[8]} @ last round key
++ veor @XMM[6], @XMM[6], @XMM[8]
++ veor @XMM[4], @XMM[4], @XMM[8]
++ veor @XMM[2], @XMM[2], @XMM[8]
++ veor @XMM[7], @XMM[7], @XMM[8]
++ veor @XMM[3], @XMM[3], @XMM[8]
++ veor @XMM[5], @XMM[5], @XMM[8]
++ veor @XMM[0], @XMM[0], @XMM[8]
++ veor @XMM[1], @XMM[1], @XMM[8]
++ bx lr
++.size _bsaes_decrypt8,.-_bsaes_decrypt8
++
++.type _bsaes_const,%object
++.align 6
++_bsaes_const:
++.LM0ISR: @ InvShiftRows constants
++ .quad 0x0a0e0206070b0f03, 0x0004080c0d010509
++.LISR:
++ .quad 0x0504070602010003, 0x0f0e0d0c080b0a09
++.LISRM0:
++ .quad 0x01040b0e0205080f, 0x0306090c00070a0d
++.LM0SR: @ ShiftRows constants
++ .quad 0x0a0e02060f03070b, 0x0004080c05090d01
++.LSR:
++ .quad 0x0504070600030201, 0x0f0e0d0c0a09080b
++.LSRM0:
++ .quad 0x0304090e00050a0f, 0x01060b0c0207080d
++.LM0:
++ .quad 0x02060a0e03070b0f, 0x0004080c0105090d
++.LREVM0SR:
++ .quad 0x090d01050c000408, 0x03070b0f060a0e02
++.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro\@openssl.org>"
++.align 6
++.size _bsaes_const,.-_bsaes_const
++
++.type _bsaes_encrypt8,%function
++.align 4
++_bsaes_encrypt8:
++ adr $const,_bsaes_encrypt8
++ vldmia $key!, {@XMM[9]} @ round 0 key
++ sub $const,$const,#_bsaes_encrypt8-.LM0SR
++
++ vldmia $const!, {@XMM[8]} @ .LM0SR
++_bsaes_encrypt8_alt:
++ veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key
++ veor @XMM[11], @XMM[1], @XMM[9]
++ vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])`
++ vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])`
++ veor @XMM[12], @XMM[2], @XMM[9]
++ vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])`
++ vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])`
++ veor @XMM[13], @XMM[3], @XMM[9]
++ vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])`
++ vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])`
++ veor @XMM[14], @XMM[4], @XMM[9]
++ vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])`
++ vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])`
++ veor @XMM[15], @XMM[5], @XMM[9]
++ vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])`
++ vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])`
++ veor @XMM[10], @XMM[6], @XMM[9]
++ vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])`
++ vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])`
++ veor @XMM[11], @XMM[7], @XMM[9]
++ vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])`
++ vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])`
++ vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])`
++ vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])`
++_bsaes_encrypt8_bitslice:
++___
++ &bitslice (@XMM[0..7, 8..11]);
++$code.=<<___;
++ sub $rounds,$rounds,#1
++ b .Lenc_sbox
++.align 4
++.Lenc_loop:
++___
++ &ShiftRows (@XMM[0..7, 8..12]);
++$code.=".Lenc_sbox:\n";
++ &Sbox (@XMM[0..7, 8..15]);
++$code.=<<___;
++ subs $rounds,$rounds,#1
++ bcc .Lenc_done
++___
++ &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]);
++$code.=<<___;
++ vldmia $const, {@XMM[12]} @ .LSR
++ ite eq @ Thumb2 thing, samity check in ARM
++ addeq $const,$const,#0x10
++ bne .Lenc_loop
++ vldmia $const, {@XMM[12]} @ .LSRM0
++ b .Lenc_loop
++.align 4
++.Lenc_done:
++___
++ # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb
++ &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]);
++$code.=<<___;
++ vldmia $key, {@XMM[8]} @ last round key
++ veor @XMM[4], @XMM[4], @XMM[8]
++ veor @XMM[6], @XMM[6], @XMM[8]
++ veor @XMM[3], @XMM[3], @XMM[8]
++ veor @XMM[7], @XMM[7], @XMM[8]
++ veor @XMM[2], @XMM[2], @XMM[8]
++ veor @XMM[5], @XMM[5], @XMM[8]
++ veor @XMM[0], @XMM[0], @XMM[8]
++ veor @XMM[1], @XMM[1], @XMM[8]
++ bx lr
++.size _bsaes_encrypt8,.-_bsaes_encrypt8
++___
++}
++{
++my ($out,$inp,$rounds,$const)=("r12","r4","r5","r6");
++
++sub bitslice_key {
++my @x=reverse(@_[0..7]);
++my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12];
++
++ &swapmove (@x[0,1],1,$bs0,$t2,$t3);
++$code.=<<___;
++ @ &swapmove(@x[2,3],1,$t0,$t2,$t3);
++ vmov @x[2], @x[0]
++ vmov @x[3], @x[1]
++___
++ #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
++
++ &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3);
++$code.=<<___;
++ @ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
++ vmov @x[4], @x[0]
++ vmov @x[6], @x[2]
++ vmov @x[5], @x[1]
++ vmov @x[7], @x[3]
++___
++ &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3);
++ &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3);
++}
++
++$code.=<<___;
++.type _bsaes_key_convert,%function
++.align 4
++_bsaes_key_convert:
++ adr $const,_bsaes_key_convert
++ vld1.8 {@XMM[7]}, [$inp]! @ load round 0 key
++ sub $const,$const,#_bsaes_key_convert-.LM0
++ vld1.8 {@XMM[15]}, [$inp]! @ load round 1 key
++
++ vmov.i8 @XMM[8], #0x01 @ bit masks
++ vmov.i8 @XMM[9], #0x02
++ vmov.i8 @XMM[10], #0x04
++ vmov.i8 @XMM[11], #0x08
++ vmov.i8 @XMM[12], #0x10
++ vmov.i8 @XMM[13], #0x20
++ vldmia $const, {@XMM[14]} @ .LM0
++
++#ifdef __ARMEL__
++ vrev32.8 @XMM[7], @XMM[7]
++ vrev32.8 @XMM[15], @XMM[15]
++#endif
++ sub $rounds,$rounds,#1
++ vstmia $out!, {@XMM[7]} @ save round 0 key
++ b .Lkey_loop
++
++.align 4
++.Lkey_loop:
++ vtbl.8 `&Dlo(@XMM[7])`,{@XMM[15]},`&Dlo(@XMM[14])`
++ vtbl.8 `&Dhi(@XMM[7])`,{@XMM[15]},`&Dhi(@XMM[14])`
++ vmov.i8 @XMM[6], #0x40
++ vmov.i8 @XMM[15], #0x80
++
++ vtst.8 @XMM[0], @XMM[7], @XMM[8]
++ vtst.8 @XMM[1], @XMM[7], @XMM[9]
++ vtst.8 @XMM[2], @XMM[7], @XMM[10]
++ vtst.8 @XMM[3], @XMM[7], @XMM[11]
++ vtst.8 @XMM[4], @XMM[7], @XMM[12]
++ vtst.8 @XMM[5], @XMM[7], @XMM[13]
++ vtst.8 @XMM[6], @XMM[7], @XMM[6]
++ vtst.8 @XMM[7], @XMM[7], @XMM[15]
++ vld1.8 {@XMM[15]}, [$inp]! @ load next round key
++ vmvn @XMM[0], @XMM[0] @ "pnot"
++ vmvn @XMM[1], @XMM[1]
++ vmvn @XMM[5], @XMM[5]
++ vmvn @XMM[6], @XMM[6]
++#ifdef __ARMEL__
++ vrev32.8 @XMM[15], @XMM[15]
++#endif
++ subs $rounds,$rounds,#1
++ vstmia $out!,{@XMM[0]-@XMM[7]} @ write bit-sliced round key
++ bne .Lkey_loop
++
++ vmov.i8 @XMM[7],#0x63 @ compose .L63
++ @ don't save last round key
++ bx lr
++.size _bsaes_key_convert,.-_bsaes_key_convert
++___
++}
++
++if (0) { # following four functions are unsupported interface
++ # used for benchmarking...
++$code.=<<___;
++.globl bsaes_enc_key_convert
++.type bsaes_enc_key_convert,%function
++.align 4
++bsaes_enc_key_convert:
++ stmdb sp!,{r4-r6,lr}
++ vstmdb sp!,{d8-d15} @ ABI specification says so
++
++ ldr r5,[$inp,#240] @ pass rounds
++ mov r4,$inp @ pass key
++ mov r12,$out @ pass key schedule
++ bl _bsaes_key_convert
++ veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key
++ vstmia r12, {@XMM[7]} @ save last round key
++
++ vldmia sp!,{d8-d15}
++ ldmia sp!,{r4-r6,pc}
++.size bsaes_enc_key_convert,.-bsaes_enc_key_convert
++
++.globl bsaes_encrypt_128
++.type bsaes_encrypt_128,%function
++.align 4
++bsaes_encrypt_128:
++ stmdb sp!,{r4-r6,lr}
++ vstmdb sp!,{d8-d15} @ ABI specification says so
++.Lenc128_loop:
++ vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input
++ vld1.8 {@XMM[2]-@XMM[3]}, [$inp]!
++ mov r4,$key @ pass the key
++ vld1.8 {@XMM[4]-@XMM[5]}, [$inp]!
++ mov r5,#10 @ pass rounds
++ vld1.8 {@XMM[6]-@XMM[7]}, [$inp]!
++
++ bl _bsaes_encrypt8
++
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
++ vst1.8 {@XMM[4]}, [$out]!
++ vst1.8 {@XMM[6]}, [$out]!
++ vst1.8 {@XMM[3]}, [$out]!
++ vst1.8 {@XMM[7]}, [$out]!
++ vst1.8 {@XMM[2]}, [$out]!
++ subs $len,$len,#0x80
++ vst1.8 {@XMM[5]}, [$out]!
++ bhi .Lenc128_loop
++
++ vldmia sp!,{d8-d15}
++ ldmia sp!,{r4-r6,pc}
++.size bsaes_encrypt_128,.-bsaes_encrypt_128
++
++.globl bsaes_dec_key_convert
++.type bsaes_dec_key_convert,%function
++.align 4
++bsaes_dec_key_convert:
++ stmdb sp!,{r4-r6,lr}
++ vstmdb sp!,{d8-d15} @ ABI specification says so
++
++ ldr r5,[$inp,#240] @ pass rounds
++ mov r4,$inp @ pass key
++ mov r12,$out @ pass key schedule
++ bl _bsaes_key_convert
++ vldmia $out, {@XMM[6]}
++ vstmia r12, {@XMM[15]} @ save last round key
++ veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key
++ vstmia $out, {@XMM[7]}
++
++ vldmia sp!,{d8-d15}
++ ldmia sp!,{r4-r6,pc}
++.size bsaes_dec_key_convert,.-bsaes_dec_key_convert
++
++.globl bsaes_decrypt_128
++.type bsaes_decrypt_128,%function
++.align 4
++bsaes_decrypt_128:
++ stmdb sp!,{r4-r6,lr}
++ vstmdb sp!,{d8-d15} @ ABI specification says so
++.Ldec128_loop:
++ vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input
++ vld1.8 {@XMM[2]-@XMM[3]}, [$inp]!
++ mov r4,$key @ pass the key
++ vld1.8 {@XMM[4]-@XMM[5]}, [$inp]!
++ mov r5,#10 @ pass rounds
++ vld1.8 {@XMM[6]-@XMM[7]}, [$inp]!
++
++ bl _bsaes_decrypt8
++
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
++ vst1.8 {@XMM[6]}, [$out]!
++ vst1.8 {@XMM[4]}, [$out]!
++ vst1.8 {@XMM[2]}, [$out]!
++ vst1.8 {@XMM[7]}, [$out]!
++ vst1.8 {@XMM[3]}, [$out]!
++ subs $len,$len,#0x80
++ vst1.8 {@XMM[5]}, [$out]!
++ bhi .Ldec128_loop
++
++ vldmia sp!,{d8-d15}
++ ldmia sp!,{r4-r6,pc}
++.size bsaes_decrypt_128,.-bsaes_decrypt_128
++___
++}
++{
++my ($inp,$out,$len,$key, $ivp,$fp,$rounds)=map("r$_",(0..3,8..10));
++my ($keysched)=("sp");
++
++$code.=<<___;
++.extern AES_cbc_encrypt
++.extern AES_decrypt
++
++.global bsaes_cbc_encrypt
++.type bsaes_cbc_encrypt,%function
++.align 5
++bsaes_cbc_encrypt:
++#ifndef __KERNEL__
++ cmp $len, #128
++#ifndef __thumb__
++ blo AES_cbc_encrypt
++#else
++ bhs 1f
++ b AES_cbc_encrypt
++1:
++#endif
++#endif
++
++ @ it is up to the caller to make sure we are called with enc == 0
++
++ mov ip, sp
++ stmdb sp!, {r4-r10, lr}
++ VFP_ABI_PUSH
++ ldr $ivp, [ip] @ IV is 1st arg on the stack
++ mov $len, $len, lsr#4 @ len in 16 byte blocks
++ sub sp, #0x10 @ scratch space to carry over the IV
++ mov $fp, sp @ save sp
++
++ ldr $rounds, [$key, #240] @ get # of rounds
++#ifndef BSAES_ASM_EXTENDED_KEY
++ @ allocate the key schedule on the stack
++ sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key
++ add r12, #`128-32` @ sifze of bit-slices key schedule
++
++ @ populate the key schedule
++ mov r4, $key @ pass key
++ mov r5, $rounds @ pass # of rounds
++ mov sp, r12 @ sp is $keysched
++ bl _bsaes_key_convert
++ vldmia $keysched, {@XMM[6]}
++ vstmia r12, {@XMM[15]} @ save last round key
++ veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key
++ vstmia $keysched, {@XMM[7]}
++#else
++ ldr r12, [$key, #244]
++ eors r12, #1
++ beq 0f
++
++ @ populate the key schedule
++ str r12, [$key, #244]
++ mov r4, $key @ pass key
++ mov r5, $rounds @ pass # of rounds
++ add r12, $key, #248 @ pass key schedule
++ bl _bsaes_key_convert
++ add r4, $key, #248
++ vldmia r4, {@XMM[6]}
++ vstmia r12, {@XMM[15]} @ save last round key
++ veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key
++ vstmia r4, {@XMM[7]}
++
++.align 2
++0:
++#endif
++
++ vld1.8 {@XMM[15]}, [$ivp] @ load IV
++ b .Lcbc_dec_loop
++
++.align 4
++.Lcbc_dec_loop:
++ subs $len, $len, #0x8
++ bmi .Lcbc_dec_loop_finish
++
++ vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input
++ vld1.8 {@XMM[2]-@XMM[3]}, [$inp]!
++#ifndef BSAES_ASM_EXTENDED_KEY
++ mov r4, $keysched @ pass the key
++#else
++ add r4, $key, #248
++#endif
++ vld1.8 {@XMM[4]-@XMM[5]}, [$inp]!
++ mov r5, $rounds
++ vld1.8 {@XMM[6]-@XMM[7]}, [$inp]
++ sub $inp, $inp, #0x60
++ vstmia $fp, {@XMM[15]} @ put aside IV
++
++ bl _bsaes_decrypt8
++
++ vldmia $fp, {@XMM[14]} @ reload IV
++ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
++ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
++ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
++ veor @XMM[1], @XMM[1], @XMM[8]
++ veor @XMM[6], @XMM[6], @XMM[9]
++ vld1.8 {@XMM[12]-@XMM[13]}, [$inp]!
++ veor @XMM[4], @XMM[4], @XMM[10]
++ veor @XMM[2], @XMM[2], @XMM[11]
++ vld1.8 {@XMM[14]-@XMM[15]}, [$inp]!
++ veor @XMM[7], @XMM[7], @XMM[12]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
++ veor @XMM[3], @XMM[3], @XMM[13]
++ vst1.8 {@XMM[6]}, [$out]!
++ veor @XMM[5], @XMM[5], @XMM[14]
++ vst1.8 {@XMM[4]}, [$out]!
++ vst1.8 {@XMM[2]}, [$out]!
++ vst1.8 {@XMM[7]}, [$out]!
++ vst1.8 {@XMM[3]}, [$out]!
++ vst1.8 {@XMM[5]}, [$out]!
++
++ b .Lcbc_dec_loop
++
++.Lcbc_dec_loop_finish:
++ adds $len, $len, #8
++ beq .Lcbc_dec_done
++
++ vld1.8 {@XMM[0]}, [$inp]! @ load input
++ cmp $len, #2
++ blo .Lcbc_dec_one
++ vld1.8 {@XMM[1]}, [$inp]!
++#ifndef BSAES_ASM_EXTENDED_KEY
++ mov r4, $keysched @ pass the key
++#else
++ add r4, $key, #248
++#endif
++ mov r5, $rounds
++ vstmia $fp, {@XMM[15]} @ put aside IV
++ beq .Lcbc_dec_two
++ vld1.8 {@XMM[2]}, [$inp]!
++ cmp $len, #4
++ blo .Lcbc_dec_three
++ vld1.8 {@XMM[3]}, [$inp]!
++ beq .Lcbc_dec_four
++ vld1.8 {@XMM[4]}, [$inp]!
++ cmp $len, #6
++ blo .Lcbc_dec_five
++ vld1.8 {@XMM[5]}, [$inp]!
++ beq .Lcbc_dec_six
++ vld1.8 {@XMM[6]}, [$inp]!
++ sub $inp, $inp, #0x70
++
++ bl _bsaes_decrypt8
++
++ vldmia $fp, {@XMM[14]} @ reload IV
++ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
++ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
++ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
++ veor @XMM[1], @XMM[1], @XMM[8]
++ veor @XMM[6], @XMM[6], @XMM[9]
++ vld1.8 {@XMM[12]-@XMM[13]}, [$inp]!
++ veor @XMM[4], @XMM[4], @XMM[10]
++ veor @XMM[2], @XMM[2], @XMM[11]
++ vld1.8 {@XMM[15]}, [$inp]!
++ veor @XMM[7], @XMM[7], @XMM[12]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
++ veor @XMM[3], @XMM[3], @XMM[13]
++ vst1.8 {@XMM[6]}, [$out]!
++ vst1.8 {@XMM[4]}, [$out]!
++ vst1.8 {@XMM[2]}, [$out]!
++ vst1.8 {@XMM[7]}, [$out]!
++ vst1.8 {@XMM[3]}, [$out]!
++ b .Lcbc_dec_done
++.align 4
++.Lcbc_dec_six:
++ sub $inp, $inp, #0x60
++ bl _bsaes_decrypt8
++ vldmia $fp,{@XMM[14]} @ reload IV
++ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
++ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
++ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
++ veor @XMM[1], @XMM[1], @XMM[8]
++ veor @XMM[6], @XMM[6], @XMM[9]
++ vld1.8 {@XMM[12]}, [$inp]!
++ veor @XMM[4], @XMM[4], @XMM[10]
++ veor @XMM[2], @XMM[2], @XMM[11]
++ vld1.8 {@XMM[15]}, [$inp]!
++ veor @XMM[7], @XMM[7], @XMM[12]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
++ vst1.8 {@XMM[6]}, [$out]!
++ vst1.8 {@XMM[4]}, [$out]!
++ vst1.8 {@XMM[2]}, [$out]!
++ vst1.8 {@XMM[7]}, [$out]!
++ b .Lcbc_dec_done
++.align 4
++.Lcbc_dec_five:
++ sub $inp, $inp, #0x50
++ bl _bsaes_decrypt8
++ vldmia $fp, {@XMM[14]} @ reload IV
++ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
++ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
++ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
++ veor @XMM[1], @XMM[1], @XMM[8]
++ veor @XMM[6], @XMM[6], @XMM[9]
++ vld1.8 {@XMM[15]}, [$inp]!
++ veor @XMM[4], @XMM[4], @XMM[10]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
++ veor @XMM[2], @XMM[2], @XMM[11]
++ vst1.8 {@XMM[6]}, [$out]!
++ vst1.8 {@XMM[4]}, [$out]!
++ vst1.8 {@XMM[2]}, [$out]!
++ b .Lcbc_dec_done
++.align 4
++.Lcbc_dec_four:
++ sub $inp, $inp, #0x40
++ bl _bsaes_decrypt8
++ vldmia $fp, {@XMM[14]} @ reload IV
++ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
++ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
++ vld1.8 {@XMM[10]}, [$inp]!
++ veor @XMM[1], @XMM[1], @XMM[8]
++ veor @XMM[6], @XMM[6], @XMM[9]
++ vld1.8 {@XMM[15]}, [$inp]!
++ veor @XMM[4], @XMM[4], @XMM[10]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
++ vst1.8 {@XMM[6]}, [$out]!
++ vst1.8 {@XMM[4]}, [$out]!
++ b .Lcbc_dec_done
++.align 4
++.Lcbc_dec_three:
++ sub $inp, $inp, #0x30
++ bl _bsaes_decrypt8
++ vldmia $fp, {@XMM[14]} @ reload IV
++ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input
++ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
++ vld1.8 {@XMM[15]}, [$inp]!
++ veor @XMM[1], @XMM[1], @XMM[8]
++ veor @XMM[6], @XMM[6], @XMM[9]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
++ vst1.8 {@XMM[6]}, [$out]!
++ b .Lcbc_dec_done
++.align 4
++.Lcbc_dec_two:
++ sub $inp, $inp, #0x20
++ bl _bsaes_decrypt8
++ vldmia $fp, {@XMM[14]} @ reload IV
++ vld1.8 {@XMM[8]}, [$inp]! @ reload input
++ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
++ vld1.8 {@XMM[15]}, [$inp]! @ reload input
++ veor @XMM[1], @XMM[1], @XMM[8]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
++ b .Lcbc_dec_done
++.align 4
++.Lcbc_dec_one:
++ sub $inp, $inp, #0x10
++ mov $rounds, $out @ save original out pointer
++ mov $out, $fp @ use the iv scratch space as out buffer
++ mov r2, $key
++ vmov @XMM[4],@XMM[15] @ just in case ensure that IV
++ vmov @XMM[5],@XMM[0] @ and input are preserved
++ bl AES_decrypt
++ vld1.8 {@XMM[0]}, [$fp,:64] @ load result
++ veor @XMM[0], @XMM[0], @XMM[4] @ ^= IV
++ vmov @XMM[15], @XMM[5] @ @XMM[5] holds input
++ vst1.8 {@XMM[0]}, [$rounds] @ write output
++
++.Lcbc_dec_done:
++#ifndef BSAES_ASM_EXTENDED_KEY
++ vmov.i32 q0, #0
++ vmov.i32 q1, #0
++.Lcbc_dec_bzero: @ wipe key schedule [if any]
++ vstmia $keysched!, {q0-q1}
++ cmp $keysched, $fp
++ bne .Lcbc_dec_bzero
++#endif
++
++ mov sp, $fp
++ add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb
++ vst1.8 {@XMM[15]}, [$ivp] @ return IV
++ VFP_ABI_POP
++ ldmia sp!, {r4-r10, pc}
++.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
++___
++}
++{
++my ($inp,$out,$len,$key, $ctr,$fp,$rounds)=(map("r$_",(0..3,8..10)));
++my $const = "r6"; # shared with _bsaes_encrypt8_alt
++my $keysched = "sp";
++
++$code.=<<___;
++.extern AES_encrypt
++.global bsaes_ctr32_encrypt_blocks
++.type bsaes_ctr32_encrypt_blocks,%function
++.align 5
++bsaes_ctr32_encrypt_blocks:
++ cmp $len, #8 @ use plain AES for
++ blo .Lctr_enc_short @ small sizes
++
++ mov ip, sp
++ stmdb sp!, {r4-r10, lr}
++ VFP_ABI_PUSH
++ ldr $ctr, [ip] @ ctr is 1st arg on the stack
++ sub sp, sp, #0x10 @ scratch space to carry over the ctr
++ mov $fp, sp @ save sp
++
++ ldr $rounds, [$key, #240] @ get # of rounds
++#ifndef BSAES_ASM_EXTENDED_KEY
++ @ allocate the key schedule on the stack
++ sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key
++ add r12, #`128-32` @ size of bit-sliced key schedule
++
++ @ populate the key schedule
++ mov r4, $key @ pass key
++ mov r5, $rounds @ pass # of rounds
++ mov sp, r12 @ sp is $keysched
++ bl _bsaes_key_convert
++ veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key
++ vstmia r12, {@XMM[7]} @ save last round key
++
++ vld1.8 {@XMM[0]}, [$ctr] @ load counter
++ add $ctr, $const, #.LREVM0SR-.LM0 @ borrow $ctr
++ vldmia $keysched, {@XMM[4]} @ load round0 key
++#else
++ ldr r12, [$key, #244]
++ eors r12, #1
++ beq 0f
++
++ @ populate the key schedule
++ str r12, [$key, #244]
++ mov r4, $key @ pass key
++ mov r5, $rounds @ pass # of rounds
++ add r12, $key, #248 @ pass key schedule
++ bl _bsaes_key_convert
++ veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key
++ vstmia r12, {@XMM[7]} @ save last round key
++
++.align 2
++0: add r12, $key, #248
++ vld1.8 {@XMM[0]}, [$ctr] @ load counter
++ adrl $ctr, .LREVM0SR @ borrow $ctr
++ vldmia r12, {@XMM[4]} @ load round0 key
++ sub sp, #0x10 @ place for adjusted round0 key
++#endif
++
++ vmov.i32 @XMM[8],#1 @ compose 1<<96
++ veor @XMM[9],@XMM[9],@XMM[9]
++ vrev32.8 @XMM[0],@XMM[0]
++ vext.8 @XMM[8],@XMM[9],@XMM[8],#4
++ vrev32.8 @XMM[4],@XMM[4]
++ vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96
++ vstmia $keysched, {@XMM[4]} @ save adjusted round0 key
++ b .Lctr_enc_loop
++
++.align 4
++.Lctr_enc_loop:
++ vadd.u32 @XMM[10], @XMM[8], @XMM[9] @ compose 3<<96
++ vadd.u32 @XMM[1], @XMM[0], @XMM[8] @ +1
++ vadd.u32 @XMM[2], @XMM[0], @XMM[9] @ +2
++ vadd.u32 @XMM[3], @XMM[0], @XMM[10] @ +3
++ vadd.u32 @XMM[4], @XMM[1], @XMM[10]
++ vadd.u32 @XMM[5], @XMM[2], @XMM[10]
++ vadd.u32 @XMM[6], @XMM[3], @XMM[10]
++ vadd.u32 @XMM[7], @XMM[4], @XMM[10]
++ vadd.u32 @XMM[10], @XMM[5], @XMM[10] @ next counter
++
++ @ Borrow prologue from _bsaes_encrypt8 to use the opportunity
++ @ to flip byte order in 32-bit counter
++
++ vldmia $keysched, {@XMM[9]} @ load round0 key
++#ifndef BSAES_ASM_EXTENDED_KEY
++ add r4, $keysched, #0x10 @ pass next round key
++#else
++ add r4, $key, #`248+16`
++#endif
++ vldmia $ctr, {@XMM[8]} @ .LREVM0SR
++ mov r5, $rounds @ pass rounds
++ vstmia $fp, {@XMM[10]} @ save next counter
++ sub $const, $ctr, #.LREVM0SR-.LSR @ pass constants
++
++ bl _bsaes_encrypt8_alt
++
++ subs $len, $len, #8
++ blo .Lctr_enc_loop_done
++
++ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ load input
++ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]!
++ veor @XMM[0], @XMM[8]
++ veor @XMM[1], @XMM[9]
++ vld1.8 {@XMM[12]-@XMM[13]}, [$inp]!
++ veor @XMM[4], @XMM[10]
++ veor @XMM[6], @XMM[11]
++ vld1.8 {@XMM[14]-@XMM[15]}, [$inp]!
++ veor @XMM[3], @XMM[12]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output
++ veor @XMM[7], @XMM[13]
++ veor @XMM[2], @XMM[14]
++ vst1.8 {@XMM[4]}, [$out]!
++ veor @XMM[5], @XMM[15]
++ vst1.8 {@XMM[6]}, [$out]!
++ vmov.i32 @XMM[8], #1 @ compose 1<<96
++ vst1.8 {@XMM[3]}, [$out]!
++ veor @XMM[9], @XMM[9], @XMM[9]
++ vst1.8 {@XMM[7]}, [$out]!
++ vext.8 @XMM[8], @XMM[9], @XMM[8], #4
++ vst1.8 {@XMM[2]}, [$out]!
++ vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96
++ vst1.8 {@XMM[5]}, [$out]!
++ vldmia $fp, {@XMM[0]} @ load counter
++
++ bne .Lctr_enc_loop
++ b .Lctr_enc_done
++
++.align 4
++.Lctr_enc_loop_done:
++ add $len, $len, #8
++ vld1.8 {@XMM[8]}, [$inp]! @ load input
++ veor @XMM[0], @XMM[8]
++ vst1.8 {@XMM[0]}, [$out]! @ write output
++ cmp $len, #2
++ blo .Lctr_enc_done
++ vld1.8 {@XMM[9]}, [$inp]!
++ veor @XMM[1], @XMM[9]
++ vst1.8 {@XMM[1]}, [$out]!
++ beq .Lctr_enc_done
++ vld1.8 {@XMM[10]}, [$inp]!
++ veor @XMM[4], @XMM[10]
++ vst1.8 {@XMM[4]}, [$out]!
++ cmp $len, #4
++ blo .Lctr_enc_done
++ vld1.8 {@XMM[11]}, [$inp]!
++ veor @XMM[6], @XMM[11]
++ vst1.8 {@XMM[6]}, [$out]!
++ beq .Lctr_enc_done
++ vld1.8 {@XMM[12]}, [$inp]!
++ veor @XMM[3], @XMM[12]
++ vst1.8 {@XMM[3]}, [$out]!
++ cmp $len, #6
++ blo .Lctr_enc_done
++ vld1.8 {@XMM[13]}, [$inp]!
++ veor @XMM[7], @XMM[13]
++ vst1.8 {@XMM[7]}, [$out]!
++ beq .Lctr_enc_done
++ vld1.8 {@XMM[14]}, [$inp]
++ veor @XMM[2], @XMM[14]
++ vst1.8 {@XMM[2]}, [$out]!
++
++.Lctr_enc_done:
++ vmov.i32 q0, #0
++ vmov.i32 q1, #0
++#ifndef BSAES_ASM_EXTENDED_KEY
++.Lctr_enc_bzero: @ wipe key schedule [if any]
++ vstmia $keysched!, {q0-q1}
++ cmp $keysched, $fp
++ bne .Lctr_enc_bzero
++#else
++ vstmia $keysched, {q0-q1}
++#endif
++
++ mov sp, $fp
++ add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb
++ VFP_ABI_POP
++ ldmia sp!, {r4-r10, pc} @ return
++
++.align 4
++.Lctr_enc_short:
++ ldr ip, [sp] @ ctr pointer is passed on stack
++ stmdb sp!, {r4-r8, lr}
++
++ mov r4, $inp @ copy arguments
++ mov r5, $out
++ mov r6, $len
++ mov r7, $key
++ ldr r8, [ip, #12] @ load counter LSW
++ vld1.8 {@XMM[1]}, [ip] @ load whole counter value
++#ifdef __ARMEL__
++ rev r8, r8
++#endif
++ sub sp, sp, #0x10
++ vst1.8 {@XMM[1]}, [sp,:64] @ copy counter value
++ sub sp, sp, #0x10
++
++.Lctr_enc_short_loop:
++ add r0, sp, #0x10 @ input counter value
++ mov r1, sp @ output on the stack
++ mov r2, r7 @ key
++
++ bl AES_encrypt
++
++ vld1.8 {@XMM[0]}, [r4]! @ load input
++ vld1.8 {@XMM[1]}, [sp,:64] @ load encrypted counter
++ add r8, r8, #1
++#ifdef __ARMEL__
++ rev r0, r8
++ str r0, [sp, #0x1c] @ next counter value
++#else
++ str r8, [sp, #0x1c] @ next counter value
++#endif
++ veor @XMM[0],@XMM[0],@XMM[1]
++ vst1.8 {@XMM[0]}, [r5]! @ store output
++ subs r6, r6, #1
++ bne .Lctr_enc_short_loop
++
++ vmov.i32 q0, #0
++ vmov.i32 q1, #0
++ vstmia sp!, {q0-q1}
++
++ ldmia sp!, {r4-r8, pc}
++.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
++___
++}
++{
++######################################################################
++# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len,
++# const AES_KEY *key1, const AES_KEY *key2,
++# const unsigned char iv[16]);
++#
++my ($inp,$out,$len,$key,$rounds,$magic,$fp)=(map("r$_",(7..10,1..3)));
++my $const="r6"; # returned by _bsaes_key_convert
++my $twmask=@XMM[5];
++my @T=@XMM[6..7];
++
++$code.=<<___;
++.globl bsaes_xts_encrypt
++.type bsaes_xts_encrypt,%function
++.align 4
++bsaes_xts_encrypt:
++ mov ip, sp
++ stmdb sp!, {r4-r10, lr} @ 0x20
++ VFP_ABI_PUSH
++ mov r6, sp @ future $fp
++
++ mov $inp, r0
++ mov $out, r1
++ mov $len, r2
++ mov $key, r3
++
++ sub r0, sp, #0x10 @ 0x10
++ bic r0, #0xf @ align at 16 bytes
++ mov sp, r0
++
++#ifdef XTS_CHAIN_TWEAK
++ ldr r0, [ip] @ pointer to input tweak
++#else
++ @ generate initial tweak
++ ldr r0, [ip, #4] @ iv[]
++ mov r1, sp
++ ldr r2, [ip, #0] @ key2
++ bl AES_encrypt
++ mov r0,sp @ pointer to initial tweak
++#endif
++
++ ldr $rounds, [$key, #240] @ get # of rounds
++ mov $fp, r6
++#ifndef BSAES_ASM_EXTENDED_KEY
++ @ allocate the key schedule on the stack
++ sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key
++ @ add r12, #`128-32` @ size of bit-sliced key schedule
++ sub r12, #`32+16` @ place for tweak[9]
++
++ @ populate the key schedule
++ mov r4, $key @ pass key
++ mov r5, $rounds @ pass # of rounds
++ mov sp, r12
++ add r12, #0x90 @ pass key schedule
++ bl _bsaes_key_convert
++ veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key
++ vstmia r12, {@XMM[7]} @ save last round key
++#else
++ ldr r12, [$key, #244]
++ eors r12, #1
++ beq 0f
++
++ str r12, [$key, #244]
++ mov r4, $key @ pass key
++ mov r5, $rounds @ pass # of rounds
++ add r12, $key, #248 @ pass key schedule
++ bl _bsaes_key_convert
++ veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key
++ vstmia r12, {@XMM[7]}
++
++.align 2
++0: sub sp, #0x90 @ place for tweak[9]
++#endif
++
++ vld1.8 {@XMM[8]}, [r0] @ initial tweak
++ adr $magic, .Lxts_magic
++
++ subs $len, #0x80
++ blo .Lxts_enc_short
++ b .Lxts_enc_loop
++
++.align 4
++.Lxts_enc_loop:
++ vldmia $magic, {$twmask} @ load XTS magic
++ vshr.s64 @T[0], @XMM[8], #63
++ mov r0, sp
++ vand @T[0], @T[0], $twmask
++___
++for($i=9;$i<16;$i++) {
++$code.=<<___;
++ vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1]
++ vst1.64 {@XMM[$i-1]}, [r0,:128]!
++ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
++ vshr.s64 @T[1], @XMM[$i], #63
++ veor @XMM[$i], @XMM[$i], @T[0]
++ vand @T[1], @T[1], $twmask
++___
++ @T=reverse(@T);
++
++$code.=<<___ if ($i>=10);
++ vld1.8 {@XMM[$i-10]}, [$inp]!
++___
++$code.=<<___ if ($i>=11);
++ veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
++___
++}
++$code.=<<___;
++ vadd.u64 @XMM[8], @XMM[15], @XMM[15]
++ vst1.64 {@XMM[15]}, [r0,:128]!
++ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
++ veor @XMM[8], @XMM[8], @T[0]
++ vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak
++
++ vld1.8 {@XMM[6]-@XMM[7]}, [$inp]!
++ veor @XMM[5], @XMM[5], @XMM[13]
++#ifndef BSAES_ASM_EXTENDED_KEY
++ add r4, sp, #0x90 @ pass key schedule
++#else
++ add r4, $key, #248 @ pass key schedule
++#endif
++ veor @XMM[6], @XMM[6], @XMM[14]
++ mov r5, $rounds @ pass rounds
++ veor @XMM[7], @XMM[7], @XMM[15]
++ mov r0, sp
++
++ bl _bsaes_encrypt8
++
++ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
++ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
++ veor @XMM[0], @XMM[0], @XMM[ 8]
++ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
++ veor @XMM[1], @XMM[1], @XMM[ 9]
++ veor @XMM[8], @XMM[4], @XMM[10]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
++ veor @XMM[9], @XMM[6], @XMM[11]
++ vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]!
++ veor @XMM[10], @XMM[3], @XMM[12]
++ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
++ veor @XMM[11], @XMM[7], @XMM[13]
++ veor @XMM[12], @XMM[2], @XMM[14]
++ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
++ veor @XMM[13], @XMM[5], @XMM[15]
++ vst1.8 {@XMM[12]-@XMM[13]}, [$out]!
++
++ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
++
++ subs $len, #0x80
++ bpl .Lxts_enc_loop
++
++.Lxts_enc_short:
++ adds $len, #0x70
++ bmi .Lxts_enc_done
++
++ vldmia $magic, {$twmask} @ load XTS magic
++ vshr.s64 @T[0], @XMM[8], #63
++ mov r0, sp
++ vand @T[0], @T[0], $twmask
++___
++for($i=9;$i<16;$i++) {
++$code.=<<___;
++ vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1]
++ vst1.64 {@XMM[$i-1]}, [r0,:128]!
++ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
++ vshr.s64 @T[1], @XMM[$i], #63
++ veor @XMM[$i], @XMM[$i], @T[0]
++ vand @T[1], @T[1], $twmask
++___
++ @T=reverse(@T);
++
++$code.=<<___ if ($i>=10);
++ vld1.8 {@XMM[$i-10]}, [$inp]!
++ subs $len, #0x10
++ bmi .Lxts_enc_`$i-9`
++___
++$code.=<<___ if ($i>=11);
++ veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
++___
++}
++$code.=<<___;
++ sub $len, #0x10
++ vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak
++
++ vld1.8 {@XMM[6]}, [$inp]!
++ veor @XMM[5], @XMM[5], @XMM[13]
++#ifndef BSAES_ASM_EXTENDED_KEY
++ add r4, sp, #0x90 @ pass key schedule
++#else
++ add r4, $key, #248 @ pass key schedule
++#endif
++ veor @XMM[6], @XMM[6], @XMM[14]
++ mov r5, $rounds @ pass rounds
++ mov r0, sp
++
++ bl _bsaes_encrypt8
++
++ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
++ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
++ veor @XMM[0], @XMM[0], @XMM[ 8]
++ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
++ veor @XMM[1], @XMM[1], @XMM[ 9]
++ veor @XMM[8], @XMM[4], @XMM[10]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
++ veor @XMM[9], @XMM[6], @XMM[11]
++ vld1.64 {@XMM[14]}, [r0,:128]!
++ veor @XMM[10], @XMM[3], @XMM[12]
++ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
++ veor @XMM[11], @XMM[7], @XMM[13]
++ veor @XMM[12], @XMM[2], @XMM[14]
++ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
++ vst1.8 {@XMM[12]}, [$out]!
++
++ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
++ b .Lxts_enc_done
++.align 4
++.Lxts_enc_6:
++ vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak
++
++ veor @XMM[4], @XMM[4], @XMM[12]
++#ifndef BSAES_ASM_EXTENDED_KEY
++ add r4, sp, #0x90 @ pass key schedule
++#else
++ add r4, $key, #248 @ pass key schedule
++#endif
++ veor @XMM[5], @XMM[5], @XMM[13]
++ mov r5, $rounds @ pass rounds
++ mov r0, sp
++
++ bl _bsaes_encrypt8
++
++ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
++ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
++ veor @XMM[0], @XMM[0], @XMM[ 8]
++ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
++ veor @XMM[1], @XMM[1], @XMM[ 9]
++ veor @XMM[8], @XMM[4], @XMM[10]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
++ veor @XMM[9], @XMM[6], @XMM[11]
++ veor @XMM[10], @XMM[3], @XMM[12]
++ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
++ veor @XMM[11], @XMM[7], @XMM[13]
++ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
++
++ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
++ b .Lxts_enc_done
++
++@ put this in range for both ARM and Thumb mode adr instructions
++.align 5
++.Lxts_magic:
++ .quad 1, 0x87
++
++.align 5
++.Lxts_enc_5:
++ vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak
++
++ veor @XMM[3], @XMM[3], @XMM[11]
++#ifndef BSAES_ASM_EXTENDED_KEY
++ add r4, sp, #0x90 @ pass key schedule
++#else
++ add r4, $key, #248 @ pass key schedule
++#endif
++ veor @XMM[4], @XMM[4], @XMM[12]
++ mov r5, $rounds @ pass rounds
++ mov r0, sp
++
++ bl _bsaes_encrypt8
++
++ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
++ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
++ veor @XMM[0], @XMM[0], @XMM[ 8]
++ vld1.64 {@XMM[12]}, [r0,:128]!
++ veor @XMM[1], @XMM[1], @XMM[ 9]
++ veor @XMM[8], @XMM[4], @XMM[10]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
++ veor @XMM[9], @XMM[6], @XMM[11]
++ veor @XMM[10], @XMM[3], @XMM[12]
++ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
++ vst1.8 {@XMM[10]}, [$out]!
++
++ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
++ b .Lxts_enc_done
++.align 4
++.Lxts_enc_4:
++ vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak
++
++ veor @XMM[2], @XMM[2], @XMM[10]
++#ifndef BSAES_ASM_EXTENDED_KEY
++ add r4, sp, #0x90 @ pass key schedule
++#else
++ add r4, $key, #248 @ pass key schedule
++#endif
++ veor @XMM[3], @XMM[3], @XMM[11]
++ mov r5, $rounds @ pass rounds
++ mov r0, sp
++
++ bl _bsaes_encrypt8
++
++ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
++ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
++ veor @XMM[0], @XMM[0], @XMM[ 8]
++ veor @XMM[1], @XMM[1], @XMM[ 9]
++ veor @XMM[8], @XMM[4], @XMM[10]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
++ veor @XMM[9], @XMM[6], @XMM[11]
++ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
++
++ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
++ b .Lxts_enc_done
++.align 4
++.Lxts_enc_3:
++ vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak
++
++ veor @XMM[1], @XMM[1], @XMM[9]
++#ifndef BSAES_ASM_EXTENDED_KEY
++ add r4, sp, #0x90 @ pass key schedule
++#else
++ add r4, $key, #248 @ pass key schedule
++#endif
++ veor @XMM[2], @XMM[2], @XMM[10]
++ mov r5, $rounds @ pass rounds
++ mov r0, sp
++
++ bl _bsaes_encrypt8
++
++ vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]!
++ vld1.64 {@XMM[10]}, [r0,:128]!
++ veor @XMM[0], @XMM[0], @XMM[ 8]
++ veor @XMM[1], @XMM[1], @XMM[ 9]
++ veor @XMM[8], @XMM[4], @XMM[10]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
++ vst1.8 {@XMM[8]}, [$out]!
++
++ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
++ b .Lxts_enc_done
++.align 4
++.Lxts_enc_2:
++ vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak
++
++ veor @XMM[0], @XMM[0], @XMM[8]
++#ifndef BSAES_ASM_EXTENDED_KEY
++ add r4, sp, #0x90 @ pass key schedule
++#else
++ add r4, $key, #248 @ pass key schedule
++#endif
++ veor @XMM[1], @XMM[1], @XMM[9]
++ mov r5, $rounds @ pass rounds
++ mov r0, sp
++
++ bl _bsaes_encrypt8
++
++ vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]!
++ veor @XMM[0], @XMM[0], @XMM[ 8]
++ veor @XMM[1], @XMM[1], @XMM[ 9]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
++
++ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
++ b .Lxts_enc_done
++.align 4
++.Lxts_enc_1:
++ mov r0, sp
++ veor @XMM[0], @XMM[8]
++ mov r1, sp
++ vst1.8 {@XMM[0]}, [sp,:128]
++ mov r2, $key
++ mov r4, $fp @ preserve fp
++
++ bl AES_encrypt
++
++ vld1.8 {@XMM[0]}, [sp,:128]
++ veor @XMM[0], @XMM[0], @XMM[8]
++ vst1.8 {@XMM[0]}, [$out]!
++ mov $fp, r4
++
++ vmov @XMM[8], @XMM[9] @ next round tweak
++
++.Lxts_enc_done:
++#ifndef XTS_CHAIN_TWEAK
++ adds $len, #0x10
++ beq .Lxts_enc_ret
++ sub r6, $out, #0x10
++
++.Lxts_enc_steal:
++ ldrb r0, [$inp], #1
++ ldrb r1, [$out, #-0x10]
++ strb r0, [$out, #-0x10]
++ strb r1, [$out], #1
++
++ subs $len, #1
++ bhi .Lxts_enc_steal
++
++ vld1.8 {@XMM[0]}, [r6]
++ mov r0, sp
++ veor @XMM[0], @XMM[0], @XMM[8]
++ mov r1, sp
++ vst1.8 {@XMM[0]}, [sp,:128]
++ mov r2, $key
++ mov r4, $fp @ preserve fp
++
++ bl AES_encrypt
++
++ vld1.8 {@XMM[0]}, [sp,:128]
++ veor @XMM[0], @XMM[0], @XMM[8]
++ vst1.8 {@XMM[0]}, [r6]
++ mov $fp, r4
++#endif
++
++.Lxts_enc_ret:
++ bic r0, $fp, #0xf
++ vmov.i32 q0, #0
++ vmov.i32 q1, #0
++#ifdef XTS_CHAIN_TWEAK
++ ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak
++#endif
++.Lxts_enc_bzero: @ wipe key schedule [if any]
++ vstmia sp!, {q0-q1}
++ cmp sp, r0
++ bne .Lxts_enc_bzero
++
++ mov sp, $fp
++#ifdef XTS_CHAIN_TWEAK
++ vst1.8 {@XMM[8]}, [r1]
++#endif
++ VFP_ABI_POP
++ ldmia sp!, {r4-r10, pc} @ return
++
++.size bsaes_xts_encrypt,.-bsaes_xts_encrypt
++
++.globl bsaes_xts_decrypt
++.type bsaes_xts_decrypt,%function
++.align 4
++bsaes_xts_decrypt:
++ mov ip, sp
++ stmdb sp!, {r4-r10, lr} @ 0x20
++ VFP_ABI_PUSH
++ mov r6, sp @ future $fp
++
++ mov $inp, r0
++ mov $out, r1
++ mov $len, r2
++ mov $key, r3
++
++ sub r0, sp, #0x10 @ 0x10
++ bic r0, #0xf @ align at 16 bytes
++ mov sp, r0
++
++#ifdef XTS_CHAIN_TWEAK
++ ldr r0, [ip] @ pointer to input tweak
++#else
++ @ generate initial tweak
++ ldr r0, [ip, #4] @ iv[]
++ mov r1, sp
++ ldr r2, [ip, #0] @ key2
++ bl AES_encrypt
++ mov r0, sp @ pointer to initial tweak
++#endif
++
++ ldr $rounds, [$key, #240] @ get # of rounds
++ mov $fp, r6
++#ifndef BSAES_ASM_EXTENDED_KEY
++ @ allocate the key schedule on the stack
++ sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key
++ @ add r12, #`128-32` @ size of bit-sliced key schedule
++ sub r12, #`32+16` @ place for tweak[9]
++
++ @ populate the key schedule
++ mov r4, $key @ pass key
++ mov r5, $rounds @ pass # of rounds
++ mov sp, r12
++ add r12, #0x90 @ pass key schedule
++ bl _bsaes_key_convert
++ add r4, sp, #0x90
++ vldmia r4, {@XMM[6]}
++ vstmia r12, {@XMM[15]} @ save last round key
++ veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key
++ vstmia r4, {@XMM[7]}
++#else
++ ldr r12, [$key, #244]
++ eors r12, #1
++ beq 0f
++
++ str r12, [$key, #244]
++ mov r4, $key @ pass key
++ mov r5, $rounds @ pass # of rounds
++ add r12, $key, #248 @ pass key schedule
++ bl _bsaes_key_convert
++ add r4, $key, #248
++ vldmia r4, {@XMM[6]}
++ vstmia r12, {@XMM[15]} @ save last round key
++ veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key
++ vstmia r4, {@XMM[7]}
++
++.align 2
++0: sub sp, #0x90 @ place for tweak[9]
++#endif
++ vld1.8 {@XMM[8]}, [r0] @ initial tweak
++ adr $magic, .Lxts_magic
++
++ tst $len, #0xf @ if not multiple of 16
++ it ne @ Thumb2 thing, sanity check in ARM
++ subne $len, #0x10 @ subtract another 16 bytes
++ subs $len, #0x80
++
++ blo .Lxts_dec_short
++ b .Lxts_dec_loop
++
++.align 4
++.Lxts_dec_loop:
++ vldmia $magic, {$twmask} @ load XTS magic
++ vshr.s64 @T[0], @XMM[8], #63
++ mov r0, sp
++ vand @T[0], @T[0], $twmask
++___
++for($i=9;$i<16;$i++) {
++$code.=<<___;
++ vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1]
++ vst1.64 {@XMM[$i-1]}, [r0,:128]!
++ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
++ vshr.s64 @T[1], @XMM[$i], #63
++ veor @XMM[$i], @XMM[$i], @T[0]
++ vand @T[1], @T[1], $twmask
++___
++ @T=reverse(@T);
++
++$code.=<<___ if ($i>=10);
++ vld1.8 {@XMM[$i-10]}, [$inp]!
++___
++$code.=<<___ if ($i>=11);
++ veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
++___
++}
++$code.=<<___;
++ vadd.u64 @XMM[8], @XMM[15], @XMM[15]
++ vst1.64 {@XMM[15]}, [r0,:128]!
++ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
++ veor @XMM[8], @XMM[8], @T[0]
++ vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak
++
++ vld1.8 {@XMM[6]-@XMM[7]}, [$inp]!
++ veor @XMM[5], @XMM[5], @XMM[13]
++#ifndef BSAES_ASM_EXTENDED_KEY
++ add r4, sp, #0x90 @ pass key schedule
++#else
++ add r4, $key, #248 @ pass key schedule
++#endif
++ veor @XMM[6], @XMM[6], @XMM[14]
++ mov r5, $rounds @ pass rounds
++ veor @XMM[7], @XMM[7], @XMM[15]
++ mov r0, sp
++
++ bl _bsaes_decrypt8
++
++ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
++ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
++ veor @XMM[0], @XMM[0], @XMM[ 8]
++ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
++ veor @XMM[1], @XMM[1], @XMM[ 9]
++ veor @XMM[8], @XMM[6], @XMM[10]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
++ veor @XMM[9], @XMM[4], @XMM[11]
++ vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]!
++ veor @XMM[10], @XMM[2], @XMM[12]
++ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
++ veor @XMM[11], @XMM[7], @XMM[13]
++ veor @XMM[12], @XMM[3], @XMM[14]
++ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
++ veor @XMM[13], @XMM[5], @XMM[15]
++ vst1.8 {@XMM[12]-@XMM[13]}, [$out]!
++
++ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
++
++ subs $len, #0x80
++ bpl .Lxts_dec_loop
++
++.Lxts_dec_short:
++ adds $len, #0x70
++ bmi .Lxts_dec_done
++
++ vldmia $magic, {$twmask} @ load XTS magic
++ vshr.s64 @T[0], @XMM[8], #63
++ mov r0, sp
++ vand @T[0], @T[0], $twmask
++___
++for($i=9;$i<16;$i++) {
++$code.=<<___;
++ vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1]
++ vst1.64 {@XMM[$i-1]}, [r0,:128]!
++ vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
++ vshr.s64 @T[1], @XMM[$i], #63
++ veor @XMM[$i], @XMM[$i], @T[0]
++ vand @T[1], @T[1], $twmask
++___
++ @T=reverse(@T);
++
++$code.=<<___ if ($i>=10);
++ vld1.8 {@XMM[$i-10]}, [$inp]!
++ subs $len, #0x10
++ bmi .Lxts_dec_`$i-9`
++___
++$code.=<<___ if ($i>=11);
++ veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
++___
++}
++$code.=<<___;
++ sub $len, #0x10
++ vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak
++
++ vld1.8 {@XMM[6]}, [$inp]!
++ veor @XMM[5], @XMM[5], @XMM[13]
++#ifndef BSAES_ASM_EXTENDED_KEY
++ add r4, sp, #0x90 @ pass key schedule
++#else
++ add r4, $key, #248 @ pass key schedule
++#endif
++ veor @XMM[6], @XMM[6], @XMM[14]
++ mov r5, $rounds @ pass rounds
++ mov r0, sp
++
++ bl _bsaes_decrypt8
++
++ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
++ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
++ veor @XMM[0], @XMM[0], @XMM[ 8]
++ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
++ veor @XMM[1], @XMM[1], @XMM[ 9]
++ veor @XMM[8], @XMM[6], @XMM[10]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
++ veor @XMM[9], @XMM[4], @XMM[11]
++ vld1.64 {@XMM[14]}, [r0,:128]!
++ veor @XMM[10], @XMM[2], @XMM[12]
++ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
++ veor @XMM[11], @XMM[7], @XMM[13]
++ veor @XMM[12], @XMM[3], @XMM[14]
++ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
++ vst1.8 {@XMM[12]}, [$out]!
++
++ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
++ b .Lxts_dec_done
++.align 4
++.Lxts_dec_6:
++ vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak
++
++ veor @XMM[4], @XMM[4], @XMM[12]
++#ifndef BSAES_ASM_EXTENDED_KEY
++ add r4, sp, #0x90 @ pass key schedule
++#else
++ add r4, $key, #248 @ pass key schedule
++#endif
++ veor @XMM[5], @XMM[5], @XMM[13]
++ mov r5, $rounds @ pass rounds
++ mov r0, sp
++
++ bl _bsaes_decrypt8
++
++ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
++ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
++ veor @XMM[0], @XMM[0], @XMM[ 8]
++ vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]!
++ veor @XMM[1], @XMM[1], @XMM[ 9]
++ veor @XMM[8], @XMM[6], @XMM[10]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
++ veor @XMM[9], @XMM[4], @XMM[11]
++ veor @XMM[10], @XMM[2], @XMM[12]
++ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
++ veor @XMM[11], @XMM[7], @XMM[13]
++ vst1.8 {@XMM[10]-@XMM[11]}, [$out]!
++
++ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
++ b .Lxts_dec_done
++.align 4
++.Lxts_dec_5:
++ vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak
++
++ veor @XMM[3], @XMM[3], @XMM[11]
++#ifndef BSAES_ASM_EXTENDED_KEY
++ add r4, sp, #0x90 @ pass key schedule
++#else
++ add r4, $key, #248 @ pass key schedule
++#endif
++ veor @XMM[4], @XMM[4], @XMM[12]
++ mov r5, $rounds @ pass rounds
++ mov r0, sp
++
++ bl _bsaes_decrypt8
++
++ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
++ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
++ veor @XMM[0], @XMM[0], @XMM[ 8]
++ vld1.64 {@XMM[12]}, [r0,:128]!
++ veor @XMM[1], @XMM[1], @XMM[ 9]
++ veor @XMM[8], @XMM[6], @XMM[10]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
++ veor @XMM[9], @XMM[4], @XMM[11]
++ veor @XMM[10], @XMM[2], @XMM[12]
++ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
++ vst1.8 {@XMM[10]}, [$out]!
++
++ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
++ b .Lxts_dec_done
++.align 4
++.Lxts_dec_4:
++ vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak
++
++ veor @XMM[2], @XMM[2], @XMM[10]
++#ifndef BSAES_ASM_EXTENDED_KEY
++ add r4, sp, #0x90 @ pass key schedule
++#else
++ add r4, $key, #248 @ pass key schedule
++#endif
++ veor @XMM[3], @XMM[3], @XMM[11]
++ mov r5, $rounds @ pass rounds
++ mov r0, sp
++
++ bl _bsaes_decrypt8
++
++ vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
++ vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]!
++ veor @XMM[0], @XMM[0], @XMM[ 8]
++ veor @XMM[1], @XMM[1], @XMM[ 9]
++ veor @XMM[8], @XMM[6], @XMM[10]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
++ veor @XMM[9], @XMM[4], @XMM[11]
++ vst1.8 {@XMM[8]-@XMM[9]}, [$out]!
++
++ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
++ b .Lxts_dec_done
++.align 4
++.Lxts_dec_3:
++ vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak
++
++ veor @XMM[1], @XMM[1], @XMM[9]
++#ifndef BSAES_ASM_EXTENDED_KEY
++ add r4, sp, #0x90 @ pass key schedule
++#else
++ add r4, $key, #248 @ pass key schedule
++#endif
++ veor @XMM[2], @XMM[2], @XMM[10]
++ mov r5, $rounds @ pass rounds
++ mov r0, sp
++
++ bl _bsaes_decrypt8
++
++ vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]!
++ vld1.64 {@XMM[10]}, [r0,:128]!
++ veor @XMM[0], @XMM[0], @XMM[ 8]
++ veor @XMM[1], @XMM[1], @XMM[ 9]
++ veor @XMM[8], @XMM[6], @XMM[10]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
++ vst1.8 {@XMM[8]}, [$out]!
++
++ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
++ b .Lxts_dec_done
++.align 4
++.Lxts_dec_2:
++ vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak
++
++ veor @XMM[0], @XMM[0], @XMM[8]
++#ifndef BSAES_ASM_EXTENDED_KEY
++ add r4, sp, #0x90 @ pass key schedule
++#else
++ add r4, $key, #248 @ pass key schedule
++#endif
++ veor @XMM[1], @XMM[1], @XMM[9]
++ mov r5, $rounds @ pass rounds
++ mov r0, sp
++
++ bl _bsaes_decrypt8
++
++ vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]!
++ veor @XMM[0], @XMM[0], @XMM[ 8]
++ veor @XMM[1], @XMM[1], @XMM[ 9]
++ vst1.8 {@XMM[0]-@XMM[1]}, [$out]!
++
++ vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak
++ b .Lxts_dec_done
++.align 4
++.Lxts_dec_1:
++ mov r0, sp
++ veor @XMM[0], @XMM[8]
++ mov r1, sp
++ vst1.8 {@XMM[0]}, [sp,:128]
++ mov r2, $key
++ mov r4, $fp @ preserve fp
++ mov r5, $magic @ preserve magic
++
++ bl AES_decrypt
++
++ vld1.8 {@XMM[0]}, [sp,:128]
++ veor @XMM[0], @XMM[0], @XMM[8]
++ vst1.8 {@XMM[0]}, [$out]!
++ mov $fp, r4
++ mov $magic, r5
++
++ vmov @XMM[8], @XMM[9] @ next round tweak
++
++.Lxts_dec_done:
++#ifndef XTS_CHAIN_TWEAK
++ adds $len, #0x10
++ beq .Lxts_dec_ret
++
++ @ calculate one round of extra tweak for the stolen ciphertext
++ vldmia $magic, {$twmask}
++ vshr.s64 @XMM[6], @XMM[8], #63
++ vand @XMM[6], @XMM[6], $twmask
++ vadd.u64 @XMM[9], @XMM[8], @XMM[8]
++ vswp `&Dhi("@XMM[6]")`,`&Dlo("@XMM[6]")`
++ veor @XMM[9], @XMM[9], @XMM[6]
++
++ @ perform the final decryption with the last tweak value
++ vld1.8 {@XMM[0]}, [$inp]!
++ mov r0, sp
++ veor @XMM[0], @XMM[0], @XMM[9]
++ mov r1, sp
++ vst1.8 {@XMM[0]}, [sp,:128]
++ mov r2, $key
++ mov r4, $fp @ preserve fp
++
++ bl AES_decrypt
++
++ vld1.8 {@XMM[0]}, [sp,:128]
++ veor @XMM[0], @XMM[0], @XMM[9]
++ vst1.8 {@XMM[0]}, [$out]
++
++ mov r6, $out
++.Lxts_dec_steal:
++ ldrb r1, [$out]
++ ldrb r0, [$inp], #1
++ strb r1, [$out, #0x10]
++ strb r0, [$out], #1
++
++ subs $len, #1
++ bhi .Lxts_dec_steal
++
++ vld1.8 {@XMM[0]}, [r6]
++ mov r0, sp
++ veor @XMM[0], @XMM[8]
++ mov r1, sp
++ vst1.8 {@XMM[0]}, [sp,:128]
++ mov r2, $key
++
++ bl AES_decrypt
++
++ vld1.8 {@XMM[0]}, [sp,:128]
++ veor @XMM[0], @XMM[0], @XMM[8]
++ vst1.8 {@XMM[0]}, [r6]
++ mov $fp, r4
++#endif
++
++.Lxts_dec_ret:
++ bic r0, $fp, #0xf
++ vmov.i32 q0, #0
++ vmov.i32 q1, #0
++#ifdef XTS_CHAIN_TWEAK
++ ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak
++#endif
++.Lxts_dec_bzero: @ wipe key schedule [if any]
++ vstmia sp!, {q0-q1}
++ cmp sp, r0
++ bne .Lxts_dec_bzero
++
++ mov sp, $fp
++#ifdef XTS_CHAIN_TWEAK
++ vst1.8 {@XMM[8]}, [r1]
++#endif
++ VFP_ABI_POP
++ ldmia sp!, {r4-r10, pc} @ return
++
++.size bsaes_xts_decrypt,.-bsaes_xts_decrypt
++___
++}
++$code.=<<___;
++#endif
++___
++
++$code =~ s/\`([^\`]*)\`/eval($1)/gem;
++
++open SELF,$0;
++while(<SELF>) {
++ next if (/^#!/);
++ last if (!s/^#/@/ and !/^$/);
++ print;
++}
++close SELF;
++
++print $code;
++
++close STDOUT;
+diff --git a/crypto/arm64cpuid.S b/crypto/arm64cpuid.S
+new file mode 100644
+index 0000000..4778ac1
+--- /dev/null
++++ b/crypto/arm64cpuid.S
+@@ -0,0 +1,46 @@
++#include "arm_arch.h"
++
++.text
++.arch armv8-a+crypto
++
++.align 5
++.global _armv7_neon_probe
++.type _armv7_neon_probe,%function
++_armv7_neon_probe:
++ orr v15.16b, v15.16b, v15.16b
++ ret
++.size _armv7_neon_probe,.-_armv7_neon_probe
++
++.global _armv7_tick
++.type _armv7_tick,%function
++_armv7_tick:
++ mrs x0, CNTVCT_EL0
++ ret
++.size _armv7_tick,.-_armv7_tick
++
++.global _armv8_aes_probe
++.type _armv8_aes_probe,%function
++_armv8_aes_probe:
++ aese v0.16b, v0.16b
++ ret
++.size _armv8_aes_probe,.-_armv8_aes_probe
++
++.global _armv8_sha1_probe
++.type _armv8_sha1_probe,%function
++_armv8_sha1_probe:
++ sha1h s0, s0
++ ret
++.size _armv8_sha1_probe,.-_armv8_sha1_probe
++
++.global _armv8_sha256_probe
++.type _armv8_sha256_probe,%function
++_armv8_sha256_probe:
++ sha256su0 v0.4s, v0.4s
++ ret
++.size _armv8_sha256_probe,.-_armv8_sha256_probe
++.global _armv8_pmull_probe
++.type _armv8_pmull_probe,%function
++_armv8_pmull_probe:
++ pmull v0.1q, v0.1d, v0.1d
++ ret
++.size _armv8_pmull_probe,.-_armv8_pmull_probe
+diff --git a/crypto/arm_arch.h b/crypto/arm_arch.h
+index 5a83107..6fa8724 100644
+--- a/crypto/arm_arch.h
++++ b/crypto/arm_arch.h
+@@ -10,13 +10,24 @@
+ # define __ARMEL__
+ # endif
+ # elif defined(__GNUC__)
++# if defined(__aarch64__)
++# define __ARM_ARCH__ 8
++# if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
++# define __ARMEB__
++# else
++# define __ARMEL__
++# endif
+ /*
+ * Why doesn't gcc define __ARM_ARCH__? Instead it defines
+ * bunch of below macros. See all_architectires[] table in
+ * gcc/config/arm/arm.c. On a side note it defines
+ * __ARMEL__/__ARMEB__ for little-/big-endian.
+ */
+-# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \
++# elif defined(__ARM_ARCH)
++# define __ARM_ARCH__ __ARM_ARCH
++# elif defined(__ARM_ARCH_8A__)
++# define __ARM_ARCH__ 8
++# elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \
+ defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__) || \
+ defined(__ARM_ARCH_7EM__)
+ # define __ARM_ARCH__ 7
+@@ -43,9 +54,13 @@
+
+ #if !__ASSEMBLER__
+ extern unsigned int OPENSSL_armcap_P;
++#endif
+
+ #define ARMV7_NEON (1<<0)
+ #define ARMV7_TICK (1<<1)
+-#endif
++#define ARMV8_AES (1<<2)
++#define ARMV8_SHA1 (1<<3)
++#define ARMV8_SHA256 (1<<4)
++#define ARMV8_PMULL (1<<5)
+
+ #endif
+diff --git a/crypto/armcap.c b/crypto/armcap.c
+index 9abaf39..7e46d07 100644
+--- a/crypto/armcap.c
++++ b/crypto/armcap.c
+@@ -19,9 +19,13 @@ static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); }
+ * ARM compilers support inline assembler...
+ */
+ void _armv7_neon_probe(void);
+-unsigned int _armv7_tick(void);
++void _armv8_aes_probe(void);
++void _armv8_sha1_probe(void);
++void _armv8_sha256_probe(void);
++void _armv8_pmull_probe(void);
++unsigned long _armv7_tick(void);
+
+-unsigned int OPENSSL_rdtsc(void)
++unsigned long OPENSSL_rdtsc(void)
+ {
+ if (OPENSSL_armcap_P & ARMV7_TICK)
+ return _armv7_tick();
+@@ -29,9 +33,41 @@ unsigned int OPENSSL_rdtsc(void)
+ return 0;
+ }
+
++/*
++ * Use a weak reference to getauxval() so we can use it if it is available but
++ * don't break the build if it is not.
++ */
+ #if defined(__GNUC__) && __GNUC__>=2
+ void OPENSSL_cpuid_setup(void) __attribute__((constructor));
++extern unsigned long getauxval(unsigned long type) __attribute__((weak));
++#else
++static unsigned long (*getauxval)(unsigned long) = NULL;
+ #endif
++
++/*
++ * ARM puts the the feature bits for Crypto Extensions in AT_HWCAP2, whereas
++ * AArch64 used AT_HWCAP.
++ */
++#if defined(__arm__) || defined (__arm)
++# define HWCAP 16 /* AT_HWCAP */
++# define HWCAP_NEON (1 << 12)
++
++# define HWCAP_CE 26 /* AT_HWCAP2 */
++# define HWCAP_CE_AES (1 << 0)
++# define HWCAP_CE_PMULL (1 << 1)
++# define HWCAP_CE_SHA1 (1 << 2)
++# define HWCAP_CE_SHA256 (1 << 3)
++#elif defined(__aarch64__)
++# define HWCAP 16 /* AT_HWCAP */
++# define HWCAP_NEON (1 << 1)
++
++# define HWCAP_CE HWCAP
++# define HWCAP_CE_AES (1 << 3)
++# define HWCAP_CE_PMULL (1 << 4)
++# define HWCAP_CE_SHA1 (1 << 5)
++# define HWCAP_CE_SHA256 (1 << 6)
++#endif
++
+ void OPENSSL_cpuid_setup(void)
+ {
+ char *e;
+@@ -44,7 +80,7 @@ void OPENSSL_cpuid_setup(void)
+
+ if ((e=getenv("OPENSSL_armcap")))
+ {
+- OPENSSL_armcap_P=strtoul(e,NULL,0);
++ OPENSSL_armcap_P=(unsigned int)strtoul(e,NULL,0);
+ return;
+ }
+
+@@ -64,10 +100,51 @@ void OPENSSL_cpuid_setup(void)
+ sigprocmask(SIG_SETMASK,&ill_act.sa_mask,&oset);
+ sigaction(SIGILL,&ill_act,&ill_oact);
+
+- if (sigsetjmp(ill_jmp,1) == 0)
++ if (getauxval != NULL)
++ {
++ if (getauxval(HWCAP) & HWCAP_NEON)
++ {
++ unsigned long hwcap = getauxval(HWCAP_CE);
++
++ OPENSSL_armcap_P |= ARMV7_NEON;
++
++ if (hwcap & HWCAP_CE_AES)
++ OPENSSL_armcap_P |= ARMV8_AES;
++
++ if (hwcap & HWCAP_CE_PMULL)
++ OPENSSL_armcap_P |= ARMV8_PMULL;
++
++ if (hwcap & HWCAP_CE_SHA1)
++ OPENSSL_armcap_P |= ARMV8_SHA1;
++
++ if (hwcap & HWCAP_CE_SHA256)
++ OPENSSL_armcap_P |= ARMV8_SHA256;
++ }
++ }
++ else if (sigsetjmp(ill_jmp,1) == 0)
+ {
+ _armv7_neon_probe();
+ OPENSSL_armcap_P |= ARMV7_NEON;
++ if (sigsetjmp(ill_jmp,1) == 0)
++ {
++ _armv8_pmull_probe();
++ OPENSSL_armcap_P |= ARMV8_PMULL|ARMV8_AES;
++ }
++ else if (sigsetjmp(ill_jmp,1) == 0)
++ {
++ _armv8_aes_probe();
++ OPENSSL_armcap_P |= ARMV8_AES;
++ }
++ if (sigsetjmp(ill_jmp,1) == 0)
++ {
++ _armv8_sha1_probe();
++ OPENSSL_armcap_P |= ARMV8_SHA1;
++ }
++ if (sigsetjmp(ill_jmp,1) == 0)
++ {
++ _armv8_sha256_probe();
++ OPENSSL_armcap_P |= ARMV8_SHA256;
++ }
+ }
+ if (sigsetjmp(ill_jmp,1) == 0)
+ {
+diff --git a/crypto/armv4cpuid.S b/crypto/armv4cpuid.S
+index 2d618de..add11d4 100644
+--- a/crypto/armv4cpuid.S
++++ b/crypto/armv4cpuid.S
+@@ -7,17 +7,49 @@
+ .global _armv7_neon_probe
+ .type _armv7_neon_probe,%function
+ _armv7_neon_probe:
+- .word 0xf26ee1fe @ vorr q15,q15,q15
+- .word 0xe12fff1e @ bx lr
++ .byte 0xf0,0x01,0x60,0xf2 @ vorr q8,q8,q8
++ .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
+ .size _armv7_neon_probe,.-_armv7_neon_probe
+
+ .global _armv7_tick
+ .type _armv7_tick,%function
+ _armv7_tick:
+- mrc p15,0,r0,c9,c13,0
+- .word 0xe12fff1e @ bx lr
++ mrrc p15,1,r0,r1,c14 @ CNTVCT
++#if __ARM_ARCH__>=5
++ bx lr
++#else
++ .word 0xe12fff1e @ bx lr
++#endif
+ .size _armv7_tick,.-_armv7_tick
+
++.global _armv8_aes_probe
++.type _armv8_aes_probe,%function
++_armv8_aes_probe:
++ .byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
++ .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
++.size _armv8_aes_probe,.-_armv8_aes_probe
++
++.global _armv8_sha1_probe
++.type _armv8_sha1_probe,%function
++_armv8_sha1_probe:
++ .byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
++ .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
++.size _armv8_sha1_probe,.-_armv8_sha1_probe
++
++.global _armv8_sha256_probe
++.type _armv8_sha256_probe,%function
++_armv8_sha256_probe:
++ .byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
++ .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
++.size _armv8_sha256_probe,.-_armv8_sha256_probe
++.global _armv8_pmull_probe
++.type _armv8_pmull_probe,%function
++_armv8_pmull_probe:
++ .byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
++ .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
++.size _armv8_pmull_probe,.-_armv8_pmull_probe
++
++.align 5
+ .global OPENSSL_atomic_add
+ .type OPENSSL_atomic_add,%function
+ OPENSSL_atomic_add:
+@@ -28,7 +60,7 @@ OPENSSL_atomic_add:
+ cmp r2,#0
+ bne .Ladd
+ mov r0,r3
+- .word 0xe12fff1e @ bx lr
++ bx lr
+ #else
+ stmdb sp!,{r4-r6,lr}
+ ldr r2,.Lspinlock
+@@ -81,9 +113,13 @@ OPENSSL_cleanse:
+ adds r1,r1,#4
+ bne .Little
+ .Lcleanse_done:
++#if __ARM_ARCH__>=5
++ bx lr
++#else
+ tst lr,#1
+ moveq pc,lr
+ .word 0xe12fff1e @ bx lr
++#endif
+ .size OPENSSL_cleanse,.-OPENSSL_cleanse
+
+ .global OPENSSL_wipe_cpu
+@@ -97,41 +133,53 @@ OPENSSL_wipe_cpu:
+ eor ip,ip,ip
+ tst r0,#1
+ beq .Lwipe_done
+- .word 0xf3000150 @ veor q0, q0, q0
+- .word 0xf3022152 @ veor q1, q1, q1
+- .word 0xf3044154 @ veor q2, q2, q2
+- .word 0xf3066156 @ veor q3, q3, q3
+- .word 0xf34001f0 @ veor q8, q8, q8
+- .word 0xf34221f2 @ veor q9, q9, q9
+- .word 0xf34441f4 @ veor q10, q10, q10
+- .word 0xf34661f6 @ veor q11, q11, q11
+- .word 0xf34881f8 @ veor q12, q12, q12
+- .word 0xf34aa1fa @ veor q13, q13, q13
+- .word 0xf34cc1fc @ veor q14, q14, q14
+- .word 0xf34ee1fe @ veor q15, q15, q15
++ .byte 0x50,0x01,0x00,0xf3 @ veor q0, q0, q0
++ .byte 0x52,0x21,0x02,0xf3 @ veor q1, q1, q1
++ .byte 0x54,0x41,0x04,0xf3 @ veor q2, q2, q2
++ .byte 0x56,0x61,0x06,0xf3 @ veor q3, q3, q3
++ .byte 0xf0,0x01,0x40,0xf3 @ veor q8, q8, q8
++ .byte 0xf2,0x21,0x42,0xf3 @ veor q9, q9, q9
++ .byte 0xf4,0x41,0x44,0xf3 @ veor q10, q10, q10
++ .byte 0xf6,0x61,0x46,0xf3 @ veor q11, q11, q11
++ .byte 0xf8,0x81,0x48,0xf3 @ veor q12, q12, q12
++ .byte 0xfa,0xa1,0x4a,0xf3 @ veor q13, q13, q13
++ .byte 0xfc,0xc1,0x4c,0xf3 @ veor q14, q14, q14
++ .byte 0xfe,0xe1,0x4e,0xf3 @ veor q14, q14, q14
+ .Lwipe_done:
+ mov r0,sp
++#if __ARM_ARCH__>=5
++ bx lr
++#else
+ tst lr,#1
+ moveq pc,lr
+ .word 0xe12fff1e @ bx lr
++#endif
+ .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
+
+ .global OPENSSL_instrument_bus
+ .type OPENSSL_instrument_bus,%function
+ OPENSSL_instrument_bus:
+ eor r0,r0,r0
++#if __ARM_ARCH__>=5
++ bx lr
++#else
+ tst lr,#1
+ moveq pc,lr
+ .word 0xe12fff1e @ bx lr
++#endif
+ .size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
+
+ .global OPENSSL_instrument_bus2
+ .type OPENSSL_instrument_bus2,%function
+ OPENSSL_instrument_bus2:
+ eor r0,r0,r0
++#if __ARM_ARCH__>=5
++ bx lr
++#else
+ tst lr,#1
+ moveq pc,lr
+ .word 0xe12fff1e @ bx lr
++#endif
+ .size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
+
+ .align 5
+diff --git a/crypto/bn/Makefile b/crypto/bn/Makefile
+index 6dd136b..effc409 100644
+--- a/crypto/bn/Makefile
++++ b/crypto/bn/Makefile
+@@ -130,9 +130,10 @@ alpha-mont.s: asm/alpha-mont.pl
+ $(CC) -E $$preproc > $@ && rm $$preproc)
+
+ # GNU make "catch all"
+-%-mont.s: asm/%-mont.pl; $(PERL) $< $(PERLASM_SCHEME) $@
++%-mont.S: asm/%-mont.pl; $(PERL) $< $(PERLASM_SCHEME) $@
+ %-gf2m.S: asm/%-gf2m.pl; $(PERL) $< $(PERLASM_SCHEME) $@
+
++armv4-mont.o: armv4-mont.S
+ armv4-gf2m.o: armv4-gf2m.S
+
+ files:
+diff --git a/crypto/bn/asm/armv4-gf2m.pl b/crypto/bn/asm/armv4-gf2m.pl
+index c52e0b7..b781afb 100644
+--- a/crypto/bn/asm/armv4-gf2m.pl
++++ b/crypto/bn/asm/armv4-gf2m.pl
+@@ -20,14 +20,21 @@
+ # length, more for longer keys. Even though NEON 1x1 multiplication
+ # runs in even less cycles, ~30, improvement is measurable only on
+ # longer keys. One has to optimize code elsewhere to get NEON glow...
++#
++# April 2014
++#
++# Double bn_GF2m_mul_2x2 performance by using algorithm from paper
++# referred below, which improves ECDH and ECDSA verify benchmarks
++# by 18-40%.
++#
++# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
++# Polynomial Multiplication on ARM Processors using the NEON Engine.
++#
++# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
+
+ while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+ open STDOUT,">$output";
+
+-sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
+-sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
+-sub Q() { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; }
+-
+ $code=<<___;
+ #include "arm_arch.h"
+
+@@ -36,31 +43,6 @@ $code=<<___;
+
+ #if __ARM_ARCH__>=7
+ .fpu neon
+-
+-.type mul_1x1_neon,%function
+-.align 5
+-mul_1x1_neon:
+- vshl.u64 `&Dlo("q1")`,d16,#8 @ q1-q3 are slided $a
+- vmull.p8 `&Q("d0")`,d16,d17 @ a·bb
+- vshl.u64 `&Dlo("q2")`,d16,#16
+- vmull.p8 q1,`&Dlo("q1")`,d17 @ a<<8·bb
+- vshl.u64 `&Dlo("q3")`,d16,#24
+- vmull.p8 q2,`&Dlo("q2")`,d17 @ a<<16·bb
+- vshr.u64 `&Dlo("q1")`,#8
+- vmull.p8 q3,`&Dlo("q3")`,d17 @ a<<24·bb
+- vshl.u64 `&Dhi("q1")`,#24
+- veor d0,`&Dlo("q1")`
+- vshr.u64 `&Dlo("q2")`,#16
+- veor d0,`&Dhi("q1")`
+- vshl.u64 `&Dhi("q2")`,#16
+- veor d0,`&Dlo("q2")`
+- vshr.u64 `&Dlo("q3")`,#24
+- veor d0,`&Dhi("q2")`
+- vshl.u64 `&Dhi("q3")`,#8
+- veor d0,`&Dlo("q3")`
+- veor d0,`&Dhi("q3")`
+- bx lr
+-.size mul_1x1_neon,.-mul_1x1_neon
+ #endif
+ ___
+ ################
+@@ -159,8 +141,9 @@ ___
+ # void bn_GF2m_mul_2x2(BN_ULONG *r,
+ # BN_ULONG a1,BN_ULONG a0,
+ # BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0
+-
+-($A1,$B1,$A0,$B0,$A1B1,$A0B0)=map("d$_",(18..23));
++{
++my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
++my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
+
+ $code.=<<___;
+ .global bn_GF2m_mul_2x2
+@@ -173,44 +156,58 @@ bn_GF2m_mul_2x2:
+ tst r12,#1
+ beq .Lialu
+
+- veor $A1,$A1
+- vmov.32 $B1,r3,r3 @ two copies of b1
+- vmov.32 ${A1}[0],r1 @ a1
+-
+- veor $A0,$A0
+- vld1.32 ${B0}[],[sp,:32] @ two copies of b0
+- vmov.32 ${A0}[0],r2 @ a0
+- mov r12,lr
+-
+- vmov d16,$A1
+- vmov d17,$B1
+- bl mul_1x1_neon @ a1·b1
+- vmov $A1B1,d0
+-
+- vmov d16,$A0
+- vmov d17,$B0
+- bl mul_1x1_neon @ a0·b0
+- vmov $A0B0,d0
+-
+- veor d16,$A0,$A1
+- veor d17,$B0,$B1
+- veor $A0,$A0B0,$A1B1
+- bl mul_1x1_neon @ (a0+a1)·(b0+b1)
+-
+- veor d0,$A0 @ (a0+a1)·(b0+b1)-a0·b0-a1·b1
+- vshl.u64 d1,d0,#32
+- vshr.u64 d0,d0,#32
+- veor $A0B0,d1
+- veor $A1B1,d0
+- vst1.32 {${A0B0}[0]},[r0,:32]!
+- vst1.32 {${A0B0}[1]},[r0,:32]!
+- vst1.32 {${A1B1}[0]},[r0,:32]!
+- vst1.32 {${A1B1}[1]},[r0,:32]
+- bx r12
++ ldr r12, [sp] @ 5th argument
++ vmov.32 $a, r2, r1
++ vmov.32 $b, r12, r3
++ vmov.i64 $k48, #0x0000ffffffffffff
++ vmov.i64 $k32, #0x00000000ffffffff
++ vmov.i64 $k16, #0x000000000000ffff
++
++ vext.8 $t0#lo, $a, $a, #1 @ A1
++ vmull.p8 $t0, $t0#lo, $b @ F = A1*B
++ vext.8 $r#lo, $b, $b, #1 @ B1
++ vmull.p8 $r, $a, $r#lo @ E = A*B1
++ vext.8 $t1#lo, $a, $a, #2 @ A2
++ vmull.p8 $t1, $t1#lo, $b @ H = A2*B
++ vext.8 $t3#lo, $b, $b, #2 @ B2
++ vmull.p8 $t3, $a, $t3#lo @ G = A*B2
++ vext.8 $t2#lo, $a, $a, #3 @ A3
++ veor $t0, $t0, $r @ L = E + F
++ vmull.p8 $t2, $t2#lo, $b @ J = A3*B
++ vext.8 $r#lo, $b, $b, #3 @ B3
++ veor $t1, $t1, $t3 @ M = G + H
++ vmull.p8 $r, $a, $r#lo @ I = A*B3
++ veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8
++ vand $t0#hi, $t0#hi, $k48
++ vext.8 $t3#lo, $b, $b, #4 @ B4
++ veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16
++ vand $t1#hi, $t1#hi, $k32
++ vmull.p8 $t3, $a, $t3#lo @ K = A*B4
++ veor $t2, $t2, $r @ N = I + J
++ veor $t0#lo, $t0#lo, $t0#hi
++ veor $t1#lo, $t1#lo, $t1#hi
++ veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24
++ vand $t2#hi, $t2#hi, $k16
++ vext.8 $t0, $t0, $t0, #15
++ veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32
++ vmov.i64 $t3#hi, #0
++ vext.8 $t1, $t1, $t1, #14
++ veor $t2#lo, $t2#lo, $t2#hi
++ vmull.p8 $r, $a, $b @ D = A*B
++ vext.8 $t3, $t3, $t3, #12
++ vext.8 $t2, $t2, $t2, #13
++ veor $t0, $t0, $t1
++ veor $t2, $t2, $t3
++ veor $r, $r, $t0
++ veor $r, $r, $t2
++
++ vst1.32 {$r}, [r0]
++ ret @ bx lr
+ .align 4
+ .Lialu:
+ #endif
+ ___
++}
+ $ret="r10"; # reassigned 1st argument
+ $code.=<<___;
+ stmdb sp!,{r4-r10,lr}
+@@ -272,7 +269,13 @@ $code.=<<___;
+ .comm OPENSSL_armcap_P,4,4
+ ___
+
+-$code =~ s/\`([^\`]*)\`/eval $1/gem;
+-$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
+-print $code;
++foreach (split("\n",$code)) {
++ s/\`([^\`]*)\`/eval $1/geo;
++
++ s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or
++ s/\bret\b/bx lr/go or
++ s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
++
++ print $_,"\n";
++}
+ close STDOUT; # enforce flush
+diff --git a/crypto/bn/asm/armv4-mont.pl b/crypto/bn/asm/armv4-mont.pl
+index f78a8b5..72bad8e 100644
+--- a/crypto/bn/asm/armv4-mont.pl
++++ b/crypto/bn/asm/armv4-mont.pl
+@@ -1,7 +1,7 @@
+ #!/usr/bin/env perl
+
+ # ====================================================================
+-# Written by Andy Polyakov <[email protected]> for the OpenSSL
++# Written by Andy Polyakov <[email protected]> for the OpenSSL
+ # project. The module is, however, dual licensed under OpenSSL and
+ # CRYPTOGAMS licenses depending on where you obtain it. For further
+ # details see http://www.openssl.org/~appro/cryptogams/.
+@@ -23,6 +23,21 @@
+ # than 1/2KB. Windows CE port would be trivial, as it's exclusively
+ # about decorations, ABI and instruction syntax are identical.
+
++# November 2013
++#
++# Add NEON code path, which handles lengths divisible by 8. RSA/DSA
++# performance improvement on Cortex-A8 is ~45-100% depending on key
++# length, more for longer keys. On Cortex-A15 the span is ~10-105%.
++# On Snapdragon S4 improvement was measured to vary from ~70% to
++# incredible ~380%, yes, 4.8x faster, for RSA4096 sign. But this is
++# rather because original integer-only code seems to perform
++# suboptimally on S4. Situation on Cortex-A9 is unfortunately
++# different. It's being looked into, but the trouble is that
++# performance for vectors longer than 256 bits is actually couple
++# of percent worse than for integer-only code. The code is chosen
++# for execution on all NEON-capable processors, because gain on
++# others outweighs the marginal loss on Cortex-A9.
++
+ while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+ open STDOUT,">$output";
+
+@@ -52,16 +67,40 @@ $_n0="$num,#14*4";
+ $_num="$num,#15*4"; $_bpend=$_num;
+
+ $code=<<___;
++#include "arm_arch.h"
++
+ .text
++.code 32
++
++#if __ARM_ARCH__>=7
++.align 5
++.LOPENSSL_armcap:
++.word OPENSSL_armcap_P-bn_mul_mont
++#endif
+
+ .global bn_mul_mont
+ .type bn_mul_mont,%function
+
+-.align 2
++.align 5
+ bn_mul_mont:
++ ldr ip,[sp,#4] @ load num
+ stmdb sp!,{r0,r2} @ sp points at argument block
+- ldr $num,[sp,#3*4] @ load num
+- cmp $num,#2
++#if __ARM_ARCH__>=7
++ tst ip,#7
++ bne .Lialu
++ adr r0,bn_mul_mont
++ ldr r2,.LOPENSSL_armcap
++ ldr r0,[r0,r2]
++ tst r0,#1 @ NEON available?
++ ldmia sp, {r0,r2}
++ beq .Lialu
++ add sp,sp,#8
++ b bn_mul8x_mont_neon
++.align 4
++.Lialu:
++#endif
++ cmp ip,#2
++ mov $num,ip @ load num
+ movlt r0,#0
+ addlt sp,sp,#2*4
+ blt .Labrt
+@@ -191,14 +230,446 @@ bn_mul_mont:
+ ldmia sp!,{r4-r12,lr} @ restore registers
+ add sp,sp,#2*4 @ skip over {r0,r2}
+ mov r0,#1
+-.Labrt: tst lr,#1
++.Labrt:
++#if __ARM_ARCH__>=5
++ ret @ bx lr
++#else
++ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
++#endif
+ .size bn_mul_mont,.-bn_mul_mont
+-.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
++___
++{
++sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
++sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
++
++my ($A0,$A1,$A2,$A3)=map("d$_",(0..3));
++my ($N0,$N1,$N2,$N3)=map("d$_",(4..7));
++my ($Z,$Temp)=("q4","q5");
++my ($A0xB,$A1xB,$A2xB,$A3xB,$A4xB,$A5xB,$A6xB,$A7xB)=map("q$_",(6..13));
++my ($Bi,$Ni,$M0)=map("d$_",(28..31));
++my $zero=&Dlo($Z);
++my $temp=&Dlo($Temp);
++
++my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5));
++my ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9));
++
++$code.=<<___;
++#if __ARM_ARCH__>=7
++.fpu neon
++
++.type bn_mul8x_mont_neon,%function
++.align 5
++bn_mul8x_mont_neon:
++ mov ip,sp
++ stmdb sp!,{r4-r11}
++ vstmdb sp!,{d8-d15} @ ABI specification says so
++ ldmia ip,{r4-r5} @ load rest of parameter block
++
++ sub $toutptr,sp,#16
++ vld1.32 {${Bi}[0]}, [$bptr,:32]!
++ sub $toutptr,$toutptr,$num,lsl#4
++ vld1.32 {$A0-$A3}, [$aptr]! @ can't specify :32 :-(
++ and $toutptr,$toutptr,#-64
++ vld1.32 {${M0}[0]}, [$n0,:32]
++ mov sp,$toutptr @ alloca
++ veor $zero,$zero,$zero
++ subs $inner,$num,#8
++ vzip.16 $Bi,$zero
++
++ vmull.u32 $A0xB,$Bi,${A0}[0]
++ vmull.u32 $A1xB,$Bi,${A0}[1]
++ vmull.u32 $A2xB,$Bi,${A1}[0]
++ vshl.i64 $temp,`&Dhi("$A0xB")`,#16
++ vmull.u32 $A3xB,$Bi,${A1}[1]
++
++ vadd.u64 $temp,$temp,`&Dlo("$A0xB")`
++ veor $zero,$zero,$zero
++ vmul.u32 $Ni,$temp,$M0
++
++ vmull.u32 $A4xB,$Bi,${A2}[0]
++ vld1.32 {$N0-$N3}, [$nptr]!
++ vmull.u32 $A5xB,$Bi,${A2}[1]
++ vmull.u32 $A6xB,$Bi,${A3}[0]
++ vzip.16 $Ni,$zero
++ vmull.u32 $A7xB,$Bi,${A3}[1]
++
++ bne .LNEON_1st
++
++ @ special case for num=8, everything is in register bank...
++
++ vmlal.u32 $A0xB,$Ni,${N0}[0]
++ sub $outer,$num,#1
++ vmlal.u32 $A1xB,$Ni,${N0}[1]
++ vmlal.u32 $A2xB,$Ni,${N1}[0]
++ vmlal.u32 $A3xB,$Ni,${N1}[1]
++
++ vmlal.u32 $A4xB,$Ni,${N2}[0]
++ vmov $Temp,$A0xB
++ vmlal.u32 $A5xB,$Ni,${N2}[1]
++ vmov $A0xB,$A1xB
++ vmlal.u32 $A6xB,$Ni,${N3}[0]
++ vmov $A1xB,$A2xB
++ vmlal.u32 $A7xB,$Ni,${N3}[1]
++ vmov $A2xB,$A3xB
++ vmov $A3xB,$A4xB
++ vshr.u64 $temp,$temp,#16
++ vmov $A4xB,$A5xB
++ vmov $A5xB,$A6xB
++ vadd.u64 $temp,$temp,`&Dhi("$Temp")`
++ vmov $A6xB,$A7xB
++ veor $A7xB,$A7xB
++ vshr.u64 $temp,$temp,#16
++
++ b .LNEON_outer8
++
++.align 4
++.LNEON_outer8:
++ vld1.32 {${Bi}[0]}, [$bptr,:32]!
++ veor $zero,$zero,$zero
++ vzip.16 $Bi,$zero
++ vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
++
++ vmlal.u32 $A0xB,$Bi,${A0}[0]
++ vmlal.u32 $A1xB,$Bi,${A0}[1]
++ vmlal.u32 $A2xB,$Bi,${A1}[0]
++ vshl.i64 $temp,`&Dhi("$A0xB")`,#16
++ vmlal.u32 $A3xB,$Bi,${A1}[1]
++
++ vadd.u64 $temp,$temp,`&Dlo("$A0xB")`
++ veor $zero,$zero,$zero
++ subs $outer,$outer,#1
++ vmul.u32 $Ni,$temp,$M0
++
++ vmlal.u32 $A4xB,$Bi,${A2}[0]
++ vmlal.u32 $A5xB,$Bi,${A2}[1]
++ vmlal.u32 $A6xB,$Bi,${A3}[0]
++ vzip.16 $Ni,$zero
++ vmlal.u32 $A7xB,$Bi,${A3}[1]
++
++ vmlal.u32 $A0xB,$Ni,${N0}[0]
++ vmlal.u32 $A1xB,$Ni,${N0}[1]
++ vmlal.u32 $A2xB,$Ni,${N1}[0]
++ vmlal.u32 $A3xB,$Ni,${N1}[1]
++
++ vmlal.u32 $A4xB,$Ni,${N2}[0]
++ vmov $Temp,$A0xB
++ vmlal.u32 $A5xB,$Ni,${N2}[1]
++ vmov $A0xB,$A1xB
++ vmlal.u32 $A6xB,$Ni,${N3}[0]
++ vmov $A1xB,$A2xB
++ vmlal.u32 $A7xB,$Ni,${N3}[1]
++ vmov $A2xB,$A3xB
++ vmov $A3xB,$A4xB
++ vshr.u64 $temp,$temp,#16
++ vmov $A4xB,$A5xB
++ vmov $A5xB,$A6xB
++ vadd.u64 $temp,$temp,`&Dhi("$Temp")`
++ vmov $A6xB,$A7xB
++ veor $A7xB,$A7xB
++ vshr.u64 $temp,$temp,#16
++
++ bne .LNEON_outer8
++
++ vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
++ mov $toutptr,sp
++ vshr.u64 $temp,`&Dlo("$A0xB")`,#16
++ mov $inner,$num
++ vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp
++ add $tinptr,sp,#16
++ vshr.u64 $temp,`&Dhi("$A0xB")`,#16
++ vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")`
++
++ b .LNEON_tail2
++
++.align 4
++.LNEON_1st:
++ vmlal.u32 $A0xB,$Ni,${N0}[0]
++ vld1.32 {$A0-$A3}, [$aptr]!
++ vmlal.u32 $A1xB,$Ni,${N0}[1]
++ subs $inner,$inner,#8
++ vmlal.u32 $A2xB,$Ni,${N1}[0]
++ vmlal.u32 $A3xB,$Ni,${N1}[1]
++
++ vmlal.u32 $A4xB,$Ni,${N2}[0]
++ vld1.32 {$N0-$N1}, [$nptr]!
++ vmlal.u32 $A5xB,$Ni,${N2}[1]
++ vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]!
++ vmlal.u32 $A6xB,$Ni,${N3}[0]
++ vmlal.u32 $A7xB,$Ni,${N3}[1]
++ vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]!
++
++ vmull.u32 $A0xB,$Bi,${A0}[0]
++ vld1.32 {$N2-$N3}, [$nptr]!
++ vmull.u32 $A1xB,$Bi,${A0}[1]
++ vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]!
++ vmull.u32 $A2xB,$Bi,${A1}[0]
++ vmull.u32 $A3xB,$Bi,${A1}[1]
++ vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]!
++
++ vmull.u32 $A4xB,$Bi,${A2}[0]
++ vmull.u32 $A5xB,$Bi,${A2}[1]
++ vmull.u32 $A6xB,$Bi,${A3}[0]
++ vmull.u32 $A7xB,$Bi,${A3}[1]
++
++ bne .LNEON_1st
++
++ vmlal.u32 $A0xB,$Ni,${N0}[0]
++ add $tinptr,sp,#16
++ vmlal.u32 $A1xB,$Ni,${N0}[1]
++ sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr
++ vmlal.u32 $A2xB,$Ni,${N1}[0]
++ vld1.64 {$Temp}, [sp,:128]
++ vmlal.u32 $A3xB,$Ni,${N1}[1]
++ sub $outer,$num,#1
++
++ vmlal.u32 $A4xB,$Ni,${N2}[0]
++ vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]!
++ vmlal.u32 $A5xB,$Ni,${N2}[1]
++ vshr.u64 $temp,$temp,#16
++ vld1.64 {$A0xB}, [$tinptr, :128]!
++ vmlal.u32 $A6xB,$Ni,${N3}[0]
++ vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]!
++ vmlal.u32 $A7xB,$Ni,${N3}[1]
++
++ vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]!
++ vadd.u64 $temp,$temp,`&Dhi("$Temp")`
++ veor $Z,$Z,$Z
++ vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]!
++ vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]!
++ vst1.64 {$Z}, [$toutptr,:128]
++ vshr.u64 $temp,$temp,#16
++
++ b .LNEON_outer
++
++.align 4
++.LNEON_outer:
++ vld1.32 {${Bi}[0]}, [$bptr,:32]!
++ sub $nptr,$nptr,$num,lsl#2 @ rewind $nptr
++ vld1.32 {$A0-$A3}, [$aptr]!
++ veor $zero,$zero,$zero
++ mov $toutptr,sp
++ vzip.16 $Bi,$zero
++ sub $inner,$num,#8
++ vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
++
++ vmlal.u32 $A0xB,$Bi,${A0}[0]
++ vld1.64 {$A3xB-$A4xB},[$tinptr,:256]!
++ vmlal.u32 $A1xB,$Bi,${A0}[1]
++ vmlal.u32 $A2xB,$Bi,${A1}[0]
++ vld1.64 {$A5xB-$A6xB},[$tinptr,:256]!
++ vmlal.u32 $A3xB,$Bi,${A1}[1]
++
++ vshl.i64 $temp,`&Dhi("$A0xB")`,#16
++ veor $zero,$zero,$zero
++ vadd.u64 $temp,$temp,`&Dlo("$A0xB")`
++ vld1.64 {$A7xB},[$tinptr,:128]!
++ vmul.u32 $Ni,$temp,$M0
++
++ vmlal.u32 $A4xB,$Bi,${A2}[0]
++ vld1.32 {$N0-$N3}, [$nptr]!
++ vmlal.u32 $A5xB,$Bi,${A2}[1]
++ vmlal.u32 $A6xB,$Bi,${A3}[0]
++ vzip.16 $Ni,$zero
++ vmlal.u32 $A7xB,$Bi,${A3}[1]
++
++.LNEON_inner:
++ vmlal.u32 $A0xB,$Ni,${N0}[0]
++ vld1.32 {$A0-$A3}, [$aptr]!
++ vmlal.u32 $A1xB,$Ni,${N0}[1]
++ subs $inner,$inner,#8
++ vmlal.u32 $A2xB,$Ni,${N1}[0]
++ vmlal.u32 $A3xB,$Ni,${N1}[1]
++ vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]!
++
++ vmlal.u32 $A4xB,$Ni,${N2}[0]
++ vld1.64 {$A0xB}, [$tinptr, :128]!
++ vmlal.u32 $A5xB,$Ni,${N2}[1]
++ vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]!
++ vmlal.u32 $A6xB,$Ni,${N3}[0]
++ vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]!
++ vmlal.u32 $A7xB,$Ni,${N3}[1]
++ vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]!
++
++ vmlal.u32 $A0xB,$Bi,${A0}[0]
++ vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]!
++ vmlal.u32 $A1xB,$Bi,${A0}[1]
++ vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]!
++ vmlal.u32 $A2xB,$Bi,${A1}[0]
++ vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]!
++ vmlal.u32 $A3xB,$Bi,${A1}[1]
++ vld1.32 {$N0-$N3}, [$nptr]!
++
++ vmlal.u32 $A4xB,$Bi,${A2}[0]
++ vld1.64 {$A7xB}, [$tinptr, :128]!
++ vmlal.u32 $A5xB,$Bi,${A2}[1]
++ vmlal.u32 $A6xB,$Bi,${A3}[0]
++ vmlal.u32 $A7xB,$Bi,${A3}[1]
++
++ bne .LNEON_inner
++
++ vmlal.u32 $A0xB,$Ni,${N0}[0]
++ add $tinptr,sp,#16
++ vmlal.u32 $A1xB,$Ni,${N0}[1]
++ sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr
++ vmlal.u32 $A2xB,$Ni,${N1}[0]
++ vld1.64 {$Temp}, [sp,:128]
++ vmlal.u32 $A3xB,$Ni,${N1}[1]
++ subs $outer,$outer,#1
++
++ vmlal.u32 $A4xB,$Ni,${N2}[0]
++ vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]!
++ vmlal.u32 $A5xB,$Ni,${N2}[1]
++ vld1.64 {$A0xB}, [$tinptr, :128]!
++ vshr.u64 $temp,$temp,#16
++ vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]!
++ vmlal.u32 $A6xB,$Ni,${N3}[0]
++ vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]!
++ vmlal.u32 $A7xB,$Ni,${N3}[1]
++
++ vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]!
++ vadd.u64 $temp,$temp,`&Dhi("$Temp")`
++ vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]!
++ vshr.u64 $temp,$temp,#16
++
++ bne .LNEON_outer
++
++ mov $toutptr,sp
++ mov $inner,$num
++
++.LNEON_tail:
++ vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
++ vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]!
++ vshr.u64 $temp,`&Dlo("$A0xB")`,#16
++ vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp
++ vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]!
++ vshr.u64 $temp,`&Dhi("$A0xB")`,#16
++ vld1.64 {$A7xB}, [$tinptr, :128]!
++ vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")`
++
++.LNEON_tail2:
++ vadd.u64 `&Dlo("$A1xB")`,`&Dlo("$A1xB")`,$temp
++ vst1.32 {`&Dlo("$A0xB")`[0]}, [$toutptr, :32]!
++ vshr.u64 $temp,`&Dlo("$A1xB")`,#16
++ vadd.u64 `&Dhi("$A1xB")`,`&Dhi("$A1xB")`,$temp
++ vshr.u64 $temp,`&Dhi("$A1xB")`,#16
++ vzip.16 `&Dlo("$A1xB")`,`&Dhi("$A1xB")`
++
++ vadd.u64 `&Dlo("$A2xB")`,`&Dlo("$A2xB")`,$temp
++ vst1.32 {`&Dlo("$A1xB")`[0]}, [$toutptr, :32]!
++ vshr.u64 $temp,`&Dlo("$A2xB")`,#16
++ vadd.u64 `&Dhi("$A2xB")`,`&Dhi("$A2xB")`,$temp
++ vshr.u64 $temp,`&Dhi("$A2xB")`,#16
++ vzip.16 `&Dlo("$A2xB")`,`&Dhi("$A2xB")`
++
++ vadd.u64 `&Dlo("$A3xB")`,`&Dlo("$A3xB")`,$temp
++ vst1.32 {`&Dlo("$A2xB")`[0]}, [$toutptr, :32]!
++ vshr.u64 $temp,`&Dlo("$A3xB")`,#16
++ vadd.u64 `&Dhi("$A3xB")`,`&Dhi("$A3xB")`,$temp
++ vshr.u64 $temp,`&Dhi("$A3xB")`,#16
++ vzip.16 `&Dlo("$A3xB")`,`&Dhi("$A3xB")`
++
++ vadd.u64 `&Dlo("$A4xB")`,`&Dlo("$A4xB")`,$temp
++ vst1.32 {`&Dlo("$A3xB")`[0]}, [$toutptr, :32]!
++ vshr.u64 $temp,`&Dlo("$A4xB")`,#16
++ vadd.u64 `&Dhi("$A4xB")`,`&Dhi("$A4xB")`,$temp
++ vshr.u64 $temp,`&Dhi("$A4xB")`,#16
++ vzip.16 `&Dlo("$A4xB")`,`&Dhi("$A4xB")`
++
++ vadd.u64 `&Dlo("$A5xB")`,`&Dlo("$A5xB")`,$temp
++ vst1.32 {`&Dlo("$A4xB")`[0]}, [$toutptr, :32]!
++ vshr.u64 $temp,`&Dlo("$A5xB")`,#16
++ vadd.u64 `&Dhi("$A5xB")`,`&Dhi("$A5xB")`,$temp
++ vshr.u64 $temp,`&Dhi("$A5xB")`,#16
++ vzip.16 `&Dlo("$A5xB")`,`&Dhi("$A5xB")`
++
++ vadd.u64 `&Dlo("$A6xB")`,`&Dlo("$A6xB")`,$temp
++ vst1.32 {`&Dlo("$A5xB")`[0]}, [$toutptr, :32]!
++ vshr.u64 $temp,`&Dlo("$A6xB")`,#16
++ vadd.u64 `&Dhi("$A6xB")`,`&Dhi("$A6xB")`,$temp
++ vld1.64 {$A0xB}, [$tinptr, :128]!
++ vshr.u64 $temp,`&Dhi("$A6xB")`,#16
++ vzip.16 `&Dlo("$A6xB")`,`&Dhi("$A6xB")`
++
++ vadd.u64 `&Dlo("$A7xB")`,`&Dlo("$A7xB")`,$temp
++ vst1.32 {`&Dlo("$A6xB")`[0]}, [$toutptr, :32]!
++ vshr.u64 $temp,`&Dlo("$A7xB")`,#16
++ vadd.u64 `&Dhi("$A7xB")`,`&Dhi("$A7xB")`,$temp
++ vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]!
++ vshr.u64 $temp,`&Dhi("$A7xB")`,#16
++ vzip.16 `&Dlo("$A7xB")`,`&Dhi("$A7xB")`
++ subs $inner,$inner,#8
++ vst1.32 {`&Dlo("$A7xB")`[0]}, [$toutptr, :32]!
++
++ bne .LNEON_tail
++
++ vst1.32 {${temp}[0]}, [$toutptr, :32] @ top-most bit
++ sub $nptr,$nptr,$num,lsl#2 @ rewind $nptr
++ subs $aptr,sp,#0 @ clear carry flag
++ add $bptr,sp,$num,lsl#2
++
++.LNEON_sub:
++ ldmia $aptr!, {r4-r7}
++ ldmia $nptr!, {r8-r11}
++ sbcs r8, r4,r8
++ sbcs r9, r5,r9
++ sbcs r10,r6,r10
++ sbcs r11,r7,r11
++ teq $aptr,$bptr @ preserves carry
++ stmia $rptr!, {r8-r11}
++ bne .LNEON_sub
++
++ ldr r10, [$aptr] @ load top-most bit
++ veor q0,q0,q0
++ sub r11,$bptr,sp @ this is num*4
++ veor q1,q1,q1
++ mov $aptr,sp
++ sub $rptr,$rptr,r11 @ rewind $rptr
++ mov $nptr,$bptr @ second 3/4th of frame
++ sbcs r10,r10,#0 @ result is carry flag
++
++.LNEON_copy_n_zap:
++ ldmia $aptr!, {r4-r7}
++ ldmia $rptr, {r8-r11}
++ movcc r8, r4
++ vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
++ movcc r9, r5
++ movcc r10,r6
++ vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
++ movcc r11,r7
++ ldmia $aptr, {r4-r7}
++ stmia $rptr!, {r8-r11}
++ sub $aptr,$aptr,#16
++ ldmia $rptr, {r8-r11}
++ movcc r8, r4
++ vst1.64 {q0-q1}, [$aptr,:256]! @ wipe
++ movcc r9, r5
++ movcc r10,r6
++ vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
++ movcc r11,r7
++ teq $aptr,$bptr @ preserves carry
++ stmia $rptr!, {r8-r11}
++ bne .LNEON_copy_n_zap
++
++ sub sp,ip,#96
++ vldmia sp!,{d8-d15}
++ ldmia sp!,{r4-r11}
++ ret @ bx lr
++.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
++#endif
++___
++}
++$code.=<<___;
++.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
+ .align 2
++#if __ARM_ARCH__>=7
++.comm OPENSSL_armcap_P,4,4
++#endif
+ ___
+
++$code =~ s/\`([^\`]*)\`/eval $1/gem;
+ $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
++$code =~ s/\bret\b/bx lr/gm;
+ print $code;
+ close STDOUT;
+diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c
+index c7869b6..ad0f7a4 100644
+--- a/crypto/evp/e_aes.c
++++ b/crypto/evp/e_aes.c
+@@ -62,7 +62,7 @@
+
+ typedef struct
+ {
+- AES_KEY ks;
++ union { double align; AES_KEY ks; } ks;
+ block128_f block;
+ union {
+ cbc128_f cbc;
+@@ -72,7 +72,7 @@ typedef struct
+
+ typedef struct
+ {
+- AES_KEY ks; /* AES key schedule to use */
++ union { double align; AES_KEY ks; } ks; /* AES key schedule to use */
+ int key_set; /* Set if key initialised */
+ int iv_set; /* Set if an iv is set */
+ GCM128_CONTEXT gcm;
+@@ -86,7 +86,7 @@ typedef struct
+
+ typedef struct
+ {
+- AES_KEY ks1, ks2; /* AES key schedules to use */
++ union { double align; AES_KEY ks; } ks1, ks2; /* AES key schedules to use */
+ XTS128_CONTEXT xts;
+ void (*stream)(const unsigned char *in,
+ unsigned char *out, size_t length,
+@@ -96,7 +96,7 @@ typedef struct
+
+ typedef struct
+ {
+- AES_KEY ks; /* AES key schedule to use */
++ union { double align; AES_KEY ks; } ks; /* AES key schedule to use */
+ int key_set; /* Set if key initialised */
+ int iv_set; /* Set if an iv is set */
+ int tag_set; /* Set if tag is valid */
+@@ -160,7 +160,7 @@ void AES_xts_decrypt(const char *inp,char *out,size_t len,
+ defined(_M_AMD64) || defined(_M_X64) || \
+ defined(__INTEL__) )
+
+-extern unsigned int OPENSSL_ia32cap_P[2];
++extern unsigned int OPENSSL_ia32cap_P[];
+
+ #ifdef VPAES_ASM
+ #define VPAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32)))
+@@ -310,7 +310,7 @@ static int aesni_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+ return 1;
+ if (key)
+ {
+- aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
++ aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
+ CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
+ (block128_f)aesni_encrypt);
+ gctx->ctr = (ctr128_f)aesni_ctr32_encrypt_blocks;
+@@ -355,19 +355,19 @@ static int aesni_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+ /* key_len is two AES keys */
+ if (enc)
+ {
+- aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
++ aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
+ xctx->xts.block1 = (block128_f)aesni_encrypt;
+ xctx->stream = aesni_xts_encrypt;
+ }
+ else
+ {
+- aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
++ aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
+ xctx->xts.block1 = (block128_f)aesni_decrypt;
+ xctx->stream = aesni_xts_decrypt;
+ }
+
+ aesni_set_encrypt_key(key + ctx->key_len/2,
+- ctx->key_len * 4, &xctx->ks2);
++ ctx->key_len * 4, &xctx->ks2.ks);
+ xctx->xts.block2 = (block128_f)aesni_encrypt;
+
+ xctx->xts.key1 = &xctx->ks1;
+@@ -394,7 +394,7 @@ static int aesni_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+ return 1;
+ if (key)
+ {
+- aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks);
++ aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks);
+ CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
+ &cctx->ks, (block128_f)aesni_encrypt);
+ cctx->str = enc?(ccm128_f)aesni_ccm64_encrypt_blocks :
+@@ -484,6 +484,38 @@ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
+ { return &aes_##keylen##_##mode; }
+ #endif
+
++#if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__))
++#include "arm_arch.h"
++#if __ARM_ARCH__>=7
++# if defined(BSAES_ASM)
++# define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
++# endif
++# define HWAES_CAPABLE (OPENSSL_armcap_P & ARMV8_AES)
++# define HWAES_set_encrypt_key aes_v8_set_encrypt_key
++# define HWAES_set_decrypt_key aes_v8_set_decrypt_key
++# define HWAES_encrypt aes_v8_encrypt
++# define HWAES_decrypt aes_v8_decrypt
++# define HWAES_cbc_encrypt aes_v8_cbc_encrypt
++# define HWAES_ctr32_encrypt_blocks aes_v8_ctr32_encrypt_blocks
++#endif
++#endif
++
++#if defined(HWAES_CAPABLE)
++int HWAES_set_encrypt_key(const unsigned char *userKey, const int bits,
++ AES_KEY *key);
++int HWAES_set_decrypt_key(const unsigned char *userKey, const int bits,
++ AES_KEY *key);
++void HWAES_encrypt(const unsigned char *in, unsigned char *out,
++ const AES_KEY *key);
++void HWAES_decrypt(const unsigned char *in, unsigned char *out,
++ const AES_KEY *key);
++void HWAES_cbc_encrypt(const unsigned char *in, unsigned char *out,
++ size_t length, const AES_KEY *key,
++ unsigned char *ivec, const int enc);
++void HWAES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
++ size_t len, const AES_KEY *key, const unsigned char ivec[16]);
++#endif
++
+ #define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \
+ BLOCK_CIPHER_generic(nid,keylen,16,16,cbc,cbc,CBC,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \
+ BLOCK_CIPHER_generic(nid,keylen,16,0,ecb,ecb,ECB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \
+@@ -502,10 +534,23 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+ mode = ctx->cipher->flags & EVP_CIPH_MODE;
+ if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE)
+ && !enc)
++#ifdef HWAES_CAPABLE
++ if (HWAES_CAPABLE)
++ {
++ ret = HWAES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks);
++ dat->block = (block128_f)HWAES_decrypt;
++ dat->stream.cbc = NULL;
++#ifdef HWAES_cbc_encrypt
++ if (mode==EVP_CIPH_CBC_MODE)
++ dat->stream.cbc = (cbc128_f)HWAES_cbc_encrypt;
++#endif
++ }
++ else
++#endif
+ #ifdef BSAES_CAPABLE
+ if (BSAES_CAPABLE && mode==EVP_CIPH_CBC_MODE)
+ {
+- ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks);
++ ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks);
+ dat->block = (block128_f)AES_decrypt;
+ dat->stream.cbc = (cbc128_f)bsaes_cbc_encrypt;
+ }
+@@ -514,7 +559,7 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+ #ifdef VPAES_CAPABLE
+ if (VPAES_CAPABLE)
+ {
+- ret = vpaes_set_decrypt_key(key,ctx->key_len*8,&dat->ks);
++ ret = vpaes_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks);
+ dat->block = (block128_f)vpaes_decrypt;
+ dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ?
+ (cbc128_f)vpaes_cbc_encrypt :
+@@ -523,17 +568,37 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+ else
+ #endif
+ {
+- ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks);
++ ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks);
+ dat->block = (block128_f)AES_decrypt;
+ dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ?
+ (cbc128_f)AES_cbc_encrypt :
+ NULL;
+ }
+ else
++#ifdef HWAES_CAPABLE
++ if (HWAES_CAPABLE)
++ {
++ ret = HWAES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks);
++ dat->block = (block128_f)HWAES_encrypt;
++ dat->stream.cbc = NULL;
++#ifdef HWAES_cbc_encrypt
++ if (mode==EVP_CIPH_CBC_MODE)
++ dat->stream.cbc = (cbc128_f)HWAES_cbc_encrypt;
++ else
++#endif
++#ifdef HWAES_ctr32_encrypt_blocks
++ if (mode==EVP_CIPH_CTR_MODE)
++ dat->stream.ctr = (ctr128_f)HWAES_ctr32_encrypt_blocks;
++ else
++#endif
++ (void)0; /* terminate potentially open 'else' */
++ }
++ else
++#endif
+ #ifdef BSAES_CAPABLE
+ if (BSAES_CAPABLE && mode==EVP_CIPH_CTR_MODE)
+ {
+- ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks);
++ ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks);
+ dat->block = (block128_f)AES_encrypt;
+ dat->stream.ctr = (ctr128_f)bsaes_ctr32_encrypt_blocks;
+ }
+@@ -542,7 +607,7 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+ #ifdef VPAES_CAPABLE
+ if (VPAES_CAPABLE)
+ {
+- ret = vpaes_set_encrypt_key(key,ctx->key_len*8,&dat->ks);
++ ret = vpaes_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks);
+ dat->block = (block128_f)vpaes_encrypt;
+ dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ?
+ (cbc128_f)vpaes_cbc_encrypt :
+@@ -551,7 +616,7 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+ else
+ #endif
+ {
+- ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks);
++ ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks);
+ dat->block = (block128_f)AES_encrypt;
+ dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ?
+ (cbc128_f)AES_cbc_encrypt :
+@@ -822,10 +887,25 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+ return 1;
+ if (key)
+ { do {
++#ifdef HWAES_CAPABLE
++ if (HWAES_CAPABLE)
++ {
++ HWAES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks);
++ CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks,
++ (block128_f)HWAES_encrypt);
++#ifdef HWAES_ctr32_encrypt_blocks
++ gctx->ctr = (ctr128_f)HWAES_ctr32_encrypt_blocks;
++#else
++ gctx->ctr = NULL;
++#endif
++ break;
++ }
++ else
++#endif
+ #ifdef BSAES_CAPABLE
+ if (BSAES_CAPABLE)
+ {
+- AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks);
++ AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks);
+ CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks,
+ (block128_f)AES_encrypt);
+ gctx->ctr = (ctr128_f)bsaes_ctr32_encrypt_blocks;
+@@ -836,7 +916,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+ #ifdef VPAES_CAPABLE
+ if (VPAES_CAPABLE)
+ {
+- vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks);
++ vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks);
+ CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks,
+ (block128_f)vpaes_encrypt);
+ gctx->ctr = NULL;
+@@ -846,7 +926,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+ #endif
+ (void)0; /* terminate potentially open 'else' */
+
+- AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
++ AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
+ CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f)AES_encrypt);
+ #ifdef AES_CTR_ASM
+ gctx->ctr = (ctr128_f)AES_ctr32_encrypt;
+@@ -1067,6 +1147,29 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+ xctx->stream = NULL;
+ #endif
+ /* key_len is two AES keys */
++#ifdef HWAES_CAPABLE
++ if (HWAES_CAPABLE)
++ {
++ if (enc)
++ {
++ HWAES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
++ xctx->xts.block1 = (block128_f)HWAES_encrypt;
++ }
++ else
++ {
++ HWAES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
++ xctx->xts.block1 = (block128_f)HWAES_decrypt;
++ }
++
++ HWAES_set_encrypt_key(key + ctx->key_len/2,
++ ctx->key_len * 4, &xctx->ks2.ks);
++ xctx->xts.block2 = (block128_f)HWAES_encrypt;
++
++ xctx->xts.key1 = &xctx->ks1;
++ break;
++ }
++ else
++#endif
+ #ifdef BSAES_CAPABLE
+ if (BSAES_CAPABLE)
+ xctx->stream = enc ? bsaes_xts_encrypt : bsaes_xts_decrypt;
+@@ -1077,17 +1180,17 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+ {
+ if (enc)
+ {
+- vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
++ vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
+ xctx->xts.block1 = (block128_f)vpaes_encrypt;
+ }
+ else
+ {
+- vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
++ vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
+ xctx->xts.block1 = (block128_f)vpaes_decrypt;
+ }
+
+ vpaes_set_encrypt_key(key + ctx->key_len/2,
+- ctx->key_len * 4, &xctx->ks2);
++ ctx->key_len * 4, &xctx->ks2.ks);
+ xctx->xts.block2 = (block128_f)vpaes_encrypt;
+
+ xctx->xts.key1 = &xctx->ks1;
+@@ -1099,17 +1202,17 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+
+ if (enc)
+ {
+- AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
++ AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
+ xctx->xts.block1 = (block128_f)AES_encrypt;
+ }
+ else
+ {
+- AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
++ AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
+ xctx->xts.block1 = (block128_f)AES_decrypt;
+ }
+
+ AES_set_encrypt_key(key + ctx->key_len/2,
+- ctx->key_len * 4, &xctx->ks2);
++ ctx->key_len * 4, &xctx->ks2.ks);
+ xctx->xts.block2 = (block128_f)AES_encrypt;
+
+ xctx->xts.key1 = &xctx->ks1;
+@@ -1217,10 +1320,23 @@ static int aes_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+ return 1;
+ if (key) do
+ {
++#ifdef HWAES_CAPABLE
++ if (HWAES_CAPABLE)
++ {
++ HWAES_set_encrypt_key(key,ctx->key_len*8,&cctx->ks.ks);
++
++ CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
++ &cctx->ks, (block128_f)HWAES_encrypt);
++ cctx->str = NULL;
++ cctx->key_set = 1;
++ break;
++ }
++ else
++#endif
+ #ifdef VPAES_CAPABLE
+ if (VPAES_CAPABLE)
+ {
+- vpaes_set_encrypt_key(key, ctx->key_len*8, &cctx->ks);
++ vpaes_set_encrypt_key(key, ctx->key_len*8, &cctx->ks.ks);
+ CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
+ &cctx->ks, (block128_f)vpaes_encrypt);
+ cctx->str = NULL;
+@@ -1228,7 +1344,7 @@ static int aes_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+ break;
+ }
+ #endif
+- AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks);
++ AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks);
+ CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
+ &cctx->ks, (block128_f)AES_encrypt);
+ cctx->str = NULL;
+diff --git a/crypto/modes/Makefile b/crypto/modes/Makefile
+index 3d8bafd..9bcfa0e 100644
+--- a/crypto/modes/Makefile
++++ b/crypto/modes/Makefile
+@@ -56,14 +56,16 @@ ghash-alpha.s: asm/ghash-alpha.pl
+ (preproc=/tmp/$$$$.$@; trap "rm $$preproc" INT; \
+ $(PERL) asm/ghash-alpha.pl > $$preproc && \
+ $(CC) -E $$preproc > $@ && rm $$preproc)
+-
+ ghash-parisc.s: asm/ghash-parisc.pl
+ $(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@
++ghashv8-armx.S: asm/ghashv8-armx.pl
++ $(PERL) asm/ghashv8-armx.pl $(PERLASM_SCHEME) $@
+
+ # GNU make "catch all"
+ ghash-%.S: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@
+
+ ghash-armv4.o: ghash-armv4.S
++ghashv8-armx.o: ghashv8-armx.S
+
+ files:
+ $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
+diff --git a/crypto/modes/asm/ghash-armv4.pl b/crypto/modes/asm/ghash-armv4.pl
+index d91586e..0023bf9 100644
+--- a/crypto/modes/asm/ghash-armv4.pl
++++ b/crypto/modes/asm/ghash-armv4.pl
+@@ -35,6 +35,20 @@
+ # Add NEON implementation featuring polynomial multiplication, i.e. no
+ # lookup tables involved. On Cortex A8 it was measured to process one
+ # byte in 15 cycles or 55% faster than integer-only code.
++#
++# April 2014
++#
++# Switch to multiplication algorithm suggested in paper referred
++# below and combine it with reduction algorithm from x86 module.
++# Performance improvement over previous version varies from 65% on
++# Snapdragon S4 to 110% on Cortex A9. In absolute terms Cortex A8
++# processes one byte in 8.45 cycles, A9 - in 10.2, Snapdragon S4 -
++# in 9.33.
++#
++# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
++# Polynomial Multiplication on ARM Processors using the NEON Engine.
++#
++# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
+
+ # ====================================================================
+ # Note about "528B" variant. In ARM case it makes lesser sense to
+@@ -303,117 +317,160 @@ $code.=<<___;
+ .size gcm_gmult_4bit,.-gcm_gmult_4bit
+ ___
+ {
+-my $cnt=$Htbl; # $Htbl is used once in the very beginning
+-
+-my ($Hhi, $Hlo, $Zo, $T, $xi, $mod) = map("d$_",(0..7));
+-my ($Qhi, $Qlo, $Z, $R, $zero, $Qpost, $IN) = map("q$_",(8..15));
+-
+-# Z:Zo keeps 128-bit result shifted by 1 to the right, with bottom bit
+-# in Zo. Or should I say "top bit", because GHASH is specified in
+-# reverse bit order? Otherwise straightforward 128-bt H by one input
+-# byte multiplication and modulo-reduction, times 16.
++my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3));
++my ($t0,$t1,$t2,$t3)=map("q$_",(8..12));
++my ($Hlo,$Hhi,$Hhl,$k48,$k32,$k16)=map("d$_",(26..31));
+
+-sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
+-sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
+-sub Q() { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; }
++sub clmul64x64 {
++my ($r,$a,$b)=@_;
++$code.=<<___;
++ vext.8 $t0#lo, $a, $a, #1 @ A1
++ vmull.p8 $t0, $t0#lo, $b @ F = A1*B
++ vext.8 $r#lo, $b, $b, #1 @ B1
++ vmull.p8 $r, $a, $r#lo @ E = A*B1
++ vext.8 $t1#lo, $a, $a, #2 @ A2
++ vmull.p8 $t1, $t1#lo, $b @ H = A2*B
++ vext.8 $t3#lo, $b, $b, #2 @ B2
++ vmull.p8 $t3, $a, $t3#lo @ G = A*B2
++ vext.8 $t2#lo, $a, $a, #3 @ A3
++ veor $t0, $t0, $r @ L = E + F
++ vmull.p8 $t2, $t2#lo, $b @ J = A3*B
++ vext.8 $r#lo, $b, $b, #3 @ B3
++ veor $t1, $t1, $t3 @ M = G + H
++ vmull.p8 $r, $a, $r#lo @ I = A*B3
++ veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8
++ vand $t0#hi, $t0#hi, $k48
++ vext.8 $t3#lo, $b, $b, #4 @ B4
++ veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16
++ vand $t1#hi, $t1#hi, $k32
++ vmull.p8 $t3, $a, $t3#lo @ K = A*B4
++ veor $t2, $t2, $r @ N = I + J
++ veor $t0#lo, $t0#lo, $t0#hi
++ veor $t1#lo, $t1#lo, $t1#hi
++ veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24
++ vand $t2#hi, $t2#hi, $k16
++ vext.8 $t0, $t0, $t0, #15
++ veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32
++ vmov.i64 $t3#hi, #0
++ vext.8 $t1, $t1, $t1, #14
++ veor $t2#lo, $t2#lo, $t2#hi
++ vmull.p8 $r, $a, $b @ D = A*B
++ vext.8 $t3, $t3, $t3, #12
++ vext.8 $t2, $t2, $t2, #13
++ veor $t0, $t0, $t1
++ veor $t2, $t2, $t3
++ veor $r, $r, $t0
++ veor $r, $r, $t2
++___
++}
+
+ $code.=<<___;
+ #if __ARM_ARCH__>=7
+ .fpu neon
+
++.global gcm_init_neon
++.type gcm_init_neon,%function
++.align 4
++gcm_init_neon:
++ vld1.64 $IN#hi,[r1,:64]! @ load H
++ vmov.i8 $t0,#0xe1
++ vld1.64 $IN#lo,[r1,:64]
++ vshl.i64 $t0#hi,#57
++ vshr.u64 $t0#lo,#63 @ t0=0xc2....01
++ vdup.8 $t1,$IN#hi[7]
++ vshr.u64 $Hlo,$IN#lo,#63
++ vshr.s8 $t1,#7 @ broadcast carry bit
++ vshl.i64 $IN,$IN,#1
++ vand $t0,$t0,$t1
++ vorr $IN#hi,$Hlo @ H<<<=1
++ veor $IN,$IN,$t0 @ twisted H
++ vstmia r0,{$IN}
++
++ ret @ bx lr
++.size gcm_init_neon,.-gcm_init_neon
++
+ .global gcm_gmult_neon
+ .type gcm_gmult_neon,%function
+ .align 4
+ gcm_gmult_neon:
+- sub $Htbl,#16 @ point at H in GCM128_CTX
+- vld1.64 `&Dhi("$IN")`,[$Xi,:64]!@ load Xi
+- vmov.i32 $mod,#0xe1 @ our irreducible polynomial
+- vld1.64 `&Dlo("$IN")`,[$Xi,:64]!
+- vshr.u64 $mod,#32
+- vldmia $Htbl,{$Hhi-$Hlo} @ load H
+- veor $zero,$zero
++ vld1.64 $IN#hi,[$Xi,:64]! @ load Xi
++ vld1.64 $IN#lo,[$Xi,:64]!
++ vmov.i64 $k48,#0x0000ffffffffffff
++ vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H
++ vmov.i64 $k32,#0x00000000ffffffff
+ #ifdef __ARMEL__
+ vrev64.8 $IN,$IN
+ #endif
+- veor $Qpost,$Qpost
+- veor $R,$R
+- mov $cnt,#16
+- veor $Z,$Z
++ vmov.i64 $k16,#0x000000000000ffff
++ veor $Hhl,$Hlo,$Hhi @ Karatsuba pre-processing
+ mov $len,#16
+- veor $Zo,$Zo
+- vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte
+- b .Linner_neon
++ b .Lgmult_neon
+ .size gcm_gmult_neon,.-gcm_gmult_neon
+
+ .global gcm_ghash_neon
+ .type gcm_ghash_neon,%function
+ .align 4
+ gcm_ghash_neon:
+- vld1.64 `&Dhi("$Z")`,[$Xi,:64]! @ load Xi
+- vmov.i32 $mod,#0xe1 @ our irreducible polynomial
+- vld1.64 `&Dlo("$Z")`,[$Xi,:64]!
+- vshr.u64 $mod,#32
+- vldmia $Xi,{$Hhi-$Hlo} @ load H
+- veor $zero,$zero
+- nop
++ vld1.64 $Xl#hi,[$Xi,:64]! @ load Xi
++ vld1.64 $Xl#lo,[$Xi,:64]!
++ vmov.i64 $k48,#0x0000ffffffffffff
++ vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H
++ vmov.i64 $k32,#0x00000000ffffffff
+ #ifdef __ARMEL__
+- vrev64.8 $Z,$Z
++ vrev64.8 $Xl,$Xl
+ #endif
+-.Louter_neon:
+- vld1.64 `&Dhi($IN)`,[$inp]! @ load inp
+- veor $Qpost,$Qpost
+- vld1.64 `&Dlo($IN)`,[$inp]!
+- veor $R,$R
+- mov $cnt,#16
++ vmov.i64 $k16,#0x000000000000ffff
++ veor $Hhl,$Hlo,$Hhi @ Karatsuba pre-processing
++
++.Loop_neon:
++ vld1.64 $IN#hi,[$inp]! @ load inp
++ vld1.64 $IN#lo,[$inp]!
+ #ifdef __ARMEL__
+ vrev64.8 $IN,$IN
+ #endif
+- veor $Zo,$Zo
+- veor $IN,$Z @ inp^=Xi
+- veor $Z,$Z
+- vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte
+-.Linner_neon:
+- subs $cnt,$cnt,#1
+- vmull.p8 $Qlo,$Hlo,$xi @ H.lo·Xi[i]
+- vmull.p8 $Qhi,$Hhi,$xi @ H.hi·Xi[i]
+- vext.8 $IN,$zero,#1 @ IN>>=8
+-
+- veor $Z,$Qpost @ modulo-scheduled part
+- vshl.i64 `&Dlo("$R")`,#48
+- vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte
+- veor $T,`&Dlo("$Qlo")`,`&Dlo("$Z")`
+-
+- veor `&Dhi("$Z")`,`&Dlo("$R")`
+- vuzp.8 $Qlo,$Qhi
+- vsli.8 $Zo,$T,#1 @ compose the "carry" byte
+- vext.8 $Z,$zero,#1 @ Z>>=8
+-
+- vmull.p8 $R,$Zo,$mod @ "carry"·0xe1
+- vshr.u8 $Zo,$T,#7 @ save Z's bottom bit
+- vext.8 $Qpost,$Qlo,$zero,#1 @ Qlo>>=8
+- veor $Z,$Qhi
+- bne .Linner_neon
+-
+- veor $Z,$Qpost @ modulo-scheduled artefact
+- vshl.i64 `&Dlo("$R")`,#48
+- veor `&Dhi("$Z")`,`&Dlo("$R")`
+-
+- @ finalization, normalize Z:Zo
+- vand $Zo,$mod @ suffices to mask the bit
+- vshr.u64 `&Dhi(&Q("$Zo"))`,`&Dlo("$Z")`,#63
+- vshl.i64 $Z,#1
++ veor $IN,$Xl @ inp^=Xi
++.Lgmult_neon:
++___
++ &clmul64x64 ($Xl,$Hlo,"$IN#lo"); # H.lo·Xi.lo
++$code.=<<___;
++ veor $IN#lo,$IN#lo,$IN#hi @ Karatsuba pre-processing
++___
++ &clmul64x64 ($Xm,$Hhl,"$IN#lo"); # (H.lo+H.hi)·(Xi.lo+Xi.hi)
++ &clmul64x64 ($Xh,$Hhi,"$IN#hi"); # H.hi·Xi.hi
++$code.=<<___;
++ veor $Xm,$Xm,$Xl @ Karatsuba post-processing
++ veor $Xm,$Xm,$Xh
++ veor $Xl#hi,$Xl#hi,$Xm#lo
++ veor $Xh#lo,$Xh#lo,$Xm#hi @ Xh|Xl - 256-bit result
++
++ @ equivalent of reduction_avx from ghash-x86_64.pl
++ vshl.i64 $t1,$Xl,#57 @ 1st phase
++ vshl.i64 $t2,$Xl,#62
++ veor $t2,$t2,$t1 @
++ vshl.i64 $t1,$Xl,#63
++ veor $t2, $t2, $t1 @
++ veor $Xl#hi,$Xl#hi,$t2#lo @
++ veor $Xh#lo,$Xh#lo,$t2#hi
++
++ vshr.u64 $t2,$Xl,#1 @ 2nd phase
++ veor $Xh,$Xh,$Xl
++ veor $Xl,$Xl,$t2 @
++ vshr.u64 $t2,$t2,#6
++ vshr.u64 $Xl,$Xl,#1 @
++ veor $Xl,$Xl,$Xh @
++ veor $Xl,$Xl,$t2 @
++
+ subs $len,#16
+- vorr $Z,`&Q("$Zo")` @ Z=Z:Zo<<1
+- bne .Louter_neon
++ bne .Loop_neon
+
+ #ifdef __ARMEL__
+- vrev64.8 $Z,$Z
++ vrev64.8 $Xl,$Xl
+ #endif
+ sub $Xi,#16
+- vst1.64 `&Dhi("$Z")`,[$Xi,:64]! @ write out Xi
+- vst1.64 `&Dlo("$Z")`,[$Xi,:64]
++ vst1.64 $Xl#hi,[$Xi,:64]! @ write out Xi
++ vst1.64 $Xl#lo,[$Xi,:64]
+
+- bx lr
++ ret @ bx lr
+ .size gcm_ghash_neon,.-gcm_ghash_neon
+ #endif
+ ___
+@@ -423,7 +480,13 @@ $code.=<<___;
+ .align 2
+ ___
+
+-$code =~ s/\`([^\`]*)\`/eval $1/gem;
+-$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
+-print $code;
++foreach (split("\n",$code)) {
++ s/\`([^\`]*)\`/eval $1/geo;
++
++ s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or
++ s/\bret\b/bx lr/go or
++ s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
++
++ print $_,"\n";
++}
+ close STDOUT; # enforce flush
+diff --git a/crypto/modes/asm/ghashv8-armx.pl b/crypto/modes/asm/ghashv8-armx.pl
+new file mode 100644
+index 0000000..b24f3d7
+--- /dev/null
++++ b/crypto/modes/asm/ghashv8-armx.pl
+@@ -0,0 +1,240 @@
++#!/usr/bin/env perl
++#
++# ====================================================================
++# Written by Andy Polyakov <[email protected]> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# GHASH for ARMv8 Crypto Extension, 64-bit polynomial multiplication.
++#
++# June 2014
++#
++# Initial version was developed in tight cooperation with Ard
++# Biesheuvel <[email protected]> from bits-n-pieces from
++# other assembly modules. Just like aesv8-armx.pl this module
++# supports both AArch32 and AArch64 execution modes.
++#
++# Current performance in cycles per processed byte:
++#
++# PMULL[2] 32-bit NEON(*)
++# Apple A7 1.76 5.62
++# Cortex-A5x n/a n/a
++#
++# (*) presented for reference/comparison purposes;
++
++$flavour = shift;
++open STDOUT,">".shift;
++
++$Xi="x0"; # argument block
++$Htbl="x1";
++$inp="x2";
++$len="x3";
++
++$inc="x12";
++
++{
++my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3));
++my ($t0,$t1,$t2,$t3,$H,$Hhl)=map("q$_",(8..14));
++
++$code=<<___;
++#include "arm_arch.h"
++
++.text
++___
++$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/);
++$code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/);
++
++$code.=<<___;
++.global gcm_init_v8
++.type gcm_init_v8,%function
++.align 4
++gcm_init_v8:
++ vld1.64 {$t1},[x1] @ load H
++ vmov.i8 $t0,#0xe1
++ vext.8 $IN,$t1,$t1,#8
++ vshl.i64 $t0,$t0,#57
++ vshr.u64 $t2,$t0,#63
++ vext.8 $t0,$t2,$t0,#8 @ t0=0xc2....01
++ vdup.32 $t1,${t1}[1]
++ vshr.u64 $t3,$IN,#63
++ vshr.s32 $t1,$t1,#31 @ broadcast carry bit
++ vand $t3,$t3,$t0
++ vshl.i64 $IN,$IN,#1
++ vext.8 $t3,$t3,$t3,#8
++ vand $t0,$t0,$t1
++ vorr $IN,$IN,$t3 @ H<<<=1
++ veor $IN,$IN,$t0 @ twisted H
++ vst1.64 {$IN},[x0]
++
++ ret
++.size gcm_init_v8,.-gcm_init_v8
++
++.global gcm_gmult_v8
++.type gcm_gmult_v8,%function
++.align 4
++gcm_gmult_v8:
++ vld1.64 {$t1},[$Xi] @ load Xi
++ vmov.i8 $t3,#0xe1
++ vld1.64 {$H},[$Htbl] @ load twisted H
++ vshl.u64 $t3,$t3,#57
++#ifndef __ARMEB__
++ vrev64.8 $t1,$t1
++#endif
++ vext.8 $Hhl,$H,$H,#8
++ mov $len,#0
++ vext.8 $IN,$t1,$t1,#8
++ mov $inc,#0
++ veor $Hhl,$Hhl,$H @ Karatsuba pre-processing
++ mov $inp,$Xi
++ b .Lgmult_v8
++.size gcm_gmult_v8,.-gcm_gmult_v8
++
++.global gcm_ghash_v8
++.type gcm_ghash_v8,%function
++.align 4
++gcm_ghash_v8:
++ vld1.64 {$Xl},[$Xi] @ load [rotated] Xi
++ subs $len,$len,#16
++ vmov.i8 $t3,#0xe1
++ mov $inc,#16
++ vld1.64 {$H},[$Htbl] @ load twisted H
++ cclr $inc,eq
++ vext.8 $Xl,$Xl,$Xl,#8
++ vshl.u64 $t3,$t3,#57
++ vld1.64 {$t1},[$inp],$inc @ load [rotated] inp
++ vext.8 $Hhl,$H,$H,#8
++#ifndef __ARMEB__
++ vrev64.8 $Xl,$Xl
++ vrev64.8 $t1,$t1
++#endif
++ veor $Hhl,$Hhl,$H @ Karatsuba pre-processing
++ vext.8 $IN,$t1,$t1,#8
++ b .Loop_v8
++
++.align 4
++.Loop_v8:
++ vext.8 $t2,$Xl,$Xl,#8
++ veor $IN,$IN,$Xl @ inp^=Xi
++ veor $t1,$t1,$t2 @ $t1 is rotated inp^Xi
++
++.Lgmult_v8:
++ vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
++ veor $t1,$t1,$IN @ Karatsuba pre-processing
++ vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
++ subs $len,$len,#16
++ vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
++ cclr $inc,eq
++
++ vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
++ veor $t2,$Xl,$Xh
++ veor $Xm,$Xm,$t1
++ vld1.64 {$t1},[$inp],$inc @ load [rotated] inp
++ veor $Xm,$Xm,$t2
++ vpmull.p64 $t2,$Xl,$t3 @ 1st phase
++
++ vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result
++ vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl
++#ifndef __ARMEB__
++ vrev64.8 $t1,$t1
++#endif
++ veor $Xl,$Xm,$t2
++ vext.8 $IN,$t1,$t1,#8
++
++ vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase
++ vpmull.p64 $Xl,$Xl,$t3
++ veor $t2,$t2,$Xh
++ veor $Xl,$Xl,$t2
++ b.hs .Loop_v8
++
++#ifndef __ARMEB__
++ vrev64.8 $Xl,$Xl
++#endif
++ vext.8 $Xl,$Xl,$Xl,#8
++ vst1.64 {$Xl},[$Xi] @ write out Xi
++
++ ret
++.size gcm_ghash_v8,.-gcm_ghash_v8
++___
++}
++$code.=<<___;
++.asciz "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
++.align 2
++___
++
++if ($flavour =~ /64/) { ######## 64-bit code
++ sub unvmov {
++ my $arg=shift;
++
++ $arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o &&
++ sprintf "ins v%d.d[%d],v%d.d[%d]",$1,($2 eq "lo")?0:1,$3,($4 eq "lo")?0:1;
++ }
++ foreach(split("\n",$code)) {
++ s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or
++ s/vmov\.i8/movi/o or # fix up legacy mnemonics
++ s/vmov\s+(.*)/unvmov($1)/geo or
++ s/vext\.8/ext/o or
++ s/vshr\.s/sshr\.s/o or
++ s/vshr/ushr/o or
++ s/^(\s+)v/$1/o or # strip off v prefix
++ s/\bbx\s+lr\b/ret/o;
++
++ s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers
++ s/@\s/\/\//o; # old->new style commentary
++
++ # fix up remainig legacy suffixes
++ s/\.[ui]?8(\s)/$1/o;
++ s/\.[uis]?32//o and s/\.16b/\.4s/go;
++ m/\.p64/o and s/\.16b/\.1q/o; # 1st pmull argument
++ m/l\.p64/o and s/\.16b/\.1d/go; # 2nd and 3rd pmull arguments
++ s/\.[uisp]?64//o and s/\.16b/\.2d/go;
++ s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o;
++
++ print $_,"\n";
++ }
++} else { ######## 32-bit code
++ sub unvdup32 {
++ my $arg=shift;
++
++ $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o &&
++ sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1;
++ }
++ sub unvpmullp64 {
++ my ($mnemonic,$arg)=@_;
++
++ if ($arg =~ m/q([0-9]+),\s*q([0-9]+),\s*q([0-9]+)/o) {
++ my $word = 0xf2a00e00|(($1&7)<<13)|(($1&8)<<19)
++ |(($2&7)<<17)|(($2&8)<<4)
++ |(($3&7)<<1) |(($3&8)<<2);
++ $word |= 0x00010001 if ($mnemonic =~ "2");
++ # since ARMv7 instructions are always encoded little-endian.
++ # correct solution is to use .inst directive, but older
++ # assemblers don't implement it:-(
++ sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
++ $word&0xff,($word>>8)&0xff,
++ ($word>>16)&0xff,($word>>24)&0xff,
++ $mnemonic,$arg;
++ }
++ }
++
++ foreach(split("\n",$code)) {
++ s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers
++ s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers
++ s/\/\/\s?/@ /o; # new->old style commentary
++
++ # fix up remainig new-style suffixes
++ s/\],#[0-9]+/]!/o;
++
++ s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or
++ s/vdup\.32\s+(.*)/unvdup32($1)/geo or
++ s/v?(pmull2?)\.p64\s+(.*)/unvpmullp64($1,$2)/geo or
++ s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or
++ s/^(\s+)b\./$1b/o or
++ s/^(\s+)ret/$1bx\tlr/o;
++
++ print $_,"\n";
++ }
++}
++
++close STDOUT; # enforce flush
+diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c
+index e1dc2b0..79ebb66 100644
+--- a/crypto/modes/gcm128.c
++++ b/crypto/modes/gcm128.c
+@@ -642,7 +642,7 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
+
+ #endif
+
+-#if TABLE_BITS==4 && defined(GHASH_ASM)
++#if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
+ # if !defined(I386_ONLY) && \
+ (defined(__i386) || defined(__i386__) || \
+ defined(__x86_64) || defined(__x86_64__) || \
+@@ -663,13 +663,21 @@ void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len
+ void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
+ void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+ # endif
+-# elif defined(__arm__) || defined(__arm)
++# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
+ # include "arm_arch.h"
+ # if __ARM_ARCH__>=7
+ # define GHASH_ASM_ARM
+ # define GCM_FUNCREF_4BIT
++# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
++# if defined(__arm__) || defined(__arm)
++# define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
++# endif
++void gcm_init_neon(u128 Htable[16],const u64 Xi[2]);
+ void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
+ void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
++void gcm_init_v8(u128 Htable[16],const u64 Xi[2]);
++void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
++void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+ # endif
+ # endif
+ #endif
+@@ -739,10 +747,21 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
+ ctx->ghash = gcm_ghash_4bit;
+ # endif
+ # elif defined(GHASH_ASM_ARM)
+- if (OPENSSL_armcap_P & ARMV7_NEON) {
++# ifdef PMULL_CAPABLE
++ if (PMULL_CAPABLE) {
++ gcm_init_v8(ctx->Htable,ctx->H.u);
++ ctx->gmult = gcm_gmult_v8;
++ ctx->ghash = gcm_ghash_v8;
++ } else
++# endif
++# ifdef NEON_CAPABLE
++ if (NEON_CAPABLE) {
++ gcm_init_neon(ctx->Htable,ctx->H.u);
+ ctx->gmult = gcm_gmult_neon;
+ ctx->ghash = gcm_ghash_neon;
+- } else {
++ } else
++# endif
++ {
+ gcm_init_4bit(ctx->Htable,ctx->H.u);
+ ctx->gmult = gcm_gmult_4bit;
+ ctx->ghash = gcm_ghash_4bit;
+diff --git a/crypto/sha/Makefile b/crypto/sha/Makefile
+index 2eb2b7a..6ef027d 100644
+--- a/crypto/sha/Makefile
++++ b/crypto/sha/Makefile
+@@ -92,6 +92,9 @@ sha512-%.S: asm/sha512-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@
+ sha1-armv4-large.o: sha1-armv4-large.S
+ sha256-armv4.o: sha256-armv4.S
+ sha512-armv4.o: sha512-armv4.S
++sha1-armv8.o: sha1-armv8.S
++sha256-armv8.o: sha256-armv8.S
++sha512-armv8.o: sha512-armv8.S
+
+ files:
+ $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
+diff --git a/crypto/sha/asm/sha1-armv4-large.pl b/crypto/sha/asm/sha1-armv4-large.pl
+index 33da3e0..50bd07b 100644
+--- a/crypto/sha/asm/sha1-armv4-large.pl
++++ b/crypto/sha/asm/sha1-armv4-large.pl
+@@ -1,7 +1,7 @@
+ #!/usr/bin/env perl
+
+ # ====================================================================
+-# Written by Andy Polyakov <[email protected]> for the OpenSSL
++# Written by Andy Polyakov <[email protected]> for the OpenSSL
+ # project. The module is, however, dual licensed under OpenSSL and
+ # CRYPTOGAMS licenses depending on where you obtain it. For further
+ # details see http://www.openssl.org/~appro/cryptogams/.
+@@ -52,6 +52,20 @@
+ # Profiler-assisted and platform-specific optimization resulted in 10%
+ # improvement on Cortex A8 core and 12.2 cycles per byte.
+
++# September 2013.
++#
++# Add NEON implementation (see sha1-586.pl for background info). On
++# Cortex A8 it was measured to process one byte in 6.7 cycles or >80%
++# faster than integer-only code. Because [fully unrolled] NEON code
++# is ~2.5x larger and there are some redundant instructions executed
++# when processing last block, improvement is not as big for smallest
++# blocks, only ~30%. Snapdragon S4 is a tad faster, 6.4 cycles per
++# byte, which is also >80% faster than integer-only code.
++
++# May 2014.
++#
++# Add ARMv8 code path performing at 2.35 cpb on Apple A7.
++
+ while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+ open STDOUT,">$output";
+
+@@ -153,12 +167,22 @@ $code=<<___;
+ #include "arm_arch.h"
+
+ .text
++.code 32
+
+ .global sha1_block_data_order
+ .type sha1_block_data_order,%function
+
+-.align 2
++.align 5
+ sha1_block_data_order:
++#if __ARM_ARCH__>=7
++ sub r3,pc,#8 @ sha1_block_data_order
++ ldr r12,.LOPENSSL_armcap
++ ldr r12,[r3,r12] @ OPENSSL_armcap_P
++ tst r12,#ARMV8_SHA1
++ bne .LARMv8
++ tst r12,#ARMV7_NEON
++ bne .LNEON
++#endif
+ stmdb sp!,{r4-r12,lr}
+ add $len,$inp,$len,lsl#6 @ $len to point at the end of $inp
+ ldmia $ctx,{$a,$b,$c,$d,$e}
+@@ -233,16 +257,422 @@ $code.=<<___;
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+ #endif
+-.align 2
++.size sha1_block_data_order,.-sha1_block_data_order
++
++.align 5
+ .LK_00_19: .word 0x5a827999
+ .LK_20_39: .word 0x6ed9eba1
+ .LK_40_59: .word 0x8f1bbcdc
+ .LK_60_79: .word 0xca62c1d6
+-.size sha1_block_data_order,.-sha1_block_data_order
+-.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
+-.align 2
++.LOPENSSL_armcap:
++.word OPENSSL_armcap_P-sha1_block_data_order
++.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
++.align 5
++___
++#####################################################################
++# NEON stuff
++#
++{{{
++my @V=($a,$b,$c,$d,$e);
++my ($K_XX_XX,$Ki,$t0,$t1,$Xfer,$saved_sp)=map("r$_",(8..12,14));
++my $Xi=4;
++my @X=map("q$_",(8..11,0..3));
++my @Tx=("q12","q13");
++my ($K,$zero)=("q14","q15");
++my $j=0;
++
++sub AUTOLOAD() # thunk [simplified] x86-style perlasm
++{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
++ my $arg = pop;
++ $arg = "#$arg" if ($arg*1 eq $arg);
++ $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
++}
++
++sub body_00_19 () {
++ (
++ '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'.
++ '&bic ($t0,$d,$b)',
++ '&add ($e,$e,$Ki)', # e+=X[i]+K
++ '&and ($t1,$c,$b)',
++ '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15))',
++ '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27)
++ '&eor ($t1,$t1,$t0)', # F_00_19
++ '&mov ($b,$b,"ror#2")', # b=ROR(b,2)
++ '&add ($e,$e,$t1);'. # e+=F_00_19
++ '$j++; unshift(@V,pop(@V));'
++ )
++}
++sub body_20_39 () {
++ (
++ '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'.
++ '&eor ($t0,$b,$d)',
++ '&add ($e,$e,$Ki)', # e+=X[i]+K
++ '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15)) if ($j<79)',
++ '&eor ($t1,$t0,$c)', # F_20_39
++ '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27)
++ '&mov ($b,$b,"ror#2")', # b=ROR(b,2)
++ '&add ($e,$e,$t1);'. # e+=F_20_39
++ '$j++; unshift(@V,pop(@V));'
++ )
++}
++sub body_40_59 () {
++ (
++ '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'.
++ '&add ($e,$e,$Ki)', # e+=X[i]+K
++ '&and ($t0,$c,$d)',
++ '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15))',
++ '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27)
++ '&eor ($t1,$c,$d)',
++ '&add ($e,$e,$t0)',
++ '&and ($t1,$t1,$b)',
++ '&mov ($b,$b,"ror#2")', # b=ROR(b,2)
++ '&add ($e,$e,$t1);'. # e+=F_40_59
++ '$j++; unshift(@V,pop(@V));'
++ )
++}
++
++sub Xupdate_16_31 ()
++{ use integer;
++ my $body = shift;
++ my @insns = (&$body,&$body,&$body,&$body);
++ my ($a,$b,$c,$d,$e);
++
++ &vext_8 (@X[0],@X[-4&7],@X[-3&7],8); # compose "X[-14]" in "X[0]"
++ eval(shift(@insns));
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vadd_i32 (@Tx[1],@X[-1&7],$K);
++ eval(shift(@insns));
++ &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!") if ($Xi%5==0);
++ eval(shift(@insns));
++ &vext_8 (@Tx[0],@X[-1&7],$zero,4); # "X[-3]", 3 words
++ eval(shift(@insns));
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &veor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]"
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &veor (@Tx[0],@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]"
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &veor (@Tx[0],@Tx[0],@X[0]); # "X[0]"^="X[-3]"^"X[-8]
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!"); # X[]+K xfer
++ &sub ($Xfer,$Xfer,64) if ($Xi%4==0);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vext_8 (@Tx[1],$zero,@Tx[0],4); # "X[0]"<<96, extract one dword
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vadd_i32 (@X[0],@Tx[0],@Tx[0]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vsri_32 (@X[0],@Tx[0],31); # "X[0]"<<<=1
++ eval(shift(@insns));
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vshr_u32 (@Tx[0],@Tx[1],30);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vshl_u32 (@Tx[1],@Tx[1],2);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &veor (@X[0],@X[0],@Tx[0]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &veor (@X[0],@X[0],@Tx[1]); # "X[0]"^=("X[0]">>96)<<<2
++
++ foreach (@insns) { eval; } # remaining instructions [if any]
++
++ $Xi++; push(@X,shift(@X)); # "rotate" X[]
++}
++
++sub Xupdate_32_79 ()
++{ use integer;
++ my $body = shift;
++ my @insns = (&$body,&$body,&$body,&$body);
++ my ($a,$b,$c,$d,$e);
++
++ &vext_8 (@Tx[0],@X[-2&7],@X[-1&7],8); # compose "X[-6]"
++ eval(shift(@insns));
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &veor (@X[0],@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]"
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &veor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]"
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vadd_i32 (@Tx[1],@X[-1&7],$K);
++ eval(shift(@insns));
++ &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!") if ($Xi%5==0);
++ eval(shift(@insns));
++ &veor (@Tx[0],@Tx[0],@X[0]); # "X[-6]"^="X[0]"
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vshr_u32 (@X[0],@Tx[0],30);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!"); # X[]+K xfer
++ &sub ($Xfer,$Xfer,64) if ($Xi%4==0);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vsli_32 (@X[0],@Tx[0],2); # "X[0]"="X[-6]"<<<2
++
++ foreach (@insns) { eval; } # remaining instructions [if any]
++
++ $Xi++; push(@X,shift(@X)); # "rotate" X[]
++}
++
++sub Xuplast_80 ()
++{ use integer;
++ my $body = shift;
++ my @insns = (&$body,&$body,&$body,&$body);
++ my ($a,$b,$c,$d,$e);
++
++ &vadd_i32 (@Tx[1],@X[-1&7],$K);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!");
++ &sub ($Xfer,$Xfer,64);
++
++ &teq ($inp,$len);
++ &sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX
++ &subeq ($inp,$inp,64); # reload last block to avoid SEGV
++ &vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!");
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vld1_8 ("{@X[-2&7]-@X[-1&7]}","[$inp]!");
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!"); # load K_00_19
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vrev32_8 (@X[-4&7],@X[-4&7]);
++
++ foreach (@insns) { eval; } # remaining instructions
++
++ $Xi=0;
++}
++
++sub Xloop()
++{ use integer;
++ my $body = shift;
++ my @insns = (&$body,&$body,&$body,&$body);
++ my ($a,$b,$c,$d,$e);
++
++ &vrev32_8 (@X[($Xi-3)&7],@X[($Xi-3)&7]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vadd_i32 (@X[$Xi&7],@X[($Xi-4)&7],$K);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vst1_32 ("{@X[$Xi&7]}","[$Xfer,:128]!");# X[]+K xfer to IALU
++
++ foreach (@insns) { eval; }
++
++ $Xi++;
++}
++
++$code.=<<___;
++#if __ARM_ARCH__>=7
++.fpu neon
++
++.type sha1_block_data_order_neon,%function
++.align 4
++sha1_block_data_order_neon:
++.LNEON:
++ stmdb sp!,{r4-r12,lr}
++ add $len,$inp,$len,lsl#6 @ $len to point at the end of $inp
++ @ dmb @ errata #451034 on early Cortex A8
++ @ vstmdb sp!,{d8-d15} @ ABI specification says so
++ mov $saved_sp,sp
++ sub sp,sp,#64 @ alloca
++ adr $K_XX_XX,.LK_00_19
++ bic sp,sp,#15 @ align for 128-bit stores
++
++ ldmia $ctx,{$a,$b,$c,$d,$e} @ load context
++ mov $Xfer,sp
++
++ vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned
++ veor $zero,$zero,$zero
++ vld1.8 {@X[-2&7]-@X[-1&7]},[$inp]!
++ vld1.32 {${K}\[]},[$K_XX_XX,:32]! @ load K_00_19
++ vrev32.8 @X[-4&7],@X[-4&7] @ yes, even on
++ vrev32.8 @X[-3&7],@X[-3&7] @ big-endian...
++ vrev32.8 @X[-2&7],@X[-2&7]
++ vadd.i32 @X[0],@X[-4&7],$K
++ vrev32.8 @X[-1&7],@X[-1&7]
++ vadd.i32 @X[1],@X[-3&7],$K
++ vst1.32 {@X[0]},[$Xfer,:128]!
++ vadd.i32 @X[2],@X[-2&7],$K
++ vst1.32 {@X[1]},[$Xfer,:128]!
++ vst1.32 {@X[2]},[$Xfer,:128]!
++ ldr $Ki,[sp] @ big RAW stall
++
++.Loop_neon:
++___
++ &Xupdate_16_31(\&body_00_19);
++ &Xupdate_16_31(\&body_00_19);
++ &Xupdate_16_31(\&body_00_19);
++ &Xupdate_16_31(\&body_00_19);
++ &Xupdate_32_79(\&body_00_19);
++ &Xupdate_32_79(\&body_20_39);
++ &Xupdate_32_79(\&body_20_39);
++ &Xupdate_32_79(\&body_20_39);
++ &Xupdate_32_79(\&body_20_39);
++ &Xupdate_32_79(\&body_20_39);
++ &Xupdate_32_79(\&body_40_59);
++ &Xupdate_32_79(\&body_40_59);
++ &Xupdate_32_79(\&body_40_59);
++ &Xupdate_32_79(\&body_40_59);
++ &Xupdate_32_79(\&body_40_59);
++ &Xupdate_32_79(\&body_20_39);
++ &Xuplast_80(\&body_20_39);
++ &Xloop(\&body_20_39);
++ &Xloop(\&body_20_39);
++ &Xloop(\&body_20_39);
++$code.=<<___;
++ ldmia $ctx,{$Ki,$t0,$t1,$Xfer} @ accumulate context
++ add $a,$a,$Ki
++ ldr $Ki,[$ctx,#16]
++ add $b,$b,$t0
++ add $c,$c,$t1
++ add $d,$d,$Xfer
++ moveq sp,$saved_sp
++ add $e,$e,$Ki
++ ldrne $Ki,[sp]
++ stmia $ctx,{$a,$b,$c,$d,$e}
++ addne $Xfer,sp,#3*16
++ bne .Loop_neon
++
++ @ vldmia sp!,{d8-d15}
++ ldmia sp!,{r4-r12,pc}
++.size sha1_block_data_order_neon,.-sha1_block_data_order_neon
++#endif
++___
++}}}
++#####################################################################
++# ARMv8 stuff
++#
++{{{
++my ($ABCD,$E,$E0,$E1)=map("q$_",(0..3));
++my @MSG=map("q$_",(4..7));
++my @Kxx=map("q$_",(8..11));
++my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14));
++
++$code.=<<___;
++#if __ARM_ARCH__>=7
++.type sha1_block_data_order_armv8,%function
++.align 5
++sha1_block_data_order_armv8:
++.LARMv8:
++ vstmdb sp!,{d8-d15} @ ABI specification says so
++
++ veor $E,$E,$E
++ adr r3,.LK_00_19
++ vld1.32 {$ABCD},[$ctx]!
++ vld1.32 {$E\[0]},[$ctx]
++ sub $ctx,$ctx,#16
++ vld1.32 {@Kxx[0]\[]},[r3,:32]!
++ vld1.32 {@Kxx[1]\[]},[r3,:32]!
++ vld1.32 {@Kxx[2]\[]},[r3,:32]!
++ vld1.32 {@Kxx[3]\[]},[r3,:32]
++
++.Loop_v8:
++ vld1.8 {@MSG[0]-@MSG[1]},[$inp]!
++ vld1.8 {@MSG[2]-@MSG[3]},[$inp]!
++ vrev32.8 @MSG[0],@MSG[0]
++ vrev32.8 @MSG[1],@MSG[1]
++
++ vadd.i32 $W0,@Kxx[0],@MSG[0]
++ vrev32.8 @MSG[2],@MSG[2]
++ vmov $ABCD_SAVE,$ABCD @ offload
++ subs $len,$len,#1
++
++ vadd.i32 $W1,@Kxx[0],@MSG[1]
++ vrev32.8 @MSG[3],@MSG[3]
++ sha1h $E1,$ABCD @ 0
++ sha1c $ABCD,$E,$W0
++ vadd.i32 $W0,@Kxx[$j],@MSG[2]
++ sha1su0 @MSG[0],@MSG[1],@MSG[2]
++___
++for ($j=0,$i=1;$i<20-3;$i++) {
++my $f=("c","p","m","p")[$i/5];
++$code.=<<___;
++ sha1h $E0,$ABCD @ $i
++ sha1$f $ABCD,$E1,$W1
++ vadd.i32 $W1,@Kxx[$j],@MSG[3]
++ sha1su1 @MSG[0],@MSG[3]
++___
++$code.=<<___ if ($i<20-4);
++ sha1su0 @MSG[1],@MSG[2],@MSG[3]
+ ___
++ ($E0,$E1)=($E1,$E0); ($W0,$W1)=($W1,$W0);
++ push(@MSG,shift(@MSG)); $j++ if ((($i+3)%5)==0);
++}
++$code.=<<___;
++ sha1h $E0,$ABCD @ $i
++ sha1p $ABCD,$E1,$W1
++ vadd.i32 $W1,@Kxx[$j],@MSG[3]
++
++ sha1h $E1,$ABCD @ 18
++ sha1p $ABCD,$E0,$W0
++
++ sha1h $E0,$ABCD @ 19
++ sha1p $ABCD,$E1,$W1
++
++ vadd.i32 $E,$E,$E0
++ vadd.i32 $ABCD,$ABCD,$ABCD_SAVE
++ bne .Loop_v8
++
++ vst1.32 {$ABCD},[$ctx]!
++ vst1.32 {$E\[0]},[$ctx]
++
++ vldmia sp!,{d8-d15}
++ ret @ bx lr
++.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8
++#endif
++___
++}}}
++$code.=<<___;
++.comm OPENSSL_armcap_P,4,4
++___
++
++{ my %opcode = (
++ "sha1c" => 0xf2000c40, "sha1p" => 0xf2100c40,
++ "sha1m" => 0xf2200c40, "sha1su0" => 0xf2300c40,
++ "sha1h" => 0xf3b902c0, "sha1su1" => 0xf3ba0380 );
++
++ sub unsha1 {
++ my ($mnemonic,$arg)=@_;
++
++ if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
++ my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
++ |(($2&7)<<17)|(($2&8)<<4)
++ |(($3&7)<<1) |(($3&8)<<2);
++ # since ARMv7 instructions are always encoded little-endian.
++ # correct solution is to use .inst directive, but older
++ # assemblers don't implement it:-(
++ sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
++ $word&0xff,($word>>8)&0xff,
++ ($word>>16)&0xff,($word>>24)&0xff,
++ $mnemonic,$arg;
++ }
++ }
++}
++
++foreach (split($/,$code)) {
++ s/{q([0-9]+)\[\]}/sprintf "{d%d[],d%d[]}",2*$1,2*$1+1/eo or
++ s/{q([0-9]+)\[0\]}/sprintf "{d%d[0]}",2*$1/eo;
++
++ s/\b(sha1\w+)\s+(q.*)/unsha1($1,$2)/geo;
++
++ s/\bret\b/bx lr/o or
++ s/\bbx\s+lr\b/.word\t0xe12fff1e/o; # make it possible to compile with -march=armv4
++
++ print $_,$/;
++}
+
+-$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
+-print $code;
+ close STDOUT; # enforce flush
+diff --git a/crypto/sha/asm/sha1-armv8.pl b/crypto/sha/asm/sha1-armv8.pl
+new file mode 100644
+index 0000000..c1f552b
+--- /dev/null
++++ b/crypto/sha/asm/sha1-armv8.pl
+@@ -0,0 +1,333 @@
++#!/usr/bin/env perl
++#
++# ====================================================================
++# Written by Andy Polyakov <[email protected]> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# SHA1 for ARMv8.
++#
++# Performance in cycles per processed byte and improvement coefficient
++# over code generated with "default" compiler:
++#
++# hardware-assisted software(*)
++# Apple A7 2.31 4.13 (+14%)
++# Cortex-A5x n/a n/a
++#
++# (*) Software results are presented mostly for reference purposes.
++
++$flavour = shift;
++open STDOUT,">".shift;
++
++($ctx,$inp,$num)=("x0","x1","x2");
++@Xw=map("w$_",(3..17,19));
++@Xx=map("x$_",(3..17,19));
++@V=($A,$B,$C,$D,$E)=map("w$_",(20..24));
++($t0,$t1,$t2,$K)=map("w$_",(25..28));
++
++
++sub BODY_00_19 {
++my ($i,$a,$b,$c,$d,$e)=@_;
++my $j=($i+2)&15;
++
++$code.=<<___ if ($i<15 && !($i&1));
++ lsr @Xx[$i+1],@Xx[$i],#32
++___
++$code.=<<___ if ($i<14 && !($i&1));
++ ldr @Xx[$i+2],[$inp,#`($i+2)*4-64`]
++___
++$code.=<<___ if ($i<14 && ($i&1));
++#ifdef __ARMEB__
++ ror @Xx[$i+1],@Xx[$i+1],#32
++#else
++ rev32 @Xx[$i+1],@Xx[$i+1]
++#endif
++___
++$code.=<<___ if ($i<14);
++ bic $t0,$d,$b
++ and $t1,$c,$b
++ ror $t2,$a,#27
++ add $d,$d,$K // future e+=K
++ orr $t0,$t0,$t1
++ add $e,$e,$t2 // e+=rot(a,5)
++ ror $b,$b,#2
++ add $d,$d,@Xw[($i+1)&15] // future e+=X[i]
++ add $e,$e,$t0 // e+=F(b,c,d)
++___
++$code.=<<___ if ($i==19);
++ movz $K,#0xeba1
++ movk $K,#0x6ed9,lsl#16
++___
++$code.=<<___ if ($i>=14);
++ eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15]
++ bic $t0,$d,$b
++ and $t1,$c,$b
++ ror $t2,$a,#27
++ eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15]
++ add $d,$d,$K // future e+=K
++ orr $t0,$t0,$t1
++ add $e,$e,$t2 // e+=rot(a,5)
++ eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15]
++ ror $b,$b,#2
++ add $d,$d,@Xw[($i+1)&15] // future e+=X[i]
++ add $e,$e,$t0 // e+=F(b,c,d)
++ ror @Xw[$j],@Xw[$j],#31
++___
++}
++
++sub BODY_40_59 {
++my ($i,$a,$b,$c,$d,$e)=@_;
++my $j=($i+2)&15;
++
++$code.=<<___ if ($i==59);
++ movz $K,#0xc1d6
++ movk $K,#0xca62,lsl#16
++___
++$code.=<<___;
++ orr $t0,$b,$c
++ and $t1,$b,$c
++ eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15]
++ ror $t2,$a,#27
++ and $t0,$t0,$d
++ add $d,$d,$K // future e+=K
++ eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15]
++ add $e,$e,$t2 // e+=rot(a,5)
++ orr $t0,$t0,$t1
++ ror $b,$b,#2
++ eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15]
++ add $d,$d,@Xw[($i+1)&15] // future e+=X[i]
++ add $e,$e,$t0 // e+=F(b,c,d)
++ ror @Xw[$j],@Xw[$j],#31
++___
++}
++
++sub BODY_20_39 {
++my ($i,$a,$b,$c,$d,$e)=@_;
++my $j=($i+2)&15;
++
++$code.=<<___ if ($i==39);
++ movz $K,#0xbcdc
++ movk $K,#0x8f1b,lsl#16
++___
++$code.=<<___ if ($i<78);
++ eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15]
++ eor $t0,$d,$b
++ ror $t2,$a,#27
++ add $d,$d,$K // future e+=K
++ eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15]
++ eor $t0,$t0,$c
++ add $e,$e,$t2 // e+=rot(a,5)
++ ror $b,$b,#2
++ eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15]
++ add $d,$d,@Xw[($i+1)&15] // future e+=X[i]
++ add $e,$e,$t0 // e+=F(b,c,d)
++ ror @Xw[$j],@Xw[$j],#31
++___
++$code.=<<___ if ($i==78);
++ ldp @Xw[1],@Xw[2],[$ctx]
++ eor $t0,$d,$b
++ ror $t2,$a,#27
++ add $d,$d,$K // future e+=K
++ eor $t0,$t0,$c
++ add $e,$e,$t2 // e+=rot(a,5)
++ ror $b,$b,#2
++ add $d,$d,@Xw[($i+1)&15] // future e+=X[i]
++ add $e,$e,$t0 // e+=F(b,c,d)
++___
++$code.=<<___ if ($i==79);
++ ldp @Xw[3],@Xw[4],[$ctx,#8]
++ eor $t0,$d,$b
++ ror $t2,$a,#27
++ eor $t0,$t0,$c
++ add $e,$e,$t2 // e+=rot(a,5)
++ ror $b,$b,#2
++ ldr @Xw[5],[$ctx,#16]
++ add $e,$e,$t0 // e+=F(b,c,d)
++___
++}
++
++$code.=<<___;
++#include "arm_arch.h"
++
++.text
++
++.globl sha1_block_data_order
++.type sha1_block_data_order,%function
++.align 6
++sha1_block_data_order:
++ ldr x16,.LOPENSSL_armcap_P
++ adr x17,.LOPENSSL_armcap_P
++ add x16,x16,x17
++ ldr w16,[x16]
++ tst w16,#ARMV8_SHA1
++ b.ne .Lv8_entry
++
++ stp x29,x30,[sp,#-96]!
++ add x29,sp,#0
++ stp x19,x20,[sp,#16]
++ stp x21,x22,[sp,#32]
++ stp x23,x24,[sp,#48]
++ stp x25,x26,[sp,#64]
++ stp x27,x28,[sp,#80]
++
++ ldp $A,$B,[$ctx]
++ ldp $C,$D,[$ctx,#8]
++ ldr $E,[$ctx,#16]
++
++.Loop:
++ ldr @Xx[0],[$inp],#64
++ movz $K,#0x7999
++ sub $num,$num,#1
++ movk $K,#0x5a82,lsl#16
++#ifdef __ARMEB__
++ ror $Xx[0],@Xx[0],#32
++#else
++ rev32 @Xx[0],@Xx[0]
++#endif
++ add $E,$E,$K // warm it up
++ add $E,$E,@Xw[0]
++___
++for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
++for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
++for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
++for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
++$code.=<<___;
++ add $B,$B,@Xw[2]
++ add $C,$C,@Xw[3]
++ add $A,$A,@Xw[1]
++ add $D,$D,@Xw[4]
++ add $E,$E,@Xw[5]
++ stp $A,$B,[$ctx]
++ stp $C,$D,[$ctx,#8]
++ str $E,[$ctx,#16]
++ cbnz $num,.Loop
++
++ ldp x19,x20,[sp,#16]
++ ldp x21,x22,[sp,#32]
++ ldp x23,x24,[sp,#48]
++ ldp x25,x26,[sp,#64]
++ ldp x27,x28,[sp,#80]
++ ldr x29,[sp],#96
++ ret
++.size sha1_block_data_order,.-sha1_block_data_order
++___
++{{{
++my ($ABCD,$E,$E0,$E1)=map("v$_.16b",(0..3));
++my @MSG=map("v$_.16b",(4..7));
++my @Kxx=map("v$_.4s",(16..19));
++my ($W0,$W1)=("v20.4s","v21.4s");
++my $ABCD_SAVE="v22.16b";
++
++$code.=<<___;
++.type sha1_block_armv8,%function
++.align 6
++sha1_block_armv8:
++.Lv8_entry:
++ stp x29,x30,[sp,#-16]!
++ add x29,sp,#0
++
++ adr x4,.Lconst
++ eor $E,$E,$E
++ ld1.32 {$ABCD},[$ctx],#16
++ ld1.32 {$E}[0],[$ctx]
++ sub $ctx,$ctx,#16
++ ld1.32 {@Kxx[0]-@Kxx[3]},[x4]
++
++.Loop_hw:
++ ld1 {@MSG[0]-@MSG[3]},[$inp],#64
++ sub $num,$num,#1
++ rev32 @MSG[0],@MSG[0]
++ rev32 @MSG[1],@MSG[1]
++
++ add.i32 $W0,@Kxx[0],@MSG[0]
++ rev32 @MSG[2],@MSG[2]
++ orr $ABCD_SAVE,$ABCD,$ABCD // offload
++
++ add.i32 $W1,@Kxx[0],@MSG[1]
++ rev32 @MSG[3],@MSG[3]
++ sha1h $E1,$ABCD
++ sha1c $ABCD,$E,$W0 // 0
++ add.i32 $W0,@Kxx[$j],@MSG[2]
++ sha1su0 @MSG[0],@MSG[1],@MSG[2]
++___
++for ($j=0,$i=1;$i<20-3;$i++) {
++my $f=("c","p","m","p")[$i/5];
++$code.=<<___;
++ sha1h $E0,$ABCD // $i
++ sha1$f $ABCD,$E1,$W1
++ add.i32 $W1,@Kxx[$j],@MSG[3]
++ sha1su1 @MSG[0],@MSG[3]
++___
++$code.=<<___ if ($i<20-4);
++ sha1su0 @MSG[1],@MSG[2],@MSG[3]
++___
++ ($E0,$E1)=($E1,$E0); ($W0,$W1)=($W1,$W0);
++ push(@MSG,shift(@MSG)); $j++ if ((($i+3)%5)==0);
++}
++$code.=<<___;
++ sha1h $E0,$ABCD // $i
++ sha1p $ABCD,$E1,$W1
++ add.i32 $W1,@Kxx[$j],@MSG[3]
++
++ sha1h $E1,$ABCD // 18
++ sha1p $ABCD,$E0,$W0
++
++ sha1h $E0,$ABCD // 19
++ sha1p $ABCD,$E1,$W1
++
++ add.i32 $E,$E,$E0
++ add.i32 $ABCD,$ABCD,$ABCD_SAVE
++
++ cbnz $num,.Loop_hw
++
++ st1.32 {$ABCD},[$ctx],#16
++ st1.32 {$E}[0],[$ctx]
++
++ ldr x29,[sp],#16
++ ret
++.size sha1_block_armv8,.-sha1_block_armv8
++.align 6
++.Lconst:
++.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19
++.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39
++.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59
++.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79
++.LOPENSSL_armcap_P:
++.quad OPENSSL_armcap_P-.
++.asciz "SHA1 block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
++.align 2
++.comm OPENSSL_armcap_P,4,4
++___
++}}}
++
++{ my %opcode = (
++ "sha1c" => 0x5e000000, "sha1p" => 0x5e001000,
++ "sha1m" => 0x5e002000, "sha1su0" => 0x5e003000,
++ "sha1h" => 0x5e280800, "sha1su1" => 0x5e281800 );
++
++ sub unsha1 {
++ my ($mnemonic,$arg)=@_;
++
++ $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
++ &&
++ sprintf ".inst\t0x%08x\t//%s %s",
++ $opcode{$mnemonic}|$1|($2<<5)|($3<<16),
++ $mnemonic,$arg;
++ }
++}
++
++foreach(split("\n",$code)) {
++
++ s/\`([^\`]*)\`/eval($1)/geo;
++
++ s/\b(sha1\w+)\s+([qv].*)/unsha1($1,$2)/geo;
++
++ s/\.\w?32\b//o and s/\.16b/\.4s/go;
++ m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go;
++
++ print $_,"\n";
++}
++
++close STDOUT;
+diff --git a/crypto/sha/asm/sha256-armv4.pl b/crypto/sha/asm/sha256-armv4.pl
+index 9c84e8d..505ca8f 100644
+--- a/crypto/sha/asm/sha256-armv4.pl
++++ b/crypto/sha/asm/sha256-armv4.pl
+@@ -1,7 +1,7 @@
+ #!/usr/bin/env perl
+
+ # ====================================================================
+-# Written by Andy Polyakov <[email protected]> for the OpenSSL
++# Written by Andy Polyakov <[email protected]> for the OpenSSL
+ # project. The module is, however, dual licensed under OpenSSL and
+ # CRYPTOGAMS licenses depending on where you obtain it. For further
+ # details see http://www.openssl.org/~appro/cryptogams/.
+@@ -21,15 +21,27 @@
+ # February 2011.
+ #
+ # Profiler-assisted and platform-specific optimization resulted in 16%
+-# improvement on Cortex A8 core and ~17 cycles per processed byte.
++# improvement on Cortex A8 core and ~15.4 cycles per processed byte.
++
++# September 2013.
++#
++# Add NEON implementation. On Cortex A8 it was measured to process one
++# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
++# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
++# code (meaning that latter performs sub-optimally, nothing was done
++# about it).
++
++# May 2014.
++#
++# Add ARMv8 code path performing at 2.0 cpb on Apple A7.
+
+ while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+ open STDOUT,">$output";
+
+ $ctx="r0"; $t0="r0";
+-$inp="r1"; $t3="r1";
++$inp="r1"; $t4="r1";
+ $len="r2"; $t1="r2";
+-$T1="r3";
++$T1="r3"; $t3="r3";
+ $A="r4";
+ $B="r5";
+ $C="r6";
+@@ -52,71 +64,88 @@ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
+
+ $code.=<<___ if ($i<16);
+ #if __ARM_ARCH__>=7
+- ldr $T1,[$inp],#4
++ @ ldr $t1,[$inp],#4 @ $i
++# if $i==15
++ str $inp,[sp,#17*4] @ make room for $t4
++# endif
++ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
++ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
++ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
++ rev $t1,$t1
+ #else
+- ldrb $T1,[$inp,#3] @ $i
++ @ ldrb $t1,[$inp,#3] @ $i
++ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
+ ldrb $t2,[$inp,#2]
+- ldrb $t1,[$inp,#1]
+- ldrb $t0,[$inp],#4
+- orr $T1,$T1,$t2,lsl#8
+- orr $T1,$T1,$t1,lsl#16
+- orr $T1,$T1,$t0,lsl#24
++ ldrb $t0,[$inp,#1]
++ orr $t1,$t1,$t2,lsl#8
++ ldrb $t2,[$inp],#4
++ orr $t1,$t1,$t0,lsl#16
++# if $i==15
++ str $inp,[sp,#17*4] @ make room for $t4
++# endif
++ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
++ orr $t1,$t1,$t2,lsl#24
++ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
+ #endif
+ ___
+ $code.=<<___;
+- mov $t0,$e,ror#$Sigma1[0]
+ ldr $t2,[$Ktbl],#4 @ *K256++
+- eor $t0,$t0,$e,ror#$Sigma1[1]
++ add $h,$h,$t1 @ h+=X[i]
++ str $t1,[sp,#`$i%16`*4]
+ eor $t1,$f,$g
+-#if $i>=16
+- add $T1,$T1,$t3 @ from BODY_16_xx
+-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
+- rev $T1,$T1
+-#endif
+-#if $i==15
+- str $inp,[sp,#17*4] @ leave room for $t3
+-#endif
+- eor $t0,$t0,$e,ror#$Sigma1[2] @ Sigma1(e)
++ add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e)
+ and $t1,$t1,$e
+- str $T1,[sp,#`$i%16`*4]
+- add $T1,$T1,$t0
++ add $h,$h,$t2 @ h+=K256[i]
+ eor $t1,$t1,$g @ Ch(e,f,g)
+- add $T1,$T1,$h
+- mov $h,$a,ror#$Sigma0[0]
+- add $T1,$T1,$t1
+- eor $h,$h,$a,ror#$Sigma0[1]
+- add $T1,$T1,$t2
+- eor $h,$h,$a,ror#$Sigma0[2] @ Sigma0(a)
+-#if $i>=15
+- ldr $t3,[sp,#`($i+2)%16`*4] @ from BODY_16_xx
++ eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
++ add $h,$h,$t1 @ h+=Ch(e,f,g)
++#if $i==31
++ and $t2,$t2,#0xff
++ cmp $t2,#0xf2 @ done?
+ #endif
+- orr $t0,$a,$b
+- and $t1,$a,$b
+- and $t0,$t0,$c
+- add $h,$h,$T1
+- orr $t0,$t0,$t1 @ Maj(a,b,c)
+- add $d,$d,$T1
+- add $h,$h,$t0
++#if $i<15
++# if __ARM_ARCH__>=7
++ ldr $t1,[$inp],#4 @ prefetch
++# else
++ ldrb $t1,[$inp,#3]
++# endif
++ eor $t2,$a,$b @ a^b, b^c in next round
++#else
++ ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx
++ eor $t2,$a,$b @ a^b, b^c in next round
++ ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx
++#endif
++ eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a)
++ and $t3,$t3,$t2 @ (b^c)&=(a^b)
++ add $d,$d,$h @ d+=h
++ eor $t3,$t3,$b @ Maj(a,b,c)
++ add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a)
++ @ add $h,$h,$t3 @ h+=Maj(a,b,c)
+ ___
++ ($t2,$t3)=($t3,$t2);
+ }
+
+ sub BODY_16_XX {
+ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
+
+ $code.=<<___;
+- @ ldr $t3,[sp,#`($i+1)%16`*4] @ $i
+- ldr $t2,[sp,#`($i+14)%16`*4]
+- mov $t0,$t3,ror#$sigma0[0]
+- ldr $T1,[sp,#`($i+0)%16`*4]
+- eor $t0,$t0,$t3,ror#$sigma0[1]
+- ldr $t1,[sp,#`($i+9)%16`*4]
+- eor $t0,$t0,$t3,lsr#$sigma0[2] @ sigma0(X[i+1])
+- mov $t3,$t2,ror#$sigma1[0]
+- add $T1,$T1,$t0
+- eor $t3,$t3,$t2,ror#$sigma1[1]
+- add $T1,$T1,$t1
+- eor $t3,$t3,$t2,lsr#$sigma1[2] @ sigma1(X[i+14])
+- @ add $T1,$T1,$t3
++ @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i
++ @ ldr $t4,[sp,#`($i+14)%16`*4]
++ mov $t0,$t1,ror#$sigma0[0]
++ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
++ mov $t2,$t4,ror#$sigma1[0]
++ eor $t0,$t0,$t1,ror#$sigma0[1]
++ eor $t2,$t2,$t4,ror#$sigma1[1]
++ eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1])
++ ldr $t1,[sp,#`($i+0)%16`*4]
++ eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14])
++ ldr $t4,[sp,#`($i+9)%16`*4]
++
++ add $t2,$t2,$t0
++ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15
++ add $t1,$t1,$t2
++ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
++ add $t1,$t1,$t4 @ X[i]
+ ___
+ &BODY_00_15(@_);
+ }
+@@ -147,46 +176,64 @@ K256:
+ .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+ .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+ .size K256,.-K256
++.word 0 @ terminator
++.LOPENSSL_armcap:
++.word OPENSSL_armcap_P-sha256_block_data_order
++.align 5
+
+ .global sha256_block_data_order
+ .type sha256_block_data_order,%function
+ sha256_block_data_order:
+ sub r3,pc,#8 @ sha256_block_data_order
+ add $len,$inp,$len,lsl#6 @ len to point at the end of inp
++#if __ARM_ARCH__>=7
++ ldr r12,.LOPENSSL_armcap
++ ldr r12,[r3,r12] @ OPENSSL_armcap_P
++ tst r12,#ARMV8_SHA256
++ bne .LARMv8
++ tst r12,#ARMV7_NEON
++ bne .LNEON
++#endif
+ stmdb sp!,{$ctx,$inp,$len,r4-r11,lr}
+ ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
+- sub $Ktbl,r3,#256 @ K256
++ sub $Ktbl,r3,#256+32 @ K256
+ sub sp,sp,#16*4 @ alloca(X[16])
+ .Loop:
++# if __ARM_ARCH__>=7
++ ldr $t1,[$inp],#4
++# else
++ ldrb $t1,[$inp,#3]
++# endif
++ eor $t3,$B,$C @ magic
++ eor $t2,$t2,$t2
+ ___
+ for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
+ $code.=".Lrounds_16_xx:\n";
+ for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
+ $code.=<<___;
+- and $t2,$t2,#0xff
+- cmp $t2,#0xf2
++ ldreq $t3,[sp,#16*4] @ pull ctx
+ bne .Lrounds_16_xx
+
+- ldr $T1,[sp,#16*4] @ pull ctx
+- ldr $t0,[$T1,#0]
+- ldr $t1,[$T1,#4]
+- ldr $t2,[$T1,#8]
++ add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
++ ldr $t0,[$t3,#0]
++ ldr $t1,[$t3,#4]
++ ldr $t2,[$t3,#8]
+ add $A,$A,$t0
+- ldr $t0,[$T1,#12]
++ ldr $t0,[$t3,#12]
+ add $B,$B,$t1
+- ldr $t1,[$T1,#16]
++ ldr $t1,[$t3,#16]
+ add $C,$C,$t2
+- ldr $t2,[$T1,#20]
++ ldr $t2,[$t3,#20]
+ add $D,$D,$t0
+- ldr $t0,[$T1,#24]
++ ldr $t0,[$t3,#24]
+ add $E,$E,$t1
+- ldr $t1,[$T1,#28]
++ ldr $t1,[$t3,#28]
+ add $F,$F,$t2
+ ldr $inp,[sp,#17*4] @ pull inp
+ ldr $t2,[sp,#18*4] @ pull inp+len
+ add $G,$G,$t0
+ add $H,$H,$t1
+- stmia $T1,{$A,$B,$C,$D,$E,$F,$G,$H}
++ stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H}
+ cmp $inp,$t2
+ sub $Ktbl,$Ktbl,#256 @ rewind Ktbl
+ bne .Loop
+@@ -200,12 +247,410 @@ $code.=<<___;
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+ #endif
+-.size sha256_block_data_order,.-sha256_block_data_order
+-.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
++.size sha256_block_data_order,.-sha256_block_data_order
++___
++######################################################################
++# NEON stuff
++#
++{{{
++my @X=map("q$_",(0..3));
++my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
++my $Xfer=$t4;
++my $j=0;
++
++sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
++sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
++
++sub AUTOLOAD() # thunk [simplified] x86-style perlasm
++{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
++ my $arg = pop;
++ $arg = "#$arg" if ($arg*1 eq $arg);
++ $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
++}
++
++sub Xupdate()
++{ use integer;
++ my $body = shift;
++ my @insns = (&$body,&$body,&$body,&$body);
++ my ($a,$b,$c,$d,$e,$f,$g,$h);
++
++ &vext_8 ($T0,@X[0],@X[1],4); # X[1..4]
++ eval(shift(@insns));
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vext_8 ($T1,@X[2],@X[3],4); # X[9..12]
++ eval(shift(@insns));
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vshr_u32 ($T2,$T0,$sigma0[0]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12]
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vshr_u32 ($T1,$T0,$sigma0[2]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vsli_32 ($T2,$T0,32-$sigma0[0]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vshr_u32 ($T3,$T0,$sigma0[1]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &veor ($T1,$T1,$T2);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vsli_32 ($T3,$T0,32-$sigma0[1]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &veor ($T1,$T1,$T3); # sigma0(X[1..4])
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4])
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &veor ($T5,$T5,$T4);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &veor ($T5,$T5,$T4); # sigma1(X[14..15])
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15])
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &veor ($T5,$T5,$T4);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vld1_32 ("{$T0}","[$Ktbl,:128]!");
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &veor ($T5,$T5,$T4); # sigma1(X[16..17])
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17])
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vadd_i32 ($T0,$T0,@X[0]);
++ while($#insns>=2) { eval(shift(@insns)); }
++ &vst1_32 ("{$T0}","[$Xfer,:128]!");
++ eval(shift(@insns));
++ eval(shift(@insns));
++
++ push(@X,shift(@X)); # "rotate" X[]
++}
++
++sub Xpreload()
++{ use integer;
++ my $body = shift;
++ my @insns = (&$body,&$body,&$body,&$body);
++ my ($a,$b,$c,$d,$e,$f,$g,$h);
++
++ eval(shift(@insns));
++ eval(shift(@insns));
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vld1_32 ("{$T0}","[$Ktbl,:128]!");
++ eval(shift(@insns));
++ eval(shift(@insns));
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vrev32_8 (@X[0],@X[0]);
++ eval(shift(@insns));
++ eval(shift(@insns));
++ eval(shift(@insns));
++ eval(shift(@insns));
++ &vadd_i32 ($T0,$T0,@X[0]);
++ foreach (@insns) { eval; } # remaining instructions
++ &vst1_32 ("{$T0}","[$Xfer,:128]!");
++
++ push(@X,shift(@X)); # "rotate" X[]
++}
++
++sub body_00_15 () {
++ (
++ '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
++ '&add ($h,$h,$t1)', # h+=X[i]+K[i]
++ '&eor ($t1,$f,$g)',
++ '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
++ '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past
++ '&and ($t1,$t1,$e)',
++ '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e)
++ '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
++ '&eor ($t1,$t1,$g)', # Ch(e,f,g)
++ '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e)
++ '&eor ($t2,$a,$b)', # a^b, b^c in next round
++ '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a)
++ '&add ($h,$h,$t1)', # h+=Ch(e,f,g)
++ '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'.
++ '&ldr ($t1,"[$Ktbl]") if ($j==15);'.
++ '&ldr ($t1,"[sp,#64]") if ($j==31)',
++ '&and ($t3,$t3,$t2)', # (b^c)&=(a^b)
++ '&add ($d,$d,$h)', # d+=h
++ '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a)
++ '&eor ($t3,$t3,$b)', # Maj(a,b,c)
++ '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
++ )
++}
++
++$code.=<<___;
++#if __ARM_ARCH__>=7
++.fpu neon
++
++.type sha256_block_data_order_neon,%function
++.align 4
++sha256_block_data_order_neon:
++.LNEON:
++ stmdb sp!,{r4-r12,lr}
++
++ mov $t2,sp
++ sub sp,sp,#16*4+16 @ alloca
++ sub $Ktbl,r3,#256+32 @ K256
++ bic sp,sp,#15 @ align for 128-bit stores
++
++ vld1.8 {@X[0]},[$inp]!
++ vld1.8 {@X[1]},[$inp]!
++ vld1.8 {@X[2]},[$inp]!
++ vld1.8 {@X[3]},[$inp]!
++ vld1.32 {$T0},[$Ktbl,:128]!
++ vld1.32 {$T1},[$Ktbl,:128]!
++ vld1.32 {$T2},[$Ktbl,:128]!
++ vld1.32 {$T3},[$Ktbl,:128]!
++ vrev32.8 @X[0],@X[0] @ yes, even on
++ str $ctx,[sp,#64]
++ vrev32.8 @X[1],@X[1] @ big-endian
++ str $inp,[sp,#68]
++ mov $Xfer,sp
++ vrev32.8 @X[2],@X[2]
++ str $len,[sp,#72]
++ vrev32.8 @X[3],@X[3]
++ str $t2,[sp,#76] @ save original sp
++ vadd.i32 $T0,$T0,@X[0]
++ vadd.i32 $T1,$T1,@X[1]
++ vst1.32 {$T0},[$Xfer,:128]!
++ vadd.i32 $T2,$T2,@X[2]
++ vst1.32 {$T1},[$Xfer,:128]!
++ vadd.i32 $T3,$T3,@X[3]
++ vst1.32 {$T2},[$Xfer,:128]!
++ vst1.32 {$T3},[$Xfer,:128]!
++
++ ldmia $ctx,{$A-$H}
++ sub $Xfer,$Xfer,#64
++ ldr $t1,[sp,#0]
++ eor $t2,$t2,$t2
++ eor $t3,$B,$C
++ b .L_00_48
++
++.align 4
++.L_00_48:
++___
++ &Xupdate(\&body_00_15);
++ &Xupdate(\&body_00_15);
++ &Xupdate(\&body_00_15);
++ &Xupdate(\&body_00_15);
++$code.=<<___;
++ teq $t1,#0 @ check for K256 terminator
++ ldr $t1,[sp,#0]
++ sub $Xfer,$Xfer,#64
++ bne .L_00_48
++
++ ldr $inp,[sp,#68]
++ ldr $t0,[sp,#72]
++ sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl
++ teq $inp,$t0
++ subeq $inp,$inp,#64 @ avoid SEGV
++ vld1.8 {@X[0]},[$inp]! @ load next input block
++ vld1.8 {@X[1]},[$inp]!
++ vld1.8 {@X[2]},[$inp]!
++ vld1.8 {@X[3]},[$inp]!
++ strne $inp,[sp,#68]
++ mov $Xfer,sp
++___
++ &Xpreload(\&body_00_15);
++ &Xpreload(\&body_00_15);
++ &Xpreload(\&body_00_15);
++ &Xpreload(\&body_00_15);
++$code.=<<___;
++ ldr $t0,[$t1,#0]
++ add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
++ ldr $t2,[$t1,#4]
++ ldr $t3,[$t1,#8]
++ ldr $t4,[$t1,#12]
++ add $A,$A,$t0 @ accumulate
++ ldr $t0,[$t1,#16]
++ add $B,$B,$t2
++ ldr $t2,[$t1,#20]
++ add $C,$C,$t3
++ ldr $t3,[$t1,#24]
++ add $D,$D,$t4
++ ldr $t4,[$t1,#28]
++ add $E,$E,$t0
++ str $A,[$t1],#4
++ add $F,$F,$t2
++ str $B,[$t1],#4
++ add $G,$G,$t3
++ str $C,[$t1],#4
++ add $H,$H,$t4
++ str $D,[$t1],#4
++ stmia $t1,{$E-$H}
++
++ movne $Xfer,sp
++ ldrne $t1,[sp,#0]
++ eorne $t2,$t2,$t2
++ ldreq sp,[sp,#76] @ restore original sp
++ eorne $t3,$B,$C
++ bne .L_00_48
++
++ ldmia sp!,{r4-r12,pc}
++.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
++#endif
++___
++}}}
++######################################################################
++# ARMv8 stuff
++#
++{{{
++my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
++my @MSG=map("q$_",(8..11));
++my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
++my $Ktbl="r3";
++
++$code.=<<___;
++#if __ARM_ARCH__>=7
++.type sha256_block_data_order_armv8,%function
++.align 5
++sha256_block_data_order_armv8:
++.LARMv8:
++ vld1.32 {$ABCD,$EFGH},[$ctx]
++ sub $Ktbl,r3,#sha256_block_data_order-K256
++
++.Loop_v8:
++ vld1.8 {@MSG[0]-@MSG[1]},[$inp]!
++ vld1.8 {@MSG[2]-@MSG[3]},[$inp]!
++ vld1.32 {$W0},[$Ktbl]!
++ vrev32.8 @MSG[0],@MSG[0]
++ vrev32.8 @MSG[1],@MSG[1]
++ vrev32.8 @MSG[2],@MSG[2]
++ vrev32.8 @MSG[3],@MSG[3]
++ vmov $ABCD_SAVE,$ABCD @ offload
++ vmov $EFGH_SAVE,$EFGH
++ teq $inp,$len
++___
++for($i=0;$i<12;$i++) {
++$code.=<<___;
++ vld1.32 {$W1},[$Ktbl]!
++ vadd.i32 $W0,$W0,@MSG[0]
++ sha256su0 @MSG[0],@MSG[1]
++ vmov $abcd,$ABCD
++ sha256h $ABCD,$EFGH,$W0
++ sha256h2 $EFGH,$abcd,$W0
++ sha256su1 @MSG[0],@MSG[2],@MSG[3]
++___
++ ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
++}
++$code.=<<___;
++ vld1.32 {$W1},[$Ktbl]!
++ vadd.i32 $W0,$W0,@MSG[0]
++ vmov $abcd,$ABCD
++ sha256h $ABCD,$EFGH,$W0
++ sha256h2 $EFGH,$abcd,$W0
++
++ vld1.32 {$W0},[$Ktbl]!
++ vadd.i32 $W1,$W1,@MSG[1]
++ vmov $abcd,$ABCD
++ sha256h $ABCD,$EFGH,$W1
++ sha256h2 $EFGH,$abcd,$W1
++
++ vld1.32 {$W1},[$Ktbl]
++ vadd.i32 $W0,$W0,@MSG[2]
++ sub $Ktbl,$Ktbl,#256-16 @ rewind
++ vmov $abcd,$ABCD
++ sha256h $ABCD,$EFGH,$W0
++ sha256h2 $EFGH,$abcd,$W0
++
++ vadd.i32 $W1,$W1,@MSG[3]
++ vmov $abcd,$ABCD
++ sha256h $ABCD,$EFGH,$W1
++ sha256h2 $EFGH,$abcd,$W1
++
++ vadd.i32 $ABCD,$ABCD,$ABCD_SAVE
++ vadd.i32 $EFGH,$EFGH,$EFGH_SAVE
++ bne .Loop_v8
++
++ vst1.32 {$ABCD,$EFGH},[$ctx]
++
++ ret @ bx lr
++.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
++#endif
++___
++}}}
++$code.=<<___;
++.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+ .align 2
++.comm OPENSSL_armcap_P,4,4
+ ___
+
+-$code =~ s/\`([^\`]*)\`/eval $1/gem;
+-$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
+-print $code;
++{ my %opcode = (
++ "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40,
++ "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 );
++
++ sub unsha256 {
++ my ($mnemonic,$arg)=@_;
++
++ if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
++ my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
++ |(($2&7)<<17)|(($2&8)<<4)
++ |(($3&7)<<1) |(($3&8)<<2);
++ # since ARMv7 instructions are always encoded little-endian.
++ # correct solution is to use .inst directive, but older
++ # assemblers don't implement it:-(
++ sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
++ $word&0xff,($word>>8)&0xff,
++ ($word>>16)&0xff,($word>>24)&0xff,
++ $mnemonic,$arg;
++ }
++ }
++}
++
++foreach (split($/,$code)) {
++
++ s/\`([^\`]*)\`/eval $1/geo;
++
++ s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
++
++ s/\bret\b/bx lr/go or
++ s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
++
++ print $_,"\n";
++}
++
+ close STDOUT; # enforce flush
+diff --git a/crypto/sha/asm/sha512-armv4.pl b/crypto/sha/asm/sha512-armv4.pl
+index 7faf37b..71aa935 100644
+--- a/crypto/sha/asm/sha512-armv4.pl
++++ b/crypto/sha/asm/sha512-armv4.pl
+@@ -565,7 +565,7 @@ $code.=<<___;
+ bne .Loop_neon
+
+ vldmia sp!,{d8-d15} @ epilogue
+- bx lr
++ ret @ bx lr
+ #endif
+ ___
+ }
+@@ -578,5 +578,6 @@ ___
+
+ $code =~ s/\`([^\`]*)\`/eval $1/gem;
+ $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
++$code =~ s/\bret\b/bx lr/gm;
+ print $code;
+ close STDOUT; # enforce flush
+diff --git a/crypto/sha/asm/sha512-armv8.pl b/crypto/sha/asm/sha512-armv8.pl
+new file mode 100644
+index 0000000..6935ed6
+--- /dev/null
++++ b/crypto/sha/asm/sha512-armv8.pl
+@@ -0,0 +1,414 @@
++#!/usr/bin/env perl
++#
++# ====================================================================
++# Written by Andy Polyakov <[email protected]> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# SHA256/512 for ARMv8.
++#
++# Performance in cycles per processed byte and improvement coefficient
++# over code generated with "default" compiler:
++#
++# SHA256-hw SHA256(*) SHA512
++# Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**))
++# Cortex-A5x n/a n/a n/a
++#
++# (*) Software SHA256 results are of lesser relevance, presented
++# mostly for informational purposes.
++# (**) The result is a trade-off: it's possible to improve it by
++# 10%, but at the cost of 20% loss on Cortex-A5x.
++
++$flavour=shift;
++$output=shift;
++open STDOUT,">$output";
++
++if ($output =~ /512/) {
++ $BITS=512;
++ $SZ=8;
++ @Sigma0=(28,34,39);
++ @Sigma1=(14,18,41);
++ @sigma0=(1, 8, 7);
++ @sigma1=(19,61, 6);
++ $rounds=80;
++ $reg_t="x";
++} else {
++ $BITS=256;
++ $SZ=4;
++ @Sigma0=( 2,13,22);
++ @Sigma1=( 6,11,25);
++ @sigma0=( 7,18, 3);
++ @sigma1=(17,19,10);
++ $rounds=64;
++ $reg_t="w";
++}
++
++$func="sha${BITS}_block_data_order";
++
++($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30));
++
++@X=map("$reg_t$_",(3..15,0..2));
++@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("$reg_t$_",(20..27));
++($t0,$t1,$t2,$t3)=map("$reg_t$_",(16,17,19,28));
++
++sub BODY_00_xx {
++my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
++my $j=($i+1)&15;
++my ($T0,$T1,$T2)=(@X[($i-8)&15],@X[($i-9)&15],@X[($i-10)&15]);
++ $T0=@X[$i+3] if ($i<11);
++
++$code.=<<___ if ($i<16);
++#ifndef __ARMEB__
++ rev @X[$i],@X[$i] // $i
++#endif
++___
++$code.=<<___ if ($i<13 && ($i&1));
++ ldp @X[$i+1],@X[$i+2],[$inp],#2*$SZ
++___
++$code.=<<___ if ($i==13);
++ ldp @X[14],@X[15],[$inp]
++___
++$code.=<<___ if ($i>=14);
++ ldr @X[($i-11)&15],[sp,#`$SZ*(($i-11)%4)`]
++___
++$code.=<<___ if ($i>0 && $i<16);
++ add $a,$a,$t1 // h+=Sigma0(a)
++___
++$code.=<<___ if ($i>=11);
++ str @X[($i-8)&15],[sp,#`$SZ*(($i-8)%4)`]
++___
++# While ARMv8 specifies merged rotate-n-logical operation such as
++# 'eor x,y,z,ror#n', it was found to negatively affect performance
++# on Apple A7. The reason seems to be that it requires even 'y' to
++# be available earlier. This means that such merged instruction is
++# not necessarily best choice on critical path... On the other hand
++# Cortex-A5x handles merged instructions much better than disjoint
++# rotate and logical... See (**) footnote above.
++$code.=<<___ if ($i<15);
++ ror $t0,$e,#$Sigma1[0]
++ add $h,$h,$t2 // h+=K[i]
++ eor $T0,$e,$e,ror#`$Sigma1[2]-$Sigma1[1]`
++ and $t1,$f,$e
++ bic $t2,$g,$e
++ add $h,$h,@X[$i&15] // h+=X[i]
++ orr $t1,$t1,$t2 // Ch(e,f,g)
++ eor $t2,$a,$b // a^b, b^c in next round
++ eor $t0,$t0,$T0,ror#$Sigma1[1] // Sigma1(e)
++ ror $T0,$a,#$Sigma0[0]
++ add $h,$h,$t1 // h+=Ch(e,f,g)
++ eor $t1,$a,$a,ror#`$Sigma0[2]-$Sigma0[1]`
++ add $h,$h,$t0 // h+=Sigma1(e)
++ and $t3,$t3,$t2 // (b^c)&=(a^b)
++ add $d,$d,$h // d+=h
++ eor $t3,$t3,$b // Maj(a,b,c)
++ eor $t1,$T0,$t1,ror#$Sigma0[1] // Sigma0(a)
++ add $h,$h,$t3 // h+=Maj(a,b,c)
++ ldr $t3,[$Ktbl],#$SZ // *K++, $t2 in next round
++ //add $h,$h,$t1 // h+=Sigma0(a)
++___
++$code.=<<___ if ($i>=15);
++ ror $t0,$e,#$Sigma1[0]
++ add $h,$h,$t2 // h+=K[i]
++ ror $T1,@X[($j+1)&15],#$sigma0[0]
++ and $t1,$f,$e
++ ror $T2,@X[($j+14)&15],#$sigma1[0]
++ bic $t2,$g,$e
++ ror $T0,$a,#$Sigma0[0]
++ add $h,$h,@X[$i&15] // h+=X[i]
++ eor $t0,$t0,$e,ror#$Sigma1[1]
++ eor $T1,$T1,@X[($j+1)&15],ror#$sigma0[1]
++ orr $t1,$t1,$t2 // Ch(e,f,g)
++ eor $t2,$a,$b // a^b, b^c in next round
++ eor $t0,$t0,$e,ror#$Sigma1[2] // Sigma1(e)
++ eor $T0,$T0,$a,ror#$Sigma0[1]
++ add $h,$h,$t1 // h+=Ch(e,f,g)
++ and $t3,$t3,$t2 // (b^c)&=(a^b)
++ eor $T2,$T2,@X[($j+14)&15],ror#$sigma1[1]
++ eor $T1,$T1,@X[($j+1)&15],lsr#$sigma0[2] // sigma0(X[i+1])
++ add $h,$h,$t0 // h+=Sigma1(e)
++ eor $t3,$t3,$b // Maj(a,b,c)
++ eor $t1,$T0,$a,ror#$Sigma0[2] // Sigma0(a)
++ eor $T2,$T2,@X[($j+14)&15],lsr#$sigma1[2] // sigma1(X[i+14])
++ add @X[$j],@X[$j],@X[($j+9)&15]
++ add $d,$d,$h // d+=h
++ add $h,$h,$t3 // h+=Maj(a,b,c)
++ ldr $t3,[$Ktbl],#$SZ // *K++, $t2 in next round
++ add @X[$j],@X[$j],$T1
++ add $h,$h,$t1 // h+=Sigma0(a)
++ add @X[$j],@X[$j],$T2
++___
++ ($t2,$t3)=($t3,$t2);
++}
++
++$code.=<<___;
++#include "arm_arch.h"
++
++.text
++
++.globl $func
++.type $func,%function
++.align 6
++$func:
++___
++$code.=<<___ if ($SZ==4);
++ ldr x16,.LOPENSSL_armcap_P
++ adr x17,.LOPENSSL_armcap_P
++ add x16,x16,x17
++ ldr w16,[x16]
++ tst w16,#ARMV8_SHA256
++ b.ne .Lv8_entry
++___
++$code.=<<___;
++ stp x29,x30,[sp,#-128]!
++ add x29,sp,#0
++
++ stp x19,x20,[sp,#16]
++ stp x21,x22,[sp,#32]
++ stp x23,x24,[sp,#48]
++ stp x25,x26,[sp,#64]
++ stp x27,x28,[sp,#80]
++ sub sp,sp,#4*$SZ
++
++ ldp $A,$B,[$ctx] // load context
++ ldp $C,$D,[$ctx,#2*$SZ]
++ ldp $E,$F,[$ctx,#4*$SZ]
++ add $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input
++ ldp $G,$H,[$ctx,#6*$SZ]
++ adr $Ktbl,K$BITS
++ stp $ctx,$num,[x29,#96]
++
++.Loop:
++ ldp @X[0],@X[1],[$inp],#2*$SZ
++ ldr $t2,[$Ktbl],#$SZ // *K++
++ eor $t3,$B,$C // magic seed
++ str $inp,[x29,#112]
++___
++for ($i=0;$i<16;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); }
++$code.=".Loop_16_xx:\n";
++for (;$i<32;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); }
++$code.=<<___;
++ cbnz $t2,.Loop_16_xx
++
++ ldp $ctx,$num,[x29,#96]
++ ldr $inp,[x29,#112]
++ sub $Ktbl,$Ktbl,#`$SZ*($rounds+1)` // rewind
++
++ ldp @X[0],@X[1],[$ctx]
++ ldp @X[2],@X[3],[$ctx,#2*$SZ]
++ add $inp,$inp,#14*$SZ // advance input pointer
++ ldp @X[4],@X[5],[$ctx,#4*$SZ]
++ add $A,$A,@X[0]
++ ldp @X[6],@X[7],[$ctx,#6*$SZ]
++ add $B,$B,@X[1]
++ add $C,$C,@X[2]
++ add $D,$D,@X[3]
++ stp $A,$B,[$ctx]
++ add $E,$E,@X[4]
++ add $F,$F,@X[5]
++ stp $C,$D,[$ctx,#2*$SZ]
++ add $G,$G,@X[6]
++ add $H,$H,@X[7]
++ cmp $inp,$num
++ stp $E,$F,[$ctx,#4*$SZ]
++ stp $G,$H,[$ctx,#6*$SZ]
++ b.ne .Loop
++
++ ldp x19,x20,[x29,#16]
++ add sp,sp,#4*$SZ
++ ldp x21,x22,[x29,#32]
++ ldp x23,x24,[x29,#48]
++ ldp x25,x26,[x29,#64]
++ ldp x27,x28,[x29,#80]
++ ldp x29,x30,[sp],#128
++ ret
++.size $func,.-$func
++
++.align 6
++.type K$BITS,%object
++K$BITS:
++___
++$code.=<<___ if ($SZ==8);
++ .quad 0x428a2f98d728ae22,0x7137449123ef65cd
++ .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
++ .quad 0x3956c25bf348b538,0x59f111f1b605d019
++ .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
++ .quad 0xd807aa98a3030242,0x12835b0145706fbe
++ .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
++ .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
++ .quad 0x9bdc06a725c71235,0xc19bf174cf692694
++ .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
++ .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
++ .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
++ .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
++ .quad 0x983e5152ee66dfab,0xa831c66d2db43210
++ .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
++ .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
++ .quad 0x06ca6351e003826f,0x142929670a0e6e70
++ .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
++ .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
++ .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
++ .quad 0x81c2c92e47edaee6,0x92722c851482353b
++ .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
++ .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
++ .quad 0xd192e819d6ef5218,0xd69906245565a910
++ .quad 0xf40e35855771202a,0x106aa07032bbd1b8
++ .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
++ .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
++ .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
++ .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
++ .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
++ .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
++ .quad 0x90befffa23631e28,0xa4506cebde82bde9
++ .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
++ .quad 0xca273eceea26619c,0xd186b8c721c0c207
++ .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
++ .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
++ .quad 0x113f9804bef90dae,0x1b710b35131c471b
++ .quad 0x28db77f523047d84,0x32caab7b40c72493
++ .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
++ .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
++ .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
++ .quad 0 // terminator
++___
++$code.=<<___ if ($SZ==4);
++ .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
++ .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
++ .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
++ .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
++ .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
++ .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
++ .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
++ .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
++ .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
++ .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
++ .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
++ .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
++ .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
++ .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
++ .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
++ .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
++ .long 0 //terminator
++___
++$code.=<<___;
++.size K$BITS,.-K$BITS
++.align 3
++.LOPENSSL_armcap_P:
++ .quad OPENSSL_armcap_P-.
++.asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
++.align 2
++___
++
++if ($SZ==4) {
++my $Ktbl="x3";
++
++my ($ABCD,$EFGH,$abcd)=map("v$_.16b",(0..2));
++my @MSG=map("v$_.16b",(4..7));
++my ($W0,$W1)=("v16.4s","v17.4s");
++my ($ABCD_SAVE,$EFGH_SAVE)=("v18.16b","v19.16b");
++
++$code.=<<___;
++.type sha256_block_armv8,%function
++.align 6
++sha256_block_armv8:
++.Lv8_entry:
++ stp x29,x30,[sp,#-16]!
++ add x29,sp,#0
++
++ ld1.32 {$ABCD,$EFGH},[$ctx]
++ adr $Ktbl,K256
++
++.Loop_hw:
++ ld1 {@MSG[0]-@MSG[3]},[$inp],#64
++ sub $num,$num,#1
++ ld1.32 {$W0},[$Ktbl],#16
++ rev32 @MSG[0],@MSG[0]
++ rev32 @MSG[1],@MSG[1]
++ rev32 @MSG[2],@MSG[2]
++ rev32 @MSG[3],@MSG[3]
++ orr $ABCD_SAVE,$ABCD,$ABCD // offload
++ orr $EFGH_SAVE,$EFGH,$EFGH
++___
++for($i=0;$i<12;$i++) {
++$code.=<<___;
++ ld1.32 {$W1},[$Ktbl],#16
++ add.i32 $W0,$W0,@MSG[0]
++ sha256su0 @MSG[0],@MSG[1]
++ orr $abcd,$ABCD,$ABCD
++ sha256h $ABCD,$EFGH,$W0
++ sha256h2 $EFGH,$abcd,$W0
++ sha256su1 @MSG[0],@MSG[2],@MSG[3]
++___
++ ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
++}
++$code.=<<___;
++ ld1.32 {$W1},[$Ktbl],#16
++ add.i32 $W0,$W0,@MSG[0]
++ orr $abcd,$ABCD,$ABCD
++ sha256h $ABCD,$EFGH,$W0
++ sha256h2 $EFGH,$abcd,$W0
++
++ ld1.32 {$W0},[$Ktbl],#16
++ add.i32 $W1,$W1,@MSG[1]
++ orr $abcd,$ABCD,$ABCD
++ sha256h $ABCD,$EFGH,$W1
++ sha256h2 $EFGH,$abcd,$W1
++
++ ld1.32 {$W1},[$Ktbl]
++ add.i32 $W0,$W0,@MSG[2]
++ sub $Ktbl,$Ktbl,#$rounds*$SZ-16 // rewind
++ orr $abcd,$ABCD,$ABCD
++ sha256h $ABCD,$EFGH,$W0
++ sha256h2 $EFGH,$abcd,$W0
++
++ add.i32 $W1,$W1,@MSG[3]
++ orr $abcd,$ABCD,$ABCD
++ sha256h $ABCD,$EFGH,$W1
++ sha256h2 $EFGH,$abcd,$W1
++
++ add.i32 $ABCD,$ABCD,$ABCD_SAVE
++ add.i32 $EFGH,$EFGH,$EFGH_SAVE
++
++ cbnz $num,.Loop_hw
++
++ st1.32 {$ABCD,$EFGH},[$ctx]
++
++ ldr x29,[sp],#16
++ ret
++.size sha256_block_armv8,.-sha256_block_armv8
++___
++}
++
++$code.=<<___;
++.comm OPENSSL_armcap_P,4,4
++___
++
++{ my %opcode = (
++ "sha256h" => 0x5e004000, "sha256h2" => 0x5e005000,
++ "sha256su0" => 0x5e282800, "sha256su1" => 0x5e006000 );
++
++ sub unsha256 {
++ my ($mnemonic,$arg)=@_;
++
++ $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
++ &&
++ sprintf ".inst\t0x%08x\t//%s %s",
++ $opcode{$mnemonic}|$1|($2<<5)|($3<<16),
++ $mnemonic,$arg;
++ }
++}
++
++foreach(split("\n",$code)) {
++
++ s/\`([^\`]*)\`/eval($1)/geo;
++
++ s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/geo;
++
++ s/\.\w?32\b//o and s/\.16b/\.4s/go;
++ m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go;
++
++ print $_,"\n";
++}
++
++close STDOUT;
diff --git a/patches/0015-psk_client_callback_128_byte_id_bug.patch b/patches/0015-psk_client_callback_128_byte_id_bug.patch
new file mode 100644
index 0000000..3b06e71
--- /dev/null
+++ b/patches/0015-psk_client_callback_128_byte_id_bug.patch
@@ -0,0 +1,81 @@
+From c8b1f7ec56704c1116795aec9ca61db654b433bf Mon Sep 17 00:00:00 2001
+From: Alex Klyubin <[email protected]>
+Date: Mon, 19 May 2014 11:27:33 -0700
+Subject: psk_client_callback, 128-byte id bug.
+
+Fix a bug in handling of 128 byte long PSK identity in
+psk_client_callback.
+
+OpenSSL supports PSK identities of up to (and including) 128 bytes in
+length. PSK identity is obtained via the psk_client_callback,
+implementors of which are expected to provide a NULL-terminated
+identity. However, the callback is invoked with only 128 bytes of
+storage thus making it impossible to return a 128 byte long identity and
+the required additional NULL byte.
+
+This CL fixes the issue by passing in a 129 byte long buffer into the
+psk_client_callback. As a safety precaution, this CL also zeroes out the
+buffer before passing it into the callback, uses strnlen for obtaining
+the length of the identity returned by the callback, and aborts the
+handshake if the identity (without the NULL terminator) is longer than
+128 bytes.
+---
+ ssl/s3_clnt.c | 20 ++++++++++++++------
+ 1 file changed, 14 insertions(+), 6 deletions(-)
+
+diff --git a/ssl/s3_clnt.c b/ssl/s3_clnt.c
+index 03b96e8..0e22afc 100644
+--- a/ssl/s3_clnt.c
++++ b/ssl/s3_clnt.c
+@@ -2328,7 +2328,8 @@ int ssl3_send_client_key_exchange(SSL *s)
+ #ifndef OPENSSL_NO_PSK
+ if (alg_a & SSL_aPSK)
+ {
+- char identity[PSK_MAX_IDENTITY_LEN];
++ char identity[PSK_MAX_IDENTITY_LEN + 1];
++ size_t identity_len;
+ unsigned char *t = NULL;
+ unsigned char pre_ms[PSK_MAX_PSK_LEN*2+4];
+ unsigned int pre_ms_len = 0;
+@@ -2342,8 +2343,9 @@ int ssl3_send_client_key_exchange(SSL *s)
+ goto err;
+ }
+
++ memset(identity, 0, sizeof(identity));
+ psk_len = s->psk_client_callback(s, s->session->psk_identity_hint,
+- identity, PSK_MAX_IDENTITY_LEN, psk, sizeof(psk));
++ identity, sizeof(identity), psk, sizeof(psk));
+ if (psk_len > PSK_MAX_PSK_LEN)
+ {
+ SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
+@@ -2356,6 +2358,13 @@ int ssl3_send_client_key_exchange(SSL *s)
+ SSL_R_PSK_IDENTITY_NOT_FOUND);
+ goto psk_err;
+ }
++ identity_len = strnlen(identity, sizeof(identity));
++ if (identity_len > PSK_MAX_IDENTITY_LEN)
++ {
++ SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
++ ERR_R_INTERNAL_ERROR);
++ goto psk_err;
++ }
+
+ if (!(alg_k & SSL_kEECDH))
+ {
+@@ -2372,10 +2381,9 @@ int ssl3_send_client_key_exchange(SSL *s)
+ s->method->ssl3_enc->generate_master_secret(s,
+ s->session->master_key,
+ pre_ms, pre_ms_len);
+- n = strlen(identity);
+- s2n(n, p);
+- memcpy(p, identity, n);
+- n += 2;
++ s2n(identity_len, p);
++ memcpy(p, identity, identity_len);
++ n = 2 + identity_len;
+ }
+
+ if (s->session->psk_identity != NULL)
+--
+2.0.0.526.g5318336
+
diff --git a/patches/0016-ecdhe_psk_part2.patch b/patches/0016-ecdhe_psk_part2.patch
new file mode 100644
index 0000000..e87e00a
--- /dev/null
+++ b/patches/0016-ecdhe_psk_part2.patch
@@ -0,0 +1,100 @@
+From cf389e16d8dc49e97c0b13ea3a1c373c6f6f94bd Mon Sep 17 00:00:00 2001
+From: Adam Langley <[email protected]>
+Date: Wed, 4 Jun 2014 10:59:32 -0700
+Subject: ECDHE-PSK_AES-CBC-SHA_cipher_suites
+
+Add ECDHE-PSK AES-CBC-SHA cipher suites from RFC 5489.
+Remove ECDHE-PSK AES-CBC-SHA2 cipher suites from RFC 5489 because
+they cannot be used with SSLv3 and there's no way to express that in
+OpenSSL's configuration.
+---
+ ssl/s3_lib.c | 25 ++++++++++++-------------
+ ssl/tls1.h | 14 ++++++++------
+ 2 files changed, 20 insertions(+), 19 deletions(-)
+
+diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c
+index f84da7f..e016fc8 100644
+--- a/ssl/s3_lib.c
++++ b/ssl/s3_lib.c
+@@ -2828,35 +2828,34 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
+
+ #ifndef OPENSSL_NO_PSK
+ /* ECDH PSK ciphersuites from RFC 5489 */
+-
+- /* Cipher C037 */
++ /* Cipher C035 */
+ {
+ 1,
+- TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA256,
+- TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA256,
++ TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA,
++ TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA,
+ SSL_kEECDH,
+ SSL_aPSK,
+ SSL_AES128,
+- SSL_SHA256,
++ SSL_SHA1,
+ SSL_TLSV1,
+- SSL_NOT_EXP|SSL_HIGH,
+- SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF_SHA256,
++ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
++ SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF,
+ 128,
+ 128,
+ },
+
+- /* Cipher C038 */
++ /* Cipher C036 */
+ {
+ 1,
+- TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA384,
+- TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA384,
++ TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA,
++ TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA,
+ SSL_kEECDH,
+ SSL_aPSK,
+ SSL_AES256,
+- SSL_SHA384,
++ SSL_SHA1,
+ SSL_TLSV1,
+- SSL_NOT_EXP|SSL_HIGH,
+- SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF_SHA384,
++ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
++ SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF,
+ 256,
+ 256,
+ },
+diff --git a/ssl/tls1.h b/ssl/tls1.h
+index ec8948d..51d073d 100644
+--- a/ssl/tls1.h
++++ b/ssl/tls1.h
+@@ -531,9 +531,11 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb)
+ #define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031
+ #define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032
+
+-/* ECDHE PSK ciphersuites from RFC 5489 */
+-#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA256 0x0300C037
+-#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA384 0x0300C038
++/* ECDHE PSK ciphersuites from RFC5489
++ * SHA-2 cipher suites are omitted because they cannot be used safely with
++ * SSLv3. */
++#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA 0x0300C035
++#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA 0x0300C036
+
+ /* XXX
+ * Inconsistency alert:
+@@ -686,9 +688,9 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb)
+ #define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-SHA256"
+ #define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-SHA384"
+
+-/* ECDHE PSK ciphersuites from RFC 5489 */
+-#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA256 "ECDHE-PSK-WITH-AES-128-CBC-SHA256"
+-#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA384 "ECDHE-PSK-WITH-AES-256-CBC-SHA384"
++/* ECDHE PSK ciphersuites from RFC5489 */
++#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA "ECDHE-PSK-AES128-CBC-SHA"
++#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA "ECDHE-PSK-AES256-CBC-SHA"
+
+ #define TLS_CT_RSA_SIGN 1
+ #define TLS_CT_DSS_SIGN 2
+--
+2.0.0.526.g5318336
diff --git a/patches/0017-x86_textrel.patch b/patches/0017-x86_textrel.patch
new file mode 100644
index 0000000..32931a8
--- /dev/null
+++ b/patches/0017-x86_textrel.patch
@@ -0,0 +1,48 @@
+From 6206682a35760eb6ddb25172df2ae9f03d12e594 Mon Sep 17 00:00:00 2001
+From: Andy Polyakov <[email protected]>
+Date: Wed, 29 Aug 2012 14:19:59 +0000
+Subject: [PATCH] x86cpuid.pl: hide symbols [backport from x86_64].
+
+---
+ crypto/perlasm/x86asm.pl | 2 ++
+ crypto/perlasm/x86gas.pl | 2 ++
+ crypto/x86cpuid.pl | 3 +++
+ 3 files changed, 7 insertions(+)
+
+diff --git a/crypto/perlasm/x86asm.pl b/crypto/perlasm/x86asm.pl
+index eb543db..3f190ae 100644
+--- a/crypto/perlasm/x86asm.pl
++++ b/crypto/perlasm/x86asm.pl
+@@ -257,4 +257,6 @@ sub ::asm_init
+ &file($filename);
+ }
+
++sub ::hidden {}
++
+ 1;
+diff --git a/crypto/perlasm/x86gas.pl b/crypto/perlasm/x86gas.pl
+index 5972d06..e02ee84 100644
+--- a/crypto/perlasm/x86gas.pl
++++ b/crypto/perlasm/x86gas.pl
+@@ -251,4 +251,6 @@ sub ::initseg
+ sub ::dataseg
+ { push(@out,".data\n"); }
+
++*::hidden = sub { push(@out,".hidden\t$nmdecor$_[0]\n"); } if ($::elf);
++
+ 1;
+diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl
+index 808049a..597b7a5 100644
+--- a/crypto/x86cpuid.pl
++++ b/crypto/x86cpuid.pl
+@@ -455,4 +455,7 @@
+
+ &initseg("OPENSSL_cpuid_setup");
+
++&hidden("OPENSSL_cpuid_setup");
++&hidden("OPENSSL_ia32cap_P");
++
+ &asm_finish();
+--
+1.9.3
+
diff --git a/patches/README b/patches/README
index c355482..5344470 100644
--- a/patches/README
+++ b/patches/README
@@ -43,10 +43,38 @@
BEAST attack client-side mitigation. Removes 0/n record splitting, adds 1/n-1
record splitting. Record splitting is disabled by default.
-paddingext.patch
+dsa_nonce.patch
-ClientHello padding extension which is added, when needed, to work around bugs
-in F5 terminators.
+Adds an option to mix in hash of message and private key into (EC)DSA nonces to
+make (EC)DSA more resilient to weaknesses in RNGs used for nonces. The feature
+is disabled by default.
+
+ecdhe_psk.patch
+
+Adds support for ECDHE Pre-Shared Key (PSK) TLS cipher suites.
+
+ecdhe_psk_part2.patch
+
+Removes ECHDE-PSK cipher suites with SHA-2 because they cannot be used with
+SSLv3 (and there's no way to express that in OpenSSL's configuration). Adds
+SHA-1 based ECDHE-PSK AES-CBC cipher suites instead.
+
+arm_asm.patch
+
+Adds newer ARM assembly pack with BSAES for ARMv7 and acceleration for ARMv8
+Based on branch available at:
+https://git.linaro.org/people/ard.biesheuvel/openssl.git/shortlog/refs/heads/openssl-1.0.1f-with-arm-patches
+c7b582ef23eb6f4386664e841e6e406d984c38d3^..cb8b1ab03e5c179a719afe83f03fecb1c2c78730
+
+tls_psk_hint.patch
+
+Fixes issues with TLS-PSK identity hint implementation where
+per-connection/session and per-context hints were being mixed up.
+
+psk_client_callback_128_byte_id_bug.patch
+
+Fixes the issue where it was impossible to return a 128 byte long PSK identity
+(the maximum supported length) from psk_client_callback.
tls_fallback_scsv.patch
diff --git a/patches/channelid.patch b/patches/channelid.patch
deleted file mode 100644
index 4eb025a..0000000
--- a/patches/channelid.patch
+++ /dev/null
@@ -1,983 +0,0 @@
---- openssl-1.0.1e.orig/crypto/evp/evp.h 2013-03-05 18:49:33.183296743 +0000
-+++ openssl-1.0.1e/crypto/evp/evp.h 2013-03-05 18:49:33.373298798 +0000
-@@ -921,6 +921,7 @@ struct ec_key_st *EVP_PKEY_get1_EC_KEY(E
- #endif
-
- EVP_PKEY * EVP_PKEY_new(void);
-+EVP_PKEY * EVP_PKEY_dup(EVP_PKEY *pkey);
- void EVP_PKEY_free(EVP_PKEY *pkey);
-
- EVP_PKEY * d2i_PublicKey(int type,EVP_PKEY **a, const unsigned char **pp,
---- openssl-1.0.1e.orig/crypto/evp/p_lib.c 2013-03-05 18:49:33.183296743 +0000
-+++ openssl-1.0.1e/crypto/evp/p_lib.c 2013-03-05 18:49:33.373298798 +0000
-@@ -200,6 +200,12 @@ EVP_PKEY *EVP_PKEY_new(void)
- return(ret);
- }
-
-+EVP_PKEY *EVP_PKEY_dup(EVP_PKEY *pkey)
-+ {
-+ CRYPTO_add(&pkey->references, 1, CRYPTO_LOCK_EVP_PKEY);
-+ return pkey;
-+ }
-+
- /* Setup a public key ASN1 method and ENGINE from a NID or a string.
- * If pkey is NULL just return 1 or 0 if the algorithm exists.
- */
---- openssl-1.0.1e.orig/ssl/s3_both.c 2013-03-05 18:49:33.233297282 +0000
-+++ openssl-1.0.1e/ssl/s3_both.c 2013-03-05 18:49:33.413299231 +0000
-@@ -555,7 +555,8 @@ long ssl3_get_message(SSL *s, int st1, i
- #endif
-
- /* Feed this message into MAC computation. */
-- ssl3_finish_mac(s, (unsigned char *)s->init_buf->data, s->init_num + 4);
-+ if (*(unsigned char*)s->init_buf->data != SSL3_MT_ENCRYPTED_EXTENSIONS)
-+ ssl3_finish_mac(s, (unsigned char *)s->init_buf->data, s->init_num + 4);
- if (s->msg_callback)
- s->msg_callback(0, s->version, SSL3_RT_HANDSHAKE, s->init_buf->data, (size_t)s->init_num + 4, s, s->msg_callback_arg);
- *ok=1;
---- openssl-1.0.1e.orig/ssl/s3_clnt.c 2013-03-05 18:49:33.233297282 +0000
-+++ openssl-1.0.1e/ssl/s3_clnt.c 2013-03-05 18:49:33.413299231 +0000
-@@ -477,13 +477,14 @@ int ssl3_connect(SSL *s)
- SSL3_ST_CW_CHANGE_A,SSL3_ST_CW_CHANGE_B);
- if (ret <= 0) goto end;
-
--#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG)
- s->state=SSL3_ST_CW_FINISHED_A;
--#else
-+#if !defined(OPENSSL_NO_TLSEXT)
-+ if (s->s3->tlsext_channel_id_valid)
-+ s->state=SSL3_ST_CW_CHANNEL_ID_A;
-+# if !defined(OPENSSL_NO_NEXTPROTONEG)
- if (s->s3->next_proto_neg_seen)
- s->state=SSL3_ST_CW_NEXT_PROTO_A;
-- else
-- s->state=SSL3_ST_CW_FINISHED_A;
-+# endif
- #endif
- s->init_num=0;
-
-@@ -517,6 +518,18 @@ int ssl3_connect(SSL *s)
- case SSL3_ST_CW_NEXT_PROTO_B:
- ret=ssl3_send_next_proto(s);
- if (ret <= 0) goto end;
-+ if (s->s3->tlsext_channel_id_valid)
-+ s->state=SSL3_ST_CW_CHANNEL_ID_A;
-+ else
-+ s->state=SSL3_ST_CW_FINISHED_A;
-+ break;
-+#endif
-+
-+#if !defined(OPENSSL_NO_TLSEXT)
-+ case SSL3_ST_CW_CHANNEL_ID_A:
-+ case SSL3_ST_CW_CHANNEL_ID_B:
-+ ret=ssl3_send_channel_id(s);
-+ if (ret <= 0) goto end;
- s->state=SSL3_ST_CW_FINISHED_A;
- break;
- #endif
-@@ -3362,7 +3375,8 @@ err:
- return(0);
- }
-
--#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+#if !defined(OPENSSL_NO_TLSEXT)
-+# if !defined(OPENSSL_NO_NEXTPROTONEG)
- int ssl3_send_next_proto(SSL *s)
- {
- unsigned int len, padding_len;
-@@ -3386,7 +3400,116 @@ int ssl3_send_next_proto(SSL *s)
-
- return ssl3_do_write(s, SSL3_RT_HANDSHAKE);
- }
--#endif /* !OPENSSL_NO_TLSEXT && !OPENSSL_NO_NEXTPROTONEG */
-+# endif /* !OPENSSL_NO_NEXTPROTONEG */
-+
-+int ssl3_send_channel_id(SSL *s)
-+ {
-+ unsigned char *d;
-+ int ret = -1, public_key_len;
-+ EVP_MD_CTX md_ctx;
-+ size_t sig_len;
-+ ECDSA_SIG *sig = NULL;
-+ unsigned char *public_key = NULL, *derp, *der_sig = NULL;
-+
-+ if (s->state != SSL3_ST_CW_CHANNEL_ID_A)
-+ return ssl3_do_write(s, SSL3_RT_HANDSHAKE);
-+
-+ d = (unsigned char *)s->init_buf->data;
-+ *(d++)=SSL3_MT_ENCRYPTED_EXTENSIONS;
-+ l2n3(2 + 2 + TLSEXT_CHANNEL_ID_SIZE, d);
-+ s2n(TLSEXT_TYPE_channel_id, d);
-+ s2n(TLSEXT_CHANNEL_ID_SIZE, d);
-+
-+ EVP_MD_CTX_init(&md_ctx);
-+
-+ public_key_len = i2d_PublicKey(s->tlsext_channel_id_private, NULL);
-+ if (public_key_len <= 0)
-+ {
-+ SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_CANNOT_SERIALIZE_PUBLIC_KEY);
-+ goto err;
-+ }
-+ // i2d_PublicKey will produce an ANSI X9.62 public key which, for a
-+ // P-256 key, is 0x04 (meaning uncompressed) followed by the x and y
-+ // field elements as 32-byte, big-endian numbers.
-+ if (public_key_len != 65)
-+ {
-+ SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_CHANNEL_ID_NOT_P256);
-+ goto err;
-+ }
-+ public_key = OPENSSL_malloc(public_key_len);
-+ if (!public_key)
-+ {
-+ SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,ERR_R_MALLOC_FAILURE);
-+ goto err;
-+ }
-+
-+ derp = public_key;
-+ i2d_PublicKey(s->tlsext_channel_id_private, &derp);
-+
-+ if (EVP_DigestSignInit(&md_ctx, NULL, EVP_sha256(), NULL,
-+ s->tlsext_channel_id_private) != 1)
-+ {
-+ SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_EVP_DIGESTSIGNINIT_FAILED);
-+ goto err;
-+ }
-+
-+ if (!tls1_channel_id_hash(&md_ctx, s))
-+ goto err;
-+
-+ if (!EVP_DigestSignFinal(&md_ctx, NULL, &sig_len))
-+ {
-+ SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_EVP_DIGESTSIGNFINAL_FAILED);
-+ goto err;
-+ }
-+
-+ der_sig = OPENSSL_malloc(sig_len);
-+ if (!der_sig)
-+ {
-+ SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,ERR_R_MALLOC_FAILURE);
-+ goto err;
-+ }
-+
-+ if (!EVP_DigestSignFinal(&md_ctx, der_sig, &sig_len))
-+ {
-+ SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_EVP_DIGESTSIGNFINAL_FAILED);
-+ goto err;
-+ }
-+
-+ derp = der_sig;
-+ sig = d2i_ECDSA_SIG(NULL, (const unsigned char**)&derp, sig_len);
-+ if (sig == NULL)
-+ {
-+ SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_D2I_ECDSA_SIG);
-+ goto err;
-+ }
-+
-+ // The first byte of public_key will be 0x4, denoting an uncompressed key.
-+ memcpy(d, public_key + 1, 64);
-+ d += 64;
-+ memset(d, 0, 2 * 32);
-+ BN_bn2bin(sig->r, d + 32 - BN_num_bytes(sig->r));
-+ d += 32;
-+ BN_bn2bin(sig->s, d + 32 - BN_num_bytes(sig->s));
-+ d += 32;
-+
-+ s->state = SSL3_ST_CW_CHANNEL_ID_B;
-+ s->init_num = 4 + 2 + 2 + TLSEXT_CHANNEL_ID_SIZE;
-+ s->init_off = 0;
-+
-+ ret = ssl3_do_write(s, SSL3_RT_HANDSHAKE);
-+
-+err:
-+ EVP_MD_CTX_cleanup(&md_ctx);
-+ if (public_key)
-+ OPENSSL_free(public_key);
-+ if (der_sig)
-+ OPENSSL_free(der_sig);
-+ if (sig)
-+ ECDSA_SIG_free(sig);
-+
-+ return ret;
-+ }
-+#endif /* !OPENSSL_NO_TLSEXT */
-
- /* Check to see if handshake is full or resumed. Usually this is just a
- * case of checking to see if a cache hit has occurred. In the case of
---- openssl-1.0.1e.orig/ssl/s3_lib.c 2013-03-05 18:49:33.223297173 +0000
-+++ openssl-1.0.1e/ssl/s3_lib.c 2013-03-05 18:49:33.413299231 +0000
-@@ -2951,6 +2951,11 @@ int ssl3_new(SSL *s)
- #ifndef OPENSSL_NO_SRP
- SSL_SRP_CTX_init(s);
- #endif
-+#if !defined(OPENSSL_NO_TLSEXT)
-+ s->tlsext_channel_id_enabled = s->ctx->tlsext_channel_id_enabled;
-+ if (s->ctx->tlsext_channel_id_private)
-+ s->tlsext_channel_id_private = EVP_PKEY_dup(s->ctx->tlsext_channel_id_private);
-+#endif
- s->method->ssl_clear(s);
- return(1);
- err:
-@@ -3074,6 +3079,10 @@ void ssl3_clear(SSL *s)
- s->next_proto_negotiated_len = 0;
- }
- #endif
-+
-+#if !defined(OPENSSL_NO_TLSEXT)
-+ s->s3->tlsext_channel_id_valid = 0;
-+#endif
- }
-
- #ifndef OPENSSL_NO_SRP
-@@ -3348,6 +3357,35 @@ long ssl3_ctrl(SSL *s, int cmd, long lar
- ret = 1;
- break;
- #endif
-+ case SSL_CTRL_CHANNEL_ID:
-+ if (!s->server)
-+ break;
-+ s->tlsext_channel_id_enabled = 1;
-+ ret = 1;
-+ break;
-+
-+ case SSL_CTRL_SET_CHANNEL_ID:
-+ if (s->server)
-+ break;
-+ s->tlsext_channel_id_enabled = 1;
-+ if (EVP_PKEY_bits(parg) != 256)
-+ {
-+ SSLerr(SSL_F_SSL3_CTRL,SSL_R_CHANNEL_ID_NOT_P256);
-+ break;
-+ }
-+ if (s->tlsext_channel_id_private)
-+ EVP_PKEY_free(s->tlsext_channel_id_private);
-+ s->tlsext_channel_id_private = (EVP_PKEY*) parg;
-+ ret = 1;
-+ break;
-+
-+ case SSL_CTRL_GET_CHANNEL_ID:
-+ if (!s->server)
-+ break;
-+ if (!s->s3->tlsext_channel_id_valid)
-+ break;
-+ memcpy(parg, s->s3->tlsext_channel_id, larg < 64 ? larg : 64);
-+ return 64;
-
- #endif /* !OPENSSL_NO_TLSEXT */
- default:
-@@ -3569,6 +3607,12 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd
- }
- return 1;
- }
-+ case SSL_CTRL_CHANNEL_ID:
-+ /* must be called on a server */
-+ if (ctx->method->ssl_accept == ssl_undefined_function)
-+ return 0;
-+ ctx->tlsext_channel_id_enabled=1;
-+ return 1;
-
- #ifdef TLSEXT_TYPE_opaque_prf_input
- case SSL_CTRL_SET_TLSEXT_OPAQUE_PRF_INPUT_CB_ARG:
-@@ -3637,6 +3681,18 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd
- }
- break;
-
-+ case SSL_CTRL_SET_CHANNEL_ID:
-+ ctx->tlsext_channel_id_enabled = 1;
-+ if (EVP_PKEY_bits(parg) != 256)
-+ {
-+ SSLerr(SSL_F_SSL3_CTX_CTRL,SSL_R_CHANNEL_ID_NOT_P256);
-+ break;
-+ }
-+ if (ctx->tlsext_channel_id_private)
-+ EVP_PKEY_free(ctx->tlsext_channel_id_private);
-+ ctx->tlsext_channel_id_private = (EVP_PKEY*) parg;
-+ break;
-+
- default:
- return(0);
- }
---- openssl-1.0.1e.orig/ssl/s3_srvr.c 2013-03-05 18:49:33.233297282 +0000
-+++ openssl-1.0.1e/ssl/s3_srvr.c 2013-03-05 18:49:33.413299231 +0000
-@@ -157,8 +157,11 @@
- #include <openssl/buffer.h>
- #include <openssl/rand.h>
- #include <openssl/objects.h>
-+#include <openssl/ec.h>
-+#include <openssl/ecdsa.h>
- #include <openssl/evp.h>
- #include <openssl/hmac.h>
-+#include <openssl/sha.h>
- #include <openssl/x509.h>
- #ifndef OPENSSL_NO_DH
- #include <openssl/dh.h>
-@@ -609,15 +612,8 @@ int ssl3_accept(SSL *s)
- * the client uses its key from the certificate
- * for key exchange.
- */
--#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG)
-- s->state=SSL3_ST_SR_FINISHED_A;
--#else
-- if (s->s3->next_proto_neg_seen)
-- s->state=SSL3_ST_SR_NEXT_PROTO_A;
-- else
-- s->state=SSL3_ST_SR_FINISHED_A;
--#endif
- s->init_num = 0;
-+ s->state=SSL3_ST_SR_POST_CLIENT_CERT;
- }
- else if (TLS1_get_version(s) >= TLS1_2_VERSION)
- {
-@@ -677,16 +673,28 @@ int ssl3_accept(SSL *s)
- ret=ssl3_get_cert_verify(s);
- if (ret <= 0) goto end;
-
--#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG)
-- s->state=SSL3_ST_SR_FINISHED_A;
--#else
-- if (s->s3->next_proto_neg_seen)
-+ s->state=SSL3_ST_SR_POST_CLIENT_CERT;
-+ s->init_num=0;
-+ break;
-+
-+ case SSL3_ST_SR_POST_CLIENT_CERT: {
-+ char next_proto_neg = 0;
-+ char channel_id = 0;
-+#if !defined(OPENSSL_NO_TLSEXT)
-+# if !defined(OPENSSL_NO_NEXTPROTONEG)
-+ next_proto_neg = s->s3->next_proto_neg_seen;
-+# endif
-+ channel_id = s->s3->tlsext_channel_id_valid;
-+#endif
-+
-+ if (next_proto_neg)
- s->state=SSL3_ST_SR_NEXT_PROTO_A;
-+ else if (channel_id)
-+ s->state=SSL3_ST_SR_CHANNEL_ID_A;
- else
- s->state=SSL3_ST_SR_FINISHED_A;
--#endif
-- s->init_num=0;
- break;
-+ }
-
- #if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
- case SSL3_ST_SR_NEXT_PROTO_A:
-@@ -694,6 +702,19 @@ int ssl3_accept(SSL *s)
- ret=ssl3_get_next_proto(s);
- if (ret <= 0) goto end;
- s->init_num = 0;
-+ if (s->s3->tlsext_channel_id_valid)
-+ s->state=SSL3_ST_SR_CHANNEL_ID_A;
-+ else
-+ s->state=SSL3_ST_SR_FINISHED_A;
-+ break;
-+#endif
-+
-+#if !defined(OPENSSL_NO_TLSEXT)
-+ case SSL3_ST_SR_CHANNEL_ID_A:
-+ case SSL3_ST_SR_CHANNEL_ID_B:
-+ ret=ssl3_get_channel_id(s);
-+ if (ret <= 0) goto end;
-+ s->init_num = 0;
- s->state=SSL3_ST_SR_FINISHED_A;
- break;
- #endif
-@@ -765,16 +786,7 @@ int ssl3_accept(SSL *s)
- if (ret <= 0) goto end;
- s->state=SSL3_ST_SW_FLUSH;
- if (s->hit)
-- {
--#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG)
-- s->s3->tmp.next_state=SSL3_ST_SR_FINISHED_A;
--#else
-- if (s->s3->next_proto_neg_seen)
-- s->s3->tmp.next_state=SSL3_ST_SR_NEXT_PROTO_A;
-- else
-- s->s3->tmp.next_state=SSL3_ST_SR_FINISHED_A;
--#endif
-- }
-+ s->s3->tmp.next_state=SSL3_ST_SR_POST_CLIENT_CERT;
- else
- s->s3->tmp.next_state=SSL_ST_OK;
- s->init_num=0;
-@@ -3610,4 +3622,140 @@ int ssl3_get_next_proto(SSL *s)
- return 1;
- }
- # endif
-+
-+/* ssl3_get_channel_id reads and verifies a ClientID handshake message. */
-+int ssl3_get_channel_id(SSL *s)
-+ {
-+ int ret = -1, ok;
-+ long n;
-+ const unsigned char *p;
-+ unsigned short extension_type, extension_len;
-+ EC_GROUP* p256 = NULL;
-+ EC_KEY* key = NULL;
-+ EC_POINT* point = NULL;
-+ ECDSA_SIG sig;
-+ BIGNUM x, y;
-+
-+ if (s->state == SSL3_ST_SR_CHANNEL_ID_A && s->init_num == 0)
-+ {
-+ /* The first time that we're called we take the current
-+ * handshake hash and store it. */
-+ EVP_MD_CTX md_ctx;
-+ unsigned int len;
-+
-+ EVP_MD_CTX_init(&md_ctx);
-+ EVP_DigestInit_ex(&md_ctx, EVP_sha256(), NULL);
-+ if (!tls1_channel_id_hash(&md_ctx, s))
-+ return -1;
-+ len = sizeof(s->s3->tlsext_channel_id);
-+ EVP_DigestFinal(&md_ctx, s->s3->tlsext_channel_id, &len);
-+ EVP_MD_CTX_cleanup(&md_ctx);
-+ }
-+
-+ n = s->method->ssl_get_message(s,
-+ SSL3_ST_SR_CHANNEL_ID_A,
-+ SSL3_ST_SR_CHANNEL_ID_B,
-+ SSL3_MT_ENCRYPTED_EXTENSIONS,
-+ 2 + 2 + TLSEXT_CHANNEL_ID_SIZE,
-+ &ok);
-+
-+ if (!ok)
-+ return((int)n);
-+
-+ ssl3_finish_mac(s, (unsigned char*)s->init_buf->data, s->init_num + 4);
-+
-+ /* s->state doesn't reflect whether ChangeCipherSpec has been received
-+ * in this handshake, but s->s3->change_cipher_spec does (will be reset
-+ * by ssl3_get_finished). */
-+ if (!s->s3->change_cipher_spec)
-+ {
-+ SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_GOT_CHANNEL_ID_BEFORE_A_CCS);
-+ return -1;
-+ }
-+
-+ if (n != 2 + 2 + TLSEXT_CHANNEL_ID_SIZE)
-+ {
-+ SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_INVALID_MESSAGE);
-+ return -1;
-+ }
-+
-+ p = (unsigned char *)s->init_msg;
-+
-+ /* The payload looks like:
-+ * uint16 extension_type
-+ * uint16 extension_len;
-+ * uint8 x[32];
-+ * uint8 y[32];
-+ * uint8 r[32];
-+ * uint8 s[32];
-+ */
-+ n2s(p, extension_type);
-+ n2s(p, extension_len);
-+
-+ if (extension_type != TLSEXT_TYPE_channel_id ||
-+ extension_len != TLSEXT_CHANNEL_ID_SIZE)
-+ {
-+ SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_INVALID_MESSAGE);
-+ return -1;
-+ }
-+
-+ p256 = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1);
-+ if (!p256)
-+ {
-+ SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_NO_P256_SUPPORT);
-+ return -1;
-+ }
-+
-+ BN_init(&x);
-+ BN_init(&y);
-+ sig.r = BN_new();
-+ sig.s = BN_new();
-+
-+ if (BN_bin2bn(p + 0, 32, &x) == NULL ||
-+ BN_bin2bn(p + 32, 32, &y) == NULL ||
-+ BN_bin2bn(p + 64, 32, sig.r) == NULL ||
-+ BN_bin2bn(p + 96, 32, sig.s) == NULL)
-+ goto err;
-+
-+ point = EC_POINT_new(p256);
-+ if (!point ||
-+ !EC_POINT_set_affine_coordinates_GFp(p256, point, &x, &y, NULL))
-+ goto err;
-+
-+ key = EC_KEY_new();
-+ if (!key ||
-+ !EC_KEY_set_group(key, p256) ||
-+ !EC_KEY_set_public_key(key, point))
-+ goto err;
-+
-+ /* We stored the handshake hash in |tlsext_channel_id| the first time
-+ * that we were called. */
-+ switch (ECDSA_do_verify(s->s3->tlsext_channel_id, SHA256_DIGEST_LENGTH, &sig, key)) {
-+ case 1:
-+ break;
-+ case 0:
-+ SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_CHANNEL_ID_SIGNATURE_INVALID);
-+ s->s3->tlsext_channel_id_valid = 0;
-+ goto err;
-+ default:
-+ s->s3->tlsext_channel_id_valid = 0;
-+ goto err;
-+ }
-+
-+ memcpy(s->s3->tlsext_channel_id, p, 64);
-+ ret = 1;
-+
-+err:
-+ BN_free(&x);
-+ BN_free(&y);
-+ BN_free(sig.r);
-+ BN_free(sig.s);
-+ if (key)
-+ EC_KEY_free(key);
-+ if (point)
-+ EC_POINT_free(point);
-+ if (p256)
-+ EC_GROUP_free(p256);
-+ return ret;
-+ }
- #endif
---- openssl-1.0.1e.orig/ssl/ssl.h 2013-03-05 18:49:33.233297282 +0000
-+++ openssl-1.0.1e/ssl/ssl.h 2013-03-05 18:49:33.413299231 +0000
-@@ -981,6 +981,12 @@ struct ssl_ctx_st
- # endif
- /* SRTP profiles we are willing to do from RFC 5764 */
- STACK_OF(SRTP_PROTECTION_PROFILE) *srtp_profiles;
-+
-+ /* If true, a client will advertise the Channel ID extension and a
-+ * server will echo it. */
-+ char tlsext_channel_id_enabled;
-+ /* The client's Channel ID private key. */
-+ EVP_PKEY *tlsext_channel_id_private;
- #endif
- };
-
-@@ -1022,6 +1028,10 @@ LHASH_OF(SSL_SESSION) *SSL_CTX_sessions(
- SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_TIMEOUTS,0,NULL)
- #define SSL_CTX_sess_cache_full(ctx) \
- SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CACHE_FULL,0,NULL)
-+/* SSL_CTX_enable_tls_channel_id configures a TLS server to accept TLS client
-+ * IDs from clients. Returns 1 on success. */
-+#define SSL_CTX_enable_tls_channel_id(ctx) \
-+ SSL_CTX_ctrl(ctx,SSL_CTRL_CHANNEL_ID,0,NULL)
-
- void SSL_CTX_sess_set_new_cb(SSL_CTX *ctx, int (*new_session_cb)(struct ssl_st *ssl,SSL_SESSION *sess));
- int (*SSL_CTX_sess_get_new_cb(SSL_CTX *ctx))(struct ssl_st *ssl, SSL_SESSION *sess);
-@@ -1348,6 +1358,13 @@ struct ssl_st
- */
- unsigned int tlsext_hb_pending; /* Indicates if a HeartbeatRequest is in flight */
- unsigned int tlsext_hb_seq; /* HeartbeatRequest sequence number */
-+
-+ /* Copied from the SSL_CTX. For a server, means that we'll accept
-+ * Channel IDs from clients. For a client, means that we'll advertise
-+ * support. */
-+ char tlsext_channel_id_enabled;
-+ /* The client's Channel ID private key. */
-+ EVP_PKEY *tlsext_channel_id_private;
- #else
- #define session_ctx ctx
- #endif /* OPENSSL_NO_TLSEXT */
-@@ -1605,6 +1622,9 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
- #define SSL_CTRL_GET_TLS_EXT_HEARTBEAT_PENDING 86
- #define SSL_CTRL_SET_TLS_EXT_HEARTBEAT_NO_REQUESTS 87
- #endif
-+#define SSL_CTRL_CHANNEL_ID 88
-+#define SSL_CTRL_GET_CHANNEL_ID 89
-+#define SSL_CTRL_SET_CHANNEL_ID 90
- #endif
-
- #define DTLS_CTRL_GET_TIMEOUT 73
-@@ -1652,6 +1672,25 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
- #define SSL_set_tmp_ecdh(ssl,ecdh) \
- SSL_ctrl(ssl,SSL_CTRL_SET_TMP_ECDH,0,(char *)ecdh)
-
-+/* SSL_enable_tls_channel_id configures a TLS server to accept TLS client
-+ * IDs from clients. Returns 1 on success. */
-+#define SSL_enable_tls_channel_id(ctx) \
-+ SSL_ctrl(ctx,SSL_CTRL_CHANNEL_ID,0,NULL)
-+/* SSL_set1_tls_channel_id configures a TLS client to send a TLS Channel ID to
-+ * compatible servers. private_key must be a P-256 EVP_PKEY*. Returns 1 on
-+ * success. */
-+#define SSL_set1_tls_channel_id(s, private_key) \
-+ SSL_ctrl(s,SSL_CTRL_SET_CHANNEL_ID,0,(void*)private_key)
-+#define SSL_CTX_set1_tls_channel_id(ctx, private_key) \
-+ SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CHANNEL_ID,0,(void*)private_key)
-+/* SSL_get_tls_channel_id gets the client's TLS Channel ID from a server SSL*
-+ * and copies up to the first |channel_id_len| bytes into |channel_id|. The
-+ * Channel ID consists of the client's P-256 public key as an (x,y) pair where
-+ * each is a 32-byte, big-endian field element. Returns 0 if the client didn't
-+ * offer a Channel ID and the length of the complete Channel ID otherwise. */
-+#define SSL_get_tls_channel_id(ctx, channel_id, channel_id_len) \
-+ SSL_ctrl(ctx,SSL_CTRL_GET_CHANNEL_ID,channel_id_len,(void*)channel_id)
-+
- #define SSL_CTX_add_extra_chain_cert(ctx,x509) \
- SSL_CTX_ctrl(ctx,SSL_CTRL_EXTRA_CHAIN_CERT,0,(char *)x509)
- #define SSL_CTX_get_extra_chain_certs(ctx,px509) \
-@@ -1686,6 +1725,7 @@ int SSL_CIPHER_get_bits(const SSL_CIPHER
- char * SSL_CIPHER_get_version(const SSL_CIPHER *c);
- const char * SSL_CIPHER_get_name(const SSL_CIPHER *c);
- unsigned long SSL_CIPHER_get_id(const SSL_CIPHER *c);
-+const char* SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher);
-
- int SSL_get_fd(const SSL *s);
- int SSL_get_rfd(const SSL *s);
-@@ -2149,6 +2189,7 @@ void ERR_load_SSL_strings(void);
- #define SSL_F_SSL3_GET_CERTIFICATE_REQUEST 135
- #define SSL_F_SSL3_GET_CERT_STATUS 289
- #define SSL_F_SSL3_GET_CERT_VERIFY 136
-+#define SSL_F_SSL3_GET_CHANNEL_ID 317
- #define SSL_F_SSL3_GET_CLIENT_CERTIFICATE 137
- #define SSL_F_SSL3_GET_CLIENT_HELLO 138
- #define SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE 139
-@@ -2168,6 +2209,7 @@ void ERR_load_SSL_strings(void);
- #define SSL_F_SSL3_READ_BYTES 148
- #define SSL_F_SSL3_READ_N 149
- #define SSL_F_SSL3_SEND_CERTIFICATE_REQUEST 150
-+#define SSL_F_SSL3_SEND_CHANNEL_ID 318
- #define SSL_F_SSL3_SEND_CLIENT_CERTIFICATE 151
- #define SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE 152
- #define SSL_F_SSL3_SEND_CLIENT_VERIFY 153
-@@ -2335,12 +2377,15 @@ void ERR_load_SSL_strings(void);
- #define SSL_R_BIO_NOT_SET 128
- #define SSL_R_BLOCK_CIPHER_PAD_IS_WRONG 129
- #define SSL_R_BN_LIB 130
-+#define SSL_R_CANNOT_SERIALIZE_PUBLIC_KEY 376
- #define SSL_R_CA_DN_LENGTH_MISMATCH 131
- #define SSL_R_CA_DN_TOO_LONG 132
- #define SSL_R_CCS_RECEIVED_EARLY 133
- #define SSL_R_CERTIFICATE_VERIFY_FAILED 134
- #define SSL_R_CERT_LENGTH_MISMATCH 135
- #define SSL_R_CHALLENGE_IS_DIFFERENT 136
-+#define SSL_R_CHANNEL_ID_NOT_P256 375
-+#define SSL_R_CHANNEL_ID_SIGNATURE_INVALID 371
- #define SSL_R_CIPHER_CODE_WRONG_LENGTH 137
- #define SSL_R_CIPHER_OR_HASH_UNAVAILABLE 138
- #define SSL_R_CIPHER_TABLE_SRC_ERROR 139
-@@ -2353,6 +2398,7 @@ void ERR_load_SSL_strings(void);
- #define SSL_R_CONNECTION_ID_IS_DIFFERENT 143
- #define SSL_R_CONNECTION_TYPE_NOT_SET 144
- #define SSL_R_COOKIE_MISMATCH 308
-+#define SSL_R_D2I_ECDSA_SIG 379
- #define SSL_R_DATA_BETWEEN_CCS_AND_FINISHED 145
- #define SSL_R_DATA_LENGTH_TOO_LONG 146
- #define SSL_R_DECRYPTION_FAILED 147
-@@ -2370,9 +2416,12 @@ void ERR_load_SSL_strings(void);
- #define SSL_R_ENCRYPTED_LENGTH_TOO_LONG 150
- #define SSL_R_ERROR_GENERATING_TMP_RSA_KEY 282
- #define SSL_R_ERROR_IN_RECEIVED_CIPHER_LIST 151
-+#define SSL_R_EVP_DIGESTSIGNFINAL_FAILED 377
-+#define SSL_R_EVP_DIGESTSIGNINIT_FAILED 378
- #define SSL_R_EXCESSIVE_MESSAGE_SIZE 152
- #define SSL_R_EXTRA_DATA_IN_MESSAGE 153
- #define SSL_R_GOT_A_FIN_BEFORE_A_CCS 154
-+#define SSL_R_GOT_CHANNEL_ID_BEFORE_A_CCS 372
- #define SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS 355
- #define SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION 356
- #define SSL_R_HTTPS_PROXY_REQUEST 155
-@@ -2382,6 +2431,7 @@ void ERR_load_SSL_strings(void);
- #define SSL_R_INVALID_CHALLENGE_LENGTH 158
- #define SSL_R_INVALID_COMMAND 280
- #define SSL_R_INVALID_COMPRESSION_ALGORITHM 341
-+#define SSL_R_INVALID_MESSAGE 374
- #define SSL_R_INVALID_PURPOSE 278
- #define SSL_R_INVALID_SRP_USERNAME 357
- #define SSL_R_INVALID_STATUS_RESPONSE 328
-@@ -2436,6 +2486,7 @@ void ERR_load_SSL_strings(void);
- #define SSL_R_NO_COMPRESSION_SPECIFIED 187
- #define SSL_R_NO_GOST_CERTIFICATE_SENT_BY_PEER 330
- #define SSL_R_NO_METHOD_SPECIFIED 188
-+#define SSL_R_NO_P256_SUPPORT 373
- #define SSL_R_NO_PRIVATEKEY 189
- #define SSL_R_NO_PRIVATE_KEY_ASSIGNED 190
- #define SSL_R_NO_PROTOCOLS_AVAILABLE 191
---- openssl-1.0.1e.orig/ssl/ssl3.h 2013-03-05 18:49:33.223297173 +0000
-+++ openssl-1.0.1e/ssl/ssl3.h 2013-03-05 18:49:33.413299231 +0000
-@@ -539,6 +539,17 @@ typedef struct ssl3_state_st
- char is_probably_safari;
- #endif /* !OPENSSL_NO_EC */
- #endif /* !OPENSSL_NO_TLSEXT */
-+
-+ /* In a client, this means that the server supported Channel ID and that
-+ * a Channel ID was sent. In a server it means that we echoed support
-+ * for Channel IDs and that tlsext_channel_id will be valid after the
-+ * handshake. */
-+ char tlsext_channel_id_valid;
-+ /* For a server:
-+ * If |tlsext_channel_id_valid| is true, then this contains the
-+ * verified Channel ID from the client: a P256 point, (x,y), where
-+ * each are big-endian values. */
-+ unsigned char tlsext_channel_id[64];
- } SSL3_STATE;
-
- #endif
-@@ -583,6 +594,8 @@ typedef struct ssl3_state_st
- #define SSL3_ST_CW_NEXT_PROTO_A (0x200|SSL_ST_CONNECT)
- #define SSL3_ST_CW_NEXT_PROTO_B (0x201|SSL_ST_CONNECT)
- #endif
-+#define SSL3_ST_CW_CHANNEL_ID_A (0x210|SSL_ST_CONNECT)
-+#define SSL3_ST_CW_CHANNEL_ID_B (0x211|SSL_ST_CONNECT)
- #define SSL3_ST_CW_FINISHED_A (0x1B0|SSL_ST_CONNECT)
- #define SSL3_ST_CW_FINISHED_B (0x1B1|SSL_ST_CONNECT)
- /* read from server */
-@@ -632,10 +645,13 @@ typedef struct ssl3_state_st
- #define SSL3_ST_SR_CERT_VRFY_B (0x1A1|SSL_ST_ACCEPT)
- #define SSL3_ST_SR_CHANGE_A (0x1B0|SSL_ST_ACCEPT)
- #define SSL3_ST_SR_CHANGE_B (0x1B1|SSL_ST_ACCEPT)
-+#define SSL3_ST_SR_POST_CLIENT_CERT (0x1BF|SSL_ST_ACCEPT)
- #ifndef OPENSSL_NO_NEXTPROTONEG
- #define SSL3_ST_SR_NEXT_PROTO_A (0x210|SSL_ST_ACCEPT)
- #define SSL3_ST_SR_NEXT_PROTO_B (0x211|SSL_ST_ACCEPT)
- #endif
-+#define SSL3_ST_SR_CHANNEL_ID_A (0x220|SSL_ST_ACCEPT)
-+#define SSL3_ST_SR_CHANNEL_ID_B (0x221|SSL_ST_ACCEPT)
- #define SSL3_ST_SR_FINISHED_A (0x1C0|SSL_ST_ACCEPT)
- #define SSL3_ST_SR_FINISHED_B (0x1C1|SSL_ST_ACCEPT)
- /* write to client */
-@@ -663,6 +679,7 @@ typedef struct ssl3_state_st
- #ifndef OPENSSL_NO_NEXTPROTONEG
- #define SSL3_MT_NEXT_PROTO 67
- #endif
-+#define SSL3_MT_ENCRYPTED_EXTENSIONS 203
- #define DTLS1_MT_HELLO_VERIFY_REQUEST 3
-
-
---- openssl-1.0.1e.orig/ssl/ssl_err.c 2013-03-05 18:49:33.243297392 +0000
-+++ openssl-1.0.1e/ssl/ssl_err.c 2013-03-05 18:49:33.413299231 +0000
-@@ -151,6 +151,7 @@ static ERR_STRING_DATA SSL_str_functs[]=
- {ERR_FUNC(SSL_F_SSL3_GET_CERTIFICATE_REQUEST), "SSL3_GET_CERTIFICATE_REQUEST"},
- {ERR_FUNC(SSL_F_SSL3_GET_CERT_STATUS), "SSL3_GET_CERT_STATUS"},
- {ERR_FUNC(SSL_F_SSL3_GET_CERT_VERIFY), "SSL3_GET_CERT_VERIFY"},
-+{ERR_FUNC(SSL_F_SSL3_GET_CHANNEL_ID), "SSL3_GET_CHANNEL_ID"},
- {ERR_FUNC(SSL_F_SSL3_GET_CLIENT_CERTIFICATE), "SSL3_GET_CLIENT_CERTIFICATE"},
- {ERR_FUNC(SSL_F_SSL3_GET_CLIENT_HELLO), "SSL3_GET_CLIENT_HELLO"},
- {ERR_FUNC(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE), "SSL3_GET_CLIENT_KEY_EXCHANGE"},
-@@ -170,6 +171,7 @@ static ERR_STRING_DATA SSL_str_functs[]=
- {ERR_FUNC(SSL_F_SSL3_READ_BYTES), "SSL3_READ_BYTES"},
- {ERR_FUNC(SSL_F_SSL3_READ_N), "SSL3_READ_N"},
- {ERR_FUNC(SSL_F_SSL3_SEND_CERTIFICATE_REQUEST), "SSL3_SEND_CERTIFICATE_REQUEST"},
-+{ERR_FUNC(SSL_F_SSL3_SEND_CHANNEL_ID), "SSL3_SEND_CHANNEL_ID"},
- {ERR_FUNC(SSL_F_SSL3_SEND_CLIENT_CERTIFICATE), "SSL3_SEND_CLIENT_CERTIFICATE"},
- {ERR_FUNC(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE), "SSL3_SEND_CLIENT_KEY_EXCHANGE"},
- {ERR_FUNC(SSL_F_SSL3_SEND_CLIENT_VERIFY), "SSL3_SEND_CLIENT_VERIFY"},
-@@ -339,12 +341,15 @@ static ERR_STRING_DATA SSL_str_reasons[]
- {ERR_REASON(SSL_R_BIO_NOT_SET) ,"bio not set"},
- {ERR_REASON(SSL_R_BLOCK_CIPHER_PAD_IS_WRONG),"block cipher pad is wrong"},
- {ERR_REASON(SSL_R_BN_LIB) ,"bn lib"},
-+{ERR_REASON(SSL_R_CANNOT_SERIALIZE_PUBLIC_KEY),"cannot serialize public key"},
- {ERR_REASON(SSL_R_CA_DN_LENGTH_MISMATCH) ,"ca dn length mismatch"},
- {ERR_REASON(SSL_R_CA_DN_TOO_LONG) ,"ca dn too long"},
- {ERR_REASON(SSL_R_CCS_RECEIVED_EARLY) ,"ccs received early"},
- {ERR_REASON(SSL_R_CERTIFICATE_VERIFY_FAILED),"certificate verify failed"},
- {ERR_REASON(SSL_R_CERT_LENGTH_MISMATCH) ,"cert length mismatch"},
- {ERR_REASON(SSL_R_CHALLENGE_IS_DIFFERENT),"challenge is different"},
-+{ERR_REASON(SSL_R_CHANNEL_ID_NOT_P256) ,"channel id not p256"},
-+{ERR_REASON(SSL_R_CHANNEL_ID_SIGNATURE_INVALID),"Channel ID signature invalid"},
- {ERR_REASON(SSL_R_CIPHER_CODE_WRONG_LENGTH),"cipher code wrong length"},
- {ERR_REASON(SSL_R_CIPHER_OR_HASH_UNAVAILABLE),"cipher or hash unavailable"},
- {ERR_REASON(SSL_R_CIPHER_TABLE_SRC_ERROR),"cipher table src error"},
-@@ -357,6 +362,7 @@ static ERR_STRING_DATA SSL_str_reasons[]
- {ERR_REASON(SSL_R_CONNECTION_ID_IS_DIFFERENT),"connection id is different"},
- {ERR_REASON(SSL_R_CONNECTION_TYPE_NOT_SET),"connection type not set"},
- {ERR_REASON(SSL_R_COOKIE_MISMATCH) ,"cookie mismatch"},
-+{ERR_REASON(SSL_R_D2I_ECDSA_SIG) ,"d2i ecdsa sig"},
- {ERR_REASON(SSL_R_DATA_BETWEEN_CCS_AND_FINISHED),"data between ccs and finished"},
- {ERR_REASON(SSL_R_DATA_LENGTH_TOO_LONG) ,"data length too long"},
- {ERR_REASON(SSL_R_DECRYPTION_FAILED) ,"decryption failed"},
-@@ -374,9 +380,12 @@ static ERR_STRING_DATA SSL_str_reasons[]
- {ERR_REASON(SSL_R_ENCRYPTED_LENGTH_TOO_LONG),"encrypted length too long"},
- {ERR_REASON(SSL_R_ERROR_GENERATING_TMP_RSA_KEY),"error generating tmp rsa key"},
- {ERR_REASON(SSL_R_ERROR_IN_RECEIVED_CIPHER_LIST),"error in received cipher list"},
-+{ERR_REASON(SSL_R_EVP_DIGESTSIGNFINAL_FAILED),"evp digestsignfinal failed"},
-+{ERR_REASON(SSL_R_EVP_DIGESTSIGNINIT_FAILED),"evp digestsigninit failed"},
- {ERR_REASON(SSL_R_EXCESSIVE_MESSAGE_SIZE),"excessive message size"},
- {ERR_REASON(SSL_R_EXTRA_DATA_IN_MESSAGE) ,"extra data in message"},
- {ERR_REASON(SSL_R_GOT_A_FIN_BEFORE_A_CCS),"got a fin before a ccs"},
-+{ERR_REASON(SSL_R_GOT_CHANNEL_ID_BEFORE_A_CCS),"got Channel ID before a ccs"},
- {ERR_REASON(SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS),"got next proto before a ccs"},
- {ERR_REASON(SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION),"got next proto without seeing extension"},
- {ERR_REASON(SSL_R_HTTPS_PROXY_REQUEST) ,"https proxy request"},
-@@ -386,6 +395,7 @@ static ERR_STRING_DATA SSL_str_reasons[]
- {ERR_REASON(SSL_R_INVALID_CHALLENGE_LENGTH),"invalid challenge length"},
- {ERR_REASON(SSL_R_INVALID_COMMAND) ,"invalid command"},
- {ERR_REASON(SSL_R_INVALID_COMPRESSION_ALGORITHM),"invalid compression algorithm"},
-+{ERR_REASON(SSL_R_INVALID_MESSAGE) ,"invalid message"},
- {ERR_REASON(SSL_R_INVALID_PURPOSE) ,"invalid purpose"},
- {ERR_REASON(SSL_R_INVALID_SRP_USERNAME) ,"invalid srp username"},
- {ERR_REASON(SSL_R_INVALID_STATUS_RESPONSE),"invalid status response"},
-@@ -440,6 +450,7 @@ static ERR_STRING_DATA SSL_str_reasons[]
- {ERR_REASON(SSL_R_NO_COMPRESSION_SPECIFIED),"no compression specified"},
- {ERR_REASON(SSL_R_NO_GOST_CERTIFICATE_SENT_BY_PEER),"Peer haven't sent GOST certificate, required for selected ciphersuite"},
- {ERR_REASON(SSL_R_NO_METHOD_SPECIFIED) ,"no method specified"},
-+{ERR_REASON(SSL_R_NO_P256_SUPPORT) ,"no p256 support"},
- {ERR_REASON(SSL_R_NO_PRIVATEKEY) ,"no privatekey"},
- {ERR_REASON(SSL_R_NO_PRIVATE_KEY_ASSIGNED),"no private key assigned"},
- {ERR_REASON(SSL_R_NO_PROTOCOLS_AVAILABLE),"no protocols available"},
---- openssl-1.0.1e.orig/ssl/ssl_lib.c 2013-03-05 18:49:33.243297392 +0000
-+++ openssl-1.0.1e/ssl/ssl_lib.c 2013-03-05 18:49:33.413299231 +0000
-@@ -579,6 +579,8 @@ void SSL_free(SSL *s)
- sk_OCSP_RESPID_pop_free(s->tlsext_ocsp_ids, OCSP_RESPID_free);
- if (s->tlsext_ocsp_resp)
- OPENSSL_free(s->tlsext_ocsp_resp);
-+ if (s->tlsext_channel_id_private)
-+ EVP_PKEY_free(s->tlsext_channel_id_private);
- #endif
-
- if (s->client_CA != NULL)
-@@ -2005,6 +2007,11 @@ void SSL_CTX_free(SSL_CTX *a)
- ssl_buf_freelist_free(a->rbuf_freelist);
- #endif
-
-+#ifndef OPENSSL_NO_TLSEXT
-+ if (a->tlsext_channel_id_private)
-+ EVP_PKEY_free(a->tlsext_channel_id_private);
-+#endif
-+
- OPENSSL_free(a);
- }
-
---- openssl-1.0.1e.orig/ssl/ssl_locl.h 2013-03-05 18:49:33.243297392 +0000
-+++ openssl-1.0.1e/ssl/ssl_locl.h 2013-03-05 18:49:33.413299231 +0000
-@@ -378,6 +378,7 @@
- * (currently this also goes into algorithm2) */
- #define TLS1_STREAM_MAC 0x04
-
-+#define TLSEXT_CHANNEL_ID_SIZE 128
-
-
- /*
-@@ -1004,6 +1005,7 @@ int ssl3_check_cert_and_algorithm(SSL *s
- int ssl3_check_finished(SSL *s);
- # ifndef OPENSSL_NO_NEXTPROTONEG
- int ssl3_send_next_proto(SSL *s);
-+int ssl3_send_channel_id(SSL *s);
- # endif
- #endif
-
-@@ -1026,6 +1028,7 @@ int ssl3_get_cert_verify(SSL *s);
- #ifndef OPENSSL_NO_NEXTPROTONEG
- int ssl3_get_next_proto(SSL *s);
- #endif
-+int ssl3_get_channel_id(SSL *s);
-
- int dtls1_send_hello_request(SSL *s);
- int dtls1_send_server_hello(SSL *s);
-@@ -1123,7 +1126,9 @@ int tls12_get_sigandhash(unsigned char *
- int tls12_get_sigid(const EVP_PKEY *pk);
- const EVP_MD *tls12_get_hash(unsigned char hash_alg);
-
-+int tls1_channel_id_hash(EVP_MD_CTX *ctx, SSL *s);
- #endif
-+
- EVP_MD_CTX* ssl_replace_hash(EVP_MD_CTX **hash,const EVP_MD *md) ;
- void ssl_clear_hash_ctx(EVP_MD_CTX **hash);
- int ssl_add_serverhello_renegotiate_ext(SSL *s, unsigned char *p, int *len,
---- openssl-1.0.1e.orig/ssl/t1_lib.c 2013-03-05 18:49:33.173296633 +0000
-+++ openssl-1.0.1e/ssl/t1_lib.c 2013-03-05 18:49:33.413299231 +0000
-@@ -649,6 +649,16 @@ unsigned char *ssl_add_clienthello_tlsex
- }
- #endif
-
-+ if (s->tlsext_channel_id_enabled)
-+ {
-+ /* The client advertises an emtpy extension to indicate its
-+ * support for Channel ID. */
-+ if (limit - ret - 4 < 0)
-+ return NULL;
-+ s2n(TLSEXT_TYPE_channel_id,ret);
-+ s2n(0,ret);
-+ }
-+
- #ifndef OPENSSL_NO_SRTP
- if(SSL_get_srtp_profiles(s))
- {
-@@ -859,6 +869,16 @@ unsigned char *ssl_add_serverhello_tlsex
- }
- #endif
-
-+ /* If the client advertised support for Channel ID, and we have it
-+ * enabled, then we want to echo it back. */
-+ if (s->s3->tlsext_channel_id_valid)
-+ {
-+ if (limit - ret - 4 < 0)
-+ return NULL;
-+ s2n(TLSEXT_TYPE_channel_id,ret);
-+ s2n(0,ret);
-+ }
-+
- if ((extdatalen = ret-p-2)== 0)
- return p;
-
-@@ -1332,6 +1352,9 @@ int ssl_parse_clienthello_tlsext(SSL *s,
- }
- #endif
-
-+ else if (type == TLSEXT_TYPE_channel_id && s->tlsext_channel_id_enabled)
-+ s->s3->tlsext_channel_id_valid = 1;
-+
- /* session ticket processed earlier */
- #ifndef OPENSSL_NO_SRTP
- else if (type == TLSEXT_TYPE_use_srtp)
-@@ -1562,6 +1585,9 @@ int ssl_parse_serverhello_tlsext(SSL *s,
- s->s3->next_proto_neg_seen = 1;
- }
- #endif
-+ else if (type == TLSEXT_TYPE_channel_id)
-+ s->s3->tlsext_channel_id_valid = 1;
-+
- else if (type == TLSEXT_TYPE_renegotiate)
- {
- if(!ssl_parse_serverhello_renegotiate_ext(s, data, size, al))
-@@ -2621,3 +2647,37 @@ tls1_heartbeat(SSL *s)
- return ret;
- }
- #endif
-+
-+#if !defined(OPENSSL_NO_TLSEXT)
-+/* tls1_channel_id_hash calculates the signed data for a Channel ID on the given
-+ * SSL connection and writes it to |md|.
-+ */
-+int
-+tls1_channel_id_hash(EVP_MD_CTX *md, SSL *s)
-+ {
-+ EVP_MD_CTX ctx;
-+ unsigned char temp_digest[EVP_MAX_MD_SIZE];
-+ unsigned temp_digest_len;
-+ int i;
-+ static const char kClientIDMagic[] = "TLS Channel ID signature";
-+
-+ if (s->s3->handshake_buffer)
-+ if (!ssl3_digest_cached_records(s))
-+ return 0;
-+
-+ EVP_DigestUpdate(md, kClientIDMagic, sizeof(kClientIDMagic));
-+
-+ EVP_MD_CTX_init(&ctx);
-+ for (i = 0; i < SSL_MAX_DIGEST; i++)
-+ {
-+ if (s->s3->handshake_dgst[i] == NULL)
-+ continue;
-+ EVP_MD_CTX_copy_ex(&ctx, s->s3->handshake_dgst[i]);
-+ EVP_DigestFinal_ex(&ctx, temp_digest, &temp_digest_len);
-+ EVP_DigestUpdate(md, temp_digest, temp_digest_len);
-+ }
-+ EVP_MD_CTX_cleanup(&ctx);
-+
-+ return 1;
-+ }
-+#endif
---- openssl-1.0.1e.orig/ssl/tls1.h 2013-03-05 18:49:33.173296633 +0000
-+++ openssl-1.0.1e/ssl/tls1.h 2013-03-05 18:49:33.413299231 +0000
-@@ -248,6 +248,9 @@ extern "C" {
- #define TLSEXT_TYPE_next_proto_neg 13172
- #endif
-
-+/* This is not an IANA defined extension number */
-+#define TLSEXT_TYPE_channel_id 30031
-+
- /* NameType value from RFC 3546 */
- #define TLSEXT_NAMETYPE_host_name 0
- /* status request value from RFC 3546 */
diff --git a/patches/early_ccs.patch b/patches/early_ccs.patch
deleted file mode 100644
index d4c31e6..0000000
--- a/patches/early_ccs.patch
+++ /dev/null
@@ -1,100 +0,0 @@
---- openssl-1.0.1e.orig/ssl/s3_clnt.c
-+++ openssl-1.0.1e/ssl/s3_clnt.c
-@@ -606,7 +606,7 @@ int ssl3_connect(SSL *s)
-
- case SSL3_ST_CR_FINISHED_A:
- case SSL3_ST_CR_FINISHED_B:
--
-+ s->s3->flags |= SSL3_FLAGS_CCS_OK;
- ret=ssl3_get_finished(s,SSL3_ST_CR_FINISHED_A,
- SSL3_ST_CR_FINISHED_B);
- if (ret <= 0) goto end;
-@@ -915,6 +916,7 @@
- SSLerr(SSL_F_SSL3_GET_SERVER_HELLO,SSL_R_ATTEMPT_TO_REUSE_SESSION_IN_DIFFERENT_CONTEXT);
- goto f_err;
- }
-+ s->s3->flags |= SSL3_FLAGS_CCS_OK;
- s->hit=1;
- }
- else /* a miss or crap from the other end */
---- openssl-1.0.1e.orig/ssl/s3_pkt.c
-+++ openssl-1.0.1e/ssl/s3_pkt.c
-@@ -1297,6 +1297,13 @@ start:
- goto f_err;
- }
-
-+ if (!(s->s3->flags & SSL3_FLAGS_CCS_OK))
-+ {
-+ al=SSL_AD_UNEXPECTED_MESSAGE;
-+ SSLerr(SSL_F_SSL3_READ_BYTES,SSL_R_UNEXPECTED_CCS);
-+ goto f_err;
-+ }
-+
- rr->length=0;
-
- if (s->msg_callback)
-@@ -1431,7 +1438,12 @@ int ssl3_do_change_cipher_spec(SSL *s)
-
- if (s->s3->tmp.key_block == NULL)
- {
-- if (s->session == NULL)
-+ if (s->session->master_key_length == 0)
-+ {
-+ SSLerr(SSL_F_SSL3_DO_CHANGE_CIPHER_SPEC,SSL_R_UNEXPECTED_CCS);
-+ return (0);
-+ }
-+ if (s->session == NULL)
- {
- /* might happen if dtls1_read_bytes() calls this */
- SSLerr(SSL_F_SSL3_DO_CHANGE_CIPHER_SPEC,SSL_R_CCS_RECEIVED_EARLY);
---- openssl-1.0.1e.orig/ssl/s3_srvr.c
-+++ openssl-1.0.1e/ssl/s3_srvr.c
-@@ -670,6 +670,7 @@ int ssl3_accept(SSL *s)
- case SSL3_ST_SR_CERT_VRFY_B:
-
- /* we should decide if we expected this one */
-+ s->s3->flags |= SSL3_FLAGS_CCS_OK;
- ret=ssl3_get_cert_verify(s);
- if (ret <= 0) goto end;
-
-@@ -687,6 +688,7 @@ int ssl3_accept(SSL *s)
- channel_id = s->s3->tlsext_channel_id_valid;
- #endif
-
-+ s->s3->flags |= SSL3_FLAGS_CCS_OK;
- if (next_proto_neg)
- s->state=SSL3_ST_SR_NEXT_PROTO_A;
- else if (channel_id)
---- openssl-1.0.1e.orig/ssl/ssl.h
-+++ openssl-1.0.1e/ssl/ssl.h
-@@ -2640,6 +2640,7 @@ void ERR_load_SSL_strings(void);
- #define SSL_R_WRONG_VERSION_NUMBER 267
- #define SSL_R_X509_LIB 268
- #define SSL_R_X509_VERIFICATION_SETUP_PROBLEMS 269
-+#define SSL_R_UNEXPECTED_CCS 388
-
- #ifdef __cplusplus
- }
---- openssl-1.0.1e.orig/ssl/ssl3.h
-+++ openssl-1.0.1e/ssl/ssl3.h
-@@ -388,6 +388,10 @@ typedef struct ssl3_buffer_st
- #define TLS1_FLAGS_TLS_PADDING_BUG 0x0008
- #define TLS1_FLAGS_SKIP_CERT_VERIFY 0x0010
- #define TLS1_FLAGS_KEEP_HANDSHAKE 0x0020
-+/* SSL3_FLAGS_CCS_OK indicates that a ChangeCipherSpec record is acceptable at
-+ * this point in the handshake. If this flag is not set then received CCS
-+ * records will cause a fatal error for the connection. */
-+#define SSL3_FLAGS_CCS_OK 0x0080
-
- /* SSL3_FLAGS_SGC_RESTART_DONE is set when we
- * restart a handshake because of MS SGC and so prevents us
---- openssl-1.0.1e.orig/ssl/ssl_err.c
-+++ openssl-1.0.1e/ssl/ssl_err.c
-@@ -604,6 +604,7 @@ static ERR_STRING_DATA SSL_str_reasons[]
- {ERR_REASON(SSL_R_WRONG_VERSION_NUMBER) ,"wrong version number"},
- {ERR_REASON(SSL_R_X509_LIB) ,"x509 lib"},
- {ERR_REASON(SSL_R_X509_VERIFICATION_SETUP_PROBLEMS),"x509 verification setup problems"},
-+{ERR_REASON(SSL_R_UNEXPECTED_CCS),"unexpected CCS"},
- {0,NULL}
- };
-
diff --git a/patches/paddingext.patch b/patches/paddingext.patch
deleted file mode 100644
index 37514bc..0000000
--- a/patches/paddingext.patch
+++ /dev/null
@@ -1,99 +0,0 @@
-From 9336ed61c26255f31fac2563d0911fc29d2143fd Mon Sep 17 00:00:00 2001
-From: Adam Langley <[email protected]>
-Date: Wed, 11 Dec 2013 16:25:17 -0500
-Subject: Add padding extension.
-
-This change adds a padding extension, when needed, in order to work
-around bugs in F5 terminators.
----
- ssl/s23_clnt.c | 5 ++++-
- ssl/s3_clnt.c | 4 +++-
- ssl/t1_lib.c | 25 +++++++++++++++++++++++++
- ssl/tls1.h | 4 ++++
- 4 files changed, 36 insertions(+), 2 deletions(-)
-
-diff --git a/ssl/s23_clnt.c b/ssl/s23_clnt.c
-index 84670b6..814a4c6 100644
---- a/ssl/s23_clnt.c
-+++ b/ssl/s23_clnt.c
-@@ -487,7 +487,10 @@ static int ssl23_client_hello(SSL *s)
- {
- /* create Client Hello in SSL 3.0/TLS 1.0 format */
-
-- /* do the record header (5 bytes) and handshake message header (4 bytes) last */
-+ /* do the record header (5 bytes) and handshake message
-+ * header (4 bytes) last. Note: the code to add the
-+ * padding extension in t1_lib.c depends on the size of
-+ * this prefix. */
- d = p = &(buf[9]);
-
- *(p++) = version_major;
-diff --git a/ssl/s3_clnt.c b/ssl/s3_clnt.c
-index 67edeaa..f37e907 100644
---- a/ssl/s3_clnt.c
-+++ b/ssl/s3_clnt.c
-@@ -751,7 +751,9 @@ int ssl3_client_hello(SSL *s)
- if (ssl_fill_hello_random(s, 0, p, SSL3_RANDOM_SIZE) <= 0)
- goto err;
-
-- /* Do the message type and length last */
-+ /* Do the message type and length last.
-+ * Note: the code to add the padding extension in t1_lib.c
-+ * depends on the size of this prefix. */
- d=p= &(buf[4]);
-
- /* version indicates the negotiated version: for example from
-diff --git a/ssl/t1_lib.c b/ssl/t1_lib.c
-index 357db6e..a499367 100644
---- a/ssl/t1_lib.c
-+++ b/ssl/t1_lib.c
-@@ -687,6 +687,31 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *p, unsigned cha
- }
- #endif
-
-+ /* Add padding to workaround bugs in F5 terminators.
-+ * See https://tools.ietf.org/html/draft-agl-tls-padding-02 */
-+ {
-+ int hlen = ret - (unsigned char *)s->init_buf->data;
-+ /* The code in s23_clnt.c to build ClientHello messages includes the
-+ * 5-byte record header in the buffer, while the code in s3_clnt.c does
-+ * not. */
-+ if (s->state == SSL23_ST_CW_CLNT_HELLO_A)
-+ hlen -= 5;
-+ if (hlen > 0xff && hlen < 0x200)
-+ {
-+ hlen = 0x200 - hlen;
-+ if (hlen >= 4)
-+ hlen -= 4;
-+ else
-+ hlen = 0;
-+
-+ s2n(TLSEXT_TYPE_padding, ret);
-+ s2n(hlen, ret);
-+ memset(ret, 0, hlen);
-+ ret += hlen;
-+ }
-+ }
-+
-+
- if ((extdatalen = ret-p-2)== 0)
- return p;
-
-diff --git a/ssl/tls1.h b/ssl/tls1.h
-index ecf5da7..df8f482 100644
---- a/ssl/tls1.h
-+++ b/ssl/tls1.h
-@@ -255,6 +255,10 @@ extern "C" {
- #define TLSEXT_TYPE_channel_id 30031
- #define TLSEXT_TYPE_channel_id_new 30032
-
-+/* See https://tools.ietf.org/html/draft-agl-tls-padding-02
-+ * Number not yet IANA assigned. */
-+#define TLSEXT_TYPE_padding 35655
-+
- /* NameType value from RFC 3546 */
- #define TLSEXT_NAMETYPE_host_name 0
- /* status request value from RFC 3546 */
---
-1.8.5.1
-
diff --git a/rules.mk b/rules.mk
new file mode 100644
index 0000000..252dbbb
--- /dev/null
+++ b/rules.mk
@@ -0,0 +1,40 @@
+LOCAL_DIR := $(GET_LOCAL_DIR)
+
+MODULE := $(LOCAL_DIR)
+
+TARGET_ARCH := $(ARCH)
+TARGET_2ND_ARCH := $(ARCH)
+
+# Reset local variables
+LOCAL_CFLAGS :=
+LOCAL_C_INCLUDES :=
+LOCAL_SRC_FILES_$(TARGET_ARCH) :=
+LOCAL_SRC_FILES_$(TARGET_2ND_ARCH) :=
+LOCAL_CFLAGS_$(TARGET_ARCH) :=
+LOCAL_CFLAGS_$(TARGET_2ND_ARCH) :=
+LOCAL_ADDITIONAL_DEPENDENCIES :=
+
+# get openssl_cflags
+MODULE_SRCDEPS += $(LOCAL_DIR)/build-config-trusty.mk
+include $(LOCAL_DIR)/build-config-trusty.mk
+
+# get target_c_flags, target_c_includes, target_src_files
+MODULE_SRCDEPS += $(LOCAL_DIR)/Crypto-config-trusty.mk
+include $(LOCAL_DIR)/Crypto-config-trusty.mk
+
+MODULE_SRCS += $(addprefix $(LOCAL_DIR)/,$(LOCAL_SRC_FILES_$(ARCH)))
+
+MODULE_CFLAGS += $(LOCAL_CFLAGS)
+MODULE_CFLAGS += -Wno-error=implicit-function-declaration
+
+# Global for other modules which include openssl headers
+GLOBAL_DEFINES += OPENSSL_SYS_TRUSTY
+
+LOCAL_C_INCLUDES := $(patsubst external/openssl/%,%,$(LOCAL_C_INCLUDES))
+GLOBAL_INCLUDES += $(addprefix $(LOCAL_DIR)/,$(LOCAL_C_INCLUDES))
+
+MODULE_DEPS := \
+ lib/openssl-stubs \
+ lib/libc-trusty
+
+include make/module.mk
diff --git a/ssl/bio_ssl.c b/ssl/bio_ssl.c
index e9552ca..06a13de 100644
--- a/ssl/bio_ssl.c
+++ b/ssl/bio_ssl.c
@@ -206,6 +206,10 @@
BIO_set_retry_special(b);
retry_reason=BIO_RR_SSL_X509_LOOKUP;
break;
+ case SSL_ERROR_WANT_CHANNEL_ID_LOOKUP:
+ BIO_set_retry_special(b);
+ retry_reason=BIO_RR_SSL_CHANNEL_ID_LOOKUP;
+ break;
case SSL_ERROR_WANT_ACCEPT:
BIO_set_retry_special(b);
retry_reason=BIO_RR_ACCEPT;
@@ -280,6 +284,10 @@
BIO_set_retry_special(b);
retry_reason=BIO_RR_SSL_X509_LOOKUP;
break;
+ case SSL_ERROR_WANT_CHANNEL_ID_LOOKUP:
+ BIO_set_retry_special(b);
+ retry_reason=BIO_RR_SSL_CHANNEL_ID_LOOKUP;
+ break;
case SSL_ERROR_WANT_CONNECT:
BIO_set_retry_special(b);
retry_reason=BIO_RR_CONNECT;
diff --git a/ssl/d1_both.c b/ssl/d1_both.c
index 7a5596a..04aa231 100644
--- a/ssl/d1_both.c
+++ b/ssl/d1_both.c
@@ -627,7 +627,16 @@
frag->msg_header.frag_off = 0;
}
else
+ {
frag = (hm_fragment*) item->data;
+ if (frag->msg_header.msg_len != msg_hdr->msg_len)
+ {
+ item = NULL;
+ frag = NULL;
+ goto err;
+ }
+ }
+
/* If message is already reassembled, this must be a
* retransmit and can be dropped.
@@ -674,8 +683,8 @@
item = pitem_new(seq64be, frag);
if (item == NULL)
{
- goto err;
i = -1;
+ goto err;
}
pqueue_insert(s->d1->buffered_messages, item);
@@ -784,6 +793,7 @@
int i,al;
struct hm_header_st msg_hdr;
+ redo:
/* see if we have the required fragment already */
if ((frag_len = dtls1_retrieve_buffered_fragment(s,max,ok)) || *ok)
{
@@ -842,8 +852,7 @@
s->msg_callback_arg);
s->init_num = 0;
- return dtls1_get_message_fragment(s, st1, stn,
- max, ok);
+ goto redo;
}
else /* Incorrectly formated Hello request */
{
@@ -1459,26 +1468,36 @@
unsigned int payload;
unsigned int padding = 16; /* Use minimum padding */
- /* Read type and payload length first */
- hbtype = *p++;
- n2s(p, payload);
- pl = p;
-
if (s->msg_callback)
s->msg_callback(0, s->version, TLS1_RT_HEARTBEAT,
&s->s3->rrec.data[0], s->s3->rrec.length,
s, s->msg_callback_arg);
+ /* Read type and payload length first */
+ if (1 + 2 + 16 > s->s3->rrec.length)
+ return 0; /* silently discard */
+ hbtype = *p++;
+ n2s(p, payload);
+ if (1 + 2 + payload + 16 > s->s3->rrec.length)
+ return 0; /* silently discard per RFC 6520 sec. 4 */
+ pl = p;
+
if (hbtype == TLS1_HB_REQUEST)
{
unsigned char *buffer, *bp;
+ unsigned int write_length = 1 /* heartbeat type */ +
+ 2 /* heartbeat length */ +
+ payload + padding;
int r;
+ if (write_length > SSL3_RT_MAX_PLAIN_LENGTH)
+ return 0;
+
/* Allocate memory for the response, size is 1 byte
* message type, plus 2 bytes payload length, plus
* payload, plus padding
*/
- buffer = OPENSSL_malloc(1 + 2 + payload + padding);
+ buffer = OPENSSL_malloc(write_length);
bp = buffer;
/* Enter response type, length and copy payload */
@@ -1489,11 +1508,11 @@
/* Random padding */
RAND_pseudo_bytes(bp, padding);
- r = dtls1_write_bytes(s, TLS1_RT_HEARTBEAT, buffer, 3 + payload + padding);
+ r = dtls1_write_bytes(s, TLS1_RT_HEARTBEAT, buffer, write_length);
if (r >= 0 && s->msg_callback)
s->msg_callback(1, s->version, TLS1_RT_HEARTBEAT,
- buffer, 3 + payload + padding,
+ buffer, write_length,
s, s->msg_callback_arg);
OPENSSL_free(buffer);
diff --git a/ssl/d1_clnt.c b/ssl/d1_clnt.c
index 4fc4e1b..5ee8f58 100644
--- a/ssl/d1_clnt.c
+++ b/ssl/d1_clnt.c
@@ -1440,7 +1440,7 @@
goto err;
}
- psk_len = s->psk_client_callback(s, s->ctx->psk_identity_hint,
+ psk_len = s->psk_client_callback(s, s->session->psk_identity_hint,
identity, PSK_MAX_IDENTITY_LEN,
psk_or_pre_ms, sizeof(psk_or_pre_ms));
if (psk_len > PSK_MAX_PSK_LEN)
@@ -1465,17 +1465,6 @@
t+=psk_len;
s2n(psk_len, t);
- if (s->session->psk_identity_hint != NULL)
- OPENSSL_free(s->session->psk_identity_hint);
- s->session->psk_identity_hint = BUF_strdup(s->ctx->psk_identity_hint);
- if (s->ctx->psk_identity_hint != NULL &&
- s->session->psk_identity_hint == NULL)
- {
- SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
- ERR_R_MALLOC_FAILURE);
- goto psk_err;
- }
-
if (s->session->psk_identity != NULL)
OPENSSL_free(s->session->psk_identity);
s->session->psk_identity = BUF_strdup(identity);
diff --git a/ssl/d1_lib.c b/ssl/d1_lib.c
index e27828f..82ca653 100644
--- a/ssl/d1_lib.c
+++ b/ssl/d1_lib.c
@@ -176,9 +176,12 @@
while ( (item = pqueue_pop(s->d1->buffered_app_data.q)) != NULL)
{
- frag = (hm_fragment *)item->data;
- OPENSSL_free(frag->fragment);
- OPENSSL_free(frag);
+ rdata = (DTLS1_RECORD_DATA *) item->data;
+ if (rdata->rbuf.buf)
+ {
+ OPENSSL_free(rdata->rbuf.buf);
+ }
+ OPENSSL_free(item->data);
pitem_free(item);
}
}
diff --git a/ssl/d1_pkt.c b/ssl/d1_pkt.c
index 5b84e97..363fc8c 100644
--- a/ssl/d1_pkt.c
+++ b/ssl/d1_pkt.c
@@ -241,14 +241,6 @@
}
#endif
- /* insert should not fail, since duplicates are dropped */
- if (pqueue_insert(queue->q, item) == NULL)
- {
- OPENSSL_free(rdata);
- pitem_free(item);
- return(0);
- }
-
s->packet = NULL;
s->packet_length = 0;
memset(&(s->s3->rbuf), 0, sizeof(SSL3_BUFFER));
@@ -261,7 +253,16 @@
pitem_free(item);
return(0);
}
-
+
+ /* insert should not fail, since duplicates are dropped */
+ if (pqueue_insert(queue->q, item) == NULL)
+ {
+ SSLerr(SSL_F_DTLS1_BUFFER_RECORD, ERR_R_INTERNAL_ERROR);
+ OPENSSL_free(rdata);
+ pitem_free(item);
+ return(0);
+ }
+
return(1);
}
diff --git a/ssl/d1_srvr.c b/ssl/d1_srvr.c
index 9975e20..c181db6 100644
--- a/ssl/d1_srvr.c
+++ b/ssl/d1_srvr.c
@@ -471,7 +471,7 @@
/* PSK: send ServerKeyExchange if PSK identity
* hint if provided */
#ifndef OPENSSL_NO_PSK
- || ((alg_k & SSL_kPSK) && s->ctx->psk_identity_hint)
+ || ((alg_k & SSL_kPSK) && s->session->psk_identity_hint)
#endif
|| (alg_k & (SSL_kEDH|SSL_kDHr|SSL_kDHd))
|| (alg_k & SSL_kEECDH)
@@ -1288,7 +1288,7 @@
if (type & SSL_kPSK)
{
/* reserve size for record length and PSK identity hint*/
- n+=2+strlen(s->ctx->psk_identity_hint);
+ n+=2+strlen(s->session->psk_identity_hint);
}
else
#endif /* !OPENSSL_NO_PSK */
@@ -1356,6 +1356,7 @@
(unsigned char *)encodedPoint,
encodedlen);
OPENSSL_free(encodedPoint);
+ encodedPoint = NULL;
p += encodedlen;
}
#endif
@@ -1364,9 +1365,9 @@
if (type & SSL_kPSK)
{
/* copy PSK identity hint */
- s2n(strlen(s->ctx->psk_identity_hint), p);
- strncpy((char *)p, s->ctx->psk_identity_hint, strlen(s->ctx->psk_identity_hint));
- p+=strlen(s->ctx->psk_identity_hint);
+ s2n(strlen(s->session->psk_identity_hint), p);
+ strncpy((char *)p, s->session->psk_identity_hint, strlen(s->session->psk_identity_hint));
+ p+=strlen(s->session->psk_identity_hint);
}
#endif
diff --git a/ssl/kssl.h b/ssl/kssl.h
index 8242fd5..e4df843 100644
--- a/ssl/kssl.h
+++ b/ssl/kssl.h
@@ -70,6 +70,15 @@
#include <stdio.h>
#include <ctype.h>
#include <krb5.h>
+#ifdef OPENSSL_SYS_WIN32
+/* These can sometimes get redefined indirectly by krb5 header files
+ * after they get undefed in ossl_typ.h
+ */
+#undef X509_NAME
+#undef X509_EXTENSIONS
+#undef OCSP_REQUEST
+#undef OCSP_RESPONSE
+#endif
#ifdef __cplusplus
extern "C" {
diff --git a/ssl/s23_clnt.c b/ssl/s23_clnt.c
index b43088e..467adfe 100644
--- a/ssl/s23_clnt.c
+++ b/ssl/s23_clnt.c
@@ -283,7 +283,7 @@
send_time = (s->mode & SSL_MODE_SEND_CLIENTHELLO_TIME) != 0;
if (send_time)
{
- unsigned long Time = time(NULL);
+ unsigned long Time = (unsigned long)time(NULL);
unsigned char *p = result;
l2n(Time, p);
return RAND_pseudo_bytes(p, len-4);
@@ -487,10 +487,7 @@
{
/* create Client Hello in SSL 3.0/TLS 1.0 format */
- /* do the record header (5 bytes) and handshake message
- * header (4 bytes) last. Note: the code to add the
- * padding extension in t1_lib.c depends on the size of
- * this prefix. */
+ /* do the record header (5 bytes) and handshake message header (4 bytes) last */
d = p = &(buf[9]);
*(p++) = version_major;
diff --git a/ssl/s3_both.c b/ssl/s3_both.c
index d9e18a3..607990d 100644
--- a/ssl/s3_both.c
+++ b/ssl/s3_both.c
@@ -561,7 +561,7 @@
#endif
/* Feed this message into MAC computation. */
- if (*(unsigned char*)s->init_buf->data != SSL3_MT_ENCRYPTED_EXTENSIONS)
+ if (*((unsigned char*) s->init_buf->data) != SSL3_MT_ENCRYPTED_EXTENSIONS)
ssl3_finish_mac(s, (unsigned char *)s->init_buf->data, s->init_num + 4);
if (s->msg_callback)
s->msg_callback(0, s->version, SSL3_RT_HANDSHAKE, s->init_buf->data, (size_t)s->init_num + 4, s, s->msg_callback_arg);
diff --git a/ssl/s3_clnt.c b/ssl/s3_clnt.c
index 9979b7d..486f538 100644
--- a/ssl/s3_clnt.c
+++ b/ssl/s3_clnt.c
@@ -215,24 +215,12 @@
}
#endif
-// BEGIN android-added
-#if 0
-/* Send app data in separate packet, otherwise, some particular site
- * (only one site so far) closes the socket. http://b/2511073
- * Note: there is a very small chance that two TCP packets
- * could be arriving at server combined into a single TCP packet,
- * then trigger that site to break. We haven't encounter that though.
- */
-// END android-added
if (SSL_get_mode(s) & SSL_MODE_HANDSHAKE_CUTTHROUGH)
{
/* Send app data along with CCS/Finished */
s->s3->flags |= SSL3_FLAGS_DELAY_CLIENT_FINISHED;
}
-// BEGIN android-added
-#endif
-// END android-added
for (;;)
{
state=s->state;
@@ -345,9 +333,10 @@
}
#endif
/* Check if it is anon DH/ECDH */
- /* or PSK */
+ /* or non-RSA PSK */
if (!(s->s3->tmp.new_cipher->algorithm_auth & SSL_aNULL) &&
- !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK))
+ !((s->s3->tmp.new_cipher->algorithm_auth & SSL_aPSK) &&
+ !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kRSA)))
{
ret=ssl3_get_server_certificate(s);
if (ret <= 0) goto end;
@@ -557,7 +546,20 @@
}
else
{
- if ((SSL_get_mode(s) & SSL_MODE_HANDSHAKE_CUTTHROUGH) && SSL_get_cipher_bits(s, NULL) >= 128
+ /* This is a non-resumption handshake. If it
+ * involves ChannelID, then record the
+ * handshake hashes at this point in the
+ * session so that any resumption of this
+ * session with ChannelID can sign those
+ * hashes. */
+ if (s->s3->tlsext_channel_id_new)
+ {
+ ret = tls1_record_handshake_hashes_for_channel_id(s);
+ if (ret <= 0)
+ goto end;
+ }
+ if ((SSL_get_mode(s) & SSL_MODE_HANDSHAKE_CUTTHROUGH)
+ && ssl3_can_cutthrough(s)
&& s->s3->previous_server_finished_len == 0 /* no cutthrough on renegotiation (would complicate the state machine) */
)
{
@@ -606,6 +608,7 @@
case SSL3_ST_CR_FINISHED_A:
case SSL3_ST_CR_FINISHED_B:
+
s->s3->flags |= SSL3_FLAGS_CCS_OK;
ret=ssl3_get_finished(s,SSL3_ST_CR_FINISHED_A,
SSL3_ST_CR_FINISHED_B);
@@ -756,9 +759,7 @@
if (ssl_fill_hello_random(s, 0, p, SSL3_RANDOM_SIZE) <= 0)
goto err;
- /* Do the message type and length last.
- * Note: the code to add the padding extension in t1_lib.c
- * depends on the size of this prefix. */
+ /* Do the message type and length last */
d=p= &(buf[4]);
/* version indicates the negotiated version: for example from
@@ -1380,12 +1381,14 @@
omitted if no identity hint is sent. Set
session->sess_cert anyway to avoid problems
later.*/
- if (s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK)
+ if (s->s3->tmp.new_cipher->algorithm_auth & SSL_aPSK)
{
s->session->sess_cert=ssl_sess_cert_new();
- if (s->ctx->psk_identity_hint)
- OPENSSL_free(s->ctx->psk_identity_hint);
- s->ctx->psk_identity_hint = NULL;
+ if (s->session->psk_identity_hint)
+ {
+ OPENSSL_free(s->session->psk_identity_hint);
+ s->session->psk_identity_hint = NULL;
+ }
}
#endif
s->s3->tmp.reuse_message=1;
@@ -1428,52 +1431,58 @@
EVP_MD_CTX_init(&md_ctx);
#ifndef OPENSSL_NO_PSK
- if (alg_k & SSL_kPSK)
+ if (alg_a & SSL_aPSK)
{
char tmp_id_hint[PSK_MAX_IDENTITY_LEN+1];
al=SSL_AD_HANDSHAKE_FAILURE;
n2s(p,i);
param_len=i+2;
- /* Store PSK identity hint for later use, hint is used
- * in ssl3_send_client_key_exchange. Assume that the
- * maximum length of a PSK identity hint can be as
- * long as the maximum length of a PSK identity. */
- if (i > PSK_MAX_IDENTITY_LEN)
+ if (s->session->psk_identity_hint)
{
- SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,
- SSL_R_DATA_LENGTH_TOO_LONG);
- goto f_err;
+ OPENSSL_free(s->session->psk_identity_hint);
+ s->session->psk_identity_hint = NULL;
}
- if (param_len > n)
+ if (i != 0)
{
- al=SSL_AD_DECODE_ERROR;
- SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,
- SSL_R_BAD_PSK_IDENTITY_HINT_LENGTH);
- goto f_err;
+ /* Store PSK identity hint for later use, hint is used
+ * in ssl3_send_client_key_exchange. Assume that the
+ * maximum length of a PSK identity hint can be as
+ * long as the maximum length of a PSK identity. */
+ if (i > PSK_MAX_IDENTITY_LEN)
+ {
+ SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,
+ SSL_R_DATA_LENGTH_TOO_LONG);
+ goto f_err;
+ }
+ if (param_len > n)
+ {
+ al=SSL_AD_DECODE_ERROR;
+ SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,
+ SSL_R_BAD_PSK_IDENTITY_HINT_LENGTH);
+ goto f_err;
+ }
+ /* If received PSK identity hint contains NULL
+ * characters, the hint is truncated from the first
+ * NULL. p may not be ending with NULL, so create a
+ * NULL-terminated string. */
+ memcpy(tmp_id_hint, p, i);
+ memset(tmp_id_hint+i, 0, PSK_MAX_IDENTITY_LEN+1-i);
+ s->session->psk_identity_hint = BUF_strdup(tmp_id_hint);
+ if (s->session->psk_identity_hint == NULL)
+ {
+ SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, ERR_R_MALLOC_FAILURE);
+ goto f_err;
+ }
}
- /* If received PSK identity hint contains NULL
- * characters, the hint is truncated from the first
- * NULL. p may not be ending with NULL, so create a
- * NULL-terminated string. */
- memcpy(tmp_id_hint, p, i);
- memset(tmp_id_hint+i, 0, PSK_MAX_IDENTITY_LEN+1-i);
- if (s->ctx->psk_identity_hint != NULL)
- OPENSSL_free(s->ctx->psk_identity_hint);
- s->ctx->psk_identity_hint = BUF_strdup(tmp_id_hint);
- if (s->ctx->psk_identity_hint == NULL)
- {
- SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, ERR_R_MALLOC_FAILURE);
- goto f_err;
- }
-
p+=i;
n-=param_len;
}
- else
#endif /* !OPENSSL_NO_PSK */
+
+ if (0) {}
#ifndef OPENSSL_NO_SRP
- if (alg_k & SSL_kSRP)
+ else if (alg_k & SSL_kSRP)
{
n2s(p,i);
param_len=i+2;
@@ -1550,10 +1559,9 @@
pkey=X509_get_pubkey(s->session->sess_cert->peer_pkeys[SSL_PKEY_DSA_SIGN].x509);
#endif
}
- else
#endif /* !OPENSSL_NO_SRP */
#ifndef OPENSSL_NO_RSA
- if (alg_k & SSL_kRSA)
+ else if (alg_k & SSL_kRSA)
{
if ((rsa=RSA_new()) == NULL)
{
@@ -1602,9 +1610,6 @@
s->session->sess_cert->peer_rsa_tmp=rsa;
rsa=NULL;
}
-#else /* OPENSSL_NO_RSA */
- if (0)
- ;
#endif
#ifndef OPENSSL_NO_DH
else if (alg_k & SSL_kEDH)
@@ -1785,14 +1790,14 @@
EC_POINT_free(srvr_ecpoint);
srvr_ecpoint = NULL;
}
- else if (alg_k)
+#endif /* !OPENSSL_NO_ECDH */
+
+ else if (!(alg_k & SSL_kPSK))
{
al=SSL_AD_UNEXPECTED_MESSAGE;
SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,SSL_R_UNEXPECTED_MESSAGE);
goto f_err;
}
-#endif /* !OPENSSL_NO_ECDH */
-
/* p points to the next byte, there are 'n' bytes left */
@@ -1897,8 +1902,9 @@
}
else
{
- if (!(alg_a & SSL_aNULL) && !(alg_k & SSL_kPSK))
- /* aNULL or kPSK do not need public keys */
+ if (!(alg_a & SSL_aNULL) &&
+ /* Among PSK ciphers only RSA_PSK needs a public key */
+ !((alg_a & SSL_aPSK) && !(alg_k & SSL_kRSA)))
{
SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,ERR_R_INTERNAL_ERROR);
goto err;
@@ -2298,8 +2304,9 @@
int ssl3_send_client_key_exchange(SSL *s)
{
unsigned char *p,*d;
- int n;
+ int n = 0;
unsigned long alg_k;
+ unsigned long alg_a;
#ifndef OPENSSL_NO_RSA
unsigned char *q;
EVP_PKEY *pkey=NULL;
@@ -2314,7 +2321,11 @@
unsigned char *encodedPoint = NULL;
int encoded_pt_len = 0;
BN_CTX * bn_ctx = NULL;
-#endif
+#ifndef OPENSSL_NO_PSK
+ unsigned int psk_len = 0;
+ unsigned char psk[PSK_MAX_PSK_LEN];
+#endif /* OPENSSL_NO_PSK */
+#endif /* OPENSSL_NO_ECDH */
if (s->state == SSL3_ST_CW_KEY_EXCH_A)
{
@@ -2322,7 +2333,89 @@
p= &(d[4]);
alg_k=s->s3->tmp.new_cipher->algorithm_mkey;
+ alg_a=s->s3->tmp.new_cipher->algorithm_auth;
+#ifndef OPENSSL_NO_PSK
+ if (alg_a & SSL_aPSK)
+ {
+ char identity[PSK_MAX_IDENTITY_LEN + 1];
+ size_t identity_len;
+ unsigned char *t = NULL;
+ unsigned char pre_ms[PSK_MAX_PSK_LEN*2+4];
+ unsigned int pre_ms_len = 0;
+ int psk_err = 1;
+
+ n = 0;
+ if (s->psk_client_callback == NULL)
+ {
+ SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
+ SSL_R_PSK_NO_CLIENT_CB);
+ goto err;
+ }
+
+ memset(identity, 0, sizeof(identity));
+ psk_len = s->psk_client_callback(s, s->session->psk_identity_hint,
+ identity, sizeof(identity), psk, sizeof(psk));
+ if (psk_len > PSK_MAX_PSK_LEN)
+ {
+ SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
+ ERR_R_INTERNAL_ERROR);
+ goto psk_err;
+ }
+ else if (psk_len == 0)
+ {
+ SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
+ SSL_R_PSK_IDENTITY_NOT_FOUND);
+ goto psk_err;
+ }
+ identity_len = strnlen(identity, sizeof(identity));
+ if (identity_len > PSK_MAX_IDENTITY_LEN)
+ {
+ SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
+ ERR_R_INTERNAL_ERROR);
+ goto psk_err;
+ }
+
+ if (!(alg_k & SSL_kEECDH))
+ {
+ /* Create the shared secret now if we're not using ECDHE-PSK.*/
+ pre_ms_len = 2+psk_len+2+psk_len;
+ t = pre_ms;
+ s2n(psk_len, t);
+ memset(t, 0, psk_len);
+ t+=psk_len;
+ s2n(psk_len, t);
+ memcpy(t, psk, psk_len);
+
+ s->session->master_key_length =
+ s->method->ssl3_enc->generate_master_secret(s,
+ s->session->master_key,
+ pre_ms, pre_ms_len);
+ s2n(identity_len, p);
+ memcpy(p, identity, identity_len);
+ n = 2 + identity_len;
+ }
+
+ if (s->session->psk_identity != NULL)
+ OPENSSL_free(s->session->psk_identity);
+ s->session->psk_identity = BUF_strdup(identity);
+ if (s->session->psk_identity == NULL)
+ {
+ SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
+ ERR_R_MALLOC_FAILURE);
+ goto psk_err;
+ }
+ psk_err = 0;
+ psk_err:
+ OPENSSL_cleanse(identity, PSK_MAX_IDENTITY_LEN);
+ OPENSSL_cleanse(pre_ms, sizeof(pre_ms));
+ if (psk_err != 0)
+ {
+ ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE);
+ goto err;
+ }
+ }
+#endif
/* Fool emacs indentation */
if (0) {}
#ifndef OPENSSL_NO_RSA
@@ -2583,14 +2676,26 @@
/* perhaps clean things up a bit EAY EAY EAY EAY*/
}
#endif
-
-#ifndef OPENSSL_NO_ECDH
+#ifndef OPENSSL_NO_ECDH
else if (alg_k & (SSL_kEECDH|SSL_kECDHr|SSL_kECDHe))
{
const EC_GROUP *srvr_group = NULL;
EC_KEY *tkey;
int ecdh_clnt_cert = 0;
int field_size = 0;
+#ifndef OPENSSL_NO_PSK
+ unsigned char *pre_ms;
+ unsigned char *t;
+ unsigned int pre_ms_len;
+ unsigned int i;
+#endif
+
+ if (s->session->sess_cert == NULL)
+ {
+ ssl3_send_alert(s,SSL3_AL_FATAL,SSL_AD_UNEXPECTED_MESSAGE);
+ SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,SSL_R_UNEXPECTED_MESSAGE);
+ goto err;
+ }
/* Did we send out the client's
* ECDH share for use in premaster
@@ -2711,15 +2816,41 @@
goto err;
}
- /* generate master key from the result */
- s->session->master_key_length = s->method->ssl3_enc \
- -> generate_master_secret(s,
- s->session->master_key,
- p, n);
-
+#ifndef OPENSSL_NO_PSK
+ /* ECDHE PSK ciphersuites from RFC 5489 */
+ if ((alg_a & SSL_aPSK) && psk_len != 0)
+ {
+ pre_ms_len = 2+psk_len+2+n;
+ pre_ms = OPENSSL_malloc(pre_ms_len);
+ if (pre_ms == NULL)
+ {
+ SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
+ ERR_R_MALLOC_FAILURE);
+ goto err;
+ }
+ memset(pre_ms, 0, pre_ms_len);
+ t = pre_ms;
+ s2n(psk_len, t);
+ memcpy(t, psk, psk_len);
+ t += psk_len;
+ s2n(n, t);
+ memcpy(t, p, n);
+ s->session->master_key_length = s->method->ssl3_enc \
+ -> generate_master_secret(s,
+ s->session->master_key, pre_ms, pre_ms_len);
+ OPENSSL_cleanse(pre_ms, pre_ms_len);
+ OPENSSL_free(pre_ms);
+ }
+#endif /* OPENSSL_NO_PSK */
+ if (!(alg_a & SSL_aPSK))
+ {
+ /* generate master key from the result */
+ s->session->master_key_length = s->method->ssl3_enc \
+ -> generate_master_secret(s,
+ s->session->master_key, p, n);
+ }
memset(p, 0, n); /* clean up */
-
- if (ecdh_clnt_cert)
+ if (ecdh_clnt_cert)
{
/* Send empty client key exch message */
n = 0;
@@ -2747,29 +2878,42 @@
}
/* Encode the public key */
- n = EC_POINT_point2oct(srvr_group,
- EC_KEY_get0_public_key(clnt_ecdh),
- POINT_CONVERSION_UNCOMPRESSED,
+ encoded_pt_len = EC_POINT_point2oct(srvr_group,
+ EC_KEY_get0_public_key(clnt_ecdh),
+ POINT_CONVERSION_UNCOMPRESSED,
encodedPoint, encoded_pt_len, bn_ctx);
+
+ n = 0;
+#ifndef OPENSSL_NO_PSK
+ if ((alg_a & SSL_aPSK) && psk_len != 0)
+ {
+ i = strlen(s->session->psk_identity);
+ s2n(i, p);
+ memcpy(p, s->session->psk_identity, i);
+ p += i;
+ n = i + 2;
+ }
+#endif
- *p = n; /* length of encoded point */
+ *p = encoded_pt_len; /* length of encoded point */
/* Encoded point will be copied here */
- p += 1;
+ p += 1;
+ n += 1;
/* copy the point */
- memcpy((unsigned char *)p, encodedPoint, n);
+ memcpy((unsigned char *)p, encodedPoint, encoded_pt_len);
/* increment n to account for length field */
- n += 1;
+ n += encoded_pt_len;
}
/* Free allocated memory */
BN_CTX_free(bn_ctx);
if (encodedPoint != NULL) OPENSSL_free(encodedPoint);
- if (clnt_ecdh != NULL)
+ if (clnt_ecdh != NULL)
EC_KEY_free(clnt_ecdh);
EVP_PKEY_free(srvr_pub_pkey);
}
#endif /* !OPENSSL_NO_ECDH */
- else if (alg_k & SSL_kGOST)
+ else if (alg_k & SSL_kGOST)
{
/* GOST key exchange message creation */
EVP_PKEY_CTX *pkey_ctx;
@@ -2892,89 +3036,7 @@
}
}
#endif
-#ifndef OPENSSL_NO_PSK
- else if (alg_k & SSL_kPSK)
- {
- char identity[PSK_MAX_IDENTITY_LEN];
- unsigned char *t = NULL;
- unsigned char psk_or_pre_ms[PSK_MAX_PSK_LEN*2+4];
- unsigned int pre_ms_len = 0, psk_len = 0;
- int psk_err = 1;
-
- n = 0;
- if (s->psk_client_callback == NULL)
- {
- SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
- SSL_R_PSK_NO_CLIENT_CB);
- goto err;
- }
-
- psk_len = s->psk_client_callback(s, s->ctx->psk_identity_hint,
- identity, PSK_MAX_IDENTITY_LEN,
- psk_or_pre_ms, sizeof(psk_or_pre_ms));
- if (psk_len > PSK_MAX_PSK_LEN)
- {
- SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
- ERR_R_INTERNAL_ERROR);
- goto psk_err;
- }
- else if (psk_len == 0)
- {
- SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
- SSL_R_PSK_IDENTITY_NOT_FOUND);
- goto psk_err;
- }
-
- /* create PSK pre_master_secret */
- pre_ms_len = 2+psk_len+2+psk_len;
- t = psk_or_pre_ms;
- memmove(psk_or_pre_ms+psk_len+4, psk_or_pre_ms, psk_len);
- s2n(psk_len, t);
- memset(t, 0, psk_len);
- t+=psk_len;
- s2n(psk_len, t);
-
- if (s->session->psk_identity_hint != NULL)
- OPENSSL_free(s->session->psk_identity_hint);
- s->session->psk_identity_hint = BUF_strdup(s->ctx->psk_identity_hint);
- if (s->ctx->psk_identity_hint != NULL &&
- s->session->psk_identity_hint == NULL)
- {
- SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
- ERR_R_MALLOC_FAILURE);
- goto psk_err;
- }
-
- if (s->session->psk_identity != NULL)
- OPENSSL_free(s->session->psk_identity);
- s->session->psk_identity = BUF_strdup(identity);
- if (s->session->psk_identity == NULL)
- {
- SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
- ERR_R_MALLOC_FAILURE);
- goto psk_err;
- }
-
- s->session->master_key_length =
- s->method->ssl3_enc->generate_master_secret(s,
- s->session->master_key,
- psk_or_pre_ms, pre_ms_len);
- n = strlen(identity);
- s2n(n, p);
- memcpy(p, identity, n);
- n+=2;
- psk_err = 0;
- psk_err:
- OPENSSL_cleanse(identity, PSK_MAX_IDENTITY_LEN);
- OPENSSL_cleanse(psk_or_pre_ms, sizeof(psk_or_pre_ms));
- if (psk_err != 0)
- {
- ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE);
- goto err;
- }
- }
-#endif
- else
+ else if (!(alg_k & SSL_kPSK) || ((alg_k & SSL_kPSK) && !(alg_a & SSL_aPSK)))
{
ssl3_send_alert(s, SSL3_AL_FATAL,
SSL_AD_HANDSHAKE_FAILURE);
@@ -3279,7 +3341,7 @@
alg_a=s->s3->tmp.new_cipher->algorithm_auth;
/* we don't have a certificate */
- if ((alg_a & (SSL_aDH|SSL_aNULL|SSL_aKRB5)) || (alg_k & SSL_kPSK))
+ if ((alg_a & (SSL_aDH|SSL_aNULL|SSL_aKRB5)) || ((alg_a & SSL_aPSK) && !(alg_k & SSL_kRSA)))
return(1);
sc=s->session->sess_cert;
@@ -3438,10 +3500,29 @@
if (s->state != SSL3_ST_CW_CHANNEL_ID_A)
return ssl3_do_write(s, SSL3_RT_HANDSHAKE);
+ if (!s->tlsext_channel_id_private && s->ctx->channel_id_cb)
+ {
+ EVP_PKEY *key = NULL;
+ s->ctx->channel_id_cb(s, &key);
+ if (key != NULL)
+ {
+ s->tlsext_channel_id_private = key;
+ }
+ }
+ if (!s->tlsext_channel_id_private)
+ {
+ s->rwstate=SSL_CHANNEL_ID_LOOKUP;
+ return (-1);
+ }
+ s->rwstate=SSL_NOTHING;
+
d = (unsigned char *)s->init_buf->data;
*(d++)=SSL3_MT_ENCRYPTED_EXTENSIONS;
l2n3(2 + 2 + TLSEXT_CHANNEL_ID_SIZE, d);
- s2n(TLSEXT_TYPE_channel_id, d);
+ if (s->s3->tlsext_channel_id_new)
+ s2n(TLSEXT_TYPE_channel_id_new, d);
+ else
+ s2n(TLSEXT_TYPE_channel_id, d);
s2n(TLSEXT_CHANNEL_ID_SIZE, d);
EVP_MD_CTX_init(&md_ctx);
@@ -3452,9 +3533,9 @@
SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_CANNOT_SERIALIZE_PUBLIC_KEY);
goto err;
}
- // i2d_PublicKey will produce an ANSI X9.62 public key which, for a
- // P-256 key, is 0x04 (meaning uncompressed) followed by the x and y
- // field elements as 32-byte, big-endian numbers.
+ /* i2d_PublicKey will produce an ANSI X9.62 public key which, for a
+ * P-256 key, is 0x04 (meaning uncompressed) followed by the x and y
+ * field elements as 32-byte, big-endian numbers. */
if (public_key_len != 65)
{
SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_CHANNEL_ID_NOT_P256);
@@ -3500,14 +3581,14 @@
}
derp = der_sig;
- sig = d2i_ECDSA_SIG(NULL, (const unsigned char**)&derp, sig_len);
+ sig = d2i_ECDSA_SIG(NULL, (const unsigned char**) &derp, sig_len);
if (sig == NULL)
{
SSLerr(SSL_F_SSL3_SEND_CHANNEL_ID,SSL_R_D2I_ECDSA_SIG);
goto err;
}
- // The first byte of public_key will be 0x4, denoting an uncompressed key.
+ /* The first byte of public_key will be 0x4, denoting an uncompressed key. */
memcpy(d, public_key + 1, 64);
d += 64;
memset(d, 0, 2 * 32);
diff --git a/ssl/s3_enc.c b/ssl/s3_enc.c
index 1b5cc79..3595cff 100644
--- a/ssl/s3_enc.c
+++ b/ssl/s3_enc.c
@@ -728,7 +728,7 @@
}
t=EVP_MD_CTX_size(hash);
- if (t < 0)
+ if (t < 0 || t > 20)
return -1;
md_size=t;
npad=(48/md_size)*md_size;
diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c
index f33ada3..dca9858 100644
--- a/ssl/s3_lib.c
+++ b/ssl/s3_lib.c
@@ -2826,6 +2826,41 @@
256,
},
+#ifndef OPENSSL_NO_PSK
+ /* ECDH PSK ciphersuites from RFC 5489 */
+ /* Cipher C035 */
+ {
+ 1,
+ TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA,
+ TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA,
+ SSL_kEECDH,
+ SSL_aPSK,
+ SSL_AES128,
+ SSL_SHA1,
+ SSL_TLSV1,
+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
+ SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF,
+ 128,
+ 128,
+ },
+
+ /* Cipher C036 */
+ {
+ 1,
+ TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA,
+ TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA,
+ SSL_kEECDH,
+ SSL_aPSK,
+ SSL_AES256,
+ SSL_SHA1,
+ SSL_TLSV1,
+ SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
+ SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF,
+ 256,
+ 256,
+ },
+#endif /* OPENSSL_NO_PSK */
+
#endif /* OPENSSL_NO_ECDH */
@@ -3376,8 +3411,6 @@
break;
#endif
case SSL_CTRL_CHANNEL_ID:
- if (!s->server)
- break;
s->tlsext_channel_id_enabled = 1;
ret = 1;
break;
@@ -3393,7 +3426,7 @@
}
if (s->tlsext_channel_id_private)
EVP_PKEY_free(s->tlsext_channel_id_private);
- s->tlsext_channel_id_private = (EVP_PKEY*) parg;
+ s->tlsext_channel_id_private = EVP_PKEY_dup((EVP_PKEY*) parg);
ret = 1;
break;
@@ -3735,7 +3768,7 @@
}
if (ctx->tlsext_channel_id_private)
EVP_PKEY_free(ctx->tlsext_channel_id_private);
- ctx->tlsext_channel_id_private = (EVP_PKEY*) parg;
+ ctx->tlsext_channel_id_private = EVP_PKEY_dup((EVP_PKEY*) parg);
break;
default:
@@ -3939,7 +3972,7 @@
#endif /* OPENSSL_NO_KRB5 */
#ifndef OPENSSL_NO_PSK
/* with PSK there must be server callback set */
- if ((alg_k & SSL_kPSK) && s->psk_server_callback == NULL)
+ if ((alg_a & SSL_aPSK) && s->psk_server_callback == NULL)
continue;
#endif /* OPENSSL_NO_PSK */
diff --git a/ssl/s3_pkt.c b/ssl/s3_pkt.c
index 75997ac..df436cf 100644
--- a/ssl/s3_pkt.c
+++ b/ssl/s3_pkt.c
@@ -110,6 +110,7 @@
*/
#include <stdio.h>
+#include <limits.h>
#include <errno.h>
#define USE_SOCKETS
#include "ssl_locl.h"
@@ -580,10 +581,11 @@
int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len)
{
const unsigned char *buf=buf_;
- unsigned int tot,n,nw;
- int i;
+ unsigned int n,nw;
+ int i,tot;
s->rwstate=SSL_NOTHING;
+ OPENSSL_assert(s->s3->wnum <= INT_MAX);
tot=s->s3->wnum;
s->s3->wnum=0;
@@ -598,6 +600,22 @@
}
}
+ /* ensure that if we end up with a smaller value of data to write
+ * out than the the original len from a write which didn't complete
+ * for non-blocking I/O and also somehow ended up avoiding
+ * the check for this in ssl3_write_pending/SSL_R_BAD_WRITE_RETRY as
+ * it must never be possible to end up with (len-tot) as a large
+ * number that will then promptly send beyond the end of the users
+ * buffer ... so we trap and report the error in a way the user
+ * will notice
+ */
+ if (len < tot)
+ {
+ SSLerr(SSL_F_SSL3_WRITE_BYTES,SSL_R_BAD_LENGTH);
+ return(-1);
+ }
+
+
n=(len-tot);
for (;;)
{
@@ -614,9 +632,6 @@
!s->s3->record_split_done)
{
fragment = 1;
- /* The first byte will be in its own record, so we
- * can write an extra byte. */
- max++;
/* record_split_done records that the splitting has
* been done in case we hit an SSL_WANT_WRITE condition.
* In that case, we don't need to do the split again. */
@@ -632,6 +647,8 @@
if (i <= 0)
{
s->s3->wnum=tot;
+ /* Try to write the fragment next time. */
+ s->s3->record_split_done = 0;
return i;
}
@@ -668,9 +685,6 @@
SSL3_BUFFER *wb=&(s->s3->wbuf);
SSL_SESSION *sess;
- if (wb->buf == NULL)
- if (!ssl3_setup_write_buffer(s))
- return -1;
/* first check if there is a SSL3_BUFFER still being written
* out. This will happen with non blocking IO */
@@ -686,6 +700,10 @@
/* if it went, fall through and send more stuff */
}
+ if (wb->buf == NULL)
+ if (!ssl3_setup_write_buffer(s))
+ return -1;
+
if (len == 0)
return 0;
@@ -1067,7 +1085,7 @@
{
s->rstate=SSL_ST_READ_HEADER;
rr->off=0;
- if (s->mode & SSL_MODE_RELEASE_BUFFERS)
+ if (s->mode & SSL_MODE_RELEASE_BUFFERS && s->s3->rbuf.left == 0)
ssl3_release_read_buffer(s);
}
}
@@ -1312,10 +1330,12 @@
if (!(s->s3->flags & SSL3_FLAGS_CCS_OK))
{
al=SSL_AD_UNEXPECTED_MESSAGE;
- SSLerr(SSL_F_SSL3_READ_BYTES,SSL_R_UNEXPECTED_CCS);
+ SSLerr(SSL_F_SSL3_READ_BYTES,SSL_R_CCS_RECEIVED_EARLY);
goto f_err;
}
+ s->s3->flags &= ~SSL3_FLAGS_CCS_OK;
+
rr->length=0;
if (s->msg_callback)
@@ -1450,12 +1470,7 @@
if (s->s3->tmp.key_block == NULL)
{
- if (s->session->master_key_length == 0)
- {
- SSLerr(SSL_F_SSL3_DO_CHANGE_CIPHER_SPEC,SSL_R_UNEXPECTED_CCS);
- return (0);
- }
- if (s->session == NULL)
+ if (s->session == NULL || s->session->master_key_length == 0)
{
/* might happen if dtls1_read_bytes() calls this */
SSLerr(SSL_F_SSL3_DO_CHANGE_CIPHER_SPEC,SSL_R_CCS_RECEIVED_EARLY);
diff --git a/ssl/s3_srvr.c b/ssl/s3_srvr.c
index 9fdb5fd..f83c936 100644
--- a/ssl/s3_srvr.c
+++ b/ssl/s3_srvr.c
@@ -217,6 +217,7 @@
{
BUF_MEM *buf;
unsigned long alg_k,Time=(unsigned long)time(NULL);
+ unsigned long alg_a;
void (*cb)(const SSL *ssl,int type,int val)=NULL;
int ret= -1;
int new_state,state,skip=0;
@@ -412,9 +413,11 @@
case SSL3_ST_SW_CERT_A:
case SSL3_ST_SW_CERT_B:
/* Check if it is anon DH or anon ECDH, */
- /* normal PSK or KRB5 or SRP */
+ /* non-RSA PSK or KRB5 or SRP */
if (!(s->s3->tmp.new_cipher->algorithm_auth & SSL_aNULL)
- && !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK)
+ /* Among PSK ciphersuites only RSA_PSK uses server certificate */
+ && !(s->s3->tmp.new_cipher->algorithm_auth & SSL_aPSK &&
+ !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kRSA))
&& !(s->s3->tmp.new_cipher->algorithm_auth & SSL_aKRB5))
{
ret=ssl3_send_server_certificate(s);
@@ -443,6 +446,7 @@
case SSL3_ST_SW_KEY_EXCH_A:
case SSL3_ST_SW_KEY_EXCH_B:
alg_k = s->s3->tmp.new_cipher->algorithm_mkey;
+ alg_a = s->s3->tmp.new_cipher->algorithm_auth;
/* clear this, it may get reset by
* send_server_key_exchange */
@@ -472,10 +476,12 @@
* public key for key exchange.
*/
if (s->s3->tmp.use_rsa_tmp
- /* PSK: send ServerKeyExchange if PSK identity
- * hint if provided */
+ /* PSK: send ServerKeyExchange if either:
+ * - PSK identity hint is provided, or
+ * - the key exchange is kEECDH.
+ */
#ifndef OPENSSL_NO_PSK
- || ((alg_k & SSL_kPSK) && s->ctx->psk_identity_hint)
+ || ((alg_a & SSL_aPSK) && ((alg_k & SSL_kEECDH) || s->session->psk_identity_hint))
#endif
#ifndef OPENSSL_NO_SRP
/* SRP: send ServerKeyExchange */
@@ -669,8 +675,8 @@
case SSL3_ST_SR_CERT_VRFY_A:
case SSL3_ST_SR_CERT_VRFY_B:
- /* we should decide if we expected this one */
s->s3->flags |= SSL3_FLAGS_CCS_OK;
+ /* we should decide if we expected this one */
ret=ssl3_get_cert_verify(s);
if (ret <= 0) goto end;
@@ -688,7 +694,6 @@
channel_id = s->s3->tlsext_channel_id_valid;
#endif
- s->s3->flags |= SSL3_FLAGS_CCS_OK;
if (next_proto_neg)
s->state=SSL3_ST_SR_NEXT_PROTO_A;
else if (channel_id)
@@ -723,6 +728,7 @@
case SSL3_ST_SR_FINISHED_A:
case SSL3_ST_SR_FINISHED_B:
+ s->s3->flags |= SSL3_FLAGS_CCS_OK;
ret=ssl3_get_finished(s,SSL3_ST_SR_FINISHED_A,
SSL3_ST_SR_FINISHED_B);
if (ret <= 0) goto end;
@@ -734,6 +740,15 @@
#endif
else
s->state=SSL3_ST_SW_CHANGE_A;
+ /* If this is a full handshake with ChannelID then
+ * record the hashshake hashes in |s->session| in case
+ * we need them to verify a ChannelID signature on a
+ * resumption of this session in the future. */
+ if (!s->hit && s->s3->tlsext_channel_id_new)
+ {
+ ret = tls1_record_handshake_hashes_for_channel_id(s);
+ if (ret <= 0) goto end;
+ }
s->init_num=0;
break;
@@ -1462,6 +1477,22 @@
if (s->state == SSL3_ST_SW_SRVR_HELLO_A)
{
+ /* We only accept ChannelIDs on connections with ECDHE in order
+ * to avoid a known attack while we fix ChannelID itself. */
+ if (s->s3 &&
+ s->s3->tlsext_channel_id_valid &&
+ (s->s3->tmp.new_cipher->algorithm_mkey & SSL_kEECDH) == 0)
+ s->s3->tlsext_channel_id_valid = 0;
+
+ /* If this is a resumption and the original handshake didn't
+ * support ChannelID then we didn't record the original
+ * handshake hashes in the session and so cannot resume with
+ * ChannelIDs. */
+ if (s->hit &&
+ s->s3->tlsext_channel_id_new &&
+ s->session->original_handshake_hash_len == 0)
+ s->s3->tlsext_channel_id_valid = 0;
+
buf=(unsigned char *)s->init_buf->data;
#ifdef OPENSSL_NO_TLSEXT
p=s->s3->server_random;
@@ -1591,11 +1622,16 @@
int curve_id = 0;
BN_CTX *bn_ctx = NULL;
#endif
+#ifndef OPENSSL_NO_PSK
+ const char* psk_identity_hint;
+ size_t psk_identity_hint_len;
+#endif
EVP_PKEY *pkey;
const EVP_MD *md = NULL;
unsigned char *p,*d;
int al,i;
- unsigned long type;
+ unsigned long alg_k;
+ unsigned long alg_a;
int n;
CERT *cert;
BIGNUM *r[4];
@@ -1606,15 +1642,28 @@
EVP_MD_CTX_init(&md_ctx);
if (s->state == SSL3_ST_SW_KEY_EXCH_A)
{
- type=s->s3->tmp.new_cipher->algorithm_mkey;
+ alg_k=s->s3->tmp.new_cipher->algorithm_mkey;
+ alg_a=s->s3->tmp.new_cipher->algorithm_auth;
cert=s->cert;
buf=s->init_buf;
r[0]=r[1]=r[2]=r[3]=NULL;
n=0;
+#ifndef OPENSSL_NO_PSK
+ if (alg_a & SSL_aPSK)
+ {
+ /* size for PSK identity hint */
+ psk_identity_hint = s->session->psk_identity_hint;
+ if (psk_identity_hint)
+ psk_identity_hint_len = strlen(psk_identity_hint);
+ else
+ psk_identity_hint_len = 0;
+ n+=2+psk_identity_hint_len;
+ }
+#endif /* !OPENSSL_NO_PSK */
#ifndef OPENSSL_NO_RSA
- if (type & SSL_kRSA)
+ if (alg_k & SSL_kRSA)
{
rsa=cert->rsa_tmp;
if ((rsa == NULL) && (s->cert->rsa_tmp_cb != NULL))
@@ -1641,10 +1690,9 @@
r[1]=rsa->e;
s->s3->tmp.use_rsa_tmp=1;
}
- else
#endif
#ifndef OPENSSL_NO_DH
- if (type & SSL_kEDH)
+ else if (alg_k & SSL_kEDH)
{
dhp=cert->dh_tmp;
if ((dhp == NULL) && (s->cert->dh_tmp_cb != NULL))
@@ -1697,10 +1745,9 @@
r[1]=dh->g;
r[2]=dh->pub_key;
}
- else
#endif
#ifndef OPENSSL_NO_ECDH
- if (type & SSL_kEECDH)
+ else if (alg_k & SSL_kEECDH)
{
const EC_GROUP *group;
@@ -1813,7 +1860,7 @@
* to encode the entire ServerECDHParams
* structure.
*/
- n = 4 + encodedlen;
+ n += 4 + encodedlen;
/* We'll generate the serverKeyExchange message
* explicitly so we can set these to NULLs
@@ -1823,18 +1870,9 @@
r[2]=NULL;
r[3]=NULL;
}
- else
#endif /* !OPENSSL_NO_ECDH */
-#ifndef OPENSSL_NO_PSK
- if (type & SSL_kPSK)
- {
- /* reserve size for record length and PSK identity hint*/
- n+=2+strlen(s->ctx->psk_identity_hint);
- }
- else
-#endif /* !OPENSSL_NO_PSK */
#ifndef OPENSSL_NO_SRP
- if (type & SSL_kSRP)
+ else if (alg_k & SSL_kSRP)
{
if ((s->srp_ctx.N == NULL) ||
(s->srp_ctx.g == NULL) ||
@@ -1849,26 +1887,27 @@
r[2]=s->srp_ctx.s;
r[3]=s->srp_ctx.B;
}
- else
#endif
+ else if (!(alg_k & SSL_kPSK))
{
al=SSL_AD_HANDSHAKE_FAILURE;
SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE,SSL_R_UNKNOWN_KEY_EXCHANGE_TYPE);
goto f_err;
}
- for (i=0; r[i] != NULL && i<4; i++)
+ for (i=0; i < 4 && r[i] != NULL; i++)
{
nr[i]=BN_num_bytes(r[i]);
#ifndef OPENSSL_NO_SRP
- if ((i == 2) && (type & SSL_kSRP))
+ if ((i == 2) && (alg_k & SSL_kSRP))
n+=1+nr[i];
else
#endif
n+=2+nr[i];
}
- if (!(s->s3->tmp.new_cipher->algorithm_auth & SSL_aNULL)
- && !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK))
+ if (!(alg_a & SSL_aNULL)
+ /* Among PSK ciphersuites only RSA uses a certificate */
+ && !((alg_a & SSL_aPSK) && !(alg_k & SSL_kRSA)))
{
if ((pkey=ssl_get_sign_pkey(s,s->s3->tmp.new_cipher,&md))
== NULL)
@@ -1892,10 +1931,10 @@
d=(unsigned char *)s->init_buf->data;
p= &(d[4]);
- for (i=0; r[i] != NULL && i<4; i++)
+ for (i=0; i < 4 && r[i] != NULL; i++)
{
#ifndef OPENSSL_NO_SRP
- if ((i == 2) && (type & SSL_kSRP))
+ if ((i == 2) && (alg_k & SSL_kSRP))
{
*p = nr[i];
p++;
@@ -1907,8 +1946,24 @@
p+=nr[i];
}
+/* Note: ECDHE PSK ciphersuites use SSL_kEECDH and SSL_aPSK.
+ * When one of them is used, the server key exchange record needs to have both
+ * the psk_identity_hint and the ServerECDHParams. */
+#ifndef OPENSSL_NO_PSK
+ if (alg_a & SSL_aPSK)
+ {
+ /* copy PSK identity hint (if provided) */
+ s2n(psk_identity_hint_len, p);
+ if (psk_identity_hint_len > 0)
+ {
+ memcpy(p, psk_identity_hint, psk_identity_hint_len);
+ p+=psk_identity_hint_len;
+ }
+ }
+#endif /* OPENSSL_NO_PSK */
+
#ifndef OPENSSL_NO_ECDH
- if (type & SSL_kEECDH)
+ if (alg_k & SSL_kEECDH)
{
/* XXX: For now, we only support named (not generic) curves.
* In this situation, the serverKeyExchange message has:
@@ -1931,17 +1986,7 @@
encodedPoint = NULL;
p += encodedlen;
}
-#endif
-
-#ifndef OPENSSL_NO_PSK
- if (type & SSL_kPSK)
- {
- /* copy PSK identity hint */
- s2n(strlen(s->ctx->psk_identity_hint), p);
- strncpy((char *)p, s->ctx->psk_identity_hint, strlen(s->ctx->psk_identity_hint));
- p+=strlen(s->ctx->psk_identity_hint);
- }
-#endif
+#endif /* OPENSSL_NO_ECDH */
/* not anonymous */
if (pkey != NULL)
@@ -1978,7 +2023,7 @@
n+=u+2;
}
else
-#endif
+#endif /* OPENSSL_NO_RSA */
if (md)
{
/* For TLS1.2 and later send signature
@@ -2123,6 +2168,11 @@
s->init_num=n+4;
s->init_off=0;
#ifdef NETSCAPE_HANG_BUG
+ if (!BUF_MEM_grow_clean(buf, s->init_num + 4))
+ {
+ SSLerr(SSL_F_SSL3_SEND_CERTIFICATE_REQUEST,ERR_R_BUF_LIB);
+ goto err;
+ }
p=(unsigned char *)s->init_buf->data + s->init_num;
/* do the header */
@@ -2147,6 +2197,7 @@
int i,al,ok;
long n;
unsigned long alg_k;
+ unsigned long alg_a;
unsigned char *p;
#ifndef OPENSSL_NO_RSA
RSA *rsa=NULL;
@@ -2164,7 +2215,11 @@
EC_KEY *srvr_ecdh = NULL;
EVP_PKEY *clnt_pub_pkey = NULL;
EC_POINT *clnt_ecpoint = NULL;
- BN_CTX *bn_ctx = NULL;
+ BN_CTX *bn_ctx = NULL;
+#ifndef OPENSSL_NO_PSK
+ unsigned int psk_len = 0;
+ unsigned char psk[PSK_MAX_PSK_LEN];
+#endif /* OPENSSL_NO_PSK */
#endif
n=s->method->ssl_get_message(s,
@@ -2178,7 +2233,95 @@
p=(unsigned char *)s->init_msg;
alg_k=s->s3->tmp.new_cipher->algorithm_mkey;
+ alg_a=s->s3->tmp.new_cipher->algorithm_auth;
+#ifndef OPENSSL_NO_PSK
+ if (alg_a & SSL_aPSK)
+ {
+ unsigned char *t = NULL;
+ unsigned char pre_ms[PSK_MAX_PSK_LEN*2+4];
+ unsigned int pre_ms_len = 0;
+ int psk_err = 1;
+ char tmp_id[PSK_MAX_IDENTITY_LEN+1];
+
+ al=SSL_AD_HANDSHAKE_FAILURE;
+
+ n2s(p, i);
+ if (n != i+2 && !(alg_k & SSL_kEECDH))
+ {
+ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+ SSL_R_LENGTH_MISMATCH);
+ goto psk_err;
+ }
+ if (i > PSK_MAX_IDENTITY_LEN)
+ {
+ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+ SSL_R_DATA_LENGTH_TOO_LONG);
+ goto psk_err;
+ }
+ if (s->psk_server_callback == NULL)
+ {
+ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+ SSL_R_PSK_NO_SERVER_CB);
+ goto psk_err;
+ }
+
+ /* Create guaranteed NUL-terminated identity
+ * string for the callback */
+ memcpy(tmp_id, p, i);
+ memset(tmp_id+i, 0, PSK_MAX_IDENTITY_LEN+1-i);
+ psk_len = s->psk_server_callback(s, tmp_id, psk, sizeof(psk));
+
+ if (psk_len > PSK_MAX_PSK_LEN)
+ {
+ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+ ERR_R_INTERNAL_ERROR);
+ goto psk_err;
+ }
+ else if (psk_len == 0)
+ {
+ /* PSK related to the given identity not found */
+ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+ SSL_R_PSK_IDENTITY_NOT_FOUND);
+ al=SSL_AD_UNKNOWN_PSK_IDENTITY;
+ goto psk_err;
+ }
+ if (!(alg_k & SSL_kEECDH))
+ {
+ /* Create the shared secret now if we're not using ECDHE-PSK.*/
+ pre_ms_len=2+psk_len+2+psk_len;
+ t = pre_ms;
+ s2n(psk_len, t);
+ memset(t, 0, psk_len);
+ t+=psk_len;
+ s2n(psk_len, t);
+ memcpy(t, psk, psk_len);
+
+ s->session->master_key_length=
+ s->method->ssl3_enc->generate_master_secret(s,
+ s->session->master_key, pre_ms, pre_ms_len);
+ }
+ if (s->session->psk_identity != NULL)
+ OPENSSL_free(s->session->psk_identity);
+ s->session->psk_identity = BUF_strdup(tmp_id);
+ OPENSSL_cleanse(tmp_id, PSK_MAX_IDENTITY_LEN+1);
+ if (s->session->psk_identity == NULL)
+ {
+ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+ ERR_R_MALLOC_FAILURE);
+ goto psk_err;
+ }
+
+ p += i;
+ n -= (i + 2);
+ psk_err = 0;
+ psk_err:
+ OPENSSL_cleanse(pre_ms, sizeof(pre_ms));
+ if (psk_err != 0)
+ goto f_err;
+ }
+#endif /* OPENSSL_NO_PSK */
+ if (0) {}
#ifndef OPENSSL_NO_RSA
if (alg_k & SSL_kRSA)
{
@@ -2283,10 +2426,9 @@
p,i);
OPENSSL_cleanse(p,i);
}
- else
#endif
#ifndef OPENSSL_NO_DH
- if (alg_k & (SSL_kEDH|SSL_kDHr|SSL_kDHd))
+ else if (alg_k & (SSL_kEDH|SSL_kDHr|SSL_kDHd))
{
n2s(p,i);
if (n != i+2)
@@ -2347,10 +2489,9 @@
s->session->master_key,p,i);
OPENSSL_cleanse(p,i);
}
- else
#endif
#ifndef OPENSSL_NO_KRB5
- if (alg_k & SSL_kKRB5)
+ else if (alg_k & SSL_kKRB5)
{
krb5_error_code krb5rc;
krb5_data enc_ticket;
@@ -2539,17 +2680,20 @@
** if (s->kssl_ctx) s->kssl_ctx = NULL;
*/
}
- else
#endif /* OPENSSL_NO_KRB5 */
-
#ifndef OPENSSL_NO_ECDH
- if (alg_k & (SSL_kEECDH|SSL_kECDHr|SSL_kECDHe))
+ else if (alg_k & (SSL_kEECDH|SSL_kECDHr|SSL_kECDHe))
{
int ret = 1;
int field_size = 0;
const EC_KEY *tkey;
const EC_GROUP *group;
const BIGNUM *priv_key;
+#ifndef OPENSSL_NO_PSK
+ unsigned char *pre_ms;
+ unsigned int pre_ms_len;
+ unsigned char *t;
+#endif /* OPENSSL_NO_PSK */
/* initialize structures for server's ECDH key pair */
if ((srvr_ecdh = EC_KEY_new()) == NULL)
@@ -2645,7 +2789,7 @@
}
/* Get encoded point length */
- i = *p;
+ i = *p;
p += 1;
if (n != 1 + i)
{
@@ -2687,223 +2831,145 @@
EC_KEY_free(srvr_ecdh);
BN_CTX_free(bn_ctx);
EC_KEY_free(s->s3->tmp.ecdh);
- s->s3->tmp.ecdh = NULL;
+ s->s3->tmp.ecdh = NULL;
- /* Compute the master secret */
- s->session->master_key_length = s->method->ssl3_enc-> \
- generate_master_secret(s, s->session->master_key, p, i);
-
- OPENSSL_cleanse(p, i);
- return (ret);
- }
- else
-#endif
#ifndef OPENSSL_NO_PSK
- if (alg_k & SSL_kPSK)
+ /* ECDHE PSK ciphersuites from RFC 5489 */
+ if ((alg_a & SSL_aPSK) && psk_len != 0)
{
- unsigned char *t = NULL;
- unsigned char psk_or_pre_ms[PSK_MAX_PSK_LEN*2+4];
- unsigned int pre_ms_len = 0, psk_len = 0;
- int psk_err = 1;
- char tmp_id[PSK_MAX_IDENTITY_LEN+1];
-
- al=SSL_AD_HANDSHAKE_FAILURE;
-
- n2s(p,i);
- if (n != i+2)
- {
- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
- SSL_R_LENGTH_MISMATCH);
- goto psk_err;
- }
- if (i > PSK_MAX_IDENTITY_LEN)
- {
- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
- SSL_R_DATA_LENGTH_TOO_LONG);
- goto psk_err;
- }
- if (s->psk_server_callback == NULL)
- {
- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
- SSL_R_PSK_NO_SERVER_CB);
- goto psk_err;
- }
-
- /* Create guaranteed NULL-terminated identity
- * string for the callback */
- memcpy(tmp_id, p, i);
- memset(tmp_id+i, 0, PSK_MAX_IDENTITY_LEN+1-i);
- psk_len = s->psk_server_callback(s, tmp_id,
- psk_or_pre_ms, sizeof(psk_or_pre_ms));
- OPENSSL_cleanse(tmp_id, PSK_MAX_IDENTITY_LEN+1);
-
- if (psk_len > PSK_MAX_PSK_LEN)
- {
- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
- ERR_R_INTERNAL_ERROR);
- goto psk_err;
- }
- else if (psk_len == 0)
- {
- /* PSK related to the given identity not found */
- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
- SSL_R_PSK_IDENTITY_NOT_FOUND);
- al=SSL_AD_UNKNOWN_PSK_IDENTITY;
- goto psk_err;
- }
-
- /* create PSK pre_master_secret */
- pre_ms_len=2+psk_len+2+psk_len;
- t = psk_or_pre_ms;
- memmove(psk_or_pre_ms+psk_len+4, psk_or_pre_ms, psk_len);
- s2n(psk_len, t);
- memset(t, 0, psk_len);
- t+=psk_len;
- s2n(psk_len, t);
-
- if (s->session->psk_identity != NULL)
- OPENSSL_free(s->session->psk_identity);
- s->session->psk_identity = BUF_strdup((char *)p);
- if (s->session->psk_identity == NULL)
+ pre_ms_len = 2+psk_len+2+i;
+ pre_ms = OPENSSL_malloc(pre_ms_len);
+ if (pre_ms == NULL)
{
SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
ERR_R_MALLOC_FAILURE);
- goto psk_err;
+ goto err;
}
-
- if (s->session->psk_identity_hint != NULL)
- OPENSSL_free(s->session->psk_identity_hint);
- s->session->psk_identity_hint = BUF_strdup(s->ctx->psk_identity_hint);
- if (s->ctx->psk_identity_hint != NULL &&
- s->session->psk_identity_hint == NULL)
- {
- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
- ERR_R_MALLOC_FAILURE);
- goto psk_err;
- }
-
- s->session->master_key_length=
- s->method->ssl3_enc->generate_master_secret(s,
- s->session->master_key, psk_or_pre_ms, pre_ms_len);
- psk_err = 0;
- psk_err:
- OPENSSL_cleanse(psk_or_pre_ms, sizeof(psk_or_pre_ms));
- if (psk_err != 0)
- goto f_err;
+ memset(pre_ms, 0, pre_ms_len);
+ t = pre_ms;
+ s2n(psk_len, t);
+ memcpy(t, psk, psk_len);
+ t += psk_len;
+ s2n(i, t);
+ memcpy(t, p, i);
+ s->session->master_key_length = s->method->ssl3_enc \
+ -> generate_master_secret(s,
+ s->session->master_key, pre_ms, pre_ms_len);
+ OPENSSL_cleanse(pre_ms, pre_ms_len);
+ OPENSSL_free(pre_ms);
}
- else
+#endif /* OPENSSL_NO_PSK */
+ if (!(alg_a & SSL_aPSK))
+ {
+ /* Compute the master secret */
+ s->session->master_key_length = s->method->ssl3_enc \
+ -> generate_master_secret(s,
+ s->session->master_key, p, i);
+ }
+
+ OPENSSL_cleanse(p, i);
+ }
#endif
#ifndef OPENSSL_NO_SRP
- if (alg_k & SSL_kSRP)
+ else if (alg_k & SSL_kSRP)
+ {
+ int param_len;
+
+ n2s(p,i);
+ param_len=i+2;
+ if (param_len > n)
{
- int param_len;
-
- n2s(p,i);
- param_len=i+2;
- if (param_len > n)
- {
- al=SSL_AD_DECODE_ERROR;
- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_BAD_SRP_A_LENGTH);
- goto f_err;
- }
- if (!(s->srp_ctx.A=BN_bin2bn(p,i,NULL)))
- {
- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,ERR_R_BN_LIB);
- goto err;
- }
- if (s->session->srp_username != NULL)
- OPENSSL_free(s->session->srp_username);
- s->session->srp_username = BUF_strdup(s->srp_ctx.login);
- if (s->session->srp_username == NULL)
- {
- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
- ERR_R_MALLOC_FAILURE);
- goto err;
- }
-
- if ((s->session->master_key_length = SRP_generate_server_master_secret(s,s->session->master_key))<0)
- {
- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,ERR_R_INTERNAL_ERROR);
- goto err;
- }
-
- p+=i;
+ al=SSL_AD_DECODE_ERROR;
+ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_BAD_SRP_A_LENGTH);
+ goto f_err;
}
- else
+ if (!(s->srp_ctx.A=BN_bin2bn(p,i,NULL)))
+ {
+ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,ERR_R_BN_LIB);
+ goto err;
+ }
+ if (s->session->srp_username != NULL)
+ OPENSSL_free(s->session->srp_username);
+ s->session->srp_username = BUF_strdup(s->srp_ctx.login);
+ if (s->session->srp_username == NULL)
+ {
+ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+ ERR_R_MALLOC_FAILURE);
+ goto err;
+ }
+
+ if ((s->session->master_key_length = SRP_generate_server_master_secret(s,s->session->master_key))<0)
+ {
+ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,ERR_R_INTERNAL_ERROR);
+ goto err;
+ }
+
+ p+=i;
+ }
#endif /* OPENSSL_NO_SRP */
- if (alg_k & SSL_kGOST)
+ else if (alg_k & SSL_kGOST)
+ {
+ int ret = 0;
+ EVP_PKEY_CTX *pkey_ctx;
+ EVP_PKEY *client_pub_pkey = NULL, *pk = NULL;
+ unsigned char premaster_secret[32], *start;
+ size_t outlen=32, inlen;
+ unsigned long alg_a;
+ int Ttag, Tclass;
+ long Tlen;
+
+ /* Get our certificate private key*/
+ alg_a = s->s3->tmp.new_cipher->algorithm_auth;
+ if (alg_a & SSL_aGOST94)
+ pk = s->cert->pkeys[SSL_PKEY_GOST94].privatekey;
+ else if (alg_a & SSL_aGOST01)
+ pk = s->cert->pkeys[SSL_PKEY_GOST01].privatekey;
+
+ pkey_ctx = EVP_PKEY_CTX_new(pk,NULL);
+ EVP_PKEY_decrypt_init(pkey_ctx);
+ /* If client certificate is present and is of the same type, maybe
+ * use it for key exchange. Don't mind errors from
+ * EVP_PKEY_derive_set_peer, because it is completely valid to use
+ * a client certificate for authorization only. */
+ client_pub_pkey = X509_get_pubkey(s->session->peer);
+ if (client_pub_pkey)
{
- int ret = 0;
- EVP_PKEY_CTX *pkey_ctx;
- EVP_PKEY *client_pub_pkey = NULL, *pk = NULL;
- unsigned char premaster_secret[32], *start;
- size_t outlen=32, inlen;
- unsigned long alg_a;
-
- /* Get our certificate private key*/
- alg_a = s->s3->tmp.new_cipher->algorithm_auth;
- if (alg_a & SSL_aGOST94)
- pk = s->cert->pkeys[SSL_PKEY_GOST94].privatekey;
- else if (alg_a & SSL_aGOST01)
- pk = s->cert->pkeys[SSL_PKEY_GOST01].privatekey;
-
- pkey_ctx = EVP_PKEY_CTX_new(pk,NULL);
- EVP_PKEY_decrypt_init(pkey_ctx);
- /* If client certificate is present and is of the same type, maybe
- * use it for key exchange. Don't mind errors from
- * EVP_PKEY_derive_set_peer, because it is completely valid to use
- * a client certificate for authorization only. */
- client_pub_pkey = X509_get_pubkey(s->session->peer);
- if (client_pub_pkey)
- {
- if (EVP_PKEY_derive_set_peer(pkey_ctx, client_pub_pkey) <= 0)
- ERR_clear_error();
- }
- /* Decrypt session key */
- if ((*p!=( V_ASN1_SEQUENCE| V_ASN1_CONSTRUCTED)))
- {
- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED);
- goto gerr;
- }
- if (p[1] == 0x81)
- {
- start = p+3;
- inlen = p[2];
- }
- else if (p[1] < 0x80)
- {
- start = p+2;
- inlen = p[1];
- }
- else
- {
- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED);
- goto gerr;
- }
- if (EVP_PKEY_decrypt(pkey_ctx,premaster_secret,&outlen,start,inlen) <=0)
-
- {
- SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED);
- goto gerr;
- }
- /* Generate master secret */
- s->session->master_key_length=
- s->method->ssl3_enc->generate_master_secret(s,
- s->session->master_key,premaster_secret,32);
- /* Check if pubkey from client certificate was used */
- if (EVP_PKEY_CTX_ctrl(pkey_ctx, -1, -1, EVP_PKEY_CTRL_PEER_KEY, 2, NULL) > 0)
- ret = 2;
- else
- ret = 1;
- gerr:
- EVP_PKEY_free(client_pub_pkey);
- EVP_PKEY_CTX_free(pkey_ctx);
- if (ret)
- return ret;
- else
- goto err;
+ if (EVP_PKEY_derive_set_peer(pkey_ctx, client_pub_pkey) <= 0)
+ ERR_clear_error();
}
+ /* Decrypt session key */
+ if (ASN1_get_object((const unsigned char **)&p, &Tlen, &Ttag, &Tclass, n) != V_ASN1_CONSTRUCTED ||
+ Ttag != V_ASN1_SEQUENCE ||
+ Tclass != V_ASN1_UNIVERSAL)
+ {
+ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED);
+ goto gerr;
+ }
+ start = p;
+ inlen = Tlen;
+ if (EVP_PKEY_decrypt(pkey_ctx,premaster_secret,&outlen,start,inlen) <=0)
+ {
+ SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,SSL_R_DECRYPTION_FAILED);
+ goto gerr;
+ }
+ /* Generate master secret */
+ s->session->master_key_length=
+ s->method->ssl3_enc->generate_master_secret(s,
+ s->session->master_key,premaster_secret,32);
+ /* Check if pubkey from client certificate was used */
+ if (EVP_PKEY_CTX_ctrl(pkey_ctx, -1, -1, EVP_PKEY_CTRL_PEER_KEY, 2, NULL) > 0)
+ ret = 2;
else
+ ret = 1;
+ gerr:
+ EVP_PKEY_free(client_pub_pkey);
+ EVP_PKEY_CTX_free(pkey_ctx);
+ if (ret)
+ return ret;
+ else
+ goto err;
+ }
+ else if (!(alg_k & SSL_kPSK))
{
al=SSL_AD_HANDSHAKE_FAILURE;
SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
@@ -3629,6 +3695,7 @@
EC_POINT* point = NULL;
ECDSA_SIG sig;
BIGNUM x, y;
+ unsigned short expected_extension_type;
if (s->state == SSL3_ST_SR_CHANNEL_ID_A && s->init_num == 0)
{
@@ -3686,7 +3753,11 @@
n2s(p, extension_type);
n2s(p, extension_len);
- if (extension_type != TLSEXT_TYPE_channel_id ||
+ expected_extension_type = TLSEXT_TYPE_channel_id;
+ if (s->s3->tlsext_channel_id_new)
+ expected_extension_type = TLSEXT_TYPE_channel_id_new;
+
+ if (extension_type != expected_extension_type ||
extension_len != TLSEXT_CHANNEL_ID_SIZE)
{
SSLerr(SSL_F_SSL3_GET_CHANNEL_ID,SSL_R_INVALID_MESSAGE);
diff --git a/ssl/ssl.h b/ssl/ssl.h
index e2e97f1..a89ab23 100644
--- a/ssl/ssl.h
+++ b/ssl/ssl.h
@@ -544,6 +544,13 @@
#ifndef OPENSSL_NO_SRP
char *srp_username;
#endif
+
+ /* original_handshake_hash contains the handshake hash (either
+ * SHA-1+MD5 or SHA-2, depending on TLS version) for the original, full
+ * handshake that created a session. This is used by Channel IDs during
+ * resumption. */
+ unsigned char original_handshake_hash[EVP_MAX_MD_SIZE];
+ unsigned int original_handshake_hash_len;
};
#endif
@@ -553,7 +560,7 @@
/* Allow initial connection to servers that don't support RI */
#define SSL_OP_LEGACY_SERVER_CONNECT 0x00000004L
#define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG 0x00000008L
-#define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x00000010L
+#define SSL_OP_TLSEXT_PADDING 0x00000010L
#define SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER 0x00000020L
#define SSL_OP_SAFARI_ECDHE_ECDSA_BUG 0x00000040L
#define SSL_OP_SSLEAY_080_CLIENT_DH_BUG 0x00000080L
@@ -562,6 +569,8 @@
/* Hasn't done anything since OpenSSL 0.9.7h, retained for compatibility */
#define SSL_OP_MSIE_SSLV2_RSA_PADDING 0x0
+/* Refers to ancient SSLREF and SSLv2, retained for compatibility */
+#define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x0
/* SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS is vestigial. Previously it disabled the
* insertion of empty records in CBC mode, but the empty records were commonly
@@ -648,16 +657,19 @@
* TLS only.) "Released" buffers are put onto a free-list in the context
* or just freed (depending on the context's setting for freelist_max_len). */
#define SSL_MODE_RELEASE_BUFFERS 0x00000010L
+
/* Send the current time in the Random fields of the ClientHello and
* ServerHello records for compatibility with hypothetical implementations
* that require it.
*/
#define SSL_MODE_SEND_CLIENTHELLO_TIME 0x00000020L
#define SSL_MODE_SEND_SERVERHELLO_TIME 0x00000040L
+
/* When set, clients may send application data before receipt of CCS
* and Finished. This mode enables full-handshakes to 'complete' in
* one RTT. */
#define SSL_MODE_HANDSHAKE_CUTTHROUGH 0x00000080L
+
/* When set, TLS 1.0 and SSLv3, multi-byte, CBC records will be split in two:
* the first record will contain a single byte and the second will contain the
* rest of the bytes. This effectively randomises the IV and prevents BEAST
@@ -871,6 +883,9 @@
/* get client cert callback */
int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey);
+ /* get channel id callback */
+ void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey);
+
/* cookie generate callback */
int (*app_gen_cookie_cb)(SSL *ssl, unsigned char *cookie,
unsigned int *cookie_len);
@@ -933,7 +948,7 @@
*/
unsigned int max_send_fragment;
-#ifndef OPENSSL_ENGINE
+#ifndef OPENSSL_NO_ENGINE
/* Engine to pass requests for client certs to
*/
ENGINE *client_cert_engine;
@@ -1033,6 +1048,10 @@
/* If true, a client will advertise the Channel ID extension and a
* server will echo it. */
char tlsext_channel_id_enabled;
+ /* tlsext_channel_id_enabled_new is a hack to support both old and new
+ * ChannelID signatures. It indicates that a client should advertise the
+ * new ChannelID extension number. */
+ char tlsext_channel_id_enabled_new;
/* The client's Channel ID private key. */
EVP_PKEY *tlsext_channel_id_private;
#endif
@@ -1091,6 +1110,8 @@
void (*SSL_CTX_get_info_callback(SSL_CTX *ctx))(const SSL *ssl,int type,int val);
void SSL_CTX_set_client_cert_cb(SSL_CTX *ctx, int (*client_cert_cb)(SSL *ssl, X509 **x509, EVP_PKEY **pkey));
int (*SSL_CTX_get_client_cert_cb(SSL_CTX *ctx))(SSL *ssl, X509 **x509, EVP_PKEY **pkey);
+void SSL_CTX_set_channel_id_cb(SSL_CTX *ctx, void (*channel_id_cb)(SSL *ssl, EVP_PKEY **pkey));
+void (*SSL_CTX_get_channel_id_cb(SSL_CTX *ctx))(SSL *ssl, EVP_PKEY **pkey);
#ifndef OPENSSL_NO_ENGINE
int SSL_CTX_set_client_cert_engine(SSL_CTX *ctx, ENGINE *e);
#endif
@@ -1167,12 +1188,14 @@
#define SSL_WRITING 2
#define SSL_READING 3
#define SSL_X509_LOOKUP 4
+#define SSL_CHANNEL_ID_LOOKUP 5
/* These will only be used when doing non-blocking IO */
#define SSL_want_nothing(s) (SSL_want(s) == SSL_NOTHING)
#define SSL_want_read(s) (SSL_want(s) == SSL_READING)
#define SSL_want_write(s) (SSL_want(s) == SSL_WRITING)
#define SSL_want_x509_lookup(s) (SSL_want(s) == SSL_X509_LOOKUP)
+#define SSL_want_channel_id_lookup(s) (SSL_want(s) == SSL_CHANNEL_ID_LOOKUP)
#define SSL_MAC_FLAG_READ_MAC_STREAM 1
#define SSL_MAC_FLAG_WRITE_MAC_STREAM 2
@@ -1320,6 +1343,10 @@
#endif /* OPENSSL_NO_KRB5 */
#ifndef OPENSSL_NO_PSK
+ /* PSK identity hint is stored here only to enable setting a hint on an SSL object before an
+ * SSL_SESSION is associated with it. Once an SSL_SESSION is associated with this SSL object,
+ * the psk_identity_hint from the session takes precedence over this one. */
+ char *psk_identity_hint;
unsigned int (*psk_client_callback)(SSL *ssl, const char *hint, char *identity,
unsigned int max_identity_len, unsigned char *psk,
unsigned int max_psk_len);
@@ -1604,6 +1631,7 @@
#define SSL_ERROR_ZERO_RETURN 6
#define SSL_ERROR_WANT_CONNECT 7
#define SSL_ERROR_WANT_ACCEPT 8
+#define SSL_ERROR_WANT_CHANNEL_ID_LOOKUP 9
#define SSL_CTRL_NEED_TMP_RSA 1
#define SSL_CTRL_SET_TMP_RSA 2
@@ -1743,10 +1771,11 @@
#define SSL_set_tmp_ecdh(ssl,ecdh) \
SSL_ctrl(ssl,SSL_CTRL_SET_TMP_ECDH,0,(char *)ecdh)
-/* SSL_enable_tls_channel_id configures a TLS server to accept TLS client
- * IDs from clients. Returns 1 on success. */
-#define SSL_enable_tls_channel_id(ctx) \
- SSL_ctrl(ctx,SSL_CTRL_CHANNEL_ID,0,NULL)
+/* SSL_enable_tls_channel_id either configures a TLS server to accept TLS client
+ * IDs from clients, or configure a client to send TLS client IDs to server.
+ * Returns 1 on success. */
+#define SSL_enable_tls_channel_id(s) \
+ SSL_ctrl(s,SSL_CTRL_CHANNEL_ID,0,NULL)
/* SSL_set1_tls_channel_id configures a TLS client to send a TLS Channel ID to
* compatible servers. private_key must be a P-256 EVP_PKEY*. Returns 1 on
* success. */
@@ -1796,7 +1825,7 @@
char * SSL_CIPHER_get_version(const SSL_CIPHER *c);
const char * SSL_CIPHER_get_name(const SSL_CIPHER *c);
unsigned long SSL_CIPHER_get_id(const SSL_CIPHER *c);
-const char* SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher);
+const char * SSL_CIPHER_authentication_method(const SSL_CIPHER* cipher);
int SSL_get_fd(const SSL *s);
int SSL_get_rfd(const SSL *s);
@@ -2713,7 +2742,6 @@
#define SSL_R_WRONG_VERSION_NUMBER 267
#define SSL_R_X509_LIB 268
#define SSL_R_X509_VERIFICATION_SETUP_PROBLEMS 269
-#define SSL_R_UNEXPECTED_CCS 388
#ifdef __cplusplus
}
diff --git a/ssl/ssl3.h b/ssl/ssl3.h
index 1aa4023..cba9434 100644
--- a/ssl/ssl3.h
+++ b/ssl/ssl3.h
@@ -393,9 +393,6 @@
#define TLS1_FLAGS_TLS_PADDING_BUG 0x0008
#define TLS1_FLAGS_SKIP_CERT_VERIFY 0x0010
#define TLS1_FLAGS_KEEP_HANDSHAKE 0x0020
-/* SSL3_FLAGS_CCS_OK indicates that a ChangeCipherSpec record is acceptable at
- * this point in the handshake. If this flag is not set then received CCS
- * records will cause a fatal error for the connection. */
#define SSL3_FLAGS_CCS_OK 0x0080
/* SSL3_FLAGS_SGC_RESTART_DONE is set when we
@@ -563,6 +560,11 @@
* for Channel IDs and that tlsext_channel_id will be valid after the
* handshake. */
char tlsext_channel_id_valid;
+ /* tlsext_channel_id_new means that the updated Channel ID extension
+ * was negotiated. This is a temporary hack in the code to support both
+ * forms of Channel ID extension while we transition to the new format,
+ * which fixed a security issue. */
+ char tlsext_channel_id_new;
/* For a server:
* If |tlsext_channel_id_valid| is true, then this contains the
* verified Channel ID from the client: a P256 point, (x,y), where
@@ -683,11 +685,11 @@
#define SSL3_ST_SR_CERT_VRFY_B (0x1A1|SSL_ST_ACCEPT)
#define SSL3_ST_SR_CHANGE_A (0x1B0|SSL_ST_ACCEPT)
#define SSL3_ST_SR_CHANGE_B (0x1B1|SSL_ST_ACCEPT)
-#define SSL3_ST_SR_POST_CLIENT_CERT (0x1BF|SSL_ST_ACCEPT)
#ifndef OPENSSL_NO_NEXTPROTONEG
#define SSL3_ST_SR_NEXT_PROTO_A (0x210|SSL_ST_ACCEPT)
#define SSL3_ST_SR_NEXT_PROTO_B (0x211|SSL_ST_ACCEPT)
#endif
+#define SSL3_ST_SR_POST_CLIENT_CERT (0x1BF|SSL_ST_ACCEPT)
#define SSL3_ST_SR_CHANNEL_ID_A (0x220|SSL_ST_ACCEPT)
#define SSL3_ST_SR_CHANNEL_ID_B (0x221|SSL_ST_ACCEPT)
#define SSL3_ST_SR_FINISHED_A (0x1C0|SSL_ST_ACCEPT)
diff --git a/ssl/ssl_asn1.c b/ssl/ssl_asn1.c
index 38540be..f83e18f 100644
--- a/ssl/ssl_asn1.c
+++ b/ssl/ssl_asn1.c
@@ -117,12 +117,13 @@
#ifndef OPENSSL_NO_SRP
ASN1_OCTET_STRING srp_username;
#endif /* OPENSSL_NO_SRP */
+ ASN1_OCTET_STRING original_handshake_hash;
} SSL_SESSION_ASN1;
int i2d_SSL_SESSION(SSL_SESSION *in, unsigned char **pp)
{
#define LSIZE2 (sizeof(long)*2)
- int v1=0,v2=0,v3=0,v4=0,v5=0,v7=0,v8=0;
+ int v1=0,v2=0,v3=0,v4=0,v5=0,v7=0,v8=0,v14=0;
unsigned char buf[4],ibuf1[LSIZE2],ibuf2[LSIZE2];
unsigned char ibuf3[LSIZE2],ibuf4[LSIZE2],ibuf5[LSIZE2];
#ifndef OPENSSL_NO_TLSEXT
@@ -272,6 +273,13 @@
a.psk_identity.type=V_ASN1_OCTET_STRING;
a.psk_identity.data=(unsigned char *)(in->psk_identity);
}
+
+ if (in->original_handshake_hash_len > 0)
+ {
+ a.original_handshake_hash.length = in->original_handshake_hash_len;
+ a.original_handshake_hash.type = V_ASN1_OCTET_STRING;
+ a.original_handshake_hash.data = in->original_handshake_hash;
+ }
#endif /* OPENSSL_NO_PSK */
#ifndef OPENSSL_NO_SRP
if (in->srp_username)
@@ -325,6 +333,8 @@
if (in->srp_username)
M_ASN1_I2D_len_EXP_opt(&(a.srp_username), i2d_ASN1_OCTET_STRING,12,v12);
#endif /* OPENSSL_NO_SRP */
+ if (in->original_handshake_hash_len > 0)
+ M_ASN1_I2D_len_EXP_opt(&(a.original_handshake_hash),i2d_ASN1_OCTET_STRING,14,v14);
M_ASN1_I2D_seq_total();
@@ -373,6 +383,8 @@
if (in->srp_username)
M_ASN1_I2D_put_EXP_opt(&(a.srp_username), i2d_ASN1_OCTET_STRING,12,v12);
#endif /* OPENSSL_NO_SRP */
+ if (in->original_handshake_hash_len > 0)
+ M_ASN1_I2D_put_EXP_opt(&(a.original_handshake_hash),i2d_ASN1_OCTET_STRING,14,v14);
M_ASN1_I2D_finish();
}
@@ -408,6 +420,7 @@
if (os.length != 3)
{
c.error=SSL_R_CIPHER_CODE_WRONG_LENGTH;
+ c.line=__LINE__;
goto err;
}
id=0x02000000L|
@@ -420,6 +433,7 @@
if (os.length != 2)
{
c.error=SSL_R_CIPHER_CODE_WRONG_LENGTH;
+ c.line=__LINE__;
goto err;
}
id=0x03000000L|
@@ -429,6 +443,7 @@
else
{
c.error=SSL_R_UNKNOWN_SSL_VERSION;
+ c.line=__LINE__;
goto err;
}
@@ -521,6 +536,7 @@
if (os.length > SSL_MAX_SID_CTX_LENGTH)
{
c.error=SSL_R_BAD_LENGTH;
+ c.line=__LINE__;
goto err;
}
else
@@ -638,5 +654,16 @@
ret->srp_username=NULL;
#endif /* OPENSSL_NO_SRP */
+ os.length=0;
+ os.data=NULL;
+ M_ASN1_D2I_get_EXP_opt(osp,d2i_ASN1_OCTET_STRING,14);
+ if (os.data && os.length < (int)sizeof(ret->original_handshake_hash))
+ {
+ memcpy(ret->original_handshake_hash, os.data, os.length);
+ ret->original_handshake_hash_len = os.length;
+ OPENSSL_free(os.data);
+ os.data = NULL;
+ }
+
M_ASN1_D2I_Finish(a,SSL_SESSION_free,SSL_F_D2I_SSL_SESSION);
}
diff --git a/ssl/ssl_err.c b/ssl/ssl_err.c
index 5117be0..816f6ee 100644
--- a/ssl/ssl_err.c
+++ b/ssl/ssl_err.c
@@ -555,7 +555,7 @@
{ERR_REASON(SSL_R_TLSV1_UNRECOGNIZED_NAME),"tlsv1 unrecognized name"},
{ERR_REASON(SSL_R_TLSV1_UNSUPPORTED_EXTENSION),"tlsv1 unsupported extension"},
{ERR_REASON(SSL_R_TLS_CLIENT_CERT_REQ_WITH_ANON_CIPHER),"tls client cert req with anon cipher"},
-{ERR_REASON(SSL_R_TLS_HEARTBEAT_PEER_DOESNT_ACCEPT),"peer does not accept heartbearts"},
+{ERR_REASON(SSL_R_TLS_HEARTBEAT_PEER_DOESNT_ACCEPT),"peer does not accept heartbeats"},
{ERR_REASON(SSL_R_TLS_HEARTBEAT_PENDING) ,"heartbeat request already pending"},
{ERR_REASON(SSL_R_TLS_ILLEGAL_EXPORTER_LABEL),"tls illegal exporter label"},
{ERR_REASON(SSL_R_TLS_INVALID_ECPOINTFORMAT_LIST),"tls invalid ecpointformat list"},
@@ -606,7 +606,6 @@
{ERR_REASON(SSL_R_WRONG_VERSION_NUMBER) ,"wrong version number"},
{ERR_REASON(SSL_R_X509_LIB) ,"x509 lib"},
{ERR_REASON(SSL_R_X509_VERIFICATION_SETUP_PROBLEMS),"x509 verification setup problems"},
-{ERR_REASON(SSL_R_UNEXPECTED_CCS),"unexpected CCS"},
{0,NULL}
};
diff --git a/ssl/ssl_lib.c b/ssl/ssl_lib.c
index c6e79dc..ec0ec2e 100644
--- a/ssl/ssl_lib.c
+++ b/ssl/ssl_lib.c
@@ -388,6 +388,13 @@
CRYPTO_new_ex_data(CRYPTO_EX_INDEX_SSL, s, &s->ex_data);
#ifndef OPENSSL_NO_PSK
+ s->psk_identity_hint = NULL;
+ if (ctx->psk_identity_hint)
+ {
+ s->psk_identity_hint = BUF_strdup(ctx->psk_identity_hint);
+ if (s->psk_identity_hint == NULL)
+ goto err;
+ }
s->psk_client_callback=ctx->psk_client_callback;
s->psk_server_callback=ctx->psk_server_callback;
#endif
@@ -596,6 +603,11 @@
OPENSSL_free(s->alpn_client_proto_list);
#endif
+#ifndef OPENSSL_NO_PSK
+ if (s->psk_identity_hint)
+ OPENSSL_free(s->psk_identity_hint);
+#endif
+
if (s->client_CA != NULL)
sk_X509_NAME_pop_free(s->client_CA,X509_NAME_free);
@@ -1391,6 +1403,10 @@
p=buf;
sk=s->session->ciphers;
+
+ if (sk_SSL_CIPHER_num(sk) == 0)
+ return NULL;
+
for (i=0; i<sk_SSL_CIPHER_num(sk); i++)
{
int n;
@@ -1442,7 +1458,7 @@
#endif /* OPENSSL_NO_KRB5 */
#ifndef OPENSSL_NO_PSK
/* with PSK there must be client callback set */
- if (((c->algorithm_mkey & SSL_kPSK) || (c->algorithm_auth & SSL_aPSK)) &&
+ if ((c->algorithm_auth & SSL_aPSK) &&
s->psk_client_callback == NULL)
continue;
#endif /* OPENSSL_NO_PSK */
@@ -2689,6 +2705,10 @@
{
return(SSL_ERROR_WANT_X509_LOOKUP);
}
+ if ((i < 0) && SSL_want_channel_id_lookup(s))
+ {
+ return(SSL_ERROR_WANT_CHANNEL_ID_LOOKUP);
+ }
if (i == 0)
{
@@ -3333,32 +3353,54 @@
if (s == NULL)
return 0;
- if (s->session == NULL)
- return 1; /* session not created yet, ignored */
-
if (identity_hint != NULL && strlen(identity_hint) > PSK_MAX_IDENTITY_LEN)
{
SSLerr(SSL_F_SSL_USE_PSK_IDENTITY_HINT, SSL_R_DATA_LENGTH_TOO_LONG);
return 0;
}
- if (s->session->psk_identity_hint != NULL)
+
+ /* Clear hint in SSL and associated SSL_SESSION (if any). */
+ if (s->psk_identity_hint != NULL)
+ {
+ OPENSSL_free(s->psk_identity_hint);
+ s->psk_identity_hint = NULL;
+ }
+ if (s->session != NULL && s->session->psk_identity_hint != NULL)
+ {
OPENSSL_free(s->session->psk_identity_hint);
+ s->session->psk_identity_hint = NULL;
+ }
+
if (identity_hint != NULL)
{
- s->session->psk_identity_hint = BUF_strdup(identity_hint);
- if (s->session->psk_identity_hint == NULL)
- return 0;
+ /* The hint is stored in SSL and SSL_SESSION with the one in
+ * SSL_SESSION taking precedence. Thus, if SSL_SESSION is avaiable,
+ * we store the hint there, otherwise we store it in SSL. */
+ if (s->session != NULL)
+ {
+ s->session->psk_identity_hint = BUF_strdup(identity_hint);
+ if (s->session->psk_identity_hint == NULL)
+ return 0;
+ }
+ else
+ {
+ s->psk_identity_hint = BUF_strdup(identity_hint);
+ if (s->psk_identity_hint == NULL)
+ return 0;
+ }
}
- else
- s->session->psk_identity_hint = NULL;
return 1;
}
const char *SSL_get_psk_identity_hint(const SSL *s)
{
- if (s == NULL || s->session == NULL)
+ if (s == NULL)
return NULL;
- return(s->session->psk_identity_hint);
+ /* The hint is stored in SSL and SSL_SESSION with the one in SSL_SESSION
+ * taking precedence. */
+ if (s->session != NULL)
+ return(s->session->psk_identity_hint);
+ return(s->psk_identity_hint);
}
const char *SSL_get_psk_identity(const SSL *s)
@@ -3415,12 +3457,41 @@
s->version >= SSL3_VERSION &&
s->s3->in_read_app_data == 0 && /* cutthrough only applies to write() */
(SSL_get_mode((SSL*)s) & SSL_MODE_HANDSHAKE_CUTTHROUGH) && /* cutthrough enabled */
- SSL_get_cipher_bits(s, NULL) >= 128 && /* strong cipher choosen */
+ ssl3_can_cutthrough(s) && /* cutthrough allowed */
s->s3->previous_server_finished_len == 0 && /* not a renegotiation handshake */
(s->state == SSL3_ST_CR_SESSION_TICKET_A || /* ready to write app-data*/
s->state == SSL3_ST_CR_FINISHED_A));
}
+int ssl3_can_cutthrough(const SSL *s)
+ {
+ const SSL_CIPHER *c;
+
+ /* require a strong enough cipher */
+ if (SSL_get_cipher_bits(s, NULL) < 128)
+ return 0;
+
+ /* require ALPN or NPN extension */
+ if (!s->s3->alpn_selected
+#ifndef OPENSSL_NO_NEXTPROTONEG
+ && !s->s3->next_proto_neg_seen
+#endif
+ )
+ {
+ return 0;
+ }
+
+ /* require a forward-secret cipher */
+ c = SSL_get_current_cipher(s);
+ if (!c || (c->algorithm_mkey != SSL_kEDH &&
+ c->algorithm_mkey != SSL_kEECDH))
+ {
+ return 0;
+ }
+
+ return 1;
+ }
+
/* Allocates new EVP_MD_CTX and sets pointer to it into given pointer
* vairable, freeing EVP_MD_CTX previously stored in that variable, if
* any. If EVP_MD pointer is passed, initializes ctx with this md
diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h
index f79ab00..6b7731a 100644
--- a/ssl/ssl_locl.h
+++ b/ssl/ssl_locl.h
@@ -1070,6 +1070,7 @@
int tls1_change_cipher_state(SSL *s, int which);
int tls1_setup_key_block(SSL *s);
int tls1_enc(SSL *s, int snd);
+int tls1_handshake_digest(SSL *s, unsigned char *out, size_t out_len);
int tls1_final_finish_mac(SSL *s,
const char *str, int slen, unsigned char *p);
int tls1_cert_verify_mac(SSL *s, int md_nid, unsigned char *p);
@@ -1126,8 +1127,10 @@
const EVP_MD *tls12_get_hash(unsigned char hash_alg);
int tls1_channel_id_hash(EVP_MD_CTX *ctx, SSL *s);
+int tls1_record_handshake_hashes_for_channel_id(SSL *s);
#endif
+int ssl3_can_cutthrough(const SSL *s);
EVP_MD_CTX* ssl_replace_hash(EVP_MD_CTX **hash,const EVP_MD *md) ;
void ssl_clear_hash_ctx(EVP_MD_CTX **hash);
int ssl_add_serverhello_renegotiate_ext(SSL *s, unsigned char *p, int *len,
diff --git a/ssl/ssl_sess.c b/ssl/ssl_sess.c
index 920b763..7d17085 100644
--- a/ssl/ssl_sess.c
+++ b/ssl/ssl_sess.c
@@ -427,6 +427,18 @@
}
#endif
#endif
+#ifndef OPENSSL_NO_PSK
+ if (s->psk_identity_hint)
+ {
+ ss->psk_identity_hint = BUF_strdup(s->psk_identity_hint);
+ if (ss->psk_identity_hint == NULL)
+ {
+ SSLerr(SSL_F_SSL_GET_NEW_SESSION, ERR_R_MALLOC_FAILURE);
+ SSL_SESSION_free(ss);
+ return 0;
+ }
+ }
+#endif
}
else
{
@@ -1132,6 +1144,17 @@
return ctx->client_cert_cb;
}
+void SSL_CTX_set_channel_id_cb(SSL_CTX *ctx,
+ void (*cb)(SSL *ssl, EVP_PKEY **pkey))
+ {
+ ctx->channel_id_cb=cb;
+ }
+
+void (*SSL_CTX_get_channel_id_cb(SSL_CTX *ctx))(SSL * ssl, EVP_PKEY **pkey)
+ {
+ return ctx->channel_id_cb;
+ }
+
#ifndef OPENSSL_NO_ENGINE
int SSL_CTX_set_client_cert_engine(SSL_CTX *ctx, ENGINE *e)
{
diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c
index 5c9f261..b8d497e 100644
--- a/ssl/t1_enc.c
+++ b/ssl/t1_enc.c
@@ -895,54 +895,79 @@
return((int)ret);
}
-int tls1_final_finish_mac(SSL *s,
- const char *str, int slen, unsigned char *out)
+/* tls1_handshake_digest calculates the current handshake hash and writes it to
+ * |out|, which has space for |out_len| bytes. It returns the number of bytes
+ * written or -1 in the event of an error. This function works on a copy of the
+ * underlying digests so can be called multiple times and prior to the final
+ * update etc. */
+int tls1_handshake_digest(SSL *s, unsigned char *out, size_t out_len)
{
- unsigned int i;
+ const EVP_MD *md;
EVP_MD_CTX ctx;
- unsigned char buf[2*EVP_MAX_MD_SIZE];
- unsigned char *q,buf2[12];
- int idx;
+ int i, err = 0, len = 0;
long mask;
- int err=0;
- const EVP_MD *md;
-
- q=buf;
-
- if (s->s3->handshake_buffer)
- if (!ssl3_digest_cached_records(s))
- return 0;
EVP_MD_CTX_init(&ctx);
- for (idx=0;ssl_get_handshake_digest(idx,&mask,&md);idx++)
+ for (i = 0; ssl_get_handshake_digest(i, &mask, &md); i++)
{
- if (mask & ssl_get_algorithm2(s))
+ int hash_size;
+ unsigned int digest_len;
+ EVP_MD_CTX *hdgst = s->s3->handshake_dgst[i];
+
+ if ((mask & ssl_get_algorithm2(s)) == 0)
+ continue;
+
+ hash_size = EVP_MD_size(md);
+ if (!hdgst || hash_size < 0 || (size_t)hash_size > out_len)
{
- int hashsize = EVP_MD_size(md);
- EVP_MD_CTX *hdgst = s->s3->handshake_dgst[idx];
- if (!hdgst || hashsize < 0 || hashsize > (int)(sizeof buf - (size_t)(q-buf)))
- {
- /* internal error: 'buf' is too small for this cipersuite! */
- err = 1;
- }
- else
- {
- if (!EVP_MD_CTX_copy_ex(&ctx, hdgst) ||
- !EVP_DigestFinal_ex(&ctx,q,&i) ||
- (i != (unsigned int)hashsize))
- err = 1;
- q+=hashsize;
- }
+ err = 1;
+ break;
}
+
+ if (!EVP_MD_CTX_copy_ex(&ctx, hdgst) ||
+ !EVP_DigestFinal_ex(&ctx, out, &digest_len) ||
+ digest_len != (unsigned int)hash_size) /* internal error */
+ {
+ err = 1;
+ break;
+ }
+ out += digest_len;
+ out_len -= digest_len;
+ len += digest_len;
}
-
+
+ EVP_MD_CTX_cleanup(&ctx);
+
+ if (err != 0)
+ return -1;
+ return len;
+ }
+
+int tls1_final_finish_mac(SSL *s,
+ const char *str, int slen, unsigned char *out)
+ {
+ unsigned char buf[2*EVP_MAX_MD_SIZE];
+ unsigned char buf2[12];
+ int err=0;
+ int digests_len;
+
+ if (s->s3->handshake_buffer)
+ if (!ssl3_digest_cached_records(s))
+ return 0;
+
+ digests_len = tls1_handshake_digest(s, buf, sizeof(buf));
+ if (digests_len < 0)
+ {
+ err = 1;
+ digests_len = 0;
+ }
+
if (!tls1_PRF(ssl_get_algorithm2(s),
- str,slen, buf,(int)(q-buf), NULL,0, NULL,0, NULL,0,
+ str,slen, buf, digests_len, NULL,0, NULL,0, NULL,0,
s->session->master_key,s->session->master_key_length,
out,buf2,sizeof buf2))
err = 1;
- EVP_MD_CTX_cleanup(&ctx);
if (err)
return 0;
@@ -986,7 +1011,8 @@
}
else
{
- EVP_MD_CTX_copy(&hmac,hash);
+ if (!EVP_MD_CTX_copy(&hmac,hash))
+ return -1;
mac_ctx = &hmac;
}
@@ -1047,14 +1073,10 @@
if (!stream_mac)
EVP_MD_CTX_cleanup(&hmac);
#ifdef TLS_DEBUG
-printf("sec=");
-{unsigned int z; for (z=0; z<md_size; z++) printf("%02X ",mac_sec[z]); printf("\n"); }
printf("seq=");
{int z; for (z=0; z<8; z++) printf("%02X ",seq[z]); printf("\n"); }
-printf("buf=");
-{int z; for (z=0; z<5; z++) printf("%02X ",buf[z]); printf("\n"); }
printf("rec=");
-{unsigned int z; for (z=0; z<rec->length; z++) printf("%02X ",buf[z]); printf("\n"); }
+{unsigned int z; for (z=0; z<rec->length; z++) printf("%02X ",rec->data[z]); printf("\n"); }
#endif
if (ssl->version != DTLS1_VERSION && ssl->version != DTLS1_BAD_VER)
@@ -1184,7 +1206,7 @@
if (memcmp(val, TLS_MD_KEY_EXPANSION_CONST,
TLS_MD_KEY_EXPANSION_CONST_SIZE) == 0) goto err1;
- rv = tls1_PRF(s->s3->tmp.new_cipher->algorithm2,
+ rv = tls1_PRF(ssl_get_algorithm2(s),
val, vallen,
NULL, 0,
NULL, 0,
diff --git a/ssl/t1_lib.c b/ssl/t1_lib.c
index eba732f..122a25f 100644
--- a/ssl/t1_lib.c
+++ b/ssl/t1_lib.c
@@ -617,6 +617,8 @@
#ifndef OPENSSL_NO_HEARTBEATS
/* Add Heartbeat extension */
+ if ((limit - ret - 4 - 1) < 0)
+ return NULL;
s2n(TLSEXT_TYPE_heartbeat,ret);
s2n(1,ret);
/* Set mode:
@@ -647,7 +649,10 @@
* support for Channel ID. */
if (limit - ret - 4 < 0)
return NULL;
- s2n(TLSEXT_TYPE_channel_id,ret);
+ if (s->ctx->tlsext_channel_id_enabled_new)
+ s2n(TLSEXT_TYPE_channel_id_new,ret);
+ else
+ s2n(TLSEXT_TYPE_channel_id,ret);
s2n(0,ret);
}
@@ -683,31 +688,35 @@
ret += el;
}
#endif
-
/* Add padding to workaround bugs in F5 terminators.
- * See https://tools.ietf.org/html/draft-agl-tls-padding-02 */
- {
- int hlen = ret - (unsigned char *)s->init_buf->data;
- /* The code in s23_clnt.c to build ClientHello messages includes the
- * 5-byte record header in the buffer, while the code in s3_clnt.c does
- * not. */
- if (s->state == SSL23_ST_CW_CLNT_HELLO_A)
- hlen -= 5;
- if (hlen > 0xff && hlen < 0x200)
+ * See https://tools.ietf.org/html/draft-agl-tls-padding-03
+ *
+ * NB: because this code works out the length of all existing
+ * extensions it MUST always appear last.
+ */
+ if (s->options & SSL_OP_TLSEXT_PADDING)
{
- hlen = 0x200 - hlen;
- if (hlen >= 4)
- hlen -= 4;
- else
- hlen = 0;
+ int hlen = ret - (unsigned char *)s->init_buf->data;
+ /* The code in s23_clnt.c to build ClientHello messages
+ * includes the 5-byte record header in the buffer, while
+ * the code in s3_clnt.c does not.
+ */
+ if (s->state == SSL23_ST_CW_CLNT_HELLO_A)
+ hlen -= 5;
+ if (hlen > 0xff && hlen < 0x200)
+ {
+ hlen = 0x200 - hlen;
+ if (hlen >= 4)
+ hlen -= 4;
+ else
+ hlen = 0;
- s2n(TLSEXT_TYPE_padding, ret);
- s2n(hlen, ret);
- memset(ret, 0, hlen);
- ret += hlen;
+ s2n(TLSEXT_TYPE_padding, ret);
+ s2n(hlen, ret);
+ memset(ret, 0, hlen);
+ ret += hlen;
+ }
}
- }
-
if ((extdatalen = ret-p-2)== 0)
return p;
@@ -862,6 +871,8 @@
/* Add Heartbeat extension if we've received one */
if (s->tlsext_heartbeat & SSL_TLSEXT_HB_ENABLED)
{
+ if ((limit - ret - 4 - 1) < 0)
+ return NULL;
s2n(TLSEXT_TYPE_heartbeat,ret);
s2n(1,ret);
/* Set mode:
@@ -904,7 +915,10 @@
{
if (limit - ret - 4 < 0)
return NULL;
- s2n(TLSEXT_TYPE_channel_id,ret);
+ if (s->s3->tlsext_channel_id_new)
+ s2n(TLSEXT_TYPE_channel_id_new,ret);
+ else
+ s2n(TLSEXT_TYPE_channel_id,ret);
s2n(0,ret);
}
@@ -1412,7 +1426,7 @@
tls1_process_sigalgs(s, data, dsize);
}
else if (type == TLSEXT_TYPE_status_request &&
- s->version != DTLS1_VERSION && s->ctx->tlsext_status_cb)
+ s->version != DTLS1_VERSION)
{
if (size < 5)
@@ -1567,6 +1581,13 @@
else if (type == TLSEXT_TYPE_channel_id && s->tlsext_channel_id_enabled)
s->s3->tlsext_channel_id_valid = 1;
+ else if (type == TLSEXT_TYPE_channel_id_new &&
+ s->tlsext_channel_id_enabled)
+ {
+ s->s3->tlsext_channel_id_valid = 1;
+ s->s3->tlsext_channel_id_new = 1;
+ }
+
else if (type == TLSEXT_TYPE_application_layer_protocol_negotiation &&
s->ctx->alpn_select_cb &&
s->s3->tmp.finish_md_len == 0)
@@ -1816,6 +1837,12 @@
else if (type == TLSEXT_TYPE_channel_id)
s->s3->tlsext_channel_id_valid = 1;
+ else if (type == TLSEXT_TYPE_channel_id_new)
+ {
+ s->s3->tlsext_channel_id_valid = 1;
+ s->s3->tlsext_channel_id_new = 1;
+ }
+
else if (type == TLSEXT_TYPE_application_layer_protocol_negotiation)
{
unsigned len;
@@ -2744,16 +2771,20 @@
unsigned int payload;
unsigned int padding = 16; /* Use minimum padding */
- /* Read type and payload length first */
- hbtype = *p++;
- n2s(p, payload);
- pl = p;
-
if (s->msg_callback)
s->msg_callback(0, s->version, TLS1_RT_HEARTBEAT,
&s->s3->rrec.data[0], s->s3->rrec.length,
s, s->msg_callback_arg);
+ /* Read type and payload length first */
+ if (1 + 2 + 16 > s->s3->rrec.length)
+ return 0; /* silently discard */
+ hbtype = *p++;
+ n2s(p, payload);
+ if (1 + 2 + payload + 16 > s->s3->rrec.length)
+ return 0; /* silently discard per RFC 6520 sec. 4 */
+ pl = p;
+
if (hbtype == TLS1_HB_REQUEST)
{
unsigned char *buffer, *bp;
@@ -2899,6 +2930,17 @@
EVP_DigestUpdate(md, kClientIDMagic, sizeof(kClientIDMagic));
+ if (s->hit && s->s3->tlsext_channel_id_new)
+ {
+ static const char kResumptionMagic[] = "Resumption";
+ EVP_DigestUpdate(md, kResumptionMagic,
+ sizeof(kResumptionMagic));
+ if (s->session->original_handshake_hash_len == 0)
+ return 0;
+ EVP_DigestUpdate(md, s->session->original_handshake_hash,
+ s->session->original_handshake_hash_len);
+ }
+
EVP_MD_CTX_init(&ctx);
for (i = 0; i < SSL_MAX_DIGEST; i++)
{
@@ -2913,3 +2955,29 @@
return 1;
}
#endif
+
+/* tls1_record_handshake_hashes_for_channel_id records the current handshake
+ * hashes in |s->session| so that Channel ID resumptions can sign that data. */
+int tls1_record_handshake_hashes_for_channel_id(SSL *s)
+ {
+ int digest_len;
+ /* This function should never be called for a resumed session because
+ * the handshake hashes that we wish to record are for the original,
+ * full handshake. */
+ if (s->hit)
+ return -1;
+ /* It only makes sense to call this function if Channel IDs have been
+ * negotiated. */
+ if (!s->s3->tlsext_channel_id_new)
+ return -1;
+
+ digest_len = tls1_handshake_digest(
+ s, s->session->original_handshake_hash,
+ sizeof(s->session->original_handshake_hash));
+ if (digest_len < 0)
+ return -1;
+
+ s->session->original_handshake_hash_len = digest_len;
+
+ return 1;
+ }
diff --git a/ssl/tls1.h b/ssl/tls1.h
index 86507d7..dc36f79 100644
--- a/ssl/tls1.h
+++ b/ssl/tls1.h
@@ -233,6 +233,12 @@
/* ExtensionType value from RFC5620 */
#define TLSEXT_TYPE_heartbeat 15
+/* ExtensionType value for TLS padding extension.
+ * http://www.iana.org/assignments/tls-extensiontype-values/tls-extensiontype-values.xhtml
+ * http://tools.ietf.org/html/draft-agl-tls-padding-03
+ */
+#define TLSEXT_TYPE_padding 21
+
/* ExtensionType value from draft-ietf-tls-applayerprotoneg-00 */
#define TLSEXT_TYPE_application_layer_protocol_negotiation 16
@@ -256,10 +262,7 @@
/* This is not an IANA defined extension number */
#define TLSEXT_TYPE_channel_id 30031
-
-/* See https://tools.ietf.org/html/draft-agl-tls-padding-02
- * Number not yet IANA assigned. */
-#define TLSEXT_TYPE_padding 35655
+#define TLSEXT_TYPE_channel_id_new 30032
/* NameType value from RFC 3546 */
#define TLSEXT_NAMETYPE_host_name 0
@@ -532,6 +535,12 @@
#define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031
#define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032
+/* ECDHE PSK ciphersuites from RFC5489
+ * SHA-2 cipher suites are omitted because they cannot be used safely with
+ * SSLv3. */
+#define TLS1_CK_ECDHE_PSK_WITH_AES_128_CBC_SHA 0x0300C035
+#define TLS1_CK_ECDHE_PSK_WITH_AES_256_CBC_SHA 0x0300C036
+
/* XXX
* Inconsistency alert:
* The OpenSSL names of ciphers with ephemeral DH here include the string
@@ -683,6 +692,10 @@
#define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-SHA256"
#define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-SHA384"
+/* ECDHE PSK ciphersuites from RFC5489 */
+#define TLS1_TXT_ECDHE_PSK_WITH_AES_128_CBC_SHA "ECDHE-PSK-AES128-CBC-SHA"
+#define TLS1_TXT_ECDHE_PSK_WITH_AES_256_CBC_SHA "ECDHE-PSK-AES256-CBC-SHA"
+
#define TLS_CT_RSA_SIGN 1
#define TLS_CT_DSS_SIGN 2
#define TLS_CT_RSA_FIXED_DH 3