--- /dev/null
+/*
+ * AArch64 specific aes acceleration.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef AARCH64_HOST_CRYPTO_AES_ROUND_H
+#define AARCH64_HOST_CRYPTO_AES_ROUND_H
+
+#include "host/cpuinfo.h"
+#include <arm_neon.h>
+
+#ifdef __ARM_FEATURE_AES
+# define HAVE_AES_ACCEL true
+#else
+# define HAVE_AES_ACCEL likely(cpuinfo & CPUINFO_AES)
+#endif
+#if !defined(__ARM_FEATURE_AES) && defined(CONFIG_ARM_AES_BUILTIN)
+# define ATTR_AES_ACCEL __attribute__((target("+crypto")))
+#else
+# define ATTR_AES_ACCEL
+#endif
+
+static inline uint8x16_t aes_accel_bswap(uint8x16_t x)
+{
+ return vqtbl1q_u8(x, (uint8x16_t){ 15, 14, 13, 12, 11, 10, 9, 8,
+ 7, 6, 5, 4, 3, 2, 1, 0, });
+}
+
+#ifdef CONFIG_ARM_AES_BUILTIN
+# define aes_accel_aesd vaesdq_u8
+# define aes_accel_aese vaeseq_u8
+# define aes_accel_aesmc vaesmcq_u8
+# define aes_accel_aesimc vaesimcq_u8
+# define aes_accel_aesd_imc(S, K) vaesimcq_u8(vaesdq_u8(S, K))
+# define aes_accel_aese_mc(S, K) vaesmcq_u8(vaeseq_u8(S, K))
+#else
+static inline uint8x16_t aes_accel_aesd(uint8x16_t d, uint8x16_t k)
+{
+ asm(".arch_extension aes\n\t"
+ "aesd %0.16b, %1.16b" : "+w"(d) : "w"(k));
+ return d;
+}
+
+static inline uint8x16_t aes_accel_aese(uint8x16_t d, uint8x16_t k)
+{
+ asm(".arch_extension aes\n\t"
+ "aese %0.16b, %1.16b" : "+w"(d) : "w"(k));
+ return d;
+}
+
+static inline uint8x16_t aes_accel_aesmc(uint8x16_t d)
+{
+ asm(".arch_extension aes\n\t"
+ "aesmc %0.16b, %1.16b" : "=w"(d) : "w"(d));
+ return d;
+}
+
+static inline uint8x16_t aes_accel_aesimc(uint8x16_t d)
+{
+ asm(".arch_extension aes\n\t"
+ "aesimc %0.16b, %1.16b" : "=w"(d) : "w"(d));
+ return d;
+}
+
+/* Most CPUs fuse AESD+AESIMC in the execution pipeline. */
+static inline uint8x16_t aes_accel_aesd_imc(uint8x16_t d, uint8x16_t k)
+{
+ asm(".arch_extension aes\n\t"
+ "aesd %0.16b, %1.16b\n\t"
+ "aesimc %0.16b, %0.16b" : "+w"(d) : "w"(k));
+ return d;
+}
+
+/* Most CPUs fuse AESE+AESMC in the execution pipeline. */
+static inline uint8x16_t aes_accel_aese_mc(uint8x16_t d, uint8x16_t k)
+{
+ asm(".arch_extension aes\n\t"
+ "aese %0.16b, %1.16b\n\t"
+ "aesmc %0.16b, %0.16b" : "+w"(d) : "w"(k));
+ return d;
+}
+#endif /* CONFIG_ARM_AES_BUILTIN */
+
+static inline void ATTR_AES_ACCEL
+aesenc_MC_accel(AESState *ret, const AESState *st, bool be)
+{
+ uint8x16_t t = (uint8x16_t)st->v;
+
+ if (be) {
+ t = aes_accel_bswap(t);
+ t = aes_accel_aesmc(t);
+ t = aes_accel_bswap(t);
+ } else {
+ t = aes_accel_aesmc(t);
+ }
+ ret->v = (AESStateVec)t;
+}
+
+static inline void ATTR_AES_ACCEL
+aesenc_SB_SR_AK_accel(AESState *ret, const AESState *st,
+ const AESState *rk, bool be)
+{
+ uint8x16_t t = (uint8x16_t)st->v;
+ uint8x16_t z = { };
+
+ if (be) {
+ t = aes_accel_bswap(t);
+ t = aes_accel_aese(t, z);
+ t = aes_accel_bswap(t);
+ } else {
+ t = aes_accel_aese(t, z);
+ }
+ ret->v = (AESStateVec)t ^ rk->v;
+}
+
+static inline void ATTR_AES_ACCEL
+aesenc_SB_SR_MC_AK_accel(AESState *ret, const AESState *st,
+ const AESState *rk, bool be)
+{
+ uint8x16_t t = (uint8x16_t)st->v;
+ uint8x16_t z = { };
+
+ if (be) {
+ t = aes_accel_bswap(t);
+ t = aes_accel_aese_mc(t, z);
+ t = aes_accel_bswap(t);
+ } else {
+ t = aes_accel_aese_mc(t, z);
+ }
+ ret->v = (AESStateVec)t ^ rk->v;
+}
+
+static inline void ATTR_AES_ACCEL
+aesdec_IMC_accel(AESState *ret, const AESState *st, bool be)
+{
+ uint8x16_t t = (uint8x16_t)st->v;
+
+ if (be) {
+ t = aes_accel_bswap(t);
+ t = aes_accel_aesimc(t);
+ t = aes_accel_bswap(t);
+ } else {
+ t = aes_accel_aesimc(t);
+ }
+ ret->v = (AESStateVec)t;
+}
+
+static inline void ATTR_AES_ACCEL
+aesdec_ISB_ISR_AK_accel(AESState *ret, const AESState *st,
+ const AESState *rk, bool be)
+{
+ uint8x16_t t = (uint8x16_t)st->v;
+ uint8x16_t z = { };
+
+ if (be) {
+ t = aes_accel_bswap(t);
+ t = aes_accel_aesd(t, z);
+ t = aes_accel_bswap(t);
+ } else {
+ t = aes_accel_aesd(t, z);
+ }
+ ret->v = (AESStateVec)t ^ rk->v;
+}
+
+static inline void ATTR_AES_ACCEL
+aesdec_ISB_ISR_AK_IMC_accel(AESState *ret, const AESState *st,
+ const AESState *rk, bool be)
+{
+ uint8x16_t t = (uint8x16_t)st->v;
+ uint8x16_t k = (uint8x16_t)rk->v;
+ uint8x16_t z = { };
+
+ if (be) {
+ t = aes_accel_bswap(t);
+ k = aes_accel_bswap(k);
+ t = aes_accel_aesd(t, z);
+ t ^= k;
+ t = aes_accel_aesimc(t);
+ t = aes_accel_bswap(t);
+ } else {
+ t = aes_accel_aesd(t, z);
+ t ^= k;
+ t = aes_accel_aesimc(t);
+ }
+ ret->v = (AESStateVec)t;
+}
+
+static inline void ATTR_AES_ACCEL
+aesdec_ISB_ISR_IMC_AK_accel(AESState *ret, const AESState *st,
+ const AESState *rk, bool be)
+{
+ uint8x16_t t = (uint8x16_t)st->v;
+ uint8x16_t z = { };
+
+ if (be) {
+ t = aes_accel_bswap(t);
+ t = aes_accel_aesd_imc(t, z);
+ t = aes_accel_bswap(t);
+ } else {
+ t = aes_accel_aesd_imc(t, z);
+ }
+ ret->v = (AESStateVec)t ^ rk->v;
+}
+
+#endif /* AARCH64_HOST_CRYPTO_AES_ROUND_H */