crypto: arm/aes - add some hardening against cache-timing attacks

author Eric Biggers <ebiggers@google.com>

Thu, 18 Oct 2018 04:37:59 +0000 (21:37 -0700)

committer Herbert Xu <herbert@gondor.apana.org.au>

Fri, 9 Nov 2018 09:36:48 +0000 (17:36 +0800)
author Eric Biggers <ebiggers@google.com>
Thu, 18 Oct 2018 04:37:59 +0000 (21:37 -0700)
committer Herbert Xu <herbert@gondor.apana.org.au>
Fri, 9 Nov 2018 09:36:48 +0000 (17:36 +0800)
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig

index ef0c7fe..0473a8f 100644 (file)
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -69,6 +69,15 @@ config CRYPTO_AES_ARM
         help
           Use optimized AES assembler routines for ARM platforms.
  
+         On ARM processors without the Crypto Extensions, this is the
+         fastest AES implementation for single blocks.  For multiple
+         blocks, the NEON bit-sliced implementation is usually faster.
+
+         This implementation may be vulnerable to cache timing attacks,
+         since it uses lookup tables.  However, as countermeasures it
+         disables IRQs and preloads the tables; it is hoped this makes
+         such attacks very difficult.
+
  config CRYPTO_AES_ARM_BS
         tristate "Bit sliced AES using NEON instructions"
         depends on KERNEL_MODE_NEON
diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S

index 184d6c2..f2d67c0 100644 (file)
--- a/arch/arm/crypto/aes-cipher-core.S
+++ b/arch/arm/crypto/aes-cipher-core.S
@@ -10,6 +10,7 @@
   */
  
  #include <linux/linkage.h>
+#include <asm/assembler.h>
  #include <asm/cache.h>
  
         .text
@@ -41,7 +42,7 @@
         .endif
         .endm
  
-       .macro          __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op
+       .macro          __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
         __select        \out0, \in0, 0
         __select        t0, \in1, 1
         __load          \out0, \out0, 0, \sz, \op
@@ -73,6 +74,14 @@
         __load          t0, t0, 3, \sz, \op
         __load          \t4, \t4, 3, \sz, \op
  
+       .ifnb           \oldcpsr
+       /*
+        * This is the final round and we're done with all data-dependent table
+        * lookups, so we can safely re-enable interrupts.
+        */
+       restore_irqs    \oldcpsr
+       .endif
+
         eor             \out1, \out1, t1, ror #24
         eor             \out0, \out0, t2, ror #16
         ldm             rk!, {t1, t2}
@@ -83,14 +92,14 @@
         eor             \out1, \out1, t2
         .endm
  
-       .macro          fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+       .macro          fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
         __hround        \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
-       __hround        \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
+       __hround        \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
         .endm
  
-       .macro          iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+       .macro          iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
         __hround        \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
-       __hround        \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
+       __hround        \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
         .endm
  
         .macro          __rev, out, in
@@ -118,13 +127,14 @@
         .macro          do_crypt, round, ttab, ltab, bsz
         push            {r3-r11, lr}
  
+       // Load keys first, to reduce latency in case they're not cached yet.
+       ldm             rk!, {r8-r11}
+
         ldr             r4, [in]
         ldr             r5, [in, #4]
         ldr             r6, [in, #8]
         ldr             r7, [in, #12]
  
-       ldm             rk!, {r8-r11}
-
  #ifdef CONFIG_CPU_BIG_ENDIAN
         __rev           r4, r4
         __rev           r5, r5
@@ -138,6 +148,25 @@
         eor             r7, r7, r11
  
         __adrl          ttab, \ttab
+       /*
+        * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
+        * L1 cache, assuming cacheline size >= 32.  This is a hardening measure
+        * intended to make cache-timing attacks more difficult.  They may not
+        * be fully prevented, however; see the paper
+        * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
+        * ("Cache-timing attacks on AES") for a discussion of the many
+        * difficulties involved in writing truly constant-time AES software.
+        */
+        save_and_disable_irqs  t0
+       .set            i, 0
+       .rept           1024 / 128
+       ldr             r8, [ttab, #i + 0]
+       ldr             r9, [ttab, #i + 32]
+       ldr             r10, [ttab, #i + 64]
+       ldr             r11, [ttab, #i + 96]
+       .set            i, i + 128
+       .endr
+       push            {t0}            // oldcpsr
  
         tst             rounds, #2
         bne             1f
@@ -151,8 +180,21 @@
         \round          r4, r5, r6, r7, r8, r9, r10, r11
         b               0b
  
-2:     __adrl          ttab, \ltab
-       \round          r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b
+2:     .ifb            \ltab
+       add             ttab, ttab, #1
+       .else
+       __adrl          ttab, \ltab
+       // Prefetch inverse S-box for final round; see explanation above
+       .set            i, 0
+       .rept           256 / 64
+       ldr             t0, [ttab, #i + 0]
+       ldr             t1, [ttab, #i + 32]
+       .set            i, i + 64
+       .endr
+       .endif
+
+       pop             {rounds}        // oldcpsr
+       \round          r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
  
  #ifdef CONFIG_CPU_BIG_ENDIAN
         __rev           r4, r4
@@ -175,7 +217,7 @@
         .endm
  
  ENTRY(__aes_arm_encrypt)
-       do_crypt        fround, crypto_ft_tab, crypto_ft_tab + 1, 2
+       do_crypt        fround, crypto_ft_tab,, 2
  ENDPROC(__aes_arm_encrypt)
  
         .align          5
diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c

index ca554d5..13df33a 100644 (file)
--- a/crypto/aes_generic.c
+++ b/crypto/aes_generic.c
@@ -63,7 +63,8 @@ static inline u8 byte(const u32 x, const unsigned n)
  
  static const u32 rco_tab[10] = { 1, 2, 4, 8, 16, 32, 64, 128, 27, 54 };
  
-__visible const u32 crypto_ft_tab[4][256] = {
+/* cacheline-aligned to facilitate prefetching into cache */
+__visible const u32 crypto_ft_tab[4][256] __cacheline_aligned = {
         {
                 0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6,
                 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591,
@@ -327,7 +328,7 @@ __visible const u32 crypto_ft_tab[4][256] = {
         }
  };
  
-__visible const u32 crypto_fl_tab[4][256] = {
+__visible const u32 crypto_fl_tab[4][256] __cacheline_aligned = {
         {
                 0x00000063, 0x0000007c, 0x00000077, 0x0000007b,
                 0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5,
@@ -591,7 +592,7 @@ __visible const u32 crypto_fl_tab[4][256] = {
         }
  };
  
-__visible const u32 crypto_it_tab[4][256] = {
+__visible const u32 crypto_it_tab[4][256] __cacheline_aligned = {
         {
                 0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a,
                 0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b,
@@ -855,7 +856,7 @@ __visible const u32 crypto_it_tab[4][256] = {
         }
  };
  
-__visible const u32 crypto_il_tab[4][256] = {
+__visible const u32 crypto_il_tab[4][256] __cacheline_aligned = {
         {
                 0x00000052, 0x00000009, 0x0000006a, 0x000000d5,
                 0x00000030, 0x00000036, 0x000000a5, 0x00000038,
author	Eric Biggers <ebiggers@google.com>
	Thu, 18 Oct 2018 04:37:59 +0000 (21:37 -0700)
committer	Herbert Xu <herbert@gondor.apana.org.au>
	Fri, 9 Nov 2018 09:36:48 +0000 (17:36 +0800)
arch/arm/crypto/Kconfig		patch \| blob \| history
arch/arm/crypto/aes-cipher-core.S		patch \| blob \| history
crypto/aes_generic.c		patch \| blob \| history