OSDN Git Service

[AArch64] Enable FeatureFuseAES on Cortex-A72.
authorFlorian Hahn <florian.hahn@arm.com>
Mon, 15 May 2017 15:15:22 +0000 (15:15 +0000)
committerFlorian Hahn <florian.hahn@arm.com>
Mon, 15 May 2017 15:15:22 +0000 (15:15 +0000)
This patch enables fusing dependent AESE/AESMC and AESD/AESIMC
instruction pairs on Cortex-A72, as recommended in the Software
Optimization Guide, section 4.10.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@303073 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AArch64/AArch64.td
test/CodeGen/AArch64/misched-fusion-aes.ll

index 73f2b6a..4af5fef 100644 (file)
@@ -216,6 +216,7 @@ def ProcA72     : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
                                    FeatureCRC,
                                    FeatureCrypto,
                                    FeatureFPARMv8,
+                                   FeatureFuseAES,
                                    FeatureNEON,
                                    FeaturePerfMon
                                    ]>;
index f29dfb3..4c682e5 100644 (file)
@@ -1,4 +1,5 @@
 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKA57
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKA72
 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m1  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKM1
 
 declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d, <16 x i8> %k)
@@ -87,6 +88,22 @@ define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
 ; CHECKA57-NEXT: aesmc {{v[0-7].16b}}, [[VG]]
 ; CHECKA57: aese [[VH:v[0-7].16b]], {{v[0-7].16b}}
 ; CHECKA57-NEXT: aesmc {{v[0-7].16b}}, [[VH]]
+; CHECKA72: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKA72-NEXT: aesmc {{v[0-7].16b}}, [[VA]]
+; CHECKA72: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKA72-NEXT: aesmc {{v[0-7].16b}}, [[VB]]
+; CHECKA72: aese [[VC:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKA72-NEXT: aesmc {{v[0-7].16b}}, [[VC]]
+; CHECKA72: aese [[VD:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKA72-NEXT: aesmc {{v[0-7].16b}}, [[VD]]
+; CHECKA72: aese [[VE:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKA72-NEXT: aesmc {{v[0-7].16b}}, [[VE]]
+; CHECKA72: aese [[VF:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKA72-NEXT: aesmc {{v[0-7].16b}}, [[VF]]
+; CHECKA72: aese [[VG:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKA72-NEXT: aesmc {{v[0-7].16b}}, [[VG]]
+; CHECKA72: aese [[VH:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKA72-NEXT: aesmc {{v[0-7].16b}}, [[VH]]
 ; CHECKM1: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
 ; CHECKM1: aesmc {{v[0-7].16b}}, [[VA]]
 ; CHECKM1: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
@@ -187,6 +204,22 @@ define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
 ; CHECKA57-NEXT: aesimc {{v[0-7].16b}}, [[VG]]
 ; CHECKA57: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}}
 ; CHECKA57-NEXT: aesimc {{v[0-7].16b}}, [[VH]]
+; CHECKA72: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKA72-NEXT: aesimc {{v[0-7].16b}}, [[VA]]
+; CHECKA72: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKA72-NEXT: aesimc {{v[0-7].16b}}, [[VB]]
+; CHECKA72: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKA72-NEXT: aesimc {{v[0-7].16b}}, [[VC]]
+; CHECKA72: aesd [[VD:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKA72-NEXT: aesimc {{v[0-7].16b}}, [[VD]]
+; CHECKA72: aesd [[VE:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKA72-NEXT: aesimc {{v[0-7].16b}}, [[VE]]
+; CHECKA72: aesd [[VF:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKA72-NEXT: aesimc {{v[0-7].16b}}, [[VF]]
+; CHECKA72: aesd [[VG:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKA72-NEXT: aesimc {{v[0-7].16b}}, [[VG]]
+; CHECKA72: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKA72-NEXT: aesimc {{v[0-7].16b}}, [[VH]]
 ; CHECKM1: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}}
 ; CHECKM1: aesimc {{v[0-7].16b}}, [[VA]]
 ; CHECKM1: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}}