From 0ccfe1b267af84360c1ab6b7e5a1366e1aa8ef7d Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Fri, 26 Jun 2020 09:48:53 +0100 Subject: [PATCH] [AArch64][SVE] Predicate bfloat16 load patterns with HasBF16 Reviewers: sdesmalen, c-rhodes, efriedma, fpetrogalli Reviewed By: fpetrogalli Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, danielkiss, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D82464 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 5 +++++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 20 ++++++++++++++++---- .../sve-intrinsics-ld1-addressing-mode-reg-imm.ll | 5 ++++- .../sve-intrinsics-ld1-addressing-mode-reg-reg.ll | 5 ++++- llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll | 5 ++++- llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll | 7 +++++-- llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll | 7 +++++-- llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll | 5 ++++- 8 files changed, 47 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 26a100e4886..bed4728d4cd 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12074,6 +12074,11 @@ static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); EVT PtrTy = N->getOperand(3).getValueType(); + if (VT == MVT::nxv8bf16) + assert( + static_cast(DAG.getSubtarget()).hasBF16() && + "Unsupported type (BF16)"); + EVT LoadVT = VT; if (VT.isFloatingPoint()) LoadVT = VT.changeTypeToInteger(); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index d9f18510abb..184323d316f 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1550,7 +1550,10 @@ multiclass sve_prefetch; defm : pred_load; defm : pred_load; - defm : pred_load; + + let Predicates = [HasBF16, HasSVE] in { + defm : pred_load; + } // 16-element contiguous loads defm : pred_load; @@ -1737,7 +1740,10 @@ multiclass sve_prefetch; defm : ld1; defm : ld1; - defm : ld1; + + let Predicates = [HasBF16, HasSVE] in { + defm : ld1; + } // 16-element contiguous loads defm : ld1; @@ -1777,7 +1783,10 @@ multiclass sve_prefetch; defm : ldnf1; defm : ldnf1; - defm : ldnf1; + + let Predicates = [HasBF16, HasSVE] in { + defm : ldnf1; + } // 16-element contiguous non-faulting loads defm : ldnf1; @@ -1818,7 +1827,10 @@ multiclass sve_prefetch; defm : ldff1; defm : ldff1; - defm : ldff1; + + let Predicates = [HasBF16, HasSVE] in { + defm : ldff1; + } // 16-element contiguous first faulting loads defm : ldff1; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll index c977d343e64..e3fccea179e 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll @@ -207,7 +207,7 @@ define @ld1h_f16_inbound( %pg, half* %a) { ret %load } -define @ld1h_bf16_inbound( %pg, bfloat* %a) { +define @ld1h_bf16_inbound( %pg, bfloat* %a) #0 { ; CHECK-LABEL: ld1h_bf16_inbound: ; CHECK: ld1h { z0.h }, p0/z, [x0, #1, mul vl] ; CHECK-NEXT: ret @@ -311,3 +311,6 @@ declare @llvm.aarch64.sve.ld1.nxv2i16(, i16* declare @llvm.aarch64.sve.ld1.nxv2i32(, i32*) declare @llvm.aarch64.sve.ld1.nxv2i64(, i64*) declare @llvm.aarch64.sve.ld1.nxv2f64(, double*) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll index 7ace8882ebe..a47da1c004c 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll @@ -95,7 +95,7 @@ define @ld1h_f16( %pg, half* %a, i64 %index ret %load } -define @ld1h_bf16( %pg, bfloat* %a, i64 %index) { +define @ld1h_bf16( %pg, bfloat* %a, i64 %index) #0 { ; CHECK-LABEL: ld1h_bf16 ; CHECK: ld1h { z0.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret @@ -225,3 +225,6 @@ declare @llvm.aarch64.sve.ld1.nxv2i16(, i16* declare @llvm.aarch64.sve.ld1.nxv2i32(, i32*) declare @llvm.aarch64.sve.ld1.nxv2i64(, i64*) declare @llvm.aarch64.sve.ld1.nxv2f64(, double*) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll index e5a9ef08415..69f20fa5c13 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll @@ -87,7 +87,7 @@ define @ld1h_f16( %pred, half* %addr) { ret %res } -define @ld1h_bf16( %pred, bfloat* %addr) { +define @ld1h_bf16( %pred, bfloat* %addr) #0 { ; CHECK-LABEL: ld1h_bf16: ; CHECK: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret @@ -208,3 +208,6 @@ declare @llvm.aarch64.sve.ld1.nxv2i16(, i16* declare @llvm.aarch64.sve.ld1.nxv2i32(, i32*) declare @llvm.aarch64.sve.ld1.nxv2i64(, i64*) declare @llvm.aarch64.sve.ld1.nxv2f64(, double*) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll index eb4d94c9236..96de8cc6780 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll @@ -206,7 +206,7 @@ define @ldff1h_f16( %pg, half* %a) { ret %load } -define @ldff1h_bf16( %pg, bfloat* %a) { +define @ldff1h_bf16( %pg, bfloat* %a) #0 { ; CHECK-LABEL: ldff1h_bf16: ; CHECK: ldff1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret @@ -223,7 +223,7 @@ define @ldff1h_f16_reg( %pg, half* %a, i64 ret %load } -define @ldff1h_bf16_reg( %pg, bfloat* %a, i64 %offset) { +define @ldff1h_bf16_reg( %pg, bfloat* %a, i64 %offset) #0 { ; CHECK-LABEL: ldff1h_bf16_reg: ; CHECK: ldff1h { z0.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret @@ -428,3 +428,6 @@ declare @llvm.aarch64.sve.ldff1.nxv2i16(, i1 declare @llvm.aarch64.sve.ldff1.nxv2i32(, i32*) declare @llvm.aarch64.sve.ldff1.nxv2i64(, i64*) declare @llvm.aarch64.sve.ldff1.nxv2f64(, double*) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll index 1e9be0e267e..27394bbbf94 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll @@ -140,7 +140,7 @@ define @ldnf1h_f16( %pg, half* %a) { ret %load } -define @ldnf1h_bf16( %pg, bfloat* %a) { +define @ldnf1h_bf16( %pg, bfloat* %a) #0 { ; CHECK-LABEL: ldnf1h_bf16: ; CHECK: ldnf1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret @@ -159,7 +159,7 @@ define @ldnf1h_f16_inbound( %pg, half* %a) ret %load } -define @ldnf1h_bf16_inbound( %pg, bfloat* %a) { +define @ldnf1h_bf16_inbound( %pg, bfloat* %a) #0 { ; CHECK-LABEL: ldnf1h_bf16_inbound: ; CHECK: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl] ; CHECK-NEXT: ret @@ -473,3 +473,6 @@ declare @llvm.aarch64.sve.ldnf1.nxv2i16(, i1 declare @llvm.aarch64.sve.ldnf1.nxv2i32(, i32*) declare @llvm.aarch64.sve.ldnf1.nxv2i64(, i64*) declare @llvm.aarch64.sve.ldnf1.nxv2f64(, double*) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll index 126f946b84c..47e3a70bf60 100644 --- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll @@ -87,7 +87,7 @@ define @masked_load_nxv8f16( *%a, %load } -define @masked_load_nxv8bf16( *%a, %mask) nounwind { +define @masked_load_nxv8bf16( *%a, %mask) nounwind #0 { ; CHECK-LABEL: masked_load_nxv8bf16: ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret @@ -203,3 +203,6 @@ declare void @llvm.masked.store.nxv2f16(, declare void @llvm.masked.store.nxv4f32(, *, i32, ) declare void @llvm.masked.store.nxv4f16(, *, i32, ) declare void @llvm.masked.store.nxv8f16(, *, i32, ) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" } -- 2.11.0