From f14f22cebf4608056238fd5e326a6526cb1eef13 Mon Sep 17 00:00:00 2001 From: Kevin Qin Date: Mon, 6 Jan 2014 02:26:10 +0000 Subject: [PATCH] [AArch64 NEON] Fix invalid constant used in vselect condition. There is a wrong assumption that the vector element type and the type of each ConstantSDNode in the build_vector were the same. However, when promoting the integer operand of a legally typed build_vector, the operand type and the vector element type do not need to be the same (See method 'DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR' in LegalizeIntegerTypes.cpp). in AArch64 backend, the following dag sequence: C0: i1 = Constant<0> C1: i1 = Constant<-1> V: v8i1 = BUILD_VECTOR C1, C1, C0, C0, C0, C0, C0, C0 is type-legalized into: NewC0: i32 = Constant<0> NewC1: i32 = Constant<1> V: v8i8 = BUILD_VECTOR NewC1, NewC1, NewC0, NewC0, NewC0, NewC0, NewC0, NewC0 Forcing a getZeroExtend to VTBits to ensure that the new constant is correctly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@198582 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +- test/CodeGen/AArch64/neon-bitwise-instructions.ll | 57 +++++++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3b87922b789..422e83ab7d5 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5527,8 +5527,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { } ConstantSDNode *CurrentND = cast(Op); - const APInt &C = CurrentND->getAPIntValue(); - Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt), + const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); + Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), Op.getValueType())); } diff --git a/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/test/CodeGen/AArch64/neon-bitwise-instructions.ll index b0e51566677..8f4f304b305 100644 --- a/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -558,6 +558,63 @@ define <4 x i32> @bsl4xi32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { ret <4 x i32> %4 } +define <8 x i8> @vselect_v8i8(<8 x i8> %a) { +;CHECK: movi {{d[0-9]+}}, #0xffff +;CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %b = select <8 x i1> , <8 x i8> %a, <8 x i8> + ret <8 x i8> %b +} + +define <4 x i16> @vselect_v4i16(<4 x i16> %a) { +;CHECK: movi {{d[0-9]+}}, #0xffff +;CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %b = select <4 x i1> , <4 x i16> %a, <4 x i16> + ret <4 x i16> %b +} + +define <8 x i8> @vselect_cmp_ne(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { +;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %cmp = icmp ne <8 x i8> %a, %b + %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c + ret <8 x i8> %d +} + +define <8 x i8> @vselect_cmp_eq(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { +;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %cmp = icmp eq <8 x i8> %a, %b + %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c + ret <8 x i8> %d +} + +define <8 x i8> @vselect_cmpz_ne(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { +;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %cmp = icmp ne <8 x i8> %a, zeroinitializer + %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c + ret <8 x i8> %d +} + +define <8 x i8> @vselect_cmpz_eq(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { +;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 +;CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %cmp = icmp eq <8 x i8> %a, zeroinitializer + %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c + ret <8 x i8> %d +} + +define <8 x i8> @vselect_tst(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { +;CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %tmp3 = and <8 x i8> %a, %b + %tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer + %d = select <8 x i1> %tmp4, <8 x i8> %b, <8 x i8> %c + ret <8 x i8> %d +} + define <2 x i64> @bsl2xi64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) { ;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %1 = and <2 x i64> %v1, %v2 -- 2.11.0