From 0a92af487b8ae5d004a3460e3518a3815d9b36a8 Mon Sep 17 00:00:00 2001 From: Scott Michel Date: Wed, 19 Dec 2007 20:50:49 +0000 Subject: [PATCH] More working CellSPU test cases: - call.ll: Function call - ctpop.ll: Count population - dp_farith.ll: DP arithmetic - eqv.ll: Equivalence primitives - fcmp.ll: SP comparisons - fdiv.ll: SP division - fneg-fabs.ll: SP negation, aboslute value - int2fp.ll: Integer -> SP conversion - rotate_ops.ll: Rotation primitives - select_bits.ll: (a & c) | (b & ~c) bit selection - shift_ops.ll: Shift primitives - sp_farith.ll: SP arithmentic git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45217 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/CellSPU/call.ll | 20 +++ test/CodeGen/CellSPU/ctpop.ll | 28 ++++ test/CodeGen/CellSPU/dp_farith.ll | 100 ++++++++++++ test/CodeGen/CellSPU/eqv.ll | 150 ++++++++++++++++++ test/CodeGen/CellSPU/fcmp.ll | 20 +++ test/CodeGen/CellSPU/fdiv.ll | 18 +++ test/CodeGen/CellSPU/fneg-fabs.ll | 41 +++++ test/CodeGen/CellSPU/int2fp.ll | 38 +++++ test/CodeGen/CellSPU/rotate_ops.ll | 157 +++++++++++++++++++ test/CodeGen/CellSPU/select_bits.ll | 294 ++++++++++++++++++++++++++++++++++++ test/CodeGen/CellSPU/shift_ops.ll | 210 ++++++++++++++++++++++++++ test/CodeGen/CellSPU/sp_farith.ll | 88 +++++++++++ 12 files changed, 1164 insertions(+) create mode 100644 test/CodeGen/CellSPU/call.ll create mode 100644 test/CodeGen/CellSPU/ctpop.ll create mode 100644 test/CodeGen/CellSPU/dp_farith.ll create mode 100644 test/CodeGen/CellSPU/eqv.ll create mode 100644 test/CodeGen/CellSPU/fcmp.ll create mode 100644 test/CodeGen/CellSPU/fdiv.ll create mode 100644 test/CodeGen/CellSPU/fneg-fabs.ll create mode 100644 test/CodeGen/CellSPU/int2fp.ll create mode 100644 test/CodeGen/CellSPU/rotate_ops.ll create mode 100644 test/CodeGen/CellSPU/select_bits.ll create mode 100644 test/CodeGen/CellSPU/shift_ops.ll create mode 100644 test/CodeGen/CellSPU/sp_farith.ll diff --git a/test/CodeGen/CellSPU/call.ll b/test/CodeGen/CellSPU/call.ll new file mode 100644 index 00000000000..7b6f5b6ffcd --- /dev/null +++ b/test/CodeGen/CellSPU/call.ll @@ -0,0 +1,20 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep brsl %t1.s | count 1 && +; RUN: grep brasl %t1.s | count 1 + +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define i32 @main() { +entry: + %a = call i32 @stub_1(i32 1, float 0x400921FA00000000) + call void @extern_stub_1(i32 %a, i32 4) + ret i32 %a +} + +declare void @extern_stub_1(i32, i32) + +define i32 @stub_1(i32 %x, float %y) { +entry: + ret i32 0 +} diff --git a/test/CodeGen/CellSPU/ctpop.ll b/test/CodeGen/CellSPU/ctpop.ll new file mode 100644 index 00000000000..3e2bc64f4d8 --- /dev/null +++ b/test/CodeGen/CellSPU/ctpop.ll @@ -0,0 +1,28 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep cntb %t1.s | count 3 && +; RUN: grep andi %t1.s | count 3 && +; RUN: grep rotmi %t1.s | count 2 && +; RUN: grep rothmi %t1.s | count 1 + +declare i32 @llvm.ctpop.i8(i8) +declare i32 @llvm.ctpop.i16(i16) +declare i32 @llvm.ctpop.i32(i32) + +define i32 @test_i8(i8 %X) { + call i32 @llvm.ctpop.i8(i8 %X) + %Y = bitcast i32 %1 to i32 + ret i32 %Y +} + +define i32 @test_i16(i16 %X) { + call i32 @llvm.ctpop.i16(i16 %X) + %Y = bitcast i32 %1 to i32 + ret i32 %Y +} + +define i32 @test_i32(i32 %X) { + call i32 @llvm.ctpop.i32(i32 %X) + %Y = bitcast i32 %1 to i32 + ret i32 %Y +} + diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll new file mode 100644 index 00000000000..58c56e14705 --- /dev/null +++ b/test/CodeGen/CellSPU/dp_farith.ll @@ -0,0 +1,100 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep dfa %t1.s | count 2 && +; RUN: grep dfs %t1.s | count 2 && +; RUN: grep dfm %t1.s | count 6 && +; RUN: grep dfma %t1.s | count 2 && +; RUN: grep dfms %t1.s | count 2 && +; RUN: grep dfnms %t1.s | count 4 +; +; This file includes double precision floating point arithmetic instructions + +define double @fadd(double %arg1, double %arg2) { + %A = add double %arg1, %arg2 + ret double %A +} + +define <2 x double> @fadd_vec(<2 x double> %arg1, <2 x double> %arg2) { + %A = add <2 x double> %arg1, %arg2 + ret <2 x double> %A +} + +define double @fsub(double %arg1, double %arg2) { + %A = sub double %arg1, %arg2 + ret double %A +} + +define <2 x double> @fsub_vec(<2 x double> %arg1, <2 x double> %arg2) { + %A = sub <2 x double> %arg1, %arg2 + ret <2 x double> %A +} + +define double @fmul(double %arg1, double %arg2) { + %A = mul double %arg1, %arg2 + ret double %A +} + +define <2 x double> @fmul_vec(<2 x double> %arg1, <2 x double> %arg2) { + %A = mul <2 x double> %arg1, %arg2 + ret <2 x double> %A +} + +define double @fma(double %arg1, double %arg2, double %arg3) { + %A = mul double %arg1, %arg2 + %B = add double %A, %arg3 + ret double %B +} + +define <2 x double> @fma_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) { + %A = mul <2 x double> %arg1, %arg2 + %B = add <2 x double> %A, %arg3 + ret <2 x double> %B +} + +define double @fms(double %arg1, double %arg2, double %arg3) { + %A = mul double %arg1, %arg2 + %B = sub double %A, %arg3 + ret double %B +} + +define <2 x double> @fms_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) { + %A = mul <2 x double> %arg1, %arg2 + %B = sub <2 x double> %A, %arg3 + ret <2 x double> %B +} + +; - (a * b - c) +define double @d_fnms_1(double %arg1, double %arg2, double %arg3) { + %A = mul double %arg1, %arg2 + %B = sub double %A, %arg3 + %C = sub double -0.000000e+00, %B ; [#uses=1] + ret double %C +} + +; Annother way of getting fnms +; - ( a * b ) + c => c - (a * b) +define double @d_fnms_2(double %arg1, double %arg2, double %arg3) { + %A = mul double %arg1, %arg2 + %B = sub double %arg3, %A + ret double %B +} + +; FNMS: - (a * b - c) => c - (a * b) +define <2 x double> @d_fnms_vec_1(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) { + %A = mul <2 x double> %arg1, %arg2 + %B = sub <2 x double> %arg3, %A ; + ret <2 x double> %B +} + +; Another way to get fnms using a constant vector +; - ( a * b - c) +define <2 x double> @d_fnms_vec_2(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) { + %A = mul <2 x double> %arg1, %arg2 ; <<2 x double>> [#uses=1] + %B = sub <2 x double> %A, %arg3 ; <<2 x double>> [#uses=1] + %C = sub <2 x double> < double -0.00000e+00, double -0.00000e+00 >, %B + ret <2 x double> %C +} + +;define double @fdiv_1(double %arg1, double %arg2) { +; %A = fdiv double %arg1, %arg2 ; [#uses=1] +; ret double %A +;} diff --git a/test/CodeGen/CellSPU/eqv.ll b/test/CodeGen/CellSPU/eqv.ll new file mode 100644 index 00000000000..a4d6dbbbd4e --- /dev/null +++ b/test/CodeGen/CellSPU/eqv.ll @@ -0,0 +1,150 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep eqv %t1.s | count 18 && +; RUN: grep xshw %t1.s | count 6 && +; RUN: grep xsbh %t1.s | count 3 && +; RUN: grep andi %t1.s | count 3 + +; Test the 'eqv' instruction, whose boolean expression is: +; (a & b) | (~a & ~b), which simplifies to +; (a & b) | ~(a | b) +; Alternatively, a ^ ~b, which the compiler will also match. + +; ModuleID = 'eqv.bc' + +define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] + %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] + %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] + %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1] + ret <4 x i32> %C +} + +define <4 x i32> @equiv_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { + %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] + %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] + %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] + %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1] + ret <4 x i32> %C +} + +define <4 x i32> @equiv_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) { + %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] + %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] + %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] + %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1] + ret <4 x i32> %C +} + +define <4 x i32> @equiv_v4i32_4(<4 x i32> %arg1, <4 x i32> %arg2) { + %arg2not = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] + %C = xor <4 x i32> %arg1, %arg2not + ret <4 x i32> %C +} + +define i32 @equiv_i32_1(i32 %arg1, i32 %arg2) { + %A = and i32 %arg1, %arg2 ; [#uses=1] + %B = or i32 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i32 %B, -1 ; [#uses=1] + %C = or i32 %A, %Bnot ; [#uses=1] + ret i32 %C +} + +define i32 @equiv_i32_2(i32 %arg1, i32 %arg2) { + %B = or i32 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i32 %B, -1 ; [#uses=1] + %A = and i32 %arg1, %arg2 ; [#uses=1] + %C = or i32 %A, %Bnot ; [#uses=1] + ret i32 %C +} + +define i32 @equiv_i32_3(i32 %arg1, i32 %arg2) { + %B = or i32 %arg1, %arg2 ; [#uses=1] + %A = and i32 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i32 %B, -1 ; [#uses=1] + %C = or i32 %A, %Bnot ; [#uses=1] + ret i32 %C +} + +define i32 @equiv_i32_4(i32 %arg1, i32 %arg2) { + %arg2not = xor i32 %arg2, -1 + %C = xor i32 %arg1, %arg2not + ret i32 %C +} + +define i32 @equiv_i32_5(i32 %arg1, i32 %arg2) { + %arg1not = xor i32 %arg1, -1 + %C = xor i32 %arg2, %arg1not + ret i32 %C +} + +define i16 @equiv_i16_1(i16 signext %arg1, i16 signext %arg2) signext { + %A = and i16 %arg1, %arg2 ; [#uses=1] + %B = or i16 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i16 %B, -1 ; [#uses=1] + %C = or i16 %A, %Bnot ; [#uses=1] + ret i16 %C +} + +define i16 @equiv_i16_2(i16 signext %arg1, i16 signext %arg2) signext { + %B = or i16 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i16 %B, -1 ; [#uses=1] + %A = and i16 %arg1, %arg2 ; [#uses=1] + %C = or i16 %A, %Bnot ; [#uses=1] + ret i16 %C +} + +define i16 @equiv_i16_3(i16 signext %arg1, i16 signext %arg2) signext { + %B = or i16 %arg1, %arg2 ; [#uses=1] + %A = and i16 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i16 %B, -1 ; [#uses=1] + %C = or i16 %A, %Bnot ; [#uses=1] + ret i16 %C +} + +define i8 @equiv_i8_1(i8 signext %arg1, i8 signext %arg2) signext { + %A = and i8 %arg1, %arg2 ; [#uses=1] + %B = or i8 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i8 %B, -1 ; [#uses=1] + %C = or i8 %A, %Bnot ; [#uses=1] + ret i8 %C +} + +define i8 @equiv_i8_2(i8 signext %arg1, i8 signext %arg2) signext { + %B = or i8 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i8 %B, -1 ; [#uses=1] + %A = and i8 %arg1, %arg2 ; [#uses=1] + %C = or i8 %A, %Bnot ; [#uses=1] + ret i8 %C +} + +define i8 @equiv_i8_3(i8 signext %arg1, i8 signext %arg2) signext { + %B = or i8 %arg1, %arg2 ; [#uses=1] + %A = and i8 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i8 %B, -1 ; [#uses=1] + %C = or i8 %A, %Bnot ; [#uses=1] + ret i8 %C +} + +define i8 @equiv_u8_1(i8 zeroext %arg1, i8 zeroext %arg2) zeroext { + %A = and i8 %arg1, %arg2 ; [#uses=1] + %B = or i8 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i8 %B, -1 ; [#uses=1] + %C = or i8 %A, %Bnot ; [#uses=1] + ret i8 %C +} + +define i8 @equiv_u8_2(i8 zeroext %arg1, i8 zeroext %arg2) zeroext { + %B = or i8 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i8 %B, -1 ; [#uses=1] + %A = and i8 %arg1, %arg2 ; [#uses=1] + %C = or i8 %A, %Bnot ; [#uses=1] + ret i8 %C +} + +define i8 @equiv_u8_3(i8 zeroext %arg1, i8 zeroext %arg2) zeroext { + %B = or i8 %arg1, %arg2 ; [#uses=1] + %A = and i8 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i8 %B, -1 ; [#uses=1] + %C = or i8 %A, %Bnot ; [#uses=1] + ret i8 %C +} diff --git a/test/CodeGen/CellSPU/fcmp.ll b/test/CodeGen/CellSPU/fcmp.ll new file mode 100644 index 00000000000..8ae97e6ff59 --- /dev/null +++ b/test/CodeGen/CellSPU/fcmp.ll @@ -0,0 +1,20 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep fceq %t1.s | count 1 && +; RUN: grep fcmeq %t1.s | count 1 +; +; This file includes standard floating point arithmetic instructions + +declare double @fabs(double) +declare float @fabsf(float) + +define i1 @fcmp_eq(float %arg1, float %arg2) { + %A = fcmp oeq float %arg1, %arg2 ; [#uses=1] + ret i1 %A +} + +define i1 @fcmp_mag_eq(float %arg1, float %arg2) { + %A = call float @fabsf(float %arg1) ; [#uses=1] + %B = call float @fabsf(float %arg2) ; [#uses=1] + %C = fcmp oeq float %A, %B ; [#uses=1] + ret i1 %C +} diff --git a/test/CodeGen/CellSPU/fdiv.ll b/test/CodeGen/CellSPU/fdiv.ll new file mode 100644 index 00000000000..d55b12b9f51 --- /dev/null +++ b/test/CodeGen/CellSPU/fdiv.ll @@ -0,0 +1,18 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep frest %t1.s | count 2 && +; RUN: grep fi %t1.s | count 2 && +; RUN: grep fm %t1.s | count 4 && +; RUN: grep fma %t1.s | count 2 && +; RUN: grep fnms %t1.s | count 2 +; +; This file includes standard floating point arithmetic instructions + +define float @fdiv32(float %arg1, float %arg2) { + %A = fdiv float %arg1, %arg2 + ret float %A +} + +define <4 x float> @fdiv_v4f32(<4 x float> %arg1, <4 x float> %arg2) { + %A = fdiv <4 x float> %arg1, %arg2 + ret <4 x float> %A +} diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll new file mode 100644 index 00000000000..1abdcf6a34d --- /dev/null +++ b/test/CodeGen/CellSPU/fneg-fabs.ll @@ -0,0 +1,41 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep fsmbi %t1.s | count 3 && +; RUN: grep 32768 %t1.s | count 2 && +; RUN: grep xor %t1.s | count 4 && +; RUN: grep and %t1.s | count 5 && +; RUN: grep andbi %t1.s | count 3 + +define double @fneg_dp(double %X) { + %Y = sub double -0.000000e+00, %X + ret double %Y +} + +define <2 x double> @fneg_dp_vec(<2 x double> %X) { + %Y = sub <2 x double> < double -0.0000e+00, double -0.0000e+00 >, %X + ret <2 x double> %Y +} + +define float @fneg_sp(float %X) { + %Y = sub float -0.000000e+00, %X + ret float %Y +} + +define <4 x float> @fneg_sp_vec(<4 x float> %X) { + %Y = sub <4 x float> , %X + ret <4 x float> %Y +} + +declare double @fabs(double) + +declare float @fabsf(float) + +define double @fabs_dp(double %X) { + %Y = call double @fabs( double %X ) ; [#uses=1] + ret double %Y +} + +define float @fabs_sp(float %X) { + %Y = call float @fabsf( float %X ) ; [#uses=1] + ret float %Y +} diff --git a/test/CodeGen/CellSPU/int2fp.ll b/test/CodeGen/CellSPU/int2fp.ll new file mode 100644 index 00000000000..95a498428ec --- /dev/null +++ b/test/CodeGen/CellSPU/int2fp.ll @@ -0,0 +1,38 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep csflt %t1.s | count 5 && +; RUN: grep cuflt %t1.s | count 1 && +; RUN: grep xshw %t1.s | count 2 && +; RUN: grep xsbh %t1.s | count 1 && +; RUN: grep and %t1.s | count 2 && +; RUN: grep andi %t1.s | count 1 && +; RUN: grep ila %t1.s | count 1 + +define float @sitofp_i32(i32 %arg1) { + %A = sitofp i32 %arg1 to float ; [#uses=1] + ret float %A +} + +define float @uitofp_u32(i32 %arg1) { + %A = uitofp i32 %arg1 to float ; [#uses=1] + ret float %A +} + +define float @sitofp_i16(i16 %arg1) { + %A = sitofp i16 %arg1 to float ; [#uses=1] + ret float %A +} + +define float @uitofp_i16(i16 %arg1) { + %A = uitofp i16 %arg1 to float ; [#uses=1] + ret float %A +} + +define float @sitofp_i8(i8 %arg1) { + %A = sitofp i8 %arg1 to float ; [#uses=1] + ret float %A +} + +define float @uitofp_i8(i8 %arg1) { + %A = uitofp i8 %arg1 to float ; [#uses=1] + ret float %A +} diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll new file mode 100644 index 00000000000..6983c184c3c --- /dev/null +++ b/test/CodeGen/CellSPU/rotate_ops.ll @@ -0,0 +1,157 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu -f -o %t1.s +; RUN: grep rot %t1.s | count 85 +; RUN: grep roth %t1.s | count 8 +; RUN: grep roti.*5 %t1.s | count 1 +; RUN: grep roti.*27 %t1.s | count 1 +; RUN grep rothi.*5 %t1.s | count 2 +; RUN grep rothi.*11 %t1.s | count 1 +; RUN grep rothi.*,.3 %t1.s | count 1 +; RUN: grep andhi %t1.s | count 4 +; RUN: grep shlhi %t1.s | count 4 + +; Vector rotates are not currently supported in gcc or llvm assembly. These are +; not tested. + +; 32-bit rotates: +define i32 @rotl32_1a(i32 %arg1, i8 %arg2) { + %tmp1 = zext i8 %arg2 to i32 ; [#uses=1] + %B = shl i32 %arg1, %tmp1 ; [#uses=1] + %arg22 = sub i8 32, %arg2 ; [#uses=1] + %tmp2 = zext i8 %arg22 to i32 ; [#uses=1] + %C = lshr i32 %arg1, %tmp2 ; [#uses=1] + %D = or i32 %B, %C ; [#uses=1] + ret i32 %D +} + +define i32 @rotl32_1b(i32 %arg1, i16 %arg2) { + %tmp1 = zext i16 %arg2 to i32 ; [#uses=1] + %B = shl i32 %arg1, %tmp1 ; [#uses=1] + %arg22 = sub i16 32, %arg2 ; [#uses=1] + %tmp2 = zext i16 %arg22 to i32 ; [#uses=1] + %C = lshr i32 %arg1, %tmp2 ; [#uses=1] + %D = or i32 %B, %C ; [#uses=1] + ret i32 %D +} + +define i32 @rotl32_2(i32 %arg1, i32 %arg2) { + %B = shl i32 %arg1, %arg2 ; [#uses=1] + %tmp1 = sub i32 32, %arg2 ; [#uses=1] + %C = lshr i32 %arg1, %tmp1 ; [#uses=1] + %D = or i32 %B, %C ; [#uses=1] + ret i32 %D +} + +define i32 @rotl32_3(i32 %arg1, i32 %arg2) { + %tmp1 = sub i32 32, %arg2 ; [#uses=1] + %B = shl i32 %arg1, %arg2 ; [#uses=1] + %C = lshr i32 %arg1, %tmp1 ; [#uses=1] + %D = or i32 %B, %C ; [#uses=1] + ret i32 %D +} + +define i32 @rotl32_4(i32 %arg1, i32 %arg2) { + %tmp1 = sub i32 32, %arg2 ; [#uses=1] + %C = lshr i32 %arg1, %tmp1 ; [#uses=1] + %B = shl i32 %arg1, %arg2 ; [#uses=1] + %D = or i32 %B, %C ; [#uses=1] + ret i32 %D +} + +define i32 @rotr32_1(i32 %A, i8 %Amt) { + %tmp1 = zext i8 %Amt to i32 ; [#uses=1] + %B = lshr i32 %A, %tmp1 ; [#uses=1] + %Amt2 = sub i8 32, %Amt ; [#uses=1] + %tmp2 = zext i8 %Amt2 to i32 ; [#uses=1] + %C = shl i32 %A, %tmp2 ; [#uses=1] + %D = or i32 %B, %C ; [#uses=1] + ret i32 %D +} + +define i32 @rotr32_2(i32 %A, i8 %Amt) { + %Amt2 = sub i8 32, %Amt ; [#uses=1] + %tmp1 = zext i8 %Amt to i32 ; [#uses=1] + %B = lshr i32 %A, %tmp1 ; [#uses=1] + %tmp2 = zext i8 %Amt2 to i32 ; [#uses=1] + %C = shl i32 %A, %tmp2 ; [#uses=1] + %D = or i32 %B, %C ; [#uses=1] + ret i32 %D +} + +; Rotate left with immediate +define i32 @rotli32(i32 %A) { + %B = shl i32 %A, 5 ; [#uses=1] + %C = lshr i32 %A, 27 ; [#uses=1] + %D = or i32 %B, %C ; [#uses=1] + ret i32 %D +} + +; Rotate right with immediate +define i32 @rotri32(i32 %A) { + %B = lshr i32 %A, 5 ; [#uses=1] + %C = shl i32 %A, 27 ; [#uses=1] + %D = or i32 %B, %C ; [#uses=1] + ret i32 %D +} + +; 16-bit rotates: +define i16 @rotr16_1(i16 %arg1, i8 %arg) { + %tmp1 = zext i8 %arg to i16 ; [#uses=1] + %B = lshr i16 %arg1, %tmp1 ; [#uses=1] + %arg2 = sub i8 16, %arg ; [#uses=1] + %tmp2 = zext i8 %arg2 to i16 ; [#uses=1] + %C = shl i16 %arg1, %tmp2 ; [#uses=1] + %D = or i16 %B, %C ; [#uses=1] + ret i16 %D +} + +define i16 @rotr16_2(i16 %arg1, i16 %arg) { + %B = lshr i16 %arg1, %arg ; [#uses=1] + %tmp1 = sub i16 16, %arg ; [#uses=1] + %C = shl i16 %arg1, %tmp1 ; [#uses=1] + %D = or i16 %B, %C ; [#uses=1] + ret i16 %D +} + +define i16 @rotli16(i16 %A) { + %B = shl i16 %A, 5 ; [#uses=1] + %C = lshr i16 %A, 11 ; [#uses=1] + %D = or i16 %B, %C ; [#uses=1] + ret i16 %D +} + +define i16 @rotri16(i16 %A) { + %B = lshr i16 %A, 5 ; [#uses=1] + %C = shl i16 %A, 11 ; [#uses=1] + %D = or i16 %B, %C ; [#uses=1] + ret i16 %D +} + +define i8 @rotl8(i8 %A, i8 %Amt) { + %B = shl i8 %A, %Amt ; [#uses=1] + %Amt2 = sub i8 8, %Amt ; [#uses=1] + %C = lshr i8 %A, %Amt2 ; [#uses=1] + %D = or i8 %B, %C ; [#uses=1] + ret i8 %D +} + +define i8 @rotr8(i8 %A, i8 %Amt) { + %B = lshr i8 %A, %Amt ; [#uses=1] + %Amt2 = sub i8 8, %Amt ; [#uses=1] + %C = shl i8 %A, %Amt2 ; [#uses=1] + %D = or i8 %B, %C ; [#uses=1] + ret i8 %D +} + +define i8 @rotli8(i8 %A) { + %B = shl i8 %A, 5 ; [#uses=1] + %C = lshr i8 %A, 3 ; [#uses=1] + %D = or i8 %B, %C ; [#uses=1] + ret i8 %D +} + +define i8 @rotri8(i8 %A) { + %B = lshr i8 %A, 5 ; [#uses=1] + %C = shl i8 %A, 3 ; [#uses=1] + %D = or i8 %B, %C ; [#uses=1] + ret i8 %D +} diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll new file mode 100644 index 00000000000..3cbb7a06dc7 --- /dev/null +++ b/test/CodeGen/CellSPU/select_bits.ll @@ -0,0 +1,294 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep selb %t1.s | count 160 +; RUN: grep and %t1.s | count 2 +; RUN: grep xsbh %t1.s | count 1 +; RUN: grep xshw %t1.s | count 2 + +define <16 x i8> @selb_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { + %A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %A, %arg1 ; <<16 x i8>> [#uses=1] + %C = and <16 x i8> %arg2, %arg3 ; <<16 x i8>> [#uses=1] + %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] + ret <16 x i8> %D +} + +define <16 x i8> @selb_v16i8_11(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { + %A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %arg1, %A ; <<16 x i8>> [#uses=1] + %C = and <16 x i8> %arg3, %arg2 ; <<16 x i8>> [#uses=1] + %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] + ret <16 x i8> %D +} + +define <16 x i8> @selb_v16i8_12(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { + %A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %arg1, %A ; <<16 x i8>> [#uses=1] + %C = and <16 x i8> %arg2, %arg3 ; <<16 x i8>> [#uses=1] + %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] + ret <16 x i8> %D +} + +define <16 x i8> @selb_v16i8_13(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { + %A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %A, %arg1 ; <<16 x i8>> [#uses=1] + %C = and <16 x i8> %arg2, %arg3 ; <<16 x i8>> [#uses=1] + %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] + ret <16 x i8> %D +} + +define <16 x i8> @selb_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { + %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %A, %arg2 ; <<16 x i8>> [#uses=1] + %C = and <16 x i8> %arg3, %arg1 ; <<16 x i8>> [#uses=1] + %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] + ret <16 x i8> %D +} + +define <16 x i8> @selb_v16i8_21(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { + %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %arg2, %A ; <<16 x i8>> [#uses=1] + %C = and <16 x i8> %arg3, %arg1 ; <<16 x i8>> [#uses=1] + %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] + ret <16 x i8> %D +} + +define <16 x i8> @selb_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { + %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %A, %arg1 ; <<16 x i8>> [#uses=1] + %C = and <16 x i8> %arg3, %arg2 ; <<16 x i8>> [#uses=1] + %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] + ret <16 x i8> %D +} + +define <16 x i8> @selb_v16i8_4(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { + %C = and <16 x i8> %arg3, %arg2 ; <<16 x i8>> [#uses=1] + %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %A, %arg1 ; <<16 x i8>> [#uses=1] + %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] + ret <16 x i8> %D +} + +define <16 x i8> @selb_v16i8_41(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { + %C = and <16 x i8> %arg2, %arg3 ; <<16 x i8>> [#uses=1] + %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %arg1, %A ; <<16 x i8>> [#uses=1] + %D = or <16 x i8> %C, %B ; <<16 x i8>> [#uses=1] + ret <16 x i8> %D +} + +define <16 x i8> @selb_v16i8_42(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { + %C = and <16 x i8> %arg2, %arg3 ; <<16 x i8>> [#uses=1] + %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %A, %arg1 ; <<16 x i8>> [#uses=1] + %D = or <16 x i8> %C, %B ; <<16 x i8>> [#uses=1] + ret <16 x i8> %D +} + +define <16 x i8> @selb_v16i8_5(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { + %C = and <16 x i8> %arg2, %arg1 ; <<16 x i8>> [#uses=1] + %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %A, %arg3 ; <<16 x i8>> [#uses=1] + %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] + ret <16 x i8> %D +} + +define <8 x i16> @selb_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { + %A = xor <8 x i16> %arg3, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1 > + %B = and <8 x i16> %A, %arg1 ; <<8 x i16>> [#uses=1] + %C = and <8 x i16> %arg2, %arg3 ; <<8 x i16>> [#uses=1] + %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] + ret <8 x i16> %D +} + +define <8 x i16> @selb_v8i16_11(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { + %A = xor <8 x i16> %arg3, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1 > + %B = and <8 x i16> %arg1, %A ; <<8 x i16>> [#uses=1] + %C = and <8 x i16> %arg3, %arg2 ; <<8 x i16>> [#uses=1] + %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] + ret <8 x i16> %D +} + +define <8 x i16> @selb_v8i16_12(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { + %A = xor <8 x i16> %arg3, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1 > + %B = and <8 x i16> %arg1, %A ; <<8 x i16>> [#uses=1] + %C = and <8 x i16> %arg2, %arg3 ; <<8 x i16>> [#uses=1] + %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] + ret <8 x i16> %D +} + +define <8 x i16> @selb_v8i16_13(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { + %A = xor <8 x i16> %arg3, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1 > + %B = and <8 x i16> %A, %arg1 ; <<8 x i16>> [#uses=1] + %C = and <8 x i16> %arg2, %arg3 ; <<8 x i16>> [#uses=1] + %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] + ret <8 x i16> %D +} + +define <8 x i16> @selb_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { + %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1 > + %B = and <8 x i16> %A, %arg2 ; <<8 x i16>> [#uses=1] + %C = and <8 x i16> %arg3, %arg1 ; <<8 x i16>> [#uses=1] + %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] + ret <8 x i16> %D +} + +define <8 x i16> @selb_v8i16_21(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { + %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1 > + %B = and <8 x i16> %arg2, %A ; <<8 x i16>> [#uses=1] + %C = and <8 x i16> %arg3, %arg1 ; <<8 x i16>> [#uses=1] + %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] + ret <8 x i16> %D +} + +define <8 x i16> @selb_v8i16_3(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { + %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1 > + %B = and <8 x i16> %A, %arg1 ; <<8 x i16>> [#uses=1] + %C = and <8 x i16> %arg3, %arg2 ; <<8 x i16>> [#uses=1] + %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] + ret <8 x i16> %D +} + +define <8 x i16> @selb_v8i16_4(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { + %C = and <8 x i16> %arg3, %arg2 ; <<8 x i16>> [#uses=1] + %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1 > + %B = and <8 x i16> %A, %arg1 ; <<8 x i16>> [#uses=1] + %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] + ret <8 x i16> %D +} + +define <8 x i16> @selb_v8i16_41(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { + %C = and <8 x i16> %arg2, %arg3 ; <<8 x i16>> [#uses=1] + %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1 > + %B = and <8 x i16> %arg1, %A ; <<8 x i16>> [#uses=1] + %D = or <8 x i16> %C, %B ; <<8 x i16>> [#uses=1] + ret <8 x i16> %D +} + +define <8 x i16> @selb_v8i16_42(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { + %C = and <8 x i16> %arg2, %arg3 ; <<8 x i16>> [#uses=1] + %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1 > + %B = and <8 x i16> %A, %arg1 ; <<8 x i16>> [#uses=1] + %D = or <8 x i16> %C, %B ; <<8 x i16>> [#uses=1] + ret <8 x i16> %D +} + +define <8 x i16> @selb_v8i16_5(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { + %C = and <8 x i16> %arg2, %arg1 ; <<8 x i16>> [#uses=1] + %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1 > + %B = and <8 x i16> %A, %arg3 ; <<8 x i16>> [#uses=1] + %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] + ret <8 x i16> %D +} + +define <4 x i32> @selb_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3) { + %tmpnot = xor <4 x i32> %arg3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] + %tmp2 = and <4 x i32> %tmpnot, %arg1 ; <<4 x i32>> [#uses=1] + %tmp5 = and <4 x i32> %arg2, %arg3 ; <<4 x i32>> [#uses=1] + %tmp6 = or <4 x i32> %tmp2, %tmp5 ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp6 +} + +define <4 x i32> @selb_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3) { + %tmpnot = xor <4 x i32> %arg3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] + %tmp2 = and <4 x i32> %tmpnot, %arg1 ; <<4 x i32>> [#uses=1] + %tmp5 = and <4 x i32> %arg2, %arg3 ; <<4 x i32>> [#uses=1] + %tmp6 = or <4 x i32> %tmp2, %tmp5 ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp6 +} + +define <4 x i32> @selb_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3) { + %tmpnot = xor <4 x i32> %arg3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] + %tmp2 = and <4 x i32> %tmpnot, %arg1 ; <<4 x i32>> [#uses=1] + %tmp5 = and <4 x i32> %arg3, %arg2 ; <<4 x i32>> [#uses=1] + %tmp6 = or <4 x i32> %tmp2, %tmp5 ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp6 +} + +define <4 x i32> @selb_v4i32_4(<4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3) { + %tmp2 = and <4 x i32> %arg3, %arg2 ; <<4 x i32>> [#uses=1] + %tmp3not = xor <4 x i32> %arg3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] + %tmp5 = and <4 x i32> %tmp3not, %arg1 ; <<4 x i32>> [#uses=1] + %tmp6 = or <4 x i32> %tmp2, %tmp5 ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp6 +} + +define <4 x i32> @selb_v4i32_5(<4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3) { + %tmp2 = and <4 x i32> %arg3, %arg2 ; <<4 x i32>> [#uses=1] + %tmp3not = xor <4 x i32> %arg3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] + %tmp5 = and <4 x i32> %tmp3not, %arg1 ; <<4 x i32>> [#uses=1] + %tmp6 = or <4 x i32> %tmp2, %tmp5 ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp6 +} + +define i32 @selb_i32(i32 %arg1, i32 %arg2, i32 %arg3) { + %tmp1not = xor i32 %arg3, -1 ; [#uses=1] + %tmp3 = and i32 %tmp1not, %arg1 ; [#uses=1] + %tmp6 = and i32 %arg3, %arg2 ; [#uses=1] + %tmp7 = or i32 %tmp3, %tmp6 ; [#uses=1] + ret i32 %tmp7 +} + +define i16 @selb_i16(i16 signext %arg1, i16 signext %arg2, i16 signext %arg3) signext { + %tmp3 = and i16 %arg3, %arg1 ; [#uses=1] + %tmp4not = xor i16 %arg3, -1 ; [#uses=1] + %tmp6 = and i16 %tmp4not, %arg2 ; [#uses=1] + %retval1011 = or i16 %tmp3, %tmp6 ; [#uses=1] + ret i16 %retval1011 +} + +define i16 @selb_i16u(i16 zeroext %arg1, i16 zeroext %arg2, i16 zeroext %arg3) zeroext { + %tmp3 = and i16 %arg3, %arg1 ; [#uses=1] + %tmp4not = xor i16 %arg3, -1 ; [#uses=1] + %tmp6 = and i16 %tmp4not, %arg2 ; [#uses=1] + %retval1011 = or i16 %tmp3, %tmp6 ; [#uses=1] + ret i16 %retval1011 +} + +define i8 @selb_i8u(i8 zeroext %arg1, i8 zeroext %arg2, i8 zeroext %arg3) zeroext { + %tmp3 = and i8 %arg3, %arg1 ; [#uses=1] + %tmp4not = xor i8 %arg3, -1 ; [#uses=1] + %tmp6 = and i8 %tmp4not, %arg2 ; [#uses=1] + %retval1011 = or i8 %tmp3, %tmp6 ; [#uses=1] + ret i8 %retval1011 +} + +define i8 @selb_i8(i8 signext %arg1, i8 signext %arg2, i8 signext %arg3) signext { + %tmp3 = and i8 %arg3, %arg1 ; [#uses=1] + %tmp4not = xor i8 %arg3, -1 ; [#uses=1] + %tmp6 = and i8 %tmp4not, %arg2 ; [#uses=1] + %retval1011 = or i8 %tmp3, %tmp6 ; [#uses=1] + ret i8 %retval1011 +} diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll new file mode 100644 index 00000000000..162ca16776b --- /dev/null +++ b/test/CodeGen/CellSPU/shift_ops.ll @@ -0,0 +1,210 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep shlh %t1.s | count 84 +; RUN: grep shlhi %t1.s | count 51 +; RUN: grep shl %t1.s | count 168 +; RUN: grep shli %t1.s | count 51 +; RUN: grep xshw %t1.s | count 5 +; RUN: grep and %t1.s | count 5 + +; Vector shifts are not currently supported in gcc or llvm assembly. These are +; not tested. + +; Shift left i16 via register, note that the second operand to shl is promoted +; to a 32-bit type: + +define i16 @shlh_i16_1(i16 %arg1, i16 %arg2) { + %A = shl i16 %arg1, %arg2 + ret i16 %A +} + +define i16 @shlh_i16_2(i16 %arg1, i16 %arg2) { + %A = shl i16 %arg2, %arg1 + ret i16 %A +} + +define i16 @shlh_i16_3(i16 signext %arg1, i16 signext %arg2) signext { + %A = shl i16 %arg1, %arg2 + ret i16 %A +} + +define i16 @shlh_i16_4(i16 signext %arg1, i16 signext %arg2) signext { + %A = shl i16 %arg2, %arg1 + ret i16 %A +} + +define i16 @shlh_i16_5(i16 zeroext %arg1, i16 zeroext %arg2) zeroext { + %A = shl i16 %arg1, %arg2 + ret i16 %A +} + +define i16 @shlh_i16_6(i16 zeroext %arg1, i16 zeroext %arg2) zeroext { + %A = shl i16 %arg2, %arg1 + ret i16 %A +} + +; Shift left i16 with immediate: +define i16 @shlhi_i16_1(i16 %arg1) { + %A = shl i16 %arg1, 12 + ret i16 %A +} + +; Should not generate anything other than the return, arg1 << 0 = arg1 +define i16 @shlhi_i16_2(i16 %arg1) { + %A = shl i16 %arg1, 0 + ret i16 %A +} + +define i16 @shlhi_i16_3(i16 %arg1) { + %A = shl i16 16383, %arg1 + ret i16 %A +} + +; Should generate 0, 0 << arg1 = 0 +define i16 @shlhi_i16_4(i16 %arg1) { + %A = shl i16 0, %arg1 + ret i16 %A +} + +define i16 @shlhi_i16_5(i16 signext %arg1) signext { + %A = shl i16 %arg1, 12 + ret i16 %A +} + +; Should not generate anything other than the return, arg1 << 0 = arg1 +define i16 @shlhi_i16_6(i16 signext %arg1) signext { + %A = shl i16 %arg1, 0 + ret i16 %A +} + +define i16 @shlhi_i16_7(i16 signext %arg1) signext { + %A = shl i16 16383, %arg1 + ret i16 %A +} + +; Should generate 0, 0 << arg1 = 0 +define i16 @shlhi_i16_8(i16 signext %arg1) signext { + %A = shl i16 0, %arg1 + ret i16 %A +} + +define i16 @shlhi_i16_9(i16 zeroext %arg1) zeroext { + %A = shl i16 %arg1, 12 + ret i16 %A +} + +; Should not generate anything other than the return, arg1 << 0 = arg1 +define i16 @shlhi_i16_10(i16 zeroext %arg1) zeroext { + %A = shl i16 %arg1, 0 + ret i16 %A +} + +define i16 @shlhi_i16_11(i16 zeroext %arg1) zeroext { + %A = shl i16 16383, %arg1 + ret i16 %A +} + +; Should generate 0, 0 << arg1 = 0 +define i16 @shlhi_i16_12(i16 zeroext %arg1) zeroext { + %A = shl i16 0, %arg1 + ret i16 %A +} + +; Shift left i32 via register, note that the second operand to shl is promoted +; to a 32-bit type: + +define i32 @shl_i32_1(i32 %arg1, i32 %arg2) { + %A = shl i32 %arg1, %arg2 + ret i32 %A +} + +define i32 @shl_i32_2(i32 %arg1, i32 %arg2) { + %A = shl i32 %arg2, %arg1 + ret i32 %A +} + +define i32 @shl_i32_3(i32 signext %arg1, i32 signext %arg2) signext { + %A = shl i32 %arg1, %arg2 + ret i32 %A +} + +define i32 @shl_i32_4(i32 signext %arg1, i32 signext %arg2) signext { + %A = shl i32 %arg2, %arg1 + ret i32 %A +} + +define i32 @shl_i32_5(i32 zeroext %arg1, i32 zeroext %arg2) zeroext { + %A = shl i32 %arg1, %arg2 + ret i32 %A +} + +define i32 @shl_i32_6(i32 zeroext %arg1, i32 zeroext %arg2) zeroext { + %A = shl i32 %arg2, %arg1 + ret i32 %A +} + +; Shift left i32 with immediate: +define i32 @shli_i32_1(i32 %arg1) { + %A = shl i32 %arg1, 12 + ret i32 %A +} + +; Should not generate anything other than the return, arg1 << 0 = arg1 +define i32 @shli_i32_2(i32 %arg1) { + %A = shl i32 %arg1, 0 + ret i32 %A +} + +define i32 @shli_i32_3(i32 %arg1) { + %A = shl i32 16383, %arg1 + ret i32 %A +} + +; Should generate 0, 0 << arg1 = 0 +define i32 @shli_i32_4(i32 %arg1) { + %A = shl i32 0, %arg1 + ret i32 %A +} + +define i32 @shli_i32_5(i32 signext %arg1) signext { + %A = shl i32 %arg1, 12 + ret i32 %A +} + +; Should not generate anything other than the return, arg1 << 0 = arg1 +define i32 @shli_i32_6(i32 signext %arg1) signext { + %A = shl i32 %arg1, 0 + ret i32 %A +} + +define i32 @shli_i32_7(i32 signext %arg1) signext { + %A = shl i32 16383, %arg1 + ret i32 %A +} + +; Should generate 0, 0 << arg1 = 0 +define i32 @shli_i32_8(i32 signext %arg1) signext { + %A = shl i32 0, %arg1 + ret i32 %A +} + +define i32 @shli_i32_9(i32 zeroext %arg1) zeroext { + %A = shl i32 %arg1, 12 + ret i32 %A +} + +; Should not generate anything other than the return, arg1 << 0 = arg1 +define i32 @shli_i32_10(i32 zeroext %arg1) zeroext { + %A = shl i32 %arg1, 0 + ret i32 %A +} + +define i32 @shli_i32_11(i32 zeroext %arg1) zeroext { + %A = shl i32 16383, %arg1 + ret i32 %A +} + +; Should generate 0, 0 << arg1 = 0 +define i32 @shli_i32_12(i32 zeroext %arg1) zeroext { + %A = shl i32 0, %arg1 + ret i32 %A +} diff --git a/test/CodeGen/CellSPU/sp_farith.ll b/test/CodeGen/CellSPU/sp_farith.ll new file mode 100644 index 00000000000..c7e719982d6 --- /dev/null +++ b/test/CodeGen/CellSPU/sp_farith.ll @@ -0,0 +1,88 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep fa %t1.s | count 2 && +; RUN: grep fs %t1.s | count 2 && +; RUN: grep fm %t1.s | count 6 && +; RUN: grep fma %t1.s | count 2 && +; RUN: grep fms %t1.s | count 2 && +; RUN: grep fnms %t1.s | count 3 +; +; This file includes standard floating point arithmetic instructions +; NOTE fdiv is tested separately since it is a compound operation + +define float @fp_add(float %arg1, float %arg2) { + %A = add float %arg1, %arg2 ; [#uses=1] + ret float %A +} + +define <4 x float> @fp_add_vec(<4 x float> %arg1, <4 x float> %arg2) { + %A = add <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] + ret <4 x float> %A +} + +define float @fp_sub(float %arg1, float %arg2) { + %A = sub float %arg1, %arg2 ; [#uses=1] + ret float %A +} + +define <4 x float> @fp_sub_vec(<4 x float> %arg1, <4 x float> %arg2) { + %A = sub <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] + ret <4 x float> %A +} + +define float @fp_mul(float %arg1, float %arg2) { + %A = mul float %arg1, %arg2 ; [#uses=1] + ret float %A +} + +define <4 x float> @fp_mul_vec(<4 x float> %arg1, <4 x float> %arg2) { + %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] + ret <4 x float> %A +} + +define float @fp_mul_add(float %arg1, float %arg2, float %arg3) { + %A = mul float %arg1, %arg2 ; [#uses=1] + %B = add float %A, %arg3 ; [#uses=1] + ret float %B +} + +define <4 x float> @fp_mul_add_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) { + %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] + %B = add <4 x float> %A, %arg3 ; <<4 x float>> [#uses=1] + ret <4 x float> %B +} + +define float @fp_mul_sub(float %arg1, float %arg2, float %arg3) { + %A = mul float %arg1, %arg2 ; [#uses=1] + %B = sub float %A, %arg3 ; [#uses=1] + ret float %B +} + +define <4 x float> @fp_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) { + %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] + %B = sub <4 x float> %A, %arg3 ; <<4 x float>> [#uses=1] + ret <4 x float> %B +} + +; Test the straightforward way of getting fnms +; c - a * b +define float @fp_neg_mul_sub_1(float %arg1, float %arg2, float %arg3) { + %A = mul float %arg1, %arg2 + %B = sub float %arg3, %A + ret float %B +} + +; Test another way of getting fnms +; - ( a *b -c ) = c - a * b +define float @fp_neg_mul_sub_2(float %arg1, float %arg2, float %arg3) { + %A = mul float %arg1, %arg2 + %B = sub float %A, %arg3 + %C = sub float -0.0, %B + ret float %C +} + +define <4 x float> @fp_neg_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) { + %A = mul <4 x float> %arg1, %arg2 + %B = sub <4 x float> %A, %arg3 + %D = sub <4 x float> < float -0.0, float -0.0, float -0.0, float -0.0 >, %B + ret <4 x float> %D +} -- 2.11.0