setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
}
+ // We can do bitwise operations on v2i64 vectors
+ setOperationAction(ISD::AND, MVT::v2i64, Legal);
+ setOperationAction(ISD::OR, MVT::v2i64, Legal);
+ setOperationAction(ISD::XOR, MVT::v2i64, Legal);
// It is legal to extload from v4i8 to v4i16 or v4i32.
addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!VT.isVector() || !TLI.isTypeLegal(VT))
return SDValue();
+ if (ST->hasMVEIntegerOps() && VT == MVT::v2i64)
+ return SDValue();
int64_t Cnt;
(v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>;
def : Pat<(v4i32 (vnotq (v4i32 MQPR:$val1))),
(v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>;
+ def : Pat<(v2i64 (vnotq (v2i64 MQPR:$val1))),
+ (v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>;
}
class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
(v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
(v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+ (v2i64 (MVE_VAND (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
(v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
(v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
(v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+ (v2i64 (MVE_VORR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
(v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
(v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
(v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (xor (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+ (v2i64 (MVE_VEOR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
(v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
(v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
(v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
+ (v2i64 (MVE_VBIC (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
- def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq (v16i8 MQPR:$val2)))),
+ def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
(v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
(v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
(v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
+ (v2i64 (MVE_VORN (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
}
class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
%2 = select <4 x i1> %0, <4 x i32> %1, <4 x i32> %s1
ret <4 x i32> %2
}
+
+define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) {
+; CHECK-LABEL: abs_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: vmov r12, s2
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: vmov r3, s3
+; CHECK-NEXT: vmov r1, s0
+; CHECK-NEXT: rsbs.w lr, r12, #0
+; CHECK-NEXT: sbc.w r5, r0, r3
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mov r2, lr
+; CHECK-NEXT: lsrl r2, r5, #32
+; CHECK-NEXT: mov.w r5, #0
+; CHECK-NEXT: it mi
+; CHECK-NEXT: movmi r5, #1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: it eq
+; CHECK-NEXT: moveq r2, r3
+; CHECK-NEXT: vmov r3, s1
+; CHECK-NEXT: rsbs r4, r1, #0
+; CHECK-NEXT: mov r6, r4
+; CHECK-NEXT: sbc.w r7, r0, r3
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: lsrl r6, r7, #32
+; CHECK-NEXT: it mi
+; CHECK-NEXT: movmi r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: ite eq
+; CHECK-NEXT: moveq r6, r3
+; CHECK-NEXT: movne r1, r4
+; CHECK-NEXT: vmov.32 q0[0], r1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: vmov.32 q0[1], r6
+; CHECK-NEXT: it eq
+; CHECK-NEXT: moveq lr, r12
+; CHECK-NEXT: vmov.32 q0[2], lr
+; CHECK-NEXT: vmov.32 q0[3], r2
+; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+entry:
+ %0 = icmp slt <2 x i64> %s1, zeroinitializer
+ %1 = sub nsw <2 x i64> zeroinitializer, %s1
+ %2 = select <2 x i1> %0, <2 x i64> %1, <2 x i64> %s1
+ ret <2 x i64> %2
+}
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @and_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: and_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vand q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = and <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @or_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: or_int8_t:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @or_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: or_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = or <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @xor_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: xor_int8_t:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @xor_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: xor_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: veor q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = xor <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @v_mvn_i8(<16 x i8> %src) {
; CHECK-LABEL: v_mvn_i8:
; CHECK: @ %bb.0: @ %entry
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @v_mvn_i64(<2 x i64> %src) {
+; CHECK-LABEL: v_mvn_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmvn q0, q0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = xor <2 x i64> %src, <i64 -1, i64 -1>
+ ret <2 x i64> %0
+}
+
+
define arm_aapcs_vfpcc <16 x i8> @v_bic_i8(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: v_bic_i8:
; CHECK: @ %bb.0: @ %entry
ret <4 x i32> %1
}
+define arm_aapcs_vfpcc <2 x i64> @v_bic_i64(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: v_bic_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vbic q0, q1, q0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = xor <2 x i64> %src1, <i64 -1, i64 -1>
+ %1 = and <2 x i64> %src2, %0
+ ret <2 x i64> %1
+}
+
+
define arm_aapcs_vfpcc <16 x i8> @v_or_i8(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: v_or_i8:
; CHECK: @ %bb.0: @ %entry
%1 = or <4 x i32> %src2, %0
ret <4 x i32> %1
}
+
+define arm_aapcs_vfpcc <2 x i64> @v_or_i64(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: v_or_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vorn q0, q1, q0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = xor <2 x i64> %src1, <i64 -1, i64 -1>
+ %1 = or <2 x i64> %src2, %0
+ ret <2 x i64> %1
+}
+
ret <16 x i8> %out
}
+define arm_aapcs_vfpcc <2 x i64> @udiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
+; CHECK-LABEL: udiv_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, s20
+; CHECK-NEXT: vmov r1, s21
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: bl __aeabi_uldivmod
+; CHECK-NEXT: vmov r12, s22
+; CHECK-NEXT: vmov lr, s23
+; CHECK-NEXT: vmov r2, s18
+; CHECK-NEXT: vmov r3, s19
+; CHECK-NEXT: vmov.32 q4[0], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[1], r0
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: mov r1, lr
+; CHECK-NEXT: bl __aeabi_uldivmod
+; CHECK-NEXT: vmov.32 q4[2], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[3], r0
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = udiv <2 x i64> %in1, %in2
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @sdiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
+; CHECK-LABEL: sdiv_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, s20
+; CHECK-NEXT: vmov r1, s21
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: bl __aeabi_ldivmod
+; CHECK-NEXT: vmov r12, s22
+; CHECK-NEXT: vmov lr, s23
+; CHECK-NEXT: vmov r2, s18
+; CHECK-NEXT: vmov r3, s19
+; CHECK-NEXT: vmov.32 q4[0], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[1], r0
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: mov r1, lr
+; CHECK-NEXT: bl __aeabi_ldivmod
+; CHECK-NEXT: vmov.32 q4[2], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[3], r0
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = sdiv <2 x i64> %in1, %in2
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @urem_i64(<2 x i64> %in1, <2 x i64> %in2) {
+; CHECK-LABEL: urem_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, s20
+; CHECK-NEXT: vmov r1, s21
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: bl __aeabi_uldivmod
+; CHECK-NEXT: vmov r12, s18
+; CHECK-NEXT: vmov lr, s19
+; CHECK-NEXT: vmov.32 q4[0], r2
+; CHECK-NEXT: vmov r0, s22
+; CHECK-NEXT: vmov.32 q4[1], r3
+; CHECK-NEXT: vmov r1, s23
+; CHECK-NEXT: mov r2, r12
+; CHECK-NEXT: mov r3, lr
+; CHECK-NEXT: bl __aeabi_uldivmod
+; CHECK-NEXT: vmov.32 q4[2], r2
+; CHECK-NEXT: vmov.32 q4[3], r3
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = urem <2 x i64> %in1, %in2
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @srem_i64(<2 x i64> %in1, <2 x i64> %in2) {
+; CHECK-LABEL: srem_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, s20
+; CHECK-NEXT: vmov r1, s21
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: bl __aeabi_ldivmod
+; CHECK-NEXT: vmov r12, s18
+; CHECK-NEXT: vmov lr, s19
+; CHECK-NEXT: vmov.32 q4[0], r2
+; CHECK-NEXT: vmov r0, s22
+; CHECK-NEXT: vmov.32 q4[1], r3
+; CHECK-NEXT: vmov r1, s23
+; CHECK-NEXT: mov r2, r12
+; CHECK-NEXT: mov r3, lr
+; CHECK-NEXT: bl __aeabi_ldivmod
+; CHECK-NEXT: vmov.32 q4[2], r2
+; CHECK-NEXT: vmov.32 q4[3], r3
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = srem <2 x i64> %in1, %in2
+ ret <2 x i64> %out
+}
+
+
+
define arm_aapcs_vfpcc <4 x float> @fdiv_f32(<4 x float> %in1, <4 x float> %in2) {
; CHECK-LABEL: fdiv_f32:
%out = frem <8 x half> %in1, %in2
ret <8 x half> %out
}
+
+define arm_aapcs_vfpcc <2 x double> @fdiv_f64(<2 x double> %in1, <2 x double> %in2) {
+; CHECK-LABEL: fdiv_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d11
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: bl __aeabi_ddiv
+; CHECK-NEXT: vmov lr, r12, d10
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl __aeabi_ddiv
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = fdiv <2 x double> %in1, %in2
+ ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x double> @frem_f64(<2 x double> %in1, <2 x double> %in2) {
+; CHECK-LABEL: frem_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d11
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: bl fmod
+; CHECK-NEXT: vmov lr, r12, d10
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl fmod
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = frem <2 x double> %in1, %in2
+ ret <2 x double> %out
+}
+
+
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @sqrt_float64_t(<2 x double> %src) {
+; CHECK-LABEL: sqrt_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl sqrt
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl sqrt
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @cos_float32_t(<4 x float> %src) {
; CHECK-LABEL: cos_float32_t:
; CHECK: @ %bb.0: @ %entry
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @cos_float64_t(<2 x double> %src) {
+; CHECK-LABEL: cos_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl cos
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl cos
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.cos.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @sin_float32_t(<4 x float> %src) {
; CHECK-LABEL: sin_float32_t:
; CHECK: @ %bb.0: @ %entry
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @sin_float64_t(<2 x double> %src) {
+; CHECK-LABEL: sin_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl sin
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl sin
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @exp_float32_t(<4 x float> %src) {
; CHECK-LABEL: exp_float32_t:
; CHECK: @ %bb.0: @ %entry
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @exp_float64_t(<2 x double> %src) {
+; CHECK-LABEL: exp_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl exp
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl exp
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.exp.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @exp2_float32_t(<4 x float> %src) {
; CHECK-LABEL: exp2_float32_t:
; CHECK: @ %bb.0: @ %entry
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @exp2_float64_t(<2 x double> %src) {
+; CHECK-LABEL: exp2_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl exp2
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl exp2
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @log_float32_t(<4 x float> %src) {
; CHECK-LABEL: log_float32_t:
; CHECK: @ %bb.0: @ %entry
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @log_float64_t(<2 x double> %src) {
+; CHECK-LABEL: log_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl log
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl log
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.log.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @log2_float32_t(<4 x float> %src) {
; CHECK-LABEL: log2_float32_t:
; CHECK: @ %bb.0: @ %entry
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @log2_float64_t(<2 x double> %src) {
+; CHECK-LABEL: log2_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl log2
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl log2
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @log10_float32_t(<4 x float> %src) {
; CHECK-LABEL: log10_float32_t:
; CHECK: @ %bb.0: @ %entry
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @log10_float64_t(<2 x double> %src) {
+; CHECK-LABEL: log10_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl log10
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl log10
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @pow_float32_t(<4 x float> %src1, <4 x float> %src2) {
; CHECK-LABEL: pow_float32_t:
; CHECK: @ %bb.0: @ %entry
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @pow_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: pow_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d11
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: bl pow
+; CHECK-NEXT: vmov lr, r12, d10
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl pow
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %src1, <2 x double> %src2)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @copysign_float32_t(<4 x float> %src1, <4 x float> %src2) {
; CHECK-LABEL: copysign_float32_t:
; CHECK: @ %bb.0: @ %entry
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @copysign_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: copysign_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r0, r1, d3
+; CHECK-NEXT: vmov r0, lr, d2
+; CHECK-NEXT: vmov r0, r3, d1
+; CHECK-NEXT: vmov r12, r2, d0
+; CHECK-NEXT: lsrs r1, r1, #31
+; CHECK-NEXT: bfi r3, r1, #31, #1
+; CHECK-NEXT: lsr.w r1, lr, #31
+; CHECK-NEXT: bfi r2, r1, #31, #1
+; CHECK-NEXT: vmov d1, r0, r3
+; CHECK-NEXT: vmov d0, r12, r2
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.copysign.v2f64(<2 x double> %src1, <2 x double> %src2)
+ ret <2 x double> %0
+}
+
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
declare <4 x float> @llvm.cos.v4f32(<4 x float>)
declare <4 x float> @llvm.sin.v4f32(<4 x float>)
declare <8 x half> @llvm.log10.v8f16(<8 x half>)
declare <8 x half> @llvm.pow.v8f16(<8 x half>, <8 x half>)
declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
+declare <2 x double> @llvm.cos.v2f64(<2 x double>)
+declare <2 x double> @llvm.sin.v2f64(<2 x double>)
+declare <2 x double> @llvm.exp.v2f64(<2 x double>)
+declare <2 x double> @llvm.exp2.v2f64(<2 x double>)
+declare <2 x double> @llvm.log.v2f64(<2 x double>)
+declare <2 x double> @llvm.log2.v2f64(<2 x double>)
+declare <2 x double> @llvm.log10.v2f64(<2 x double>)
+declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
ret <4 x float> %0
}
+define arm_aapcs_vfpcc <2 x double> @fneg_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fneg_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r7, lr}
+; CHECK-NEXT: push {r4, r5, r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vldr d0, .LCPI2_0
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: vmov r4, r5, d0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: bl __aeabi_dsub
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: bl __aeabi_dsub
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI2_0:
+; CHECK-NEXT: .long 0 @ double -0
+; CHECK-NEXT: .long 2147483648
+entry:
+ %0 = fsub nnan ninf nsz <2 x double> <double 0.0e0, double 0.0e0>, %src
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <8 x half> @fabs_float16_t(<8 x half> %src) {
; CHECK-MVE-LABEL: fabs_float16_t:
; CHECK-MVE: @ %bb.0: @ %entry
ret <4 x float> %0
}
+define arm_aapcs_vfpcc <2 x double> @fabs_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fabs_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vldr d2, .LCPI5_0
+; CHECK-NEXT: vmov r12, r3, d0
+; CHECK-NEXT: vmov r0, r1, d2
+; CHECK-NEXT: vmov r0, r2, d1
+; CHECK-NEXT: lsrs r1, r1, #31
+; CHECK-NEXT: bfi r2, r1, #31, #1
+; CHECK-NEXT: bfi r3, r1, #31, #1
+; CHECK-NEXT: vmov d1, r0, r2
+; CHECK-NEXT: vmov d0, r12, r3
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI5_0:
+; CHECK-NEXT: .long 0 @ double 0
+; CHECK-NEXT: .long 0
+entry:
+ %0 = call nnan ninf nsz <2 x double> @llvm.fabs.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @fceil_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fceil_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl ceil
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl ceil
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.ceil.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @ftrunc_float32_t(<4 x float> %src) {
; CHECK-MVE-LABEL: ftrunc_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @ftrunc_float64_t(<2 x double> %src) {
+; CHECK-LABEL: ftrunc_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl trunc
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl trunc
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.trunc.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @frint_float32_t(<4 x float> %src) {
; CHECK-MVE-LABEL: frint_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @frint_float64_t(<2 x double> %src) {
+; CHECK-LABEL: frint_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl rint
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl rint
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.rint.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @fnearbyint_float32_t(<4 x float> %src) {
; CHECK-LABEL: fnearbyint_float32_t:
; CHECK: @ %bb.0: @ %entry
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @fnearbyint_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fnearbyint_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl nearbyint
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl nearbyint
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.nearbyint.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @ffloor_float32_t(<4 x float> %src) {
; CHECK-MVE-LABEL: ffloor_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @ffloor_float64_t(<2 x double> %src) {
+; CHECK-LABEL: ffloor_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl floor
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl floor
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @fround_float32_t(<4 x float> %src) {
; CHECK-MVE-LABEL: fround_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @fround_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fround_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl round
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl round
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.round.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
declare <4 x float> @llvm.rint.v4f32(<4 x float>)
declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
declare <8 x half> @llvm.floor.v8f16(<8 x half>)
declare <8 x half> @llvm.round.v8f16(<8 x half>)
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
+declare <2 x double> @llvm.rint.v2f64(<2 x double>)
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
+declare <2 x double> @llvm.floor.v2f64(<2 x double>)
+declare <2 x double> @llvm.round.v2f64(<2 x double>)
ret <4 x i32> %1
}
+define arm_aapcs_vfpcc <2 x i64> @smin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
+; CHECK-LABEL: smin_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s6
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: vmov r3, s2
+; CHECK-NEXT: vmov r12, s7
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: vmov lr, s1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: vmov r3, s0
+; CHECK-NEXT: vmov r2, s4
+; CHECK-NEXT: sbcs.w r1, r1, r12
+; CHECK-NEXT: vmov r12, s5
+; CHECK-NEXT: mov.w r1, #0
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: sbcs.w r2, lr, r12
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q2[0], r0
+; CHECK-NEXT: vmov.32 q2[1], r0
+; CHECK-NEXT: vmov.32 q2[2], r1
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vbic q1, q1, q2
+; CHECK-NEXT: vand q0, q0, q2
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = icmp slt <2 x i64> %s1, %s2
+ %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
+ ret <2 x i64> %1
+}
+
define arm_aapcs_vfpcc <16 x i8> @umin_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
; CHECK-LABEL: umin_v16i8:
; CHECK: @ %bb.0: @ %entry
ret <4 x i32> %1
}
+define arm_aapcs_vfpcc <2 x i64> @umin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
+; CHECK-LABEL: umin_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s6
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: vmov r3, s2
+; CHECK-NEXT: vmov r12, s7
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: vmov lr, s1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: vmov r3, s0
+; CHECK-NEXT: vmov r2, s4
+; CHECK-NEXT: sbcs.w r1, r1, r12
+; CHECK-NEXT: vmov r12, s5
+; CHECK-NEXT: mov.w r1, #0
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: sbcs.w r2, lr, r12
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q2[0], r0
+; CHECK-NEXT: vmov.32 q2[1], r0
+; CHECK-NEXT: vmov.32 q2[2], r1
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vbic q1, q1, q2
+; CHECK-NEXT: vand q0, q0, q2
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = icmp ult <2 x i64> %s1, %s2
+ %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
+ ret <2 x i64> %1
+}
+
define arm_aapcs_vfpcc <16 x i8> @smax_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
; CHECK-LABEL: smax_v16i8:
ret <4 x i32> %1
}
+define arm_aapcs_vfpcc <2 x i64> @smax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
+; CHECK-LABEL: smax_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: vmov r3, s6
+; CHECK-NEXT: vmov r12, s3
+; CHECK-NEXT: vmov r1, s7
+; CHECK-NEXT: vmov lr, s5
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: vmov r3, s4
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: sbcs.w r1, r1, r12
+; CHECK-NEXT: vmov r12, s1
+; CHECK-NEXT: mov.w r1, #0
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: sbcs.w r2, lr, r12
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q2[0], r0
+; CHECK-NEXT: vmov.32 q2[1], r0
+; CHECK-NEXT: vmov.32 q2[2], r1
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vbic q1, q1, q2
+; CHECK-NEXT: vand q0, q0, q2
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = icmp sgt <2 x i64> %s1, %s2
+ %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
+ ret <2 x i64> %1
+}
+
define arm_aapcs_vfpcc <16 x i8> @umax_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
; CHECK-LABEL: umax_v16i8:
; CHECK: @ %bb.0: @ %entry
ret <4 x i32> %1
}
+define arm_aapcs_vfpcc <2 x i64> @umax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
+; CHECK-LABEL: umax_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: vmov r3, s6
+; CHECK-NEXT: vmov r12, s3
+; CHECK-NEXT: vmov r1, s7
+; CHECK-NEXT: vmov lr, s5
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: vmov r3, s4
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: sbcs.w r1, r1, r12
+; CHECK-NEXT: vmov r12, s1
+; CHECK-NEXT: mov.w r1, #0
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: sbcs.w r2, lr, r12
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q2[0], r0
+; CHECK-NEXT: vmov.32 q2[1], r0
+; CHECK-NEXT: vmov.32 q2[2], r1
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vbic q1, q1, q2
+; CHECK-NEXT: vand q0, q0, q2
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = icmp ugt <2 x i64> %s1, %s2
+ %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
+ ret <2 x i64> %1
+}
+
define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x float> %src2) {
; CHECK-MVE-LABEL: maxnm_float32_t:
%0 = select <8 x i1> %cmp, <8 x half> %src1, <8 x half> %src2
ret <8 x half> %0
}
+
+define arm_aapcs_vfpcc <2 x double> @maxnm_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: maxnm_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: vmov r2, r3, d11
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vmov r0, r1, d8
+; CHECK-NEXT: vmov r2, r3, d10
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q0[0], r0
+; CHECK-NEXT: vmov.32 q0[1], r0
+; CHECK-NEXT: vmov.32 q0[2], r4
+; CHECK-NEXT: vmov.32 q0[3], r4
+; CHECK-NEXT: vbic q1, q5, q0
+; CHECK-NEXT: vand q0, q4, q0
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r4, pc}
+entry:
+ %cmp = fcmp fast ogt <2 x double> %src2, %src1
+ %0 = select <2 x i1> %cmp, <2 x double> %src2, <2 x double> %src1
+ ret <2 x double> %0
+}
%0 = sub nsw <4 x i32> zeroinitializer, %s1
ret <4 x i32> %0
}
+
+define arm_aapcs_vfpcc <2 x i64> @neg_v2i64(<2 x i64> %s1) {
+; CHECK-LABEL: neg_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s2
+; CHECK-NEXT: mov.w r12, #0
+; CHECK-NEXT: vmov r0, s3
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: vmov r3, s1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: sbc.w r0, r12, r0
+; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: sbc.w r3, r12, r3
+; CHECK-NEXT: vmov.32 q0[0], r2
+; CHECK-NEXT: vmov.32 q0[1], r3
+; CHECK-NEXT: vmov.32 q0[2], r1
+; CHECK-NEXT: vmov.32 q0[3], r0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = sub nsw <2 x i64> zeroinitializer, %s1
+ ret <2 x i64> %0
+}
+
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @sext_v2i32_v2i64(<2 x i32> %src) {
+; CHECK-LABEL: sext_v2i32_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov.32 q1[0], r0
+; CHECK-NEXT: asrs r0, r0, #31
+; CHECK-NEXT: vmov.32 q1[1], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 q1[2], r0
+; CHECK-NEXT: asrs r0, r0, #31
+; CHECK-NEXT: vmov.32 q1[3], r0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = sext <2 x i32> %src to <2 x i64>
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <8 x i16> @zext_v8i8_v8i16(<8 x i8> %src) {
; CHECK-LABEL: zext_v8i8_v8i16:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @zext_v2i32_v2i64(<2 x i32> %src) {
+; CHECK-LABEL: zext_v2i32_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: adr r0, .LCPI7_0
+; CHECK-NEXT: vldrw.u32 q1, [r0]
+; CHECK-NEXT: vand q0, q0, q1
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI7_0:
+; CHECK-NEXT: .long 4294967295 @ 0xffffffff
+; CHECK-NEXT: .long 0 @ 0x0
+; CHECK-NEXT: .long 4294967295 @ 0xffffffff
+; CHECK-NEXT: .long 0 @ 0x0
+entry:
+ %0 = zext <2 x i32> %src to <2 x i64>
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <8 x i8> @trunc_v8i16_v8i8(<8 x i16> %src) {
; CHECK-LABEL: trunc_v8i16_v8i8:
%0 = trunc <4 x i32> %src to <4 x i8>
ret <4 x i8> %0
}
+
+define arm_aapcs_vfpcc <2 x i32> @trunc_v2i64_v2i32(<2 x i64> %src) {
+; CHECK-LABEL: trunc_v2i64_v2i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bx lr
+entry:
+ %0 = trunc <2 x i64> %src to <2 x i32>
+ ret <2 x i32> %0
+}
+
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shl_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: shl_qq_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov r0, s6
+; CHECK-NEXT: vmov.32 q2[0], r2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov.32 q2[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov.32 q2[2], r2
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vmov q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = shl <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shru_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: shru_qq_int8_t:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shru_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: shru_qq_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r2, s4
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: lsll r0, r1, r2
+; CHECK-NEXT: vmov r2, s6
+; CHECK-NEXT: vmov.32 q2[0], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 q2[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: lsll r0, r1, r2
+; CHECK-NEXT: vmov.32 q2[2], r0
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vmov q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = lshr <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shrs_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: shrs_qq_int8_t:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shrs_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: shrs_qq_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: asrl r2, r1, r0
+; CHECK-NEXT: vmov r0, s6
+; CHECK-NEXT: vmov.32 q2[0], r2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov.32 q2[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: asrl r2, r1, r0
+; CHECK-NEXT: vmov.32 q2[2], r2
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vmov q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = ashr <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shl_qi_int8_t(<16 x i8> %src1) {
; CHECK-LABEL: shl_qi_int8_t:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shl_qi_int64_t(<2 x i64> %src1) {
+; CHECK-LABEL: shl_qi_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: lsll r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[0], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: lsll r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[2], r0
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = shl <2 x i64> %src1, <i64 4, i64 4>
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shru_qi_int8_t(<16 x i8> %src1) {
; CHECK-LABEL: shru_qi_int8_t:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shru_qi_int64_t(<2 x i64> %src1) {
+; CHECK-LABEL: shru_qi_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: lsrl r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[0], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: lsrl r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[2], r0
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = lshr <2 x i64> %src1, <i64 4, i64 4>
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shrs_qi_int8_t(<16 x i8> %src1) {
; CHECK-LABEL: shrs_qi_int8_t:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shrs_qi_int64_t(<2 x i64> %src1) {
+; CHECK-LABEL: shrs_qi_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: asrl r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[0], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: asrl r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[2], r0
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = ashr <2 x i64> %src1, <i64 4, i64 4>
+ ret <2 x i64> %0
+}
define arm_aapcs_vfpcc <16 x i8> @shl_qr_int8_t(<16 x i8> %src1, i8 %src2) {
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shl_qr_int64_t(<2 x i64> %src1, i64 %src2) {
+; CHECK-LABEL: shl_qr_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[0], r2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[2], r2
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <2 x i64> undef, i64 %src2, i32 0
+ %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
+ %0 = shl <2 x i64> %src1, %s
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shru_qr_int8_t(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: shru_qr_int8_t:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shru_qr_int64_t(<2 x i64> %src1, i64 %src2) {
+; CHECK-LABEL: shru_qr_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: rsbs r0, r0, #0
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[0], r2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[2], r2
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <2 x i64> undef, i64 %src2, i32 0
+ %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
+ %0 = lshr <2 x i64> %src1, %s
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shrs_qr_int8_t(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: shrs_qr_int8_t:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shrs_qr_int64_t(<2 x i64> %src1, i64 %src2) {
+; CHECK-LABEL: shrs_qr_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: asrl r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[0], r2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: asrl r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[2], r2
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <2 x i64> undef, i64 %src2, i32 0
+ %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
+ %0 = ashr <2 x i64> %src1, %s
+ ret <2 x i64> %0
+}
define arm_aapcs_vfpcc <16 x i8> @shl_qiv_int8_t(<16 x i8> %src1) {
; CHECK-LABEL: shl_qiv_int8_t:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adr r0, .LCPI27_0
+; CHECK-NEXT: adr r0, .LCPI36_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vshl.u8 q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI27_0:
+; CHECK-NEXT: .LCPI36_0:
; CHECK-NEXT: .byte 1 @ 0x1
; CHECK-NEXT: .byte 2 @ 0x2
; CHECK-NEXT: .byte 3 @ 0x3
define arm_aapcs_vfpcc <8 x i16> @shl_qiv_int16_t(<8 x i16> %src1) {
; CHECK-LABEL: shl_qiv_int16_t:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adr r0, .LCPI28_0
+; CHECK-NEXT: adr r0, .LCPI37_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vshl.u16 q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI28_0:
+; CHECK-NEXT: .LCPI37_0:
; CHECK-NEXT: .short 1 @ 0x1
; CHECK-NEXT: .short 2 @ 0x2
; CHECK-NEXT: .short 3 @ 0x3
define arm_aapcs_vfpcc <4 x i32> @shl_qiv_int32_t(<4 x i32> %src1) {
; CHECK-LABEL: shl_qiv_int32_t:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adr r0, .LCPI29_0
+; CHECK-NEXT: adr r0, .LCPI38_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vshl.u32 q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI29_0:
+; CHECK-NEXT: .LCPI38_0:
; CHECK-NEXT: .long 1 @ 0x1
; CHECK-NEXT: .long 2 @ 0x2
; CHECK-NEXT: .long 3 @ 0x3
ret <16 x i8> %out
}
+define arm_aapcs_vfpcc <2 x i64> @shuffle1_i64(<2 x i64> %src) {
+; CHECK-LABEL: shuffle1_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @shuffle2_i64(<2 x i64> %src) {
+; CHECK-LABEL: shuffle2_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f32 s4, s2
+; CHECK-NEXT: vmov.f32 s5, s3
+; CHECK-NEXT: vmov.f32 s6, s0
+; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @shuffle3_i64(<2 x i64> %src) {
+; CHECK-LABEL: shuffle3_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 undef, i32 1>
+ ret <2 x i64> %out
+}
+
define arm_aapcs_vfpcc <4 x float> @shuffle1_f32(<4 x float> %src) {
; CHECK-LABEL: shuffle1_f32:
; CHECK: @ %bb.0: @ %entry
ret <8 x half> %out
}
+define arm_aapcs_vfpcc <2 x double> @shuffle1_f64(<2 x double> %src) {
+; CHECK-LABEL: shuffle1_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x double> @shuffle2_f64(<2 x double> %src) {
+; CHECK-LABEL: shuffle2_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f32 s4, s2
+; CHECK-NEXT: vmov.f32 s5, s3
+; CHECK-NEXT: vmov.f32 s6, s0
+; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+ ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x double> @shuffle3_f64(<2 x double> %src) {
+; CHECK-LABEL: shuffle3_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 undef, i32 1>
+ ret <2 x double> %out
+}
+
define arm_aapcs_vfpcc <4 x i32> @insert_i32(i32 %a) {
; CHECK-LABEL: insert_i32:
ret <16 x i8> %res
}
+define arm_aapcs_vfpcc <2 x i64> @insert_i64(i64 %a) {
+; CHECK-LABEL: insert_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.32 q0[0], r0
+; CHECK-NEXT: vmov.32 q0[1], r1
+; CHECK-NEXT: bx lr
+entry:
+ %res = insertelement <2 x i64> undef, i64 %a, i32 0
+ ret <2 x i64> %res
+}
+
define arm_aapcs_vfpcc <4 x float> @insert_f32(float %a) {
; CHECK-LABEL: insert_f32:
; CHECK: @ %bb.0: @ %entry
ret <8 x half> %res
}
+define arm_aapcs_vfpcc <2 x double> @insert_f64(double %a) {
+; CHECK-LABEL: insert_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r6, r7, lr}
+; CHECK-NEXT: push {r4, r6, r7, lr}
+; CHECK-NEXT: .setfp r7, sp, #8
+; CHECK-NEXT: add r7, sp, #8
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: mov r4, sp
+; CHECK-NEXT: bfc r4, #0, #4
+; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: sub.w r4, r7, #8
+; CHECK-NEXT: vstr d0, [sp]
+; CHECK-NEXT: mov r0, sp
+; CHECK-NEXT: vldrw.u32 q0, [r0]
+; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: pop {r4, r6, r7, pc}
+entry:
+ %res = insertelement <2 x double> undef, double %a, i32 0
+ ret <2 x double> %res
+}
+
define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) {
; CHECK-LABEL: scalar_to_vector_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: adr r1, .LCPI30_0
+; CHECK-NEXT: adr r1, .LCPI38_0
; CHECK-NEXT: vmov.u16 r0, q0[0]
; CHECK-NEXT: vldrw.u32 q1, [r1]
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI30_0:
+; CHECK-NEXT: .LCPI38_0:
; CHECK-NEXT: .zero 4
; CHECK-NEXT: .long 7 @ 0x7
; CHECK-NEXT: .long 1 @ 0x1
ret i8 %res
}
+define arm_aapcs_vfpcc i64 @extract_i64_0(<2 x i64> %a) {
+; CHECK-LABEL: extract_i64_0:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: bx lr
+entry:
+ %res = extractelement <2 x i64> %a, i32 0
+ ret i64 %res
+}
+
+define arm_aapcs_vfpcc i64 @extract_i64_1(<2 x i64> %a) {
+; CHECK-LABEL: extract_i64_1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: bx lr
+entry:
+ %res = extractelement <2 x i64> %a, i32 1
+ ret i64 %res
+}
+
define arm_aapcs_vfpcc float @extract_f32_0(<4 x float> %a) {
; CHECK-LABEL: extract_f32_0:
; CHECK: @ %bb.0: @ %entry
%res = extractelement <8 x half> %a, i32 3
ret half %res
}
+
+define arm_aapcs_vfpcc double @extract_f64_0(<2 x double> %a) {
+; CHECK-LABEL: extract_f64_0:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: bx lr
+entry:
+ %res = extractelement <2 x double> %a, i32 0
+ ret double %res
+}
+
+define arm_aapcs_vfpcc double @extract_f64_1(<2 x double> %a) {
+; CHECK-LABEL: extract_f64_1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f32 s0, s2
+; CHECK-NEXT: vmov.f32 s1, s3
+; CHECK-NEXT: bx lr
+entry:
+ %res = extractelement <2 x double> %a, i32 1
+ ret double %res
+}
+
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @add_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: add_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s6
+; CHECK-NEXT: vmov r3, s2
+; CHECK-NEXT: vmov r0, s7
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: adds.w lr, r3, r2
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: vmov r3, s1
+; CHECK-NEXT: adc.w r12, r1, r0
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmov r1, s5
+; CHECK-NEXT: adds r0, r0, r2
+; CHECK-NEXT: adcs r1, r3
+; CHECK-NEXT: vmov.32 q0[0], r0
+; CHECK-NEXT: vmov.32 q0[1], r1
+; CHECK-NEXT: vmov.32 q0[2], lr
+; CHECK-NEXT: vmov.32 q0[3], r12
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = add nsw <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @add_float32_t(<4 x float> %src1, <4 x float> %src2) {
; CHECK-MVE-LABEL: add_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @add_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: add_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: vmov r2, r3, d11
+; CHECK-NEXT: bl __aeabi_dadd
+; CHECK-NEXT: vmov lr, r12, d8
+; CHECK-NEXT: vmov r2, r3, d10
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl __aeabi_dadd
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = fadd nnan ninf nsz <2 x double> %src2, %src1
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @sub_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: sub_int8_t:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @sub_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: sub_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov r3, s6
+; CHECK-NEXT: vmov r0, s3
+; CHECK-NEXT: vmov r1, s7
+; CHECK-NEXT: subs.w lr, r3, r2
+; CHECK-NEXT: vmov r2, s4
+; CHECK-NEXT: vmov r3, s5
+; CHECK-NEXT: sbc.w r12, r1, r0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: sbc.w r1, r3, r1
+; CHECK-NEXT: vmov.32 q0[0], r0
+; CHECK-NEXT: vmov.32 q0[1], r1
+; CHECK-NEXT: vmov.32 q0[2], lr
+; CHECK-NEXT: vmov.32 q0[3], r12
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = sub nsw <2 x i64> %src2, %src1
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @sub_float32_t(<4 x float> %src1, <4 x float> %src2) {
; CHECK-MVE-LABEL: sub_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @sub_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: sub_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: vmov r2, r3, d11
+; CHECK-NEXT: bl __aeabi_dsub
+; CHECK-NEXT: vmov lr, r12, d8
+; CHECK-NEXT: vmov r2, r3, d10
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl __aeabi_dsub
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = fsub nnan ninf nsz <2 x double> %src2, %src1
+ ret <2 x double> %0
+}
+
+
define arm_aapcs_vfpcc <16 x i8> @mul_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: mul_int8_t:
; CHECK: @ %bb.0: @ %entry
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @mul_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: mul_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r7, lr}
+; CHECK-NEXT: push {r4, r5, r7, lr}
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmov r1, s0
+; CHECK-NEXT: vmov r2, s5
+; CHECK-NEXT: umull r12, r3, r1, r0
+; CHECK-NEXT: mla lr, r1, r2, r3
+; CHECK-NEXT: vmov r3, s6
+; CHECK-NEXT: vmov r1, s2
+; CHECK-NEXT: vmov r2, s7
+; CHECK-NEXT: umull r4, r5, r1, r3
+; CHECK-NEXT: mla r1, r1, r2, r5
+; CHECK-NEXT: vmov r2, s1
+; CHECK-NEXT: mla r0, r2, r0, lr
+; CHECK-NEXT: vmov r2, s3
+; CHECK-NEXT: vmov.32 q0[0], r12
+; CHECK-NEXT: vmov.32 q0[1], r0
+; CHECK-NEXT: vmov.32 q0[2], r4
+; CHECK-NEXT: mla r1, r2, r3, r1
+; CHECK-NEXT: vmov.32 q0[3], r1
+; CHECK-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %0 = mul nsw <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <8 x half> @mul_float16_t(<8 x half> %src1, <8 x half> %src2) {
; CHECK-MVE-LABEL: mul_float16_t:
; CHECK-MVE: @ %bb.0: @ %entry
%0 = fmul nnan ninf nsz <4 x float> %src2, %src1
ret <4 x float> %0
}
+
+define arm_aapcs_vfpcc <2 x double> @mul_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: mul_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: vmov r2, r3, d11
+; CHECK-NEXT: bl __aeabi_dmul
+; CHECK-NEXT: vmov lr, r12, d8
+; CHECK-NEXT: vmov r2, r3, d10
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl __aeabi_dmul
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = fmul nnan ninf nsz <2 x double> %src2, %src1
+ ret <2 x double> %0
+}
+
ret <4 x i32> %sum
}
+define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK-FP-LABEL: vector_add_i64:
+; CHECK-FP: @ %bb.0: @ %entry
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov d1, r2, r3
+; CHECK-FP-NEXT: vmov d0, r0, r1
+; CHECK-FP-NEXT: add r0, sp, #8
+; CHECK-FP-NEXT: vldrw.u32 q1, [r0]
+; CHECK-FP-NEXT: vmov r1, s2
+; CHECK-FP-NEXT: vmov r0, s3
+; CHECK-FP-NEXT: vmov r3, s6
+; CHECK-FP-NEXT: vmov r2, s7
+; CHECK-FP-NEXT: adds.w lr, r1, r3
+; CHECK-FP-NEXT: vmov r3, s0
+; CHECK-FP-NEXT: vmov r1, s4
+; CHECK-FP-NEXT: adc.w r12, r0, r2
+; CHECK-FP-NEXT: vmov r2, s1
+; CHECK-FP-NEXT: vmov r0, s5
+; CHECK-FP-NEXT: adds r1, r1, r3
+; CHECK-FP-NEXT: vmov.32 q0[0], r1
+; CHECK-FP-NEXT: adcs r0, r2
+; CHECK-FP-NEXT: vmov.32 q0[1], r0
+; CHECK-FP-NEXT: vmov.32 q0[2], lr
+; CHECK-FP-NEXT: vmov.32 q0[3], r12
+; CHECK-FP-NEXT: vmov r0, r1, d0
+; CHECK-FP-NEXT: vmov r2, r3, d1
+; CHECK-FP-NEXT: pop {r7, pc}
+entry:
+ %sum = add <2 x i64> %lhs, %rhs
+ ret <2 x i64> %sum
+}
+
define <8 x half> @vector_add_f16(<8 x half> %lhs, <8 x half> %rhs) {
; CHECK-FP-LABEL: vector_add_f16:
; CHECK-FP: @ %bb.0: @ %entry
%sum = fadd <4 x float> %lhs, %rhs
ret <4 x float> %sum
}
+
+define <2 x double> @vector_add_f64(<2 x double> %lhs, <2 x double> %rhs) {
+; CHECK-FP-LABEL: vector_add_f64:
+; CHECK-FP: @ %bb.0: @ %entry
+; CHECK-FP-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-FP-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-FP-NEXT: .pad #4
+; CHECK-FP-NEXT: sub sp, #4
+; CHECK-FP-NEXT: .vsave {d8, d9}
+; CHECK-FP-NEXT: vpush {d8, d9}
+; CHECK-FP-NEXT: mov r5, r0
+; CHECK-FP-NEXT: add r0, sp, #40
+; CHECK-FP-NEXT: vldrw.u32 q4, [r0]
+; CHECK-FP-NEXT: mov r4, r2
+; CHECK-FP-NEXT: mov r6, r3
+; CHECK-FP-NEXT: mov r7, r1
+; CHECK-FP-NEXT: vmov r2, r3, d9
+; CHECK-FP-NEXT: mov r0, r4
+; CHECK-FP-NEXT: mov r1, r6
+; CHECK-FP-NEXT: bl __aeabi_dadd
+; CHECK-FP-NEXT: vmov r2, r3, d8
+; CHECK-FP-NEXT: vmov d9, r0, r1
+; CHECK-FP-NEXT: mov r0, r5
+; CHECK-FP-NEXT: mov r1, r7
+; CHECK-FP-NEXT: bl __aeabi_dadd
+; CHECK-FP-NEXT: vmov d8, r0, r1
+; CHECK-FP-NEXT: vmov r2, r3, d9
+; CHECK-FP-NEXT: vmov r0, r1, d8
+; CHECK-FP-NEXT: vpop {d8, d9}
+; CHECK-FP-NEXT: add sp, #4
+; CHECK-FP-NEXT: pop {r4, r5, r6, r7, pc}
+entry:
+ %sum = fadd <2 x double> %lhs, %rhs
+ ret <2 x double> %sum
+}
%out = fptoui <8 x half> %src to <8 x i16>
ret <8 x i16> %out
}
+
+define arm_aapcs_vfpcc <2 x double> @foo_float_int64(<2 x i64> %src) {
+; CHECK-LABEL: foo_float_int64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: vmov r1, s19
+; CHECK-NEXT: bl __aeabi_l2d
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl __aeabi_l2d
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = sitofp <2 x i64> %src to <2 x double>
+ ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x double> @foo_float_uint64(<2 x i64> %src) {
+; CHECK-LABEL: foo_float_uint64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: vmov r1, s19
+; CHECK-NEXT: bl __aeabi_ul2d
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl __aeabi_ul2d
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = uitofp <2 x i64> %src to <2 x double>
+ ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @foo_int64_float(<2 x double> %src) {
+; CHECK-LABEL: foo_int64_float:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d8
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: vmov.32 q4[0], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[1], r0
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: vmov.32 q4[2], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[3], r0
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = fptosi <2 x double> %src to <2 x i64>
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @foo_uint64_float(<2 x double> %src) {
+; CHECK-LABEL: foo_uint64_float:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d8
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: vmov.32 q4[0], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[1], r0
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: vmov.32 q4[2], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[3], r0
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = fptoui <2 x double> %src to <2 x i64>
+ ret <2 x i64> %out
+}
ret <16 x i8> %out
}
+define arm_aapcs_vfpcc <2 x i64> @vdup_i64(i64 %src) {
+; CHECK-LABEL: vdup_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.32 q0[0], r0
+; CHECK-NEXT: vmov.32 q0[1], r1
+; CHECK-NEXT: vmov.32 q0[2], r0
+; CHECK-NEXT: vmov.32 q0[3], r1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = insertelement <2 x i64> undef, i64 %src, i32 0
+ %out = shufflevector <2 x i64> %0, <2 x i64> undef, <2 x i32> zeroinitializer
+ ret <2 x i64> %out
+}
+
define arm_aapcs_vfpcc <4 x float> @vdup_f32_1(float %src) {
; CHECK-LABEL: vdup_f32_1:
; CHECK: @ %bb.0: @ %entry
ret <8 x half> %out
}
+define arm_aapcs_vfpcc <2 x double> @vdup_f64(double %src) {
+; CHECK-LABEL: vdup_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: vmov.f32 s2, s0
+; CHECK-NEXT: vmov.f32 s3, s1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = insertelement <2 x double> undef, double %src, i32 0
+ %out = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer
+ ret <2 x double> %out
+}
+
define arm_aapcs_vfpcc <4 x i32> @vduplane_i32(<4 x i32> %src) {
ret <16 x i8> %out
}
+define arm_aapcs_vfpcc <2 x i64> @vduplane_i64(<2 x i64> %src) {
+; CHECK-LABEL: vduplane_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f32 s0, s2
+; CHECK-NEXT: vmov.f32 s1, s3
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %out
+}
+
define arm_aapcs_vfpcc <4 x float> @vduplane_f32(<4 x float> %src) {
; CHECK-LABEL: vduplane_f32:
; CHECK: @ %bb.0: @ %entry
%out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
ret <8 x half> %out
}
+
+define arm_aapcs_vfpcc <2 x double> @vduplane_f64(<2 x double> %src) {
+; CHECK-LABEL: vduplane_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f32 s0, s2
+; CHECK-NEXT: vmov.f32 s1, s3
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x double> %out
+}
ret <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
}
-define arm_aapcs_vfpcc <16 x i8> @mov_int8_m1(i8 *%dest) {
+define arm_aapcs_vfpcc <16 x i8> @mov_int8_m1() {
; CHECK-LABEL: mov_int8_m1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i8 q0, #0xff
ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
}
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_1(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_1() {
; CHECK-LABEL: mov_int16_1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i16 q0, #0x1
ret <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
}
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_m1(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_m1() {
; CHECK-LABEL: mov_int16_m1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i8 q0, #0xff
ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
}
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_256(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_256() {
; CHECK-LABEL: mov_int16_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i16 q0, #0x100
ret <8 x i16> <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>
}
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_258(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_258() {
; CHECK-LABEL: mov_int16_258:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI6_0
ret <8 x i16> <i16 258, i16 258, i16 258, i16 258, i16 258, i16 258, i16 258, i16 258>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_1(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_1() {
; CHECK-LABEL: mov_int32_1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x1
ret <4 x i32> <i32 1, i32 1, i32 1, i32 1>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_256(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_256() {
; CHECK-LABEL: mov_int32_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x100
ret <4 x i32> <i32 256, i32 256, i32 256, i32 256>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_65536(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_65536() {
; CHECK-LABEL: mov_int32_65536:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x10000
ret <4 x i32> <i32 65536, i32 65536, i32 65536, i32 65536>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777216(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777216() {
; CHECK-LABEL: mov_int32_16777216:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x1000000
ret <4 x i32> <i32 16777216, i32 16777216, i32 16777216, i32 16777216>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777217(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777217() {
; CHECK-LABEL: mov_int32_16777217:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI11_0
ret <4 x i32> <i32 16777217, i32 16777217, i32 16777217, i32 16777217>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_17919(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_17919() {
; CHECK-LABEL: mov_int32_17919:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x45ff
ret <4 x i32> <i32 17919, i32 17919, i32 17919, i32 17919>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4587519(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4587519() {
; CHECK-LABEL: mov_int32_4587519:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x45ffff
ret <4 x i32> <i32 4587519, i32 4587519, i32 4587519, i32 4587519>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m1(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m1() {
; CHECK-LABEL: mov_int32_m1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i8 q0, #0xff
ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294901760(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294901760() {
; CHECK-LABEL: mov_int32_4294901760:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0xffff
ret <4 x i32> <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278190335(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278190335() {
; CHECK-LABEL: mov_int32_4278190335:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI16_0
ret <4 x i32> <i32 4278190335, i32 4278190335, i32 4278190335, i32 4278190335>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278255615(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278255615() {
; CHECK-LABEL: mov_int32_4278255615:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0xff0000
ret <4 x i32> <i32 4278255615, i32 4278255615, i32 4278255615, i32 4278255615>
}
-define arm_aapcs_vfpcc <4 x float> @mov_float_1(float *%dest) {
-; CHECK-LABEL: mov_float_1:
+define arm_aapcs_vfpcc <2 x i64> @mov_int64_1() {
+; CHECK-LABEL: mov_int64_1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI18_0
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI18_0:
+; CHECK-NEXT: .long 1 @ double 4.9406564584124654E-324
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 1 @ double 4.9406564584124654E-324
+; CHECK-NEXT: .long 0
+entry:
+ ret <2 x i64> <i64 1, i64 1>
+}
+
+define arm_aapcs_vfpcc <2 x i64> @mov_int64_m1() {
+; CHECK-LABEL: mov_int64_m1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i8 q0, #0xff
+; CHECK-NEXT: bx lr
+entry:
+ ret <2 x i64> <i64 -1, i64 -1>
+}
+
+define arm_aapcs_vfpcc <4 x float> @mov_float_1() {
+; CHECK-LABEL: mov_float_1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: adr r0, .LCPI20_0
+; CHECK-NEXT: vldrw.u32 q0, [r0]
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI20_0:
; CHECK-NEXT: .long 1065353216 @ double 0.007812501848093234
; CHECK-NEXT: .long 1065353216
; CHECK-NEXT: .long 1065353216 @ double 0.007812501848093234
ret <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
}
-define arm_aapcs_vfpcc <4 x float> @mov_float_m3(float *%dest) {
+define arm_aapcs_vfpcc <4 x float> @mov_float_m3() {
; CHECK-LABEL: mov_float_m3:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adr r0, .LCPI19_0
+; CHECK-NEXT: adr r0, .LCPI21_0
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI19_0:
+; CHECK-NEXT: .LCPI21_0:
; CHECK-NEXT: .long 3225419776 @ double -32.000022917985916
; CHECK-NEXT: .long 3225419776
; CHECK-NEXT: .long 3225419776 @ double -32.000022917985916
ret <4 x float> <float -3.000000e+00, float -3.000000e+00, float -3.000000e+00, float -3.000000e+00>
}
-define arm_aapcs_vfpcc <8 x half> @mov_float16_1(half *%dest) {
+define arm_aapcs_vfpcc <8 x half> @mov_float16_1() {
; CHECK-LABEL: mov_float16_1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i16 q0, #0x3c00
ret <8 x half> <half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00>
}
-define arm_aapcs_vfpcc <8 x half> @mov_float16_m3(half *%dest) {
+define arm_aapcs_vfpcc <8 x half> @mov_float16_m3() {
; CHECK-LABEL: mov_float16_m3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i16 q0, #0xc200
entry:
ret <8 x half> <half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00>
}
+
+define arm_aapcs_vfpcc <2 x double> @mov_double_1() {
+; CHECK-LABEL: mov_double_1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: adr r0, .LCPI24_0
+; CHECK-NEXT: vldrw.u32 q0, [r0]
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI24_0:
+; CHECK-NEXT: .long 0 @ double 1
+; CHECK-NEXT: .long 1072693248
+; CHECK-NEXT: .long 0 @ double 1
+; CHECK-NEXT: .long 1072693248
+entry:
+ ret <2 x double> <double 1.000000e+00, double 1.000000e+00>
+}
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_511(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_511() {
; CHECK-LABEL: mov_int16_511:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i16 q0, #0xfe00
ret <8 x i16> <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
}
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_65281(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_65281() {
; CHECK-LABEL: mov_int16_65281:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i16 q0, #0xfe
ret <8 x i16> <i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m7(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m7() {
; CHECK-LABEL: mov_int32_m7:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0x6
ret <4 x i32> <i32 -7, i32 -7, i32 -7, i32 -7>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m769(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m769() {
; CHECK-LABEL: mov_int32_m769:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0x300
ret <4 x i32> <i32 -769, i32 -769, i32 -769, i32 -769>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m262145(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m262145() {
; CHECK-LABEL: mov_int32_m262145:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0x40000
ret <4 x i32> <i32 -262145, i32 -262145, i32 -262145, i32 -262145>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m134217729(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m134217729() {
; CHECK-LABEL: mov_int32_m134217729:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0x8000000
ret <4 x i32> <i32 -134217729, i32 -134217729, i32 -134217729, i32 -134217729>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294902528(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294902528() {
; CHECK-LABEL: mov_int32_4294902528:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0xfcff
ret <4 x i32> <i32 4294902528, i32 4294902528, i32 4294902528, i32 4294902528>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278386688(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278386688() {
; CHECK-LABEL: mov_int32_4278386688:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI7_0