def : Pat<(v16f32 (ftrunc VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;
+def : Pat<(v16f32 (ffloor (loadv16f32 addr:$src))),
+ (VRNDSCALEPSZrmi addr:$src, (i32 0x9))>;
+def : Pat<(v16f32 (fnearbyint (loadv16f32 addr:$src))),
+ (VRNDSCALEPSZrmi addr:$src, (i32 0xC))>;
+def : Pat<(v16f32 (fceil (loadv16f32 addr:$src))),
+ (VRNDSCALEPSZrmi addr:$src, (i32 0xA))>;
+def : Pat<(v16f32 (frint (loadv16f32 addr:$src))),
+ (VRNDSCALEPSZrmi addr:$src, (i32 0x4))>;
+def : Pat<(v16f32 (ftrunc (loadv16f32 addr:$src))),
+ (VRNDSCALEPSZrmi addr:$src, (i32 0xB))>;
+
def : Pat<(v8f64 (ffloor VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
def : Pat<(v8f64 (fnearbyint VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
def : Pat<(v8f64 (ftrunc VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
+
+def : Pat<(v8f64 (ffloor (loadv8f64 addr:$src))),
+ (VRNDSCALEPDZrmi addr:$src, (i32 0x9))>;
+def : Pat<(v8f64 (fnearbyint (loadv8f64 addr:$src))),
+ (VRNDSCALEPDZrmi addr:$src, (i32 0xC))>;
+def : Pat<(v8f64 (fceil (loadv8f64 addr:$src))),
+ (VRNDSCALEPDZrmi addr:$src, (i32 0xA))>;
+def : Pat<(v8f64 (frint (loadv8f64 addr:$src))),
+ (VRNDSCALEPDZrmi addr:$src, (i32 0x4))>;
+def : Pat<(v8f64 (ftrunc (loadv8f64 addr:$src))),
+ (VRNDSCALEPDZrmi addr:$src, (i32 0xB))>;
}
let Predicates = [HasVLX] in {
def : Pat<(v4f32 (ftrunc VR128X:$src)),
(VRNDSCALEPSZ128rri VR128X:$src, (i32 0xB))>;
+def : Pat<(v4f32 (ffloor (loadv4f32 addr:$src))),
+ (VRNDSCALEPSZ128rmi addr:$src, (i32 0x9))>;
+def : Pat<(v4f32 (fnearbyint (loadv4f32 addr:$src))),
+ (VRNDSCALEPSZ128rmi addr:$src, (i32 0xC))>;
+def : Pat<(v4f32 (fceil (loadv4f32 addr:$src))),
+ (VRNDSCALEPSZ128rmi addr:$src, (i32 0xA))>;
+def : Pat<(v4f32 (frint (loadv4f32 addr:$src))),
+ (VRNDSCALEPSZ128rmi addr:$src, (i32 0x4))>;
+def : Pat<(v4f32 (ftrunc (loadv4f32 addr:$src))),
+ (VRNDSCALEPSZ128rmi addr:$src, (i32 0xB))>;
+
def : Pat<(v2f64 (ffloor VR128X:$src)),
(VRNDSCALEPDZ128rri VR128X:$src, (i32 0x9))>;
def : Pat<(v2f64 (fnearbyint VR128X:$src)),
def : Pat<(v2f64 (ftrunc VR128X:$src)),
(VRNDSCALEPDZ128rri VR128X:$src, (i32 0xB))>;
+def : Pat<(v2f64 (ffloor (loadv2f64 addr:$src))),
+ (VRNDSCALEPDZ128rmi addr:$src, (i32 0x9))>;
+def : Pat<(v2f64 (fnearbyint (loadv2f64 addr:$src))),
+ (VRNDSCALEPDZ128rmi addr:$src, (i32 0xC))>;
+def : Pat<(v2f64 (fceil (loadv2f64 addr:$src))),
+ (VRNDSCALEPDZ128rmi addr:$src, (i32 0xA))>;
+def : Pat<(v2f64 (frint (loadv2f64 addr:$src))),
+ (VRNDSCALEPDZ128rmi addr:$src, (i32 0x4))>;
+def : Pat<(v2f64 (ftrunc (loadv2f64 addr:$src))),
+ (VRNDSCALEPDZ128rmi addr:$src, (i32 0xB))>;
+
def : Pat<(v8f32 (ffloor VR256X:$src)),
(VRNDSCALEPSZ256rri VR256X:$src, (i32 0x9))>;
def : Pat<(v8f32 (fnearbyint VR256X:$src)),
def : Pat<(v8f32 (ftrunc VR256X:$src)),
(VRNDSCALEPSZ256rri VR256X:$src, (i32 0xB))>;
+def : Pat<(v8f32 (ffloor (loadv8f32 addr:$src))),
+ (VRNDSCALEPSZ256rmi addr:$src, (i32 0x9))>;
+def : Pat<(v8f32 (fnearbyint (loadv8f32 addr:$src))),
+ (VRNDSCALEPSZ256rmi addr:$src, (i32 0xC))>;
+def : Pat<(v8f32 (fceil (loadv8f32 addr:$src))),
+ (VRNDSCALEPSZ256rmi addr:$src, (i32 0xA))>;
+def : Pat<(v8f32 (frint (loadv8f32 addr:$src))),
+ (VRNDSCALEPSZ256rmi addr:$src, (i32 0x4))>;
+def : Pat<(v8f32 (ftrunc (loadv8f32 addr:$src))),
+ (VRNDSCALEPSZ256rmi addr:$src, (i32 0xB))>;
+
def : Pat<(v4f64 (ffloor VR256X:$src)),
(VRNDSCALEPDZ256rri VR256X:$src, (i32 0x9))>;
def : Pat<(v4f64 (fnearbyint VR256X:$src)),
(VRNDSCALEPDZ256rri VR256X:$src, (i32 0x4))>;
def : Pat<(v4f64 (ftrunc VR256X:$src)),
(VRNDSCALEPDZ256rri VR256X:$src, (i32 0xB))>;
+
+def : Pat<(v4f64 (ffloor (loadv4f64 addr:$src))),
+ (VRNDSCALEPDZ256rmi addr:$src, (i32 0x9))>;
+def : Pat<(v4f64 (fnearbyint (loadv4f64 addr:$src))),
+ (VRNDSCALEPDZ256rmi addr:$src, (i32 0xC))>;
+def : Pat<(v4f64 (fceil (loadv4f64 addr:$src))),
+ (VRNDSCALEPDZ256rmi addr:$src, (i32 0xA))>;
+def : Pat<(v4f64 (frint (loadv4f64 addr:$src))),
+ (VRNDSCALEPDZ256rmi addr:$src, (i32 0x4))>;
+def : Pat<(v4f64 (ftrunc (loadv4f64 addr:$src))),
+ (VRNDSCALEPDZ256rmi addr:$src, (i32 0xB))>;
}
multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
let Predicates = [UseAVX] in {
def : Pat<(ffloor FR32:$src),
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x9))>;
- def : Pat<(f64 (ffloor FR64:$src)),
- (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x9))>;
def : Pat<(f32 (fnearbyint FR32:$src)),
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
- def : Pat<(f64 (fnearbyint FR64:$src)),
- (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
def : Pat<(f32 (fceil FR32:$src)),
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xA))>;
- def : Pat<(f64 (fceil FR64:$src)),
- (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xA))>;
def : Pat<(f32 (frint FR32:$src)),
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
- def : Pat<(f64 (frint FR64:$src)),
- (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
def : Pat<(f32 (ftrunc FR32:$src)),
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xB))>;
+
+ def : Pat<(f64 (ffloor FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x9))>;
+ def : Pat<(f64 (fnearbyint FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
+ def : Pat<(f64 (fceil FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xA))>;
+ def : Pat<(f64 (frint FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
def : Pat<(f64 (ftrunc FR64:$src)),
(VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xB))>;
}
+let Predicates = [UseAVX, OptForSize] in {
+ def : Pat<(ffloor (loadf32 addr:$src)),
+ (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src, (i32 0x9))>;
+ def : Pat<(f32 (fnearbyint (loadf32 addr:$src))),
+ (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src, (i32 0xC))>;
+ def : Pat<(f32 (fceil (loadf32 addr:$src))),
+ (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src, (i32 0xA))>;
+ def : Pat<(f32 (frint (loadf32 addr:$src))),
+ (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src, (i32 0x4))>;
+ def : Pat<(f32 (ftrunc (loadf32 addr:$src))),
+ (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src, (i32 0xB))>;
+
+ def : Pat<(f64 (ffloor (loadf64 addr:$src))),
+ (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src, (i32 0x9))>;
+ def : Pat<(f64 (fnearbyint (loadf64 addr:$src))),
+ (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src, (i32 0xC))>;
+ def : Pat<(f64 (fceil (loadf64 addr:$src))),
+ (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src, (i32 0xA))>;
+ def : Pat<(f64 (frint (loadf64 addr:$src))),
+ (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src, (i32 0x4))>;
+ def : Pat<(f64 (ftrunc (loadf64 addr:$src))),
+ (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src, (i32 0xB))>;
+}
+
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4f32 (ffloor VR128:$src)),
(VROUNDPSr VR128:$src, (i32 0x9))>;
def : Pat<(v4f32 (ftrunc VR128:$src)),
(VROUNDPSr VR128:$src, (i32 0xB))>;
+ def : Pat<(v4f32 (ffloor (loadv4f32 addr:$src))),
+ (VROUNDPSm addr:$src, (i32 0x9))>;
+ def : Pat<(v4f32 (fnearbyint (loadv4f32 addr:$src))),
+ (VROUNDPSm addr:$src, (i32 0xC))>;
+ def : Pat<(v4f32 (fceil (loadv4f32 addr:$src))),
+ (VROUNDPSm addr:$src, (i32 0xA))>;
+ def : Pat<(v4f32 (frint (loadv4f32 addr:$src))),
+ (VROUNDPSm addr:$src, (i32 0x4))>;
+ def : Pat<(v4f32 (ftrunc (loadv4f32 addr:$src))),
+ (VROUNDPSm addr:$src, (i32 0xB))>;
+
def : Pat<(v2f64 (ffloor VR128:$src)),
(VROUNDPDr VR128:$src, (i32 0x9))>;
def : Pat<(v2f64 (fnearbyint VR128:$src)),
def : Pat<(v2f64 (ftrunc VR128:$src)),
(VROUNDPDr VR128:$src, (i32 0xB))>;
+ def : Pat<(v2f64 (ffloor (loadv2f64 addr:$src))),
+ (VROUNDPDm addr:$src, (i32 0x9))>;
+ def : Pat<(v2f64 (fnearbyint (loadv2f64 addr:$src))),
+ (VROUNDPDm addr:$src, (i32 0xC))>;
+ def : Pat<(v2f64 (fceil (loadv2f64 addr:$src))),
+ (VROUNDPDm addr:$src, (i32 0xA))>;
+ def : Pat<(v2f64 (frint (loadv2f64 addr:$src))),
+ (VROUNDPDm addr:$src, (i32 0x4))>;
+ def : Pat<(v2f64 (ftrunc (loadv2f64 addr:$src))),
+ (VROUNDPDm addr:$src, (i32 0xB))>;
+
def : Pat<(v8f32 (ffloor VR256:$src)),
(VROUNDPSYr VR256:$src, (i32 0x9))>;
def : Pat<(v8f32 (fnearbyint VR256:$src)),
def : Pat<(v8f32 (ftrunc VR256:$src)),
(VROUNDPSYr VR256:$src, (i32 0xB))>;
+ def : Pat<(v8f32 (ffloor (loadv8f32 addr:$src))),
+ (VROUNDPSYm addr:$src, (i32 0x9))>;
+ def : Pat<(v8f32 (fnearbyint (loadv8f32 addr:$src))),
+ (VROUNDPSYm addr:$src, (i32 0xC))>;
+ def : Pat<(v8f32 (fceil (loadv8f32 addr:$src))),
+ (VROUNDPSYm addr:$src, (i32 0xA))>;
+ def : Pat<(v8f32 (frint (loadv8f32 addr:$src))),
+ (VROUNDPSYm addr:$src, (i32 0x4))>;
+ def : Pat<(v8f32 (ftrunc (loadv8f32 addr:$src))),
+ (VROUNDPSYm addr:$src, (i32 0xB))>;
+
def : Pat<(v4f64 (ffloor VR256:$src)),
(VROUNDPDYr VR256:$src, (i32 0x9))>;
def : Pat<(v4f64 (fnearbyint VR256:$src)),
(VROUNDPDYr VR256:$src, (i32 0x4))>;
def : Pat<(v4f64 (ftrunc VR256:$src)),
(VROUNDPDYr VR256:$src, (i32 0xB))>;
+
+ def : Pat<(v4f64 (ffloor (loadv4f64 addr:$src))),
+ (VROUNDPDYm addr:$src, (i32 0x9))>;
+ def : Pat<(v4f64 (fnearbyint (loadv4f64 addr:$src))),
+ (VROUNDPDYm addr:$src, (i32 0xC))>;
+ def : Pat<(v4f64 (fceil (loadv4f64 addr:$src))),
+ (VROUNDPDYm addr:$src, (i32 0xA))>;
+ def : Pat<(v4f64 (frint (loadv4f64 addr:$src))),
+ (VROUNDPDYm addr:$src, (i32 0x4))>;
+ def : Pat<(v4f64 (ftrunc (loadv4f64 addr:$src))),
+ (VROUNDPDYm addr:$src, (i32 0xB))>;
}
let ExeDomain = SSEPackedSingle in
let Predicates = [UseSSE41] in {
def : Pat<(ffloor FR32:$src),
(ROUNDSSr FR32:$src, (i32 0x9))>;
- def : Pat<(f64 (ffloor FR64:$src)),
- (ROUNDSDr FR64:$src, (i32 0x9))>;
def : Pat<(f32 (fnearbyint FR32:$src)),
(ROUNDSSr FR32:$src, (i32 0xC))>;
- def : Pat<(f64 (fnearbyint FR64:$src)),
- (ROUNDSDr FR64:$src, (i32 0xC))>;
def : Pat<(f32 (fceil FR32:$src)),
(ROUNDSSr FR32:$src, (i32 0xA))>;
- def : Pat<(f64 (fceil FR64:$src)),
- (ROUNDSDr FR64:$src, (i32 0xA))>;
def : Pat<(f32 (frint FR32:$src)),
(ROUNDSSr FR32:$src, (i32 0x4))>;
- def : Pat<(f64 (frint FR64:$src)),
- (ROUNDSDr FR64:$src, (i32 0x4))>;
def : Pat<(f32 (ftrunc FR32:$src)),
(ROUNDSSr FR32:$src, (i32 0xB))>;
+
+ def : Pat<(f64 (ffloor FR64:$src)),
+ (ROUNDSDr FR64:$src, (i32 0x9))>;
+ def : Pat<(f64 (fnearbyint FR64:$src)),
+ (ROUNDSDr FR64:$src, (i32 0xC))>;
+ def : Pat<(f64 (fceil FR64:$src)),
+ (ROUNDSDr FR64:$src, (i32 0xA))>;
+ def : Pat<(f64 (frint FR64:$src)),
+ (ROUNDSDr FR64:$src, (i32 0x4))>;
def : Pat<(f64 (ftrunc FR64:$src)),
(ROUNDSDr FR64:$src, (i32 0xB))>;
+}
+let Predicates = [UseSSE41, OptForSize] in {
+ def : Pat<(ffloor (loadf32 addr:$src)),
+ (ROUNDSSm addr:$src, (i32 0x9))>;
+ def : Pat<(f32 (fnearbyint (loadf32 addr:$src))),
+ (ROUNDSSm addr:$src, (i32 0xC))>;
+ def : Pat<(f32 (fceil (loadf32 addr:$src))),
+ (ROUNDSSm addr:$src, (i32 0xA))>;
+ def : Pat<(f32 (frint (loadf32 addr:$src))),
+ (ROUNDSSm addr:$src, (i32 0x4))>;
+ def : Pat<(f32 (ftrunc (loadf32 addr:$src))),
+ (ROUNDSSm addr:$src, (i32 0xB))>;
+
+ def : Pat<(f64 (ffloor (loadf64 addr:$src))),
+ (ROUNDSDm addr:$src, (i32 0x9))>;
+ def : Pat<(f64 (fnearbyint (loadf64 addr:$src))),
+ (ROUNDSDm addr:$src, (i32 0xC))>;
+ def : Pat<(f64 (fceil (loadf64 addr:$src))),
+ (ROUNDSDm addr:$src, (i32 0xA))>;
+ def : Pat<(f64 (frint (loadf64 addr:$src))),
+ (ROUNDSDm addr:$src, (i32 0x4))>;
+ def : Pat<(f64 (ftrunc (loadf64 addr:$src))),
+ (ROUNDSDm addr:$src, (i32 0xB))>;
+}
+
+let Predicates = [UseSSE41] in {
def : Pat<(v4f32 (ffloor VR128:$src)),
(ROUNDPSr VR128:$src, (i32 0x9))>;
def : Pat<(v4f32 (fnearbyint VR128:$src)),
def : Pat<(v4f32 (ftrunc VR128:$src)),
(ROUNDPSr VR128:$src, (i32 0xB))>;
+ def : Pat<(v4f32 (ffloor (loadv4f32 addr:$src))),
+ (ROUNDPSm addr:$src, (i32 0x9))>;
+ def : Pat<(v4f32 (fnearbyint (loadv4f32 addr:$src))),
+ (ROUNDPSm addr:$src, (i32 0xC))>;
+ def : Pat<(v4f32 (fceil (loadv4f32 addr:$src))),
+ (ROUNDPSm addr:$src, (i32 0xA))>;
+ def : Pat<(v4f32 (frint (loadv4f32 addr:$src))),
+ (ROUNDPSm addr:$src, (i32 0x4))>;
+ def : Pat<(v4f32 (ftrunc (loadv4f32 addr:$src))),
+ (ROUNDPSm addr:$src, (i32 0xB))>;
+
def : Pat<(v2f64 (ffloor VR128:$src)),
(ROUNDPDr VR128:$src, (i32 0x9))>;
def : Pat<(v2f64 (fnearbyint VR128:$src)),
(ROUNDPDr VR128:$src, (i32 0x4))>;
def : Pat<(v2f64 (ftrunc VR128:$src)),
(ROUNDPDr VR128:$src, (i32 0xB))>;
+
+ def : Pat<(v2f64 (ffloor (loadv2f64 addr:$src))),
+ (ROUNDPDm addr:$src, (i32 0x9))>;
+ def : Pat<(v2f64 (fnearbyint (loadv2f64 addr:$src))),
+ (ROUNDPDm addr:$src, (i32 0xC))>;
+ def : Pat<(v2f64 (fceil (loadv2f64 addr:$src))),
+ (ROUNDPDm addr:$src, (i32 0xA))>;
+ def : Pat<(v2f64 (frint (loadv2f64 addr:$src))),
+ (ROUNDPDm addr:$src, (i32 0x4))>;
+ def : Pat<(v2f64 (ftrunc (loadv2f64 addr:$src))),
+ (ROUNDPDm addr:$src, (i32 0xB))>;
}
//===----------------------------------------------------------------------===//
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
define <8 x float> @sitofp00(<8 x i32> %a) nounwind {
; CHECK-LABEL: sitofp00:
}
define <8 x float> @sitofp02(<8 x i16> %a) {
-; CHECK-LABEL: sitofp02:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
-; CHECK-NEXT: retq
+; AVX-LABEL: sitofp02:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovsxwd %xmm0, %xmm1
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: sitofp02:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX512-NEXT: retq
%b = sitofp <8 x i16> %a to <8 x float>
ret <8 x float> %b
}
}
define <8 x float> @fptrunc00(<8 x double> %b) nounwind {
-; CHECK-LABEL: fptrunc00:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0
-; CHECK-NEXT: vcvtpd2ps %ymm1, %xmm1
-; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; CHECK-NEXT: retq
+; AVX-LABEL: fptrunc00:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvtpd2ps %ymm0, %xmm0
+; AVX-NEXT: vcvtpd2ps %ymm1, %xmm1
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: fptrunc00:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvtpd2ps %zmm0, %ymm0
+; AVX512-NEXT: retq
%a = fptrunc <8 x double> %b to <8 x float>
ret <8 x float> %a
}
}
declare float @llvm.floor.f32(float %p)
+define float @floor_f32_load(float* %aptr) optsize {
+; CHECK-LABEL: floor_f32_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vroundss $9, (%rdi), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %a = load float, float* %aptr
+ %res = call float @llvm.floor.f32(float %a)
+ ret float %res
+}
+
+define double @nearbyint_f64_load(double* %aptr) optsize {
+; CHECK-LABEL: nearbyint_f64_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vroundsd $12, (%rdi), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %a = load double, double* %aptr
+ %res = call double @llvm.nearbyint.f64(double %a)
+ ret double %res
+}