T8PD, VEX, Sched<[WriteCvtF2FLd]>;
}
-multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
+multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop> {
def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
(ins RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>,
+ [(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>,
TAPD, VEX, Sched<[WriteCvtF2F]>;
let hasSideEffects = 0, mayStore = 1,
SchedRW = [WriteCvtF2FLd, WriteRMW] in
let Predicates = [HasF16C, NoVLX] in {
defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem>;
defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem>, VEX_L;
+ defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem>;
+ defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem>, VEX_L;
// Pattern match vcvtph2ps of a scalar i64 load.
def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
(v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
(VCVTPH2PSrm addr:$src)>;
-}
-
-let Predicates = [HasF16C] in {
- defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
- defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L;
- def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16
- (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
- addr:$dst),
- (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
- def : Pat<(store (i64 (extractelt (bc_v2i64 (v8i16
- (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
- addr:$dst),
- (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
- def : Pat<(store (v8i16 (int_x86_vcvtps2ph_256 VR256:$src1, i32:$src2)),
- addr:$dst),
- (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>;
+ def : Pat<(store (f64 (extractelt
+ (bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))),
+ (iPTR 0))), addr:$dst),
+ (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
+ def : Pat<(store (i64 (extractelt
+ (bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))),
+ (iPTR 0))), addr:$dst),
+ (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
+ def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, i32:$src2)), addr:$dst),
+ (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>;
}
// Patterns for matching conversions from float to half-float and vice versa.
;
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128:
; X32-AVX512VL: # BB#0:
-; X32-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
+; X32-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128:
; X64-AVX512VL: # BB#0:
-; X64-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
+; X64-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
;
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_256:
; X32-AVX512VL: # BB#0:
-; X32-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
+; X32-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
; X32-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_256:
; X64-AVX512VL: # BB#0:
-; X64-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
+; X64-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_256_m:
; X32-AVX512VL: # BB#0: # %entry
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X32-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%eax) # encoding: [0xc4,0xe3,0x7d,0x1d,0x00,0x03]
+; X32-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0x00,0x03]
; X32-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_256_m:
; X64-AVX512VL: # BB#0: # %entry
-; X64-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%rdi) # encoding: [0xc4,0xe3,0x7d,0x1d,0x07,0x03]
+; X64-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0x07,0x03]
; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
entry:
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m:
; X32-AVX512VL: # BB#0: # %entry
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
+; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
; X32-AVX512VL-NEXT: vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0]
; X32-AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X32-AVX512VL-NEXT: vpmovdw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x00]
;
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m:
; X64-AVX512VL: # BB#0: # %entry
-; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
+; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
; X64-AVX512VL-NEXT: vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0]
; X64-AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X64-AVX512VL-NEXT: vpmovdw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x07]
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2:
; X32-AVX512VL: # BB#0: # %entry
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
+; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
+; X32-AVX512VL-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2:
; X64-AVX512VL: # BB#0: # %entry
-; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
+; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
+; X64-AVX512VL-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3:
; X32-AVX512VL: # BB#0: # %entry
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
+; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
+; X32-AVX512VL-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3:
; X64-AVX512VL: # BB#0: # %entry
-; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
+; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
+; X64-AVX512VL-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)