}
multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
- string SUFF, SDNode OpNode, SDNode OpNodeRnd> {
+ string SUFF, SDNode OpNode, SDNode OpNodeRnd,
+ Intrinsic Intr> {
let ExeDomain = _.ExeDomain in {
defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
(!cast<Instruction>(NAME#SUFF#Zr)
(_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
+ def : Pat<(Intr VR128X:$src),
+ (!cast<Instruction>(NAME#SUFF#Zr_Int) VR128X:$src,
+ VR128X:$src)>;
+}
+
+let Predicates = [HasAVX512, OptForSize] in {
def : Pat<(_.EltVT (OpNode (load addr:$src))),
(!cast<Instruction>(NAME#SUFF#Zm)
- (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>;
+ (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
+
+ def : Pat<(Intr (scalar_to_vector (_.EltVT (load addr:$src2)))),
+ (!cast<Instruction>(NAME#SUFF#Zm_Int)
+ (_.VT (IMPLICIT_DEF)), addr:$src2)>;
}
+
}
multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", f32x_info, "SS", fsqrt,
- X86fsqrtRnds>, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS,
- NotMemoryFoldable;
+ X86fsqrtRnds, int_x86_sse_sqrt_ss>,
+ EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable;
defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", f64x_info, "SD", fsqrt,
- X86fsqrtRnds>, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W,
+ X86fsqrtRnds, int_x86_sse2_sqrt_sd>,
+ EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W,
NotMemoryFoldable;
}
let Predicates = [target] in {
def : Pat<(OpNode RC:$src), (!cast<Instruction>("V"#NAME#Suffix##r)
(ScalarVT (IMPLICIT_DEF)), RC:$src)>;
- }
- let Predicates = [HasAVX] in {
def : Pat<(Intr VR128:$src),
(!cast<Instruction>("V"#NAME#Suffix##r_Int) VR128:$src,
VR128:$src)>;
}
- let Predicates = [HasAVX, OptForSize] in {
+ let Predicates = [target, OptForSize] in {
def : Pat<(Intr (scalar_to_vector (ScalarVT (load addr:$src2)))),
(!cast<Instruction>("V"#NAME#Suffix##m_Int)
(vt (IMPLICIT_DEF)), addr:$src2)>;
- }
- let Predicates = [target, OptForSize] in {
def : Pat<(ScalarVT (OpNode (load addr:$src))),
(!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)),
addr:$src)>;
; SSE-NEXT: sqrtss %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x51,0xc0]
; SSE-NEXT: retl ## encoding: [0xc3]
;
-; VCHECK-LABEL: test_x86_sse_sqrt_ss:
-; VCHECK: ## BB#0:
-; VCHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x51,0xc0]
-; VCHECK-NEXT: retl ## encoding: [0xc3]
+; AVX2-LABEL: test_x86_sse_sqrt_ss:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x51,0xc0]
+; AVX2-NEXT: retl ## encoding: [0xc3]
+;
+; SKX-LABEL: test_x86_sse_sqrt_ss:
+; SKX: ## BB#0:
+; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0]
+; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
; SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
; SSE-NEXT: retl ## encoding: [0xc3]
;
-; VCHECK-LABEL: test_x86_sse2_sqrt_sd:
-; VCHECK: ## BB#0:
-; VCHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
-; VCHECK-NEXT: retl ## encoding: [0xc3]
+; AVX2-LABEL: test_x86_sse2_sqrt_sd:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
+; AVX2-NEXT: retl ## encoding: [0xc3]
+;
+; SKX-LABEL: test_x86_sse2_sqrt_sd:
+; SKX: ## BB#0:
+; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
+; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
; SKX: ## BB#0:
; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; SKX-NEXT: vmovapd (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00]
-; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
+; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%a1 = load <2 x double>, <2 x double>* %a0, align 16
%res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a1) ; <<2 x double>> [#uses=1]