// cond.load: ; preds = %0
// %3 = getelementptr i32* %1, i32 0
// %4 = load i32* %3
-// %5 = insertelement <16 x i32> undef, i32 %4, i32 0
+// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
// br label %else
//
// else: ; preds = %0, %cond.load
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
unsigned VectorWidth = VecType->getNumElements();
- Value *UndefVal = UndefValue::get(VecType);
-
// The result vector
- Value *VResult = UndefVal;
+ Value *VResult = Src0;
if (isa<Constant>(Mask)) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
VResult =
Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
}
- Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
- CI->replaceAllUsesWith(NewI);
+ CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
return;
}
- PHINode *Phi = nullptr;
- Value *PrevPhi = UndefVal;
-
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
// br i1 %mask_1, label %cond.load, label %else
//
- if (Idx > 0) {
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- PrevPhi = Phi;
- VResult = Phi;
- }
Value *Predicate =
Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
Value *Gep =
Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
- VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
+ Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
+ Builder.getInt32(Idx));
// Create "else" block, fill it in the next iteration
BasicBlock *NewIfBlock =
OldBr->eraseFromParent();
PrevIfBlock = IfBlock;
IfBlock = NewIfBlock;
+
+ // Create the phi to join the new and previous value.
+ PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(NewVResult, CondBlock);
+ Phi->addIncoming(VResult, PrevIfBlock);
+ VResult = Phi;
}
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
- CI->replaceAllUsesWith(NewI);
+ CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
}
; AVX-LABEL: loadv1:
; AVX: ## %bb.0:
; AVX-NEXT: testq %rdi, %rdi
-; AVX-NEXT: ## implicit-def: $xmm1
-; AVX-NEXT: je LBB0_1
-; AVX-NEXT: ## %bb.2: ## %else
-; AVX-NEXT: testq %rdi, %rdi
-; AVX-NEXT: jne LBB0_3
-; AVX-NEXT: LBB0_4: ## %else
-; AVX-NEXT: vmovaps %xmm1, %xmm0
-; AVX-NEXT: retq
-; AVX-NEXT: LBB0_1: ## %cond.load
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: testq %rdi, %rdi
-; AVX-NEXT: je LBB0_4
-; AVX-NEXT: LBB0_3: ## %else
-; AVX-NEXT: vmovaps %xmm0, %xmm1
-; AVX-NEXT: vmovaps %xmm1, %xmm0
+; AVX-NEXT: jne LBB0_2
+; AVX-NEXT: ## %bb.1: ## %cond.load
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT: LBB0_2: ## %else
; AVX-NEXT: retq
;
-; AVX512F-LABEL: loadv1:
-; AVX512F: ## %bb.0:
-; AVX512F-NEXT: testq %rdi, %rdi
-; AVX512F-NEXT: ## implicit-def: $xmm1
-; AVX512F-NEXT: jne LBB0_2
-; AVX512F-NEXT: ## %bb.1: ## %cond.load
-; AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512F-NEXT: LBB0_2: ## %else
-; AVX512F-NEXT: testq %rdi, %rdi
-; AVX512F-NEXT: sete %al
-; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
-; AVX512F-NEXT: retq
-;
-; SKX-LABEL: loadv1:
-; SKX: ## %bb.0:
-; SKX-NEXT: testq %rdi, %rdi
-; SKX-NEXT: ## implicit-def: $xmm1
-; SKX-NEXT: jne LBB0_2
-; SKX-NEXT: ## %bb.1: ## %cond.load
-; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; SKX-NEXT: LBB0_2: ## %else
-; SKX-NEXT: testq %rdi, %rdi
-; SKX-NEXT: sete %al
-; SKX-NEXT: kmovd %eax, %k1
-; SKX-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
-; SKX-NEXT: retq
+; AVX512-LABEL: loadv1:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: testq %rdi, %rdi
+; AVX512-NEXT: jne LBB0_2
+; AVX512-NEXT: ## %bb.1: ## %cond.load
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512-NEXT: LBB0_2: ## %else
+; AVX512-NEXT: retq
%mask = icmp eq <1 x i64> %trigger, zeroinitializer
%res = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* %addr, i32 4, <1 x i1>%mask, <1 x double>%dst)
ret <1 x double> %res
; CHECK: cond.load:
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP4]], i32 0
; CHECK-NEXT: br label [[ELSE]]
; CHECK: else:
-; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD]] ], [ undef, [[TMP0:%.*]] ]
+; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[MASK]], i32 1
; CHECK-NEXT: br i1 [[TMP6]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
; CHECK: cond.load1:
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP8]], i32 1
; CHECK-NEXT: br label [[ELSE2]]
; CHECK: else2:
-; CHECK-NEXT: [[RES_PHI_SELECT:%.*]] = phi <2 x i64> [ [[TMP9]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[MASK]], <2 x i64> [[RES_PHI_SELECT]], <2 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT: ret <2 x i64> [[TMP10]]
+; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP9]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
+; CHECK-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]]
;
%ret = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %p, i32 8, <2 x i1> %mask, <2 x i64> %passthru)
ret <2 x i64> %ret
define <2 x i64> @scalarize_v2i64_zero_mask(<2 x i64>* %p, <2 x i64> %passthru) {
; CHECK-LABEL: @scalarize_v2i64_zero_mask(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64>* [[P:%.*]] to i64*
-; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> zeroinitializer, <2 x i64> undef, <2 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT: ret <2 x i64> [[TMP2]]
+; CHECK-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
;
%ret = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %p, i32 8, <2 x i1> <i1 false, i1 false>, <2 x i64> %passthru)
ret <2 x i64> %ret
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64>* [[P:%.*]] to i64*
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> undef, i64 [[TMP3]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> <i1 false, i1 true>, <2 x i64> [[TMP4]], <2 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT: ret <2 x i64> [[TMP5]]
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP3]], i32 1
+; CHECK-NEXT: ret <2 x i64> [[TMP4]]
;
%ret = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %p, i32 8, <2 x i1> <i1 false, i1 true>, <2 x i64> %passthru)
ret <2 x i64> %ret