From 7b343a215ecfcc7f13a4558ae07b15a67dc99045 Mon Sep 17 00:00:00 2001 From: George Kyriazis Date: Fri, 13 Apr 2018 16:21:24 -0500 Subject: [PATCH] swr/rast: Fix x86 lowering 64-bit float handling - 64-bit cvt-to-float needs to be explicitly handled - gathers need the right parameter types to work with doubles Fixes draw-vertices piglit tests Reviewed-by: Bruce Cherniak --- .../drivers/swr/rasterizer/jitter/builder_misc.h | 12 ++++++ .../rasterizer/jitter/functionpasses/lower_x86.cpp | 50 +++++++++++++++++++--- 2 files changed, 56 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h index bd4be9ffe2a..a51aad05a06 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h @@ -55,6 +55,18 @@ Constant *CA(LLVMContext& ctx, ArrayRef constList) return ConstantDataArray::get(ctx, constList); } +template +Constant *CInc(uint32_t base, uint32_t count) +{ + std::vector vConsts; + + for(uint32_t i = 0; i < count; i++) { + vConsts.push_back(C((Ty)base)); + base++; + } + return ConstantVector::get(vConsts); +} + Constant *PRED(bool pred); Value *VIMMED1(int i); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp index baf3ab5aa8d..eac0549c263 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp @@ -115,7 +115,7 @@ namespace SwrJit {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, - {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}}, + {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE}, NO_EMU}}, {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}}, {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}}, {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}}, @@ -166,10 +166,18 @@ namespace SwrJit // across all intrinsics, and will have to be rethought. Probably need something // similar to llvm's getDeclaration() utility to map a set of inputs to a specific typed // intrinsic. - void GetRequestedWidthAndType(CallInst* pCallInst, TargetWidth* pWidth, Type** pTy) + void GetRequestedWidthAndType(CallInst* pCallInst, const StringRef intrinName, TargetWidth* pWidth, Type** pTy) { uint32_t vecWidth; Type* pVecTy = pCallInst->getType(); + + // Check for intrinsic specific types + // VCVTPD2PS type comes from src, not dst + if (intrinName.equals("meta.intrinsic.VCVTPD2PS")) + { + pVecTy = pCallInst->getOperand(0)->getType(); + } + if (!pVecTy->isVectorTy()) { for (auto& op : pCallInst->arg_operands()) @@ -231,7 +239,7 @@ namespace SwrJit auto& intrinsic = intrinsicMap2[mTarget][pFunc->getName()]; TargetWidth vecWidth; Type* pElemTy; - GetRequestedWidthAndType(pCallInst, &vecWidth, &pElemTy); + GetRequestedWidthAndType(pCallInst, pFunc->getName(), &vecWidth, &pElemTy); // Check if there is a native intrinsic for this instruction Intrinsic::ID id = intrinsic.intrin[vecWidth]; @@ -460,7 +468,9 @@ namespace SwrJit // Double pump 4-wide for 64bit elements if (vSrc->getType()->getVectorElementType() == B->mDoubleTy) { - auto v64Mask = B->S_EXT(pThis->VectorMask(vi1Mask), B->mInt64Ty); + auto v64Mask = pThis->VectorMask(vi1Mask); + v64Mask = B->S_EXT(v64Mask, + VectorType::get(B->mInt64Ty, v64Mask->getType()->getVectorNumElements())); v64Mask = B->BITCAST(v64Mask, vSrc->getType()); Value* src0 = B->VSHUFFLE(vSrc, vSrc, B->C({ 0, 1, 2, 3 })); @@ -472,10 +482,15 @@ namespace SwrJit Value* mask0 = B->VSHUFFLE(v64Mask, v64Mask, B->C({ 0, 1, 2, 3 })); Value* mask1 = B->VSHUFFLE(v64Mask, v64Mask, B->C({ 4, 5, 6, 7 })); + src0 = B->BITCAST(src0, VectorType::get(B->mInt64Ty, src0->getType()->getVectorNumElements())); + mask0 = B->BITCAST(mask0, VectorType::get(B->mInt64Ty, mask0->getType()->getVectorNumElements())); Value* gather0 = B->CALL(pX86IntrinFunc, { src0, pBase, indices0, mask0, i8Scale }); + src1 = B->BITCAST(src1, VectorType::get(B->mInt64Ty, src1->getType()->getVectorNumElements())); + mask1 = B->BITCAST(mask1, VectorType::get(B->mInt64Ty, mask1->getType()->getVectorNumElements())); Value* gather1 = B->CALL(pX86IntrinFunc, { src1, pBase, indices1, mask1, i8Scale }); v32Gather = B->VSHUFFLE(gather0, gather1, B->C({ 0, 1, 2, 3, 4, 5, 6, 7 })); + v32Gather = B->BITCAST(v32Gather, vSrc->getType()); } else { @@ -603,11 +618,34 @@ namespace SwrJit SmallVector args; for (auto& arg : pCallInst->arg_operands()) { - args.push_back(arg.get()->getType()->isVectorTy() ? B->EXTRACT_16(arg.get(), i) : arg.get()); + auto argType = arg.get()->getType(); + if (argType->isVectorTy()) + { + uint32_t vecWidth = argType->getVectorNumElements(); + Value *lanes = B->CInc(i*vecWidth/2, vecWidth/2); + Value *argToPush = B->VSHUFFLE(arg.get(), B->VUNDEF(argType->getVectorElementType(), vecWidth), lanes); + args.push_back(argToPush); + } + else + { + args.push_back(arg.get()); + } } result[i] = B->CALLA(pX86IntrinFunc, args); } - return cast(B->JOIN_16(result[0], result[1])); + uint32_t vecWidth; + if (result[0]->getType()->isVectorTy()) + { + assert(result[1]->getType()->isVectorTy()); + vecWidth = result[0]->getType()->getVectorNumElements() + + result[1]->getType()->getVectorNumElements(); + } + else + { + vecWidth = 2; + } + Value *lanes = B->CInc(0, vecWidth); + return cast(B->VSHUFFLE(result[0], result[1], lanes)); } } -- 2.11.0