From 28794cf1ac2eb38de1a430bad50177e430469ed3 Mon Sep 17 00:00:00 2001 From: Logan Chien Date: Wed, 26 Sep 2018 18:58:03 +0800 Subject: [PATCH] Reactor: Fix add/sub sat generic code generation This commit fixes saturated add/sub instructions in generic LLVM code generation path. Bug: b/115344057 Test: dEQP-GLES3.functional.texture.wrap.rgba8 Change-Id: Ie3e3b708565b3ad255804090e8a3ee5521f42982 Reviewed-on: https://swiftshader-review.googlesource.com/20928 Reviewed-by: Nicolas Capens Reviewed-by: Chris Forbes Tested-by: Chris Forbes --- src/Reactor/LLVMReactor.cpp | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp index dd09cf7f7..6bc889ef1 100644 --- a/src/Reactor/LLVMReactor.cpp +++ b/src/Reactor/LLVMReactor.cpp @@ -180,32 +180,58 @@ namespace } // Packed add/sub saturatation - llvm::Value *lowerPSAT(llvm::Intrinsic::ID intrinsic, llvm::Value *x, llvm::Value *y) + llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned) { - llvm::Function *func = llvm::Intrinsic::getDeclaration( - ::module, intrinsic, {x->getType(), y->getType()}); - llvm::Value *ret = ::builder->CreateCall(func, ARGS(x, y)); - return ::builder->CreateExtractValue(ret, {0}); + llvm::VectorType *ty = llvm::cast(x->getType()); + llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty); + + unsigned numBits = ty->getScalarSizeInBits(); + + llvm::Value *max, *min, *extX, *extY; + if (isSigned) + { + max = llvm::ConstantInt::get(extTy, (1LL << (numBits - 1)) - 1, true); + min = llvm::ConstantInt::get(extTy, (-1LL << (numBits - 1)), true); + extX = ::builder->CreateSExt(x, extTy); + extY = ::builder->CreateSExt(y, extTy); + } + else + { + assert(numBits <= 64); + uint64_t maxVal = (numBits == 64) ? ~0ULL : (1ULL << numBits) - 1; + max = llvm::ConstantInt::get(extTy, maxVal, false); + min = llvm::ConstantInt::get(extTy, 0, false); + extX = ::builder->CreateZExt(x, extTy); + extY = ::builder->CreateZExt(y, extTy); + } + + llvm::Value *res = isAdd ? ::builder->CreateAdd(extX, extY) + : ::builder->CreateSub(extX, extY); + + res = lowerPMINMAX(res, min, llvm::ICmpInst::ICMP_SGT); + res = lowerPMINMAX(res, max, llvm::ICmpInst::ICMP_SLT); + + return ::builder->CreateTrunc(res, ty); } llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y) { - return lowerPSAT(llvm::Intrinsic::uadd_with_overflow, x, y); + return lowerPSAT(x, y, true, false); } llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y) { - return lowerPSAT(llvm::Intrinsic::sadd_with_overflow, x, y); + return lowerPSAT(x, y, true, true); } llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y) { - return lowerPSAT(llvm::Intrinsic::usub_with_overflow, x, y); + return lowerPSAT(x, y, false, false); } llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y) { - return lowerPSAT(llvm::Intrinsic::ssub_with_overflow, x, y); + return lowerPSAT(x, y, false, true); } llvm::Value *lowerSQRT(llvm::Value *x) -- 2.11.0